From da79b1eecc65171f6ca0cda9b4f1970bd1503c17 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 12:23:15 -0700 Subject: [PATCH 0001/1079] [SelectionDAG][X86][ARM] Teach ExpandIntRes_ABS to use sra+add+xor expansion when ADDCARRY is supported. Rather than using SELECT instructions, use SRA, UADDO/ADDCARRY and XORs to expand ABS. This is the multi-part version of the sequence we use in LegalizeDAG. It's also the same as the Custom sequence uses for i64 on 32-bit and i128 on 64-bit. So we can remove the X86 customization. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D87215 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 28 +++++- llvm/lib/Target/X86/X86ISelLowering.cpp | 30 +----- llvm/test/CodeGen/Thumb2/mve-abs.ll | 35 +++---- llvm/test/CodeGen/X86/abs.ll | 38 ++++---- llvm/test/CodeGen/X86/iabs.ll | 95 ++++++------------- 5 files changed, 85 insertions(+), 141 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 77a79a0479ef7..e1881c20e5b3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2789,16 +2789,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); + SDValue N0 = N->getOperand(0); + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + + // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we + // use in LegalizeDAG. The ADD part of the expansion is based on + // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that + // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded + // if needed. Shift expansion has a special case for filling with sign bits + // so that we will only end up with one SRA. + bool HasAddCarry = TLI.isOperationLegalOrCustom( + ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasAddCarry) { + EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); + SDValue Sign = + DAG.getNode(ISD::SRA, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); + Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign); + Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign); + return; + } + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N0); SDValue NegLo, NegHi; SplitInteger(Neg, NegLo, NegHi); - GetExpandedInteger(N0, Lo, Hi); - EVT NVT = Lo.getValueType(); SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ad8704f686c16..2c7c36325f146 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -193,10 +193,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i64 , Custom); } - setOperationAction(ISD::ABS , MVT::i64 , Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::ABS , MVT::i128 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -29720,31 +29719,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } - case ISD::ABS: { - assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) && - "Unexpected type (!= i64) on ABS."); - assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) && - "Unexpected type (!= i128) on ABS."); - MVT VT = N->getSimpleValueType(0); - MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32; - SDValue Lo, Hi, Tmp; - SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); - - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(0, dl, HalfT)); - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(1, dl, HalfT)); - Tmp = DAG.getNode( - ISD::SRA, dl, HalfT, Hi, - DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl)); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, - SDValue(Lo.getNode(), 1)); - Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); - Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi)); - return; - } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll index 0b5dcbced1a56..8a9b8814ef2ec 100644 --- a/llvm/test/CodeGen/Thumb2/mve-abs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -40,33 +40,24 @@ entry: define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { ; CHECK-LABEL: abs_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: rsbs.w lr, r1, #0 -; CHECK-NEXT: sbc.w r2, r12, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: ands r3, r3, #1 -; CHECK-NEXT: csel r1, lr, r1, ne -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: adds.w r1, r1, r0, asr #31 +; CHECK-NEXT: adc.w r2, r0, r0, asr #31 +; CHECK-NEXT: eor.w r2, r2, r0, asr #31 +; CHECK-NEXT: eor.w r0, r1, r0, asr #31 +; CHECK-NEXT: vmov.32 q1[0], r0 ; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: rsbs r2, r1, #0 -; CHECK-NEXT: sbc.w r12, r12, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: ands r3, r3, #1 -; CHECK-NEXT: csel r1, r2, r1, ne -; CHECK-NEXT: csel r0, r12, r0, ne +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: adds.w r1, r1, r0, asr #31 +; CHECK-NEXT: eor.w r1, r1, r0, asr #31 ; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: adc.w r1, r0, r0, asr #31 +; CHECK-NEXT: eor.w r0, r1, r0, asr #31 ; CHECK-NEXT: vmov.32 q1[3], r0 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: bx lr entry: %0 = icmp slt <2 x i64> %s1, zeroinitializer %1 = sub nsw <2 x i64> zeroinitializer, %s1 diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 63faafc10ec8d..8e20b001cc3e8 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -144,35 +144,31 @@ define i128 @test_i128(i128 %a) nounwind { ; ; X86-LABEL: test_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: negl %edi -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %edx, %ebx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovnsl %eax, %esi -; X86-NEXT: cmovnsl %ecx, %ebp -; X86-NEXT: cmovnsl %edx, %ebx -; X86-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, (%eax) -; X86-NEXT: movl %ebx, 4(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %ebx +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %r = call i128 @llvm.abs.i128(i128 %a, i1 false) ret i128 %r diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index f052718d98400..319eb6f5edc32 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -121,73 +121,34 @@ define i64 @test_i64(i64 %a) nounwind { } define i128 @test_i128(i128 %a) nounwind { -; X86-NO-CMOV-LABEL: test_i128: -; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: pushl %ebp -; X86-NO-CMOV-NEXT: pushl %ebx -; X86-NO-CMOV-NEXT: pushl %edi -; X86-NO-CMOV-NEXT: pushl %esi -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NO-CMOV-NEXT: xorl %ecx, %ecx -; X86-NO-CMOV-NEXT: negl %ebp -; X86-NO-CMOV-NEXT: movl $0, %ebx -; X86-NO-CMOV-NEXT: sbbl %edx, %ebx -; X86-NO-CMOV-NEXT: movl $0, %edi -; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NO-CMOV-NEXT: sbbl %esi, %ecx -; X86-NO-CMOV-NEXT: testl %esi, %esi -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NO-CMOV-NEXT: js .LBB4_2 -; X86-NO-CMOV-NEXT: # %bb.1: -; X86-NO-CMOV-NEXT: movl %esi, %ecx -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NO-CMOV-NEXT: movl %edx, %ebx -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NO-CMOV-NEXT: .LBB4_2: -; X86-NO-CMOV-NEXT: movl %ebp, (%eax) -; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax) -; X86-NO-CMOV-NEXT: movl %edi, 8(%eax) -; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax) -; X86-NO-CMOV-NEXT: popl %esi -; X86-NO-CMOV-NEXT: popl %edi -; X86-NO-CMOV-NEXT: popl %ebx -; X86-NO-CMOV-NEXT: popl %ebp -; X86-NO-CMOV-NEXT: retl $4 -; -; X86-CMOV-LABEL: test_i128: -; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebp -; X86-CMOV-NEXT: pushl %ebx -; X86-CMOV-NEXT: pushl %edi -; X86-CMOV-NEXT: pushl %esi -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-CMOV-NEXT: xorl %esi, %esi -; X86-CMOV-NEXT: negl %edi -; X86-CMOV-NEXT: movl $0, %ebx -; X86-CMOV-NEXT: sbbl %edx, %ebx -; X86-CMOV-NEXT: movl $0, %ebp -; X86-CMOV-NEXT: sbbl %ecx, %ebp -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: sbbl %eax, %esi -; X86-CMOV-NEXT: testl %eax, %eax -; X86-CMOV-NEXT: cmovnsl %eax, %esi -; X86-CMOV-NEXT: cmovnsl %ecx, %ebp -; X86-CMOV-NEXT: cmovnsl %edx, %ebx -; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl %edi, (%eax) -; X86-CMOV-NEXT: movl %ebx, 4(%eax) -; X86-CMOV-NEXT: movl %ebp, 8(%eax) -; X86-CMOV-NEXT: movl %esi, 12(%eax) -; X86-CMOV-NEXT: popl %esi -; X86-CMOV-NEXT: popl %edi -; X86-CMOV-NEXT: popl %ebx -; X86-CMOV-NEXT: popl %ebp -; X86-CMOV-NEXT: retl $4 +; X86-LABEL: test_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %ebx +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 ; ; X64-LABEL: test_i128: ; X64: # %bb.0: From 9fb46a452d4e5666828c95610ceac8dcd9e4ce16 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 10:27:23 +0200 Subject: [PATCH 0002/1079] [SCCP] Compute ranges for supported intrinsics For intrinsics supported by ConstantRange, compute the result range based on the argument ranges. We do this independently of whether some or all of the input ranges are full, as we can often still constrain the result in some way. Differential Revision: https://reviews.llvm.org/D87183 --- llvm/lib/Transforms/Scalar/SCCP.cpp | 19 +++++++++++++++++++ llvm/test/Transforms/SCCP/intrinsics.ll | 18 ++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 2afc778ed8214..33ab2907906e0 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1350,6 +1350,25 @@ void SCCPSolver::handleCallResult(CallBase &CB) { return (void)mergeInValue(IV, &CB, CopyOfVal); } + + if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + // Compute result range for intrinsics supported by ConstantRange. + // Do this even if we don't know a range for all operands, as we may + // still know something about the result range, e.g. of abs(x). + SmallVector OpRanges; + for (Value *Op : II->args()) { + const ValueLatticeElement &State = getValueState(Op); + if (State.isConstantRange()) + OpRanges.push_back(State.getConstantRange()); + else + OpRanges.push_back( + ConstantRange::getFull(Op->getType()->getScalarSizeInBits())); + } + + ConstantRange Result = + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); + return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + } } // The common case is that we aren't tracking the callee, either because we diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll index d06b94162b5be..e261a59d3d6bc 100644 --- a/llvm/test/Transforms/SCCP/intrinsics.ll +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -12,10 +12,8 @@ define void @abs1(i8* %p) { ; CHECK-LABEL: @abs1( ; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[P:%.*]], align 1, [[RNG0:!range !.*]] ; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i8 [[ABS]], 0 -; CHECK-NEXT: call void @use(i1 [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[ABS]], 10 -; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[CMP3:%.*]] = icmp sge i8 [[ABS]], 1 ; CHECK-NEXT: call void @use(i1 [[CMP3]]) ; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i8 [[ABS]], 9 @@ -40,8 +38,7 @@ define void @abs1(i8* %p) { define void @abs2(i8 %x) { ; CHECK-LABEL: @abs2( ; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 true) -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[ABS]], 0 -; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) @@ -68,10 +65,8 @@ define void @umax1(i8* %p1, i8* %p2) { ; CHECK-NEXT: [[X1:%.*]] = load i8, i8* [[P1:%.*]], align 1, [[RNG1:!range !.*]] ; CHECK-NEXT: [[X2:%.*]] = load i8, i8* [[P2:%.*]], align 1, [[RNG2:!range !.*]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X1]], i8 [[X2]]) -; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i8 [[M]], 5 -; CHECK-NEXT: call void @use(i1 [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[M]], 15 -; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[CMP3:%.*]] = icmp uge i8 [[M]], 6 ; CHECK-NEXT: call void @use(i1 [[CMP3]]) ; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i8 [[M]], 14 @@ -95,8 +90,7 @@ define void @umax1(i8* %p1, i8* %p2) { define void @umax2(i8 %x) { ; CHECK-LABEL: @umax2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 10) -; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[M]], 10 -; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %m = call i8 @llvm.umax.i8(i8 %x, i8 10) From ddab4cd83ea31141aaada424dccf94278482ee88 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Sep 2020 21:07:02 +0200 Subject: [PATCH 0003/1079] [KnownBits] Avoid some copies (NFC) These lambdas don't need copies, use const reference. --- llvm/lib/Support/KnownBits.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index aad50e1240341..03843687c10a4 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -115,13 +115,13 @@ KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) { KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0] - auto Flip = [](KnownBits Val) { return KnownBits(Val.One, Val.Zero); }; + auto Flip = [](const KnownBits &Val) { return KnownBits(Val.One, Val.Zero); }; return Flip(umax(Flip(LHS), Flip(RHS))); } KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF] - auto Flip = [](KnownBits Val) { + auto Flip = [](const KnownBits &Val) { unsigned SignBitPosition = Val.getBitWidth() - 1; APInt Zero = Val.Zero; APInt One = Val.One; @@ -134,7 +134,7 @@ KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0] - auto Flip = [](KnownBits Val) { + auto Flip = [](const KnownBits &Val) { unsigned SignBitPosition = Val.getBitWidth() - 1; APInt Zero = Val.One; APInt One = Val.Zero; From bb7d3af1139c36270bc9948605e06f40e4c51541 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Sep 2020 23:54:06 +0300 Subject: [PATCH 0004/1079] Reland [SimplifyCFG][LoopRotate] SimplifyCFG: disable common instruction hoisting by default, enable late in pipeline This was reverted in 503deec2183d466dad64b763bab4e15fd8804239 because it caused gigantic increase (3x) in branch mispredictions in certain benchmarks on certain CPU's, see https://reviews.llvm.org/D84108#2227365. It has since been investigated and here are the results: https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20200907/827578.html > It's an amazingly severe regression, but it's also all due to branch > mispredicts (about 3x without this). The code layout looks ok so there's > probably something else to deal with. I'm not sure there's anything we can > reasonably do so we'll just have to take the hit for now and wait for > another code reorganization to make the branch predictor a bit more happy :) > > Thanks for giving us some time to investigate and feel free to recommit > whenever you'd like. > > -eric So let's just reland this. Original commit message: I've been looking at missed vectorizations in one codebase. One particular thing that stands out is that some of the loops reach vectorizer in a rather mangled form, with weird PHI's, and some of the loops aren't even in a rotated form. After taking a more detailed look, that happened because the loop's headers were too big by then. It is evident that SimplifyCFG's common code hoisting transform is at fault there, because the pattern it handles is precisely the unrotated loop basic block structure. Surprizingly, `SimplifyCFGOpt::HoistThenElseCodeToIf()` is enabled by default, and is always run, unlike it's friend, common code sinking transform, `SinkCommonCodeFromPredecessors()`, which is not enabled by default and is only run once very late in the pipeline. I'm proposing to harmonize this, and disable common code hoisting until //late// in pipeline. Definition of //late// may vary, here currently i've picked the same one as for code sinking, but i suppose we could enable it as soon as right after loop rotation happens. Experimentation shows that this does indeed unsurprizingly help, more loops got rotated, although other issues remain elsewhere. Now, this undoubtedly seriously shakes phase ordering. This will undoubtedly be a mixed bag in terms of both compile- and run- time performance, codesize. Since we no longer aggressively hoist+deduplicate common code, we don't pay the price of said hoisting (which wasn't big). That may allow more loops to be rotated, so we pay that price. That, in turn, that may enable all the transforms that require canonical (rotated) loop form, including but not limited to vectorization, so we pay that too. And in general, no deduplication means more [duplicate] instructions going through the optimizations. But there's still late hoisting, some of them will be caught late. As per benchmarks i've run {F12360204}, this is mostly within the noise, there are some small improvements, some small regressions. One big regression i saw i fixed in rG8d487668d09fb0e4e54f36207f07c1480ffabbfd, but i'm sure this will expose many more pre-existing missed optimizations, as usual :S llvm-compile-time-tracker.com thoughts on this: http://llvm-compile-time-tracker.com/compare.php?from=e40315d2b4ed1e38962a8f33ff151693ed4ada63&to=c8289c0ecbf235da9fb0e3bc052e3c0d6bff5cf9&stat=instructions * this does regress compile-time by +0.5% geomean (unsurprizingly) * size impact varies; for ThinLTO it's actually an improvement The largest fallout appears to be in GVN's load partial redundancy elimination, it spends *much* more time in `MemoryDependenceResults::getNonLocalPointerDependency()`. Non-local `MemoryDependenceResults` is widely-known to be, uh, costly. There does not appear to be a proper solution to this issue, other than silencing the compile-time performance regression by tuning cut-off thresholds in `MemoryDependenceResults`, at the cost of potentially regressing run-time performance. D84609 attempts to move in that direction, but the path is unclear and is going to take some time. If we look at stats before/after diffs, some excerpts: * RawSpeed (the target) {F12360200} * -14 (-73.68%) loops not rotated due to the header size (yay) * -272 (-0.67%) `"Number of live out of a loop variables"` - good for vectorizer * -3937 (-64.19%) common instructions hoisted * +561 (+0.06%) x86 asm instructions * -2 basic blocks * +2418 (+0.11%) IR instructions * vanilla test-suite + RawSpeed + darktable {F12360201} * -36396 (-65.29%) common instructions hoisted * +1676 (+0.02%) x86 asm instructions * +662 (+0.06%) basic blocks * +4395 (+0.04%) IR instructions It is likely to be sub-optimal for when optimizing for code size, so one might want to change tune pipeline by enabling sinking/hoisting when optimizing for size. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D84108 This reverts commit 503deec2183d466dad64b763bab4e15fd8804239. --- .../Transforms/Utils/SimplifyCFGOptions.h | 2 +- llvm/lib/Passes/PassBuilder.cpp | 13 +++++---- .../Target/AArch64/AArch64TargetMachine.cpp | 1 + llvm/lib/Target/ARM/ARMTargetMachine.cpp | 3 +- .../Target/Hexagon/HexagonTargetMachine.cpp | 1 + .../lib/Transforms/IPO/PassManagerBuilder.cpp | 3 ++ .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 4 +-- llvm/test/Transforms/PGOProfile/chr.ll | 7 +++++ .../loop-rotation-vs-common-code-hoisting.ll | 29 +++++++++---------- .../SimplifyCFG/common-code-hoisting.ll | 2 +- 10 files changed, 39 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index 46f6ca0462f8b..fb3a7490346f4 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -25,7 +25,7 @@ struct SimplifyCFGOptions { bool ForwardSwitchCondToPhi = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; - bool HoistCommonInsts = true; + bool HoistCommonInsts = false; bool SinkCommonInsts = false; bool SimplifyCondBranch = true; bool FoldTwoEntryPHINode = true; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 9df6a985789ea..9a2e895d7b717 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1160,11 +1160,14 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false). - sinkCommonInsts(true))); + // FIXME: study whether hoisting and/or sinking of common instructions should + // be delayed until after SLP vectorizer. + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. if (PTO.SLPVectorization) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 8b15898c1c140..d7a14a3dc7728 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -455,6 +455,7 @@ void AArch64PassConfig::addIRPasses() { .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); // Run LoopDataPrefetch diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 55ac332e2c6a6..5068f9b5a0f46 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -407,7 +407,8 @@ void ARMPassConfig::addIRPasses() { // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) { + SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true), + [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 6728306db3d57..37cf391c99838 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -327,6 +327,7 @@ void HexagonPassConfig::addIRPasses() { .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 326d1ab28b60a..caa9a98ecb074 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -784,10 +784,13 @@ void PassManagerBuilder::populateModulePassManager( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. + // FIXME: study whether hoisting and/or sinking of common instructions should + // be delayed until after SLP vectorizer. MPM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); if (SLPVectorize) { diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index db5211df397a8..b0435bf6e4eac 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -63,8 +63,8 @@ static cl::opt UserForwardSwitchCond( cl::desc("Forward switch condition to phi ops (default = false)")); static cl::opt UserHoistCommonInsts( - "hoist-common-insts", cl::Hidden, cl::init(true), - cl::desc("hoist common instructions (default = true)")); + "hoist-common-insts", cl::Hidden, cl::init(false), + cl::desc("hoist common instructions (default = false)")); static cl::opt UserSinkCommonInsts( "sink-common-insts", cl::Hidden, cl::init(false), diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index c2e1ae4f53a0f..1a22d7f0b8498 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -2006,9 +2006,16 @@ define i64 @test_chr_22(i1 %i, i64* %j, i64 %v0) !prof !14 { ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i64 [[V0:%.*]], 1 ; CHECK-NEXT: [[V2:%.*]] = add i64 [[REASS_ADD]], 3 +; CHECK-NEXT: [[C1:%.*]] = icmp slt i64 [[V2]], 100 +; CHECK-NEXT: br i1 [[C1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0.split: ; CHECK-NEXT: [[V299:%.*]] = mul i64 [[V2]], 7860086430977039991 ; CHECK-NEXT: store i64 [[V299]], i64* [[J:%.*]], align 4 ; CHECK-NEXT: ret i64 99 +; CHECK: bb0.split.nonchr: +; CHECK-NEXT: [[V299_NONCHR:%.*]] = mul i64 [[V2]], 7860086430977039991 +; CHECK-NEXT: store i64 [[V299_NONCHR]], i64* [[J]], align 4 +; CHECK-NEXT: ret i64 99 ; bb0: %v1 = add i64 %v0, 3 diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll index 1d8cce6879e9d..314af1c141454 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -5,14 +5,11 @@ ; RUN: opt -O3 -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK2 ; RUN: opt -passes='default' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK3 -; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK4 -; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK5 +; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK4 +; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK5 -; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK6 -; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK7 - -; RUN: opt -O3 -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK8 -; RUN: opt -passes='default' -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK9 +; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK6 +; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK7 ; This example is produced from a very basic C code: ; @@ -61,8 +58,8 @@ define void @_Z4loopi(i32 %width) { ; HOIST-NEXT: br label [[FOR_COND:%.*]] ; HOIST: for.cond: ; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] +; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; HOIST: for.cond.cleanup: ; HOIST-NEXT: tail call void @f2() @@ -80,17 +77,17 @@ define void @_Z4loopi(i32 %width) { ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATED_LATER_OLDPM: for.cond.cleanup: +; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: ; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() ; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_OLDPM: return: @@ -102,19 +99,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge: ; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: +; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: ; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ] -; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() +; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: @@ -129,19 +126,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_OLDPM: for.cond.preheader: ; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_OLDPM: for.body.preheader: ; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_OLDPM: for.cond.cleanup: +; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f2() ; ROTATE_OLDPM-NEXT: br label [[RETURN]] ; ROTATE_OLDPM: for.body: ; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f1() ; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_OLDPM: return: @@ -153,19 +150,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_NEWPM: for.cond.preheader: ; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: +; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: ; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] -; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: tail call void @f0() +; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATE_NEWPM: for.body.for.body_crit_edge: diff --git a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll index b58017ba7ef0b..37cbc4640e415 100644 --- a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll +++ b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -simplifycfg -hoist-common-insts=1 -S < %s | FileCheck %s --check-prefixes=HOIST ; RUN: opt -simplifycfg -hoist-common-insts=0 -S < %s | FileCheck %s --check-prefixes=NOHOIST -; RUN: opt -simplifycfg -S < %s | FileCheck %s --check-prefixes=HOIST,DEFAULT +; RUN: opt -simplifycfg -S < %s | FileCheck %s --check-prefixes=NOHOIST,DEFAULT ; This example is produced from a very basic C code: ; From 5f5a0bb0872a9673bad08b38bc0b14c42263902a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Sep 2020 14:44:53 -0700 Subject: [PATCH 0005/1079] [asan][test] Use --image-base for Linux/asan_prelink_test.cpp if ld is LLD LLD supports -Ttext but with the option there is still a PT_LOAD at address zero and thus the Linux kernel will map it to a different address and the test will fail. Use --image-base instead. --- .../test/asan/TestCases/Linux/asan_prelink_test.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp b/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp index e00c215e92b11..9c70b61291b36 100644 --- a/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp @@ -1,11 +1,12 @@ // Test if asan works with prelink. -// It does not actually use prelink, but relies on ld's flag -Ttext-segment -// or gold's flag -Ttext (we try the first flag first, if that fails we +// It does not actually use prelink, but relies on GNU ld's -Ttext-segment, +// LLD's --image-base, or gold's -Ttext (we try the first flag first, if that fails we // try the second flag). // // RUN: %clangxx_asan -c %s -o %t.o // RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext-segment=0x3600000000 ||\ -// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000 +// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,--image-base=0x3600000000 ||\ +// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000 // RUN: %clangxx_asan %t.o %t.so -Wl,-R. -o %t // RUN: %env_asan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s From efb8e156daa120a25f993b3142ef8d6ef766df5a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 7 Sep 2020 22:52:10 +0100 Subject: [PATCH 0006/1079] [DSE,MemorySSA] Add an early check for read clobbers to traversal. Depending on the benchmark, this early exit can save a substantial amount of compile-time: http://llvm-compile-time-tracker.com/compare.php?from=505f2d817aa8e07ba98e5fd4a8f6ff0666f89df1&to=eb4e441147f9b4b7a5fcbbc57428cadbe9e01f10&stat=instructions --- .../Scalar/DeadStoreElimination.cpp | 12 ++++ .../MSSA/read-clobber-after-overwrite.ll | 58 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 109e15d6d7cfc..49e811b298a60 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1901,6 +1901,18 @@ struct DSEState { return None; } + // Quick check if there are direct uses that are read-clobbers. + if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast(U.getUser())) + return !MSSA.dominates(StartAccess, UseOrDef) && + isReadClobber(DefLoc, UseOrDef->getMemoryInst()); + return false; + })) { + Cache.KnownReads.insert(Current); + LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); + return None; + } + // If Current cannot be analyzed or is not removable, check the next // candidate. if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll new file mode 100644 index 0000000000000..4f704c35a90b1 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s + +declare i1 @cond() readnone + +define i32 @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M0:%.*]] = alloca [4 x i32], align 16 +; CHECK-NEXT: br label [[LOOP_1:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[LOOP_1]] ], [ [[IV_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 3, i64 [[IV]] +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 0, i64 [[IV]] +; CHECK-NEXT: store i32 20, i32* [[PTR_2]], align 4 +; CHECK-NEXT: store i32 30, i32* [[PTR_1]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_1_LATCH:%.*]], label [[LOOP_2]] +; CHECK: loop.1.latch: +; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_1]] +; CHECK: exit: +; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 0, i64 1 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_3]], align 16 +; CHECK-NEXT: ret i32 [[LV]] +; +entry: + %M0 = alloca [4 x i32], align 16 + br label %loop.1 + +loop.1: + br label %loop.2 + +loop.2: + %iv = phi i64 [ 0, %loop.1 ], [ %iv.next, %loop.2 ] + %ptr.1 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 3, i64 %iv + store i32 10, i32* %ptr.1, align 4 + %ptr.2 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 0, i64 %iv + store i32 20, i32* %ptr.2, align 4 + store i32 30, i32* %ptr.1, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %c.3 = call i1 @cond() + br i1 %c.3, label %loop.1.latch, label %loop.2 + +loop.1.latch: + %c.2 = call i1 @cond() + br i1 %c.2, label %exit, label %loop.1 + +exit: + %ptr.3 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 0, i64 1 + %lv = load i32, i32* %ptr.3, align 16 + ret i32 %lv + + +} From 3e782bf8090c80e6d75e62cd52c9ed32715cbcdd Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Fri, 21 Aug 2020 13:42:20 -0700 Subject: [PATCH 0007/1079] [Sema][MSVC] warn at dynamic_cast when /GR- is given Differential Revision: https://reviews.llvm.org/D86369 --- clang/include/clang/Basic/DiagnosticGroups.td | 2 ++ .../clang/Basic/DiagnosticSemaKinds.td | 6 ++++++ clang/lib/Sema/SemaCast.cpp | 12 +++++++++++ clang/lib/Sema/SemaExprCXX.cpp | 6 ++++++ clang/test/SemaCXX/ms_no_dynamic_cast.cpp | 21 +++++++++++++++++++ clang/test/SemaCXX/no_dynamic_cast.cpp | 21 +++++++++++++++++++ 6 files changed, 68 insertions(+) create mode 100644 clang/test/SemaCXX/ms_no_dynamic_cast.cpp create mode 100644 clang/test/SemaCXX/no_dynamic_cast.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 6b4dcc850612e..a9bd52b8afcdf 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1235,3 +1235,5 @@ in addition with the pragmas or -fmax-tokens flag to get any warnings. } def WebAssemblyExceptionSpec : DiagGroup<"wasm-exception-spec">; + +def RTTI : DiagGroup<"rtti">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d856f784e0eea..e1601da74b735 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7438,6 +7438,12 @@ def err_no_typeid_with_fno_rtti : Error< "use of typeid requires -frtti">; def err_no_dynamic_cast_with_fno_rtti : Error< "use of dynamic_cast requires -frtti">; +def warn_no_dynamic_cast_with_rtti_disabled: Warning< + "dynamic_cast will not work since RTTI data is disabled by " + "%select{-fno-rtti-data|/GR-}0">, InGroup; +def warn_no_typeid_with_rtti_disabled: Warning< + "typeid will not work since RTTI data is disabled by " + "%select{-fno-rtti-data|/GR-}0">, InGroup; def err_cannot_form_pointer_to_member_of_reference_type : Error< "cannot form a pointer-to-member to member %0 of reference type %1">; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 726900c59f20e..b213fb756a650 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -890,6 +890,18 @@ void CastOperation::CheckDynamicCast() { return; } + // Warns when dynamic_cast is used with RTTI data disabled. + if (!Self.getLangOpts().RTTIData) { + bool MicrosoftABI = + Self.getASTContext().getTargetInfo().getCXXABI().isMicrosoft(); + bool isClangCL = Self.getDiagnostics().getDiagnosticOptions().getFormat() == + DiagnosticOptions::MSVC; + if (MicrosoftABI || !DestPointee->isVoidType()) + Self.Diag(OpRange.getBegin(), + diag::warn_no_dynamic_cast_with_rtti_disabled) + << isClangCL; + } + // Done. Everything else is run-time checks. Kind = CK_Dynamic; } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d1fcdf3545278..8f8847e638040 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -646,6 +646,12 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti)); } + // Warns when typeid is used with RTTI data disabled. + if (!getLangOpts().RTTIData) + Diag(OpLoc, diag::warn_no_typeid_with_rtti_disabled) + << (getDiagnostics().getDiagnosticOptions().getFormat() == + DiagnosticOptions::MSVC); + QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl); if (isType) { diff --git a/clang/test/SemaCXX/ms_no_dynamic_cast.cpp b/clang/test/SemaCXX/ms_no_dynamic_cast.cpp new file mode 100644 index 0000000000000..d2c007fd8c297 --- /dev/null +++ b/clang/test/SemaCXX/ms_no_dynamic_cast.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 %s -triple x86_64-windows -fdiagnostics-format msvc -fno-rtti-data -fsyntax-only -verify + +namespace std { +struct type_info {}; +} // namespace std +class B { +public: + virtual ~B() = default; +}; + +class D1 : public B { +public: + ~D1() = default; +}; + +void f() { + B* b = new D1(); + auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} + void* v = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} + (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} +} diff --git a/clang/test/SemaCXX/no_dynamic_cast.cpp b/clang/test/SemaCXX/no_dynamic_cast.cpp new file mode 100644 index 0000000000000..4db21d36f4a99 --- /dev/null +++ b/clang/test/SemaCXX/no_dynamic_cast.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 %s -fno-rtti-data -fsyntax-only -verify + +namespace std { +struct type_info {}; +} // namespace std +class B { +public: + virtual ~B() = default; +}; + +class D1 : public B { +public: + ~D1() = default; +}; + +void f() { + B* b = new D1(); + auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by -fno-rtti-data}} + void* v = dynamic_cast(b); + (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} +} From 0a63679267e4a2e81c6b193c25ed2579c65eb824 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 7 Sep 2020 23:58:54 +0000 Subject: [PATCH 0008/1079] Add documentation for getDependentDialects() in the PassManagement infra docs Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D87181 --- mlir/docs/PassManagement.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index 92ca92218219c..6e577db4501c1 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -104,6 +104,15 @@ struct MyOperationPass : public OperationPass { }; ``` +### Dependent Dialects + +Dialects must be loaded in the MLIRContext before entities from these dialects +(operations, types, attributes, ...) can be created. Dialects must be loaded +before starting the multi-threaded pass pipeline execution. To this end, a pass +that can create an entity from a dialect that isn't already loaded must express +this by overriding the `getDependentDialects()` method and declare this list of +Dialects explicitly. + ## Analysis Management An important concept, along with transformation passes, are analyses. These are @@ -684,6 +693,8 @@ It contains the following fields: * description - A longer, more detailed description of the pass. This is used when generating pass documentation. +* dependentDialects + - A list of strings that are the Dialect classes this pass can introduce. * constructor - A piece of C++ code used to create a default instance of the pass. * options From 63d1dc66658fa072c6e0caba6c97e00da37555ce Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Sep 2020 00:06:37 +0000 Subject: [PATCH 0009/1079] Add a doc/tutorial on traversing the IR Reviewed By: stephenneuendorffer Differential Revision: https://reviews.llvm.org/D87221 --- .../Tutorials/UnderstandingTheIRStructure.md | 287 ++++++++++++++++++ mlir/docs/includes/img/DefUseChains.svg | 1 + mlir/docs/includes/img/Use-list.svg | 1 + mlir/test/IR/print-ir-defuse.mlir | 31 ++ mlir/test/IR/print-ir-nesting.mlir | 57 ++++ mlir/test/lib/IR/CMakeLists.txt | 2 + mlir/test/lib/IR/TestPrintDefUse.cpp | 71 +++++ mlir/test/lib/IR/TestPrintNesting.cpp | 96 ++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 4 + 9 files changed, 550 insertions(+) create mode 100644 mlir/docs/Tutorials/UnderstandingTheIRStructure.md create mode 100644 mlir/docs/includes/img/DefUseChains.svg create mode 100644 mlir/docs/includes/img/Use-list.svg create mode 100644 mlir/test/IR/print-ir-defuse.mlir create mode 100644 mlir/test/IR/print-ir-nesting.mlir create mode 100644 mlir/test/lib/IR/TestPrintDefUse.cpp create mode 100644 mlir/test/lib/IR/TestPrintNesting.cpp diff --git a/mlir/docs/Tutorials/UnderstandingTheIRStructure.md b/mlir/docs/Tutorials/UnderstandingTheIRStructure.md new file mode 100644 index 0000000000000..8b4f7724741fa --- /dev/null +++ b/mlir/docs/Tutorials/UnderstandingTheIRStructure.md @@ -0,0 +1,287 @@ +# Understanding the IR Structure + +The MLIR Language Reference describes the +[High Level Structure](../LangRef/#high-level-structure), this document +illustrates this structure through examples, and introduces at the same time the +C++ APIs involved in manipulating it. + +We will implement a [pass](../PassManagement/#operation-pass) that traverses any +MLIR input and prints the entity inside the IR. A pass (or in general almost any +piece of IR) is always rooted with an operation. Most of the time the top-level +operation is a `ModuleOp`, the MLIR `PassManager` is actually limited to +operation on a top-level `ModuleOp`. As such a pass starts with an operation, +and so will our traversal: + +``` + void runOnOperation() override { + Operation *op = getOperation(); + resetIndent(); + printOperation(op); + } +``` + +## Traversing the IR Nesting + +The IR is recursively nested, an `Operation` can have one or multiple nested +`Region`s, each of which is actually a list of `Blocks`, each of which itself +wraps a list of `Operation`s. Our traversal will follow this structure with +three methods: `printOperation()`, `printRegion()`, and `printBlock()`. + +The first method inspects the properties of an operation, before iterating on +the nested regions and print them individually: + +```c++ + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.first << "' : '" << attr.second + << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } +``` + +A `Region` does not hold anything other than a list of `Block`s: + +```c++ + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } +``` + +Finally, a `Block` has a list of arguments, and holds a list of `Operation`s: + +```c++ + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // A block main role is to hold a list of Operations: let's recurse into + // printing each operation. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } +``` + +The code for the pass is available +[here in the repo](https://github.com/llvm/llvm-project/blob/master/mlir/test/lib/IR/TestPrintNesting.cpp) +and can be exercised with `mlir-opt -test-print-nesting`. + +### Example + +The Pass introduced in the previous section can be applied on the following IR +with `mlir-opt -test-print-nesting -allow-unregistered-dialect +llvm-project/mlir/test/IR/print-ir-nesting.mlir`: + +```mlir +"module"() ( { + %0:4 = "dialect.op1"() {"attribute name" = 42 : i32} : () -> (i1, i16, i32, i64) + "dialect.op2"() ( { + "dialect.innerop1"(%0#0, %0#1) : (i1, i16) -> () + }, { + "dialect.innerop2"() : () -> () + "dialect.innerop3"(%0#0, %0#2, %0#3)[^bb1, ^bb2] : (i1, i32, i64) -> () + ^bb1(%1: i32): // pred: ^bb0 + "dialect.innerop4"() : () -> () + "dialect.innerop5"() : () -> () + ^bb2(%2: i64): // pred: ^bb0 + "dialect.innerop6"() : () -> () + "dialect.innerop7"() : () -> () + }) {"other attribute" = 42 : i64} : () -> () + "module_terminator"() : () -> () +}) : () -> () +``` + +And will yield the following output: + +``` +visiting op: 'module' with 0 operands and 0 results + 1 nested regions: + Region with 1 blocks: + Block with 0 arguments, 0 successors, and 3 operations + visiting op: 'dialect.op1' with 0 operands and 4 results + 1 attributes: + - 'attribute name' : '42 : i32' + 0 nested regions: + visiting op: 'dialect.op2' with 0 operands and 0 results + 2 nested regions: + Region with 1 blocks: + Block with 0 arguments, 0 successors, and 1 operations + visiting op: 'dialect.innerop1' with 2 operands and 0 results + 0 nested regions: + Region with 3 blocks: + Block with 0 arguments, 2 successors, and 2 operations + visiting op: 'dialect.innerop2' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop3' with 3 operands and 0 results + 0 nested regions: + Block with 1 arguments, 0 successors, and 2 operations + visiting op: 'dialect.innerop4' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop5' with 0 operands and 0 results + 0 nested regions: + Block with 1 arguments, 0 successors, and 2 operations + visiting op: 'dialect.innerop6' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop7' with 0 operands and 0 results + 0 nested regions: + visiting op: 'module_terminator' with 0 operands and 0 results + 0 nested regions: +``` + +## Other IR Traversal Methods. + +In many cases, unwrapping the recursive structure of the IR is cumbersome and +you may be interested in using other helpers. + +### Filtered iterator: `getOps()` + +For example the `Block` class exposes a convenient templated method +`getOps()` that provided a filtered iterator. Here is an example: + +```c++ + auto varOps = entryBlock.getOps(); + for (spirv::GlobalVariableOp gvOp : varOps) { + // process each GlobalVariable Operation in the block. + ... + } +``` + +Similarly, the `Region` class exposes the same `getOps` method that will iterate +on all the blocks in the region. + +### Walkers + +The `getOps()` is useful to iterate on some Operations immediately listed +inside a single block (or a single region), however it is frequently interesting +to traverse the IR in a nested fashion. To this end MLIR exposes the `walk()` +helper on `Operation`, `Block`, and `Region`. This helper takes a single +argument: a callback method that will be invoked for every operation recursively +nested under the provided entity. + +```c++ + // Recursively traverse all the regions and blocks nested inside the function + // and apply the callback on every single operation in post-order. + getFunction().walk([&](mlir::Operation *op) { + // process Operation `op`. + }); +``` + +The provided callback can be specialized to filter on a particular type of +Operation, for example the following will apply the callback only on `LinalgOp` +operations nested inside the function: + +```c++ + getFunction.walk([](LinalgOp linalgOp) { + // process LinalgOp `linalgOp`. + }); +``` + +Finally, the callback can optionally stop the walk by returning a +`WalkResult::interrupt()` value. For example the following walk will find all +`AllocOp` nested inside the function and interrupt the traversal if one of them +does not satisfy a criteria: + +```c++ + WalkResult result = getFunction().walk([&](AllocOp allocOp) { + if (!isValid(allocOp)) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + if (result.wasInterrupted()) + // One alloc wasn't matching. + ... +``` + +## Traversing the def-use chains + +Another relationship in the IR is the one that links a `Value` with its users. +As defined in the +[language reference](https://mlir.llvm.org/docs/LangRef/#high-level-structure), +each Value is either a `BlockArgument` or the result of exactly one `Operation` +(an `Operation` can have multiple results, each of them is a separate `Value`). +The users of a `Value` are `Operation`s, through their arguments: each +`Operation` argument references a single `Value`. + +Here is a code sample that inspects the operands of an `Operation` and prints +some information about them: + +```c++ + // Print information about the producer of each of the operands. + for (Value operand : op->getOperands()) { + if (Operation *producer = operand.getDefiningOp()) { + llvm::outs() << " - Operand produced by operation '" + << producer->getName() << "'\n"; + } else { + // If there is no defining op, the Value is necessarily a Block + // argument. + auto blockArg = operand.cast(); + llvm::outs() << " - Operand produced by Block argument, number " + << blockArg.getArgNumber() << "\n"; + } + } +``` + +Similarly, the following code sample iterates through the result `Value`s +produced by an `Operation` and for each result will iterate the users of these +results and print informations about them: + +```c++ + // Print information about the user of each of the result. + llvm::outs() << "Has " << op->getNumResults() << " results:\n"; + for (auto indexedResult : llvm::enumerate(op->getResults())) { + Value result = indexedResult.value(); + llvm::outs() << " - Result " << indexedResult.index(); + if (result.use_empty()) { + llvm::outs() << " has no uses\n"; + continue; + } + if (result.hasOneUse()) { + llvm::outs() << " has a single use: "; + } else { + llvm::outs() << " has " + << std::distance(result.getUses().begin(), + result.getUses().end()) + << " uses:\n"; + } + for (Operation *userOp : result.getUsers()) { + llvm::outs() << " - " << userOp->getName() << "\n"; + } + } +``` + +The illustrating code for this pass is available +[here in the repo](https://github.com/llvm/llvm-project/blob/master/mlir/test/lib/IR/TestPrintDefUse.cpp) +and can be exercised with `mlir-opt -test-print-defuse`. + +The chaining of `Value`s and their uses can be viewed as following: + +![Index Map Example](/includes/img/DefUseChains.svg) + +The uses of a `Value` (`OpOperand` or `BlockOperand`) are also chained in a +doubly linked-list, which is particularly useful when replacing all uses of a +`Value` with a new one ("RAUW"): + +![Index Map Example](/includes/img/Use-list.svg) diff --git a/mlir/docs/includes/img/DefUseChains.svg b/mlir/docs/includes/img/DefUseChains.svg new file mode 100644 index 0000000000000..de74a4e6e82ee --- /dev/null +++ b/mlir/docs/includes/img/DefUseChains.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlir/docs/includes/img/Use-list.svg b/mlir/docs/includes/img/Use-list.svg new file mode 100644 index 0000000000000..941ac052fd2e4 --- /dev/null +++ b/mlir/docs/includes/img/Use-list.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlir/test/IR/print-ir-defuse.mlir b/mlir/test/IR/print-ir-defuse.mlir new file mode 100644 index 0000000000000..78c5804119250 --- /dev/null +++ b/mlir/test/IR/print-ir-defuse.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-opt -test-print-defuse -allow-unregistered-dialect %s | FileCheck %s + +// CHECK: Visiting op 'dialect.op1' with 0 operands: +// CHECK: Has 4 results: +// CHECK: - Result 0 has a single use: - dialect.op2 +// CHECK: - Result 1 has no uses +// CHECK: - Result 2 has 2 uses: +// CHECK: - dialect.innerop1 +// CHECK: - dialect.op2 +// CHECK: - Result 3 has no uses +// CHECK: Visiting op 'dialect.op2' with 2 operands: +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: Has 0 results: +// CHECK: Visiting op 'dialect.innerop1' with 2 operands: +// CHECK: - Operand produced by Block argument, number 0 +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: Has 0 results: +// CHECK: Visiting op 'dialect.op3' with 0 operands: +// CHECK: Has 0 results: +// CHECK: Visiting op 'module_terminator' with 0 operands: +// CHECK: Has 0 results: +// CHECK: Visiting op 'module' with 0 operands: +// CHECK: Has 0 results: + +%results:4 = "dialect.op1"() : () -> (i1, i16, i32, i64) +"dialect.op2"(%results#0, %results#2) : (i1, i32) -> () +"dialect.op3"() ({ + ^bb0(%arg0 : i1): + "dialect.innerop1"(%arg0, %results#2) : (i1, i32) -> () +}) : () -> () diff --git a/mlir/test/IR/print-ir-nesting.mlir b/mlir/test/IR/print-ir-nesting.mlir new file mode 100644 index 0000000000000..4682753947550 --- /dev/null +++ b/mlir/test/IR/print-ir-nesting.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt -test-print-nesting -allow-unregistered-dialect %s | FileCheck %s + +// CHECK: visiting op: 'module' with 0 operands and 0 results +// CHECK: 1 nested regions: +// CHECK: Region with 1 blocks: +// CHECK: Block with 0 arguments, 0 successors, and 3 operations +module { + + +// CHECK: visiting op: 'dialect.op1' with 0 operands and 4 results +// CHECK: 1 attributes: +// CHECK: - 'attribute name' : '42 : i32' +// CHECK: 0 nested regions: + %results:4 = "dialect.op1"() { "attribute name" = 42 : i32 } : () -> (i1, i16, i32, i64) + + +// CHECK: visiting op: 'dialect.op2' with 0 operands and 0 results +// CHECK: 2 nested regions: + "dialect.op2"() ({ + +// CHECK: Region with 1 blocks: +// CHECK: Block with 0 arguments, 0 successors, and 1 operations +// CHECK: visiting op: 'dialect.innerop1' with 2 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop1"(%results#0, %results#1) : (i1, i16) -> () + +// CHECK: Region with 3 blocks: + },{ + +// CHECK: Block with 0 arguments, 2 successors, and 2 operations +// CHECK: visiting op: 'dialect.innerop2' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop2"() : () -> () +// CHECK: visiting op: 'dialect.innerop3' with 3 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop3"(%results#0, %results#2, %results#3)[^bb1, ^bb2] : (i1, i32, i64) -> () +// CHECK: Block with 1 arguments, 0 successors, and 2 operations + ^bb1(%arg1 : i32): +// CHECK: visiting op: 'dialect.innerop4' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop4"() : () -> () +// CHECK: visiting op: 'dialect.innerop5' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop5"() : () -> () +// CHECK: Block with 1 arguments, 0 successors, and 2 operations + ^bb2(%arg2 : i64): +// CHECK: visiting op: 'dialect.innerop6' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop6"() : () -> () +// CHECK: visiting op: 'dialect.innerop7' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop7"() : () -> () + }) : () -> () + +// CHECK: visiting op: 'module_terminator' with 0 operands and 0 results + +} // module diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt index f77b26e5ca184..cf4ecada0f3cb 100644 --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -3,6 +3,8 @@ add_mlir_library(MLIRTestIR TestFunc.cpp TestInterfaces.cpp TestMatchers.cpp + TestPrintDefUse.cpp + TestPrintNesting.cpp TestSideEffects.cpp TestSymbolUses.cpp TestTypes.cpp diff --git a/mlir/test/lib/IR/TestPrintDefUse.cpp b/mlir/test/lib/IR/TestPrintDefUse.cpp new file mode 100644 index 0000000000000..3153a148477a9 --- /dev/null +++ b/mlir/test/lib/IR/TestPrintDefUse.cpp @@ -0,0 +1,71 @@ +//===- TestPrintDefUse.cpp - Passes to illustrate the IR def-use chains ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// This pass illustrates the IR def-use chains through printing. +struct TestPrintDefUsePass + : public PassWrapper> { + void runOnOperation() override { + // Recursively traverse the IR nested under the current operation and print + // every single operation and their operands and users. + getOperation()->walk([](Operation *op) { + llvm::outs() << "Visiting op '" << op->getName() << "' with " + << op->getNumOperands() << " operands:\n"; + + // Print information about the producer of each of the operands. + for (Value operand : op->getOperands()) { + if (Operation *producer = operand.getDefiningOp()) { + llvm::outs() << " - Operand produced by operation '" + << producer->getName() << "'\n"; + } else { + // If there is no defining op, the Value is necessarily a Block + // argument. + auto blockArg = operand.cast(); + llvm::outs() << " - Operand produced by Block argument, number " + << blockArg.getArgNumber() << "\n"; + } + } + + // Print information about the user of each of the result. + llvm::outs() << "Has " << op->getNumResults() << " results:\n"; + for (auto indexedResult : llvm::enumerate(op->getResults())) { + Value result = indexedResult.value(); + llvm::outs() << " - Result " << indexedResult.index(); + if (result.use_empty()) { + llvm::outs() << " has no uses\n"; + continue; + } + if (result.hasOneUse()) { + llvm::outs() << " has a single use: "; + } else { + llvm::outs() << " has " + << std::distance(result.getUses().begin(), + result.getUses().end()) + << " uses:\n"; + } + for (Operation *userOp : result.getUsers()) { + llvm::outs() << " - " << userOp->getName() << "\n"; + } + } + }); + } +}; +} // end anonymous namespace + +namespace mlir { +void registerTestPrintDefUsePass() { + PassRegistration("test-print-defuse", + "Test various printing."); +} +} // namespace mlir diff --git a/mlir/test/lib/IR/TestPrintNesting.cpp b/mlir/test/lib/IR/TestPrintNesting.cpp new file mode 100644 index 0000000000000..825d241740fda --- /dev/null +++ b/mlir/test/lib/IR/TestPrintNesting.cpp @@ -0,0 +1,96 @@ +//===- TestPrintNesting.cpp - Passes to illustrate the IR nesting ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// This pass illustrates the IR nesting through printing. +struct TestPrintNestingPass + : public PassWrapper> { + // Entry point for the pass. + void runOnOperation() override { + Operation *op = getOperation(); + resetIndent(); + printOperation(op); + } + + /// The three methods below are mutually recursive and follow the nesting of + /// the IR: operation->region->block->operation->... + + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.first << "' : '" << attr.second + << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } + + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } + + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // Block main role is to hold a list of Operations: let's recurse. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } + + /// Manages the indentation as we traverse the IR nesting. + int indent; + struct IdentRAII { + int &indent; + IdentRAII(int &indent) : indent(indent) {} + ~IdentRAII() { --indent; } + }; + void resetIndent() { indent = 0; } + IdentRAII pushIndent() { return IdentRAII(++indent); } + + llvm::raw_ostream &printIndent() { + for (int i = 0; i < indent; ++i) + llvm::outs() << " "; + return llvm::outs(); + } +}; +} // end anonymous namespace + +namespace mlir { +void registerTestPrintNestingPass() { + PassRegistration("test-print-nesting", + "Test various printing."); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index ad76abed647e7..34e03a5f99201 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -66,6 +66,8 @@ void registerTestMemRefDependenceCheck(); void registerTestMemRefStrideCalculation(); void registerTestOpaqueLoc(); void registerTestPreparationPassWithAllowedMemrefResults(); +void registerTestPrintDefUsePass(); +void registerTestPrintNestingPass(); void registerTestRecursiveTypesPass(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); @@ -115,6 +117,8 @@ void registerTestPasses() { registerTestMemRefStrideCalculation(); registerTestOpaqueLoc(); registerTestPreparationPassWithAllowedMemrefResults(); + registerTestPrintDefUsePass(); + registerTestPrintNestingPass(); registerTestRecursiveTypesPass(); registerTestReducer(); registerTestGpuParallelLoopMappingPass(); From 8dcd6ea644cf86aba3dea5b1d3c1af4f350d22ab Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Sep 2020 00:56:10 +0000 Subject: [PATCH 0010/1079] Update SVG images to be properly cropped (NFC) --- mlir/docs/includes/img/DefUseChains.svg | 2 +- mlir/docs/includes/img/Use-list.svg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/docs/includes/img/DefUseChains.svg b/mlir/docs/includes/img/DefUseChains.svg index de74a4e6e82ee..2d5b75246772a 100644 --- a/mlir/docs/includes/img/DefUseChains.svg +++ b/mlir/docs/includes/img/DefUseChains.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/mlir/docs/includes/img/Use-list.svg b/mlir/docs/includes/img/Use-list.svg index 941ac052fd2e4..4840619f06741 100644 --- a/mlir/docs/includes/img/Use-list.svg +++ b/mlir/docs/includes/img/Use-list.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file From 35f708a3c9ffceacbeaf8abfb0ba5123e346b30e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 17:57:39 -0700 Subject: [PATCH 0011/1079] [builtins] Inline __paritysi2 into __paritydi2 and inline __paritydi2 into __parityti2. No point in making __parityti2 go through 2 calls to get to __paritysi2. Reviewed By: MaskRay, efriedma Differential Revision: https://reviews.llvm.org/D87218 --- compiler-rt/lib/builtins/paritydi2.c | 6 +++++- compiler-rt/lib/builtins/parityti2.c | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/builtins/paritydi2.c b/compiler-rt/lib/builtins/paritydi2.c index 58e85f89e0437..350dceb8cef59 100644 --- a/compiler-rt/lib/builtins/paritydi2.c +++ b/compiler-rt/lib/builtins/paritydi2.c @@ -17,5 +17,9 @@ COMPILER_RT_ABI int __paritydi2(di_int a) { dwords x; x.all = a; - return __paritysi2(x.s.high ^ x.s.low); + su_int x2 = x.s.high ^ x.s.low; + x2 ^= x2 >> 16; + x2 ^= x2 >> 8; + x2 ^= x2 >> 4; + return (0x6996 >> (x2 & 0xF)) & 1; } diff --git a/compiler-rt/lib/builtins/parityti2.c b/compiler-rt/lib/builtins/parityti2.c index 79e920d8a02df..011c8dd455620 100644 --- a/compiler-rt/lib/builtins/parityti2.c +++ b/compiler-rt/lib/builtins/parityti2.c @@ -18,8 +18,14 @@ COMPILER_RT_ABI int __parityti2(ti_int a) { twords x; + dwords x2; x.all = a; - return __paritydi2(x.s.high ^ x.s.low); + x2.all = x.s.high ^ x.s.low; + su_int x3 = x2.s.high ^ x2.s.low; + x3 ^= x3 >> 16; + x3 ^= x3 >> 8; + x3 ^= x3 >> 4; + return (0x6996 >> (x3 & 0xF)) & 1; } #endif // CRT_HAS_128BIT From 4536c6acb3809eaadc836f24f091db1b50b82af9 Mon Sep 17 00:00:00 2001 From: Kiran Kumar T P Date: Tue, 8 Sep 2020 06:52:07 +0530 Subject: [PATCH 0012/1079] [flang][OpenMP] Enhance parser support for atomic construct to OpenMP 5.0 Summary: This patch enhances parser support for atomic construct to OpenMP 5.0. 2.17.7 atomic -> ATOMIC [clause [,]] atomic-clause [[,] clause] | ATOMIC [clause] clause -> memory-order-clause | HINT(hint-expression) memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED atomic-clause -> READ | WRITE | UPDATE | CAPTURE The patch includes code changes and testcase modifications. Reviewed By: DavidTruby, kiranchandramohan, sameeranjoshi Differential Revision: https://reviews.llvm.org/D82931 --- flang/docs/OpenMP-4.5-grammar.txt | 2 + flang/include/flang/Parser/dump-parse-tree.h | 16 ++-- flang/include/flang/Parser/parse-tree.h | 57 +++++++----- flang/lib/Parser/openmp-parsers.cpp | 91 ++++++++++++-------- flang/lib/Parser/unparse.cpp | 54 ++++++++---- flang/test/Semantics/omp-atomic.f90 | 22 ++++- 6 files changed, 160 insertions(+), 82 deletions(-) diff --git a/flang/docs/OpenMP-4.5-grammar.txt b/flang/docs/OpenMP-4.5-grammar.txt index c74072ba1ef27..180494bbf509e 100644 --- a/flang/docs/OpenMP-4.5-grammar.txt +++ b/flang/docs/OpenMP-4.5-grammar.txt @@ -344,6 +344,8 @@ ATOMIC [seq_cst] atomic-clause -> READ | WRITE | UPDATE | CAPTURE +2.13.6 end-atomic -> END ATOMIC + 2.13.7 flush -> FLUSH [(variable-name-list)] 2.13.8 ordered -> ORDERED ordered-construct-clause [[[,] ordered-construct-clause]...] diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 41ff9631d1011..921e6172bf89b 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -445,6 +445,9 @@ class ParseTreeDumper { NODE(parser, OmpAtomicCapture) NODE(OmpAtomicCapture, Stmt1) NODE(OmpAtomicCapture, Stmt2) + NODE(parser, OmpAtomicMemoryOrderClause) + NODE(parser, OmpAtomicMemoryOrderClauseList) + NODE(parser, OmpAtomicMemoryOrderClausePostList) NODE(parser, OmpAtomicRead) NODE(parser, OmpAtomicUpdate) NODE(parser, OmpAtomicWrite) @@ -464,7 +467,6 @@ class ParseTreeDumper { #include "llvm/Frontend/OpenMP/OMP.cpp.inc" NODE(parser, OmpClauseList) NODE(parser, OmpCriticalDirective) - NODE(OmpCriticalDirective, Hint) NODE(parser, OmpDeclareTargetSpecifier) NODE(parser, OmpDeclareTargetWithClause) NODE(parser, OmpDeclareTargetWithList) @@ -487,6 +489,7 @@ class ParseTreeDumper { NODE(parser, OmpEndCriticalDirective) NODE(parser, OmpEndLoopDirective) NODE(parser, OmpEndSectionsDirective) + NODE(parser, OmpHintExpr) NODE(parser, OmpIfClause) NODE_ENUM(OmpIfClause, DirectiveNameModifier) NODE(parser, OmpLinearClause) @@ -499,10 +502,12 @@ class ParseTreeDumper { NODE(parser, OmpMapType) NODE(OmpMapType, Always) NODE_ENUM(OmpMapType, Type) - NODE(parser, OmpMemoryClause) - NODE_ENUM(OmpMemoryClause, MemoryOrder) - NODE(parser, OmpMemoryClauseList) - NODE(parser, OmpMemoryClausePostList) + NODE(parser, OmpMemoryOrderClause) + static std::string GetNodeName(const llvm::omp::Clause &x) { + return llvm::Twine( + "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x)) + .str(); + } NODE(parser, OmpNowait) NODE(parser, OmpObject) NODE(parser, OmpObjectList) @@ -549,7 +554,6 @@ class ParseTreeDumper { NODE(parser, OpenMPDeclareSimdConstruct) NODE(parser, OpenMPDeclareTargetConstruct) NODE(parser, OmpFlushMemoryClause) - NODE_ENUM(OmpFlushMemoryClause, FlushMemoryOrder) NODE(parser, OpenMPFlushConstruct) NODE(parser, OpenMPLoopConstruct) NODE(parser, OpenMPSimpleStandaloneConstruct) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 166e573b5cec3..a9fb92cf2584b 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3591,12 +3591,14 @@ struct OpenMPDeclarativeConstruct { u; }; +// HINT(hint-expression) +WRAPPER_CLASS(OmpHintExpr, ConstantExpr); + // 2.13.2 CRITICAL [Name] END CRITICAL [Name] struct OmpCriticalDirective { TUPLE_CLASS_BOILERPLATE(OmpCriticalDirective); - WRAPPER_CLASS(Hint, ConstantExpr); CharBlock source; - std::tuple, std::optional> t; + std::tuple, std::optional> t; }; struct OmpEndCriticalDirective { TUPLE_CLASS_BOILERPLATE(OmpEndCriticalDirective); @@ -3608,44 +3610,56 @@ struct OpenMPCriticalConstruct { std::tuple t; }; -// 2.13.6 atomic -> ATOMIC [seq_cst[,]] atomic-clause [[,]seq_cst] | -// ATOMIC [seq_cst] +// 2.17.7 atomic -> ATOMIC [clause[,]] atomic-clause [[,]clause] | +// ATOMIC [clause] +// clause -> memory-order-clause | HINT(hint-expression) +// memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED // atomic-clause -> READ | WRITE | UPDATE | CAPTURE // END ATOMIC EMPTY_CLASS(OmpEndAtomic); -// ATOMIC Memory related clause -struct OmpMemoryClause { - ENUM_CLASS(MemoryOrder, SeqCst) - WRAPPER_CLASS_BOILERPLATE(OmpMemoryClause, MemoryOrder); +// Memory order clause +struct OmpMemoryOrderClause { + WRAPPER_CLASS_BOILERPLATE(OmpMemoryOrderClause, llvm::omp::Clause); CharBlock source; }; -WRAPPER_CLASS(OmpMemoryClauseList, std::list); -WRAPPER_CLASS(OmpMemoryClausePostList, std::list); +// ATOMIC Memory order clause or hint expression +struct OmpAtomicMemoryOrderClause { + UNION_CLASS_BOILERPLATE(OmpAtomicMemoryOrderClause); + std::variant u; +}; + +WRAPPER_CLASS( + OmpAtomicMemoryOrderClauseList, std::list); +WRAPPER_CLASS( + OmpAtomicMemoryOrderClausePostList, std::list); // ATOMIC READ struct OmpAtomicRead { TUPLE_CLASS_BOILERPLATE(OmpAtomicRead); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; // ATOMIC WRITE struct OmpAtomicWrite { TUPLE_CLASS_BOILERPLATE(OmpAtomicWrite); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; // ATOMIC UPDATE struct OmpAtomicUpdate { TUPLE_CLASS_BOILERPLATE(OmpAtomicUpdate); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; @@ -3654,16 +3668,16 @@ struct OmpAtomicCapture { TUPLE_CLASS_BOILERPLATE(OmpAtomicCapture); WRAPPER_CLASS(Stmt1, Statement); WRAPPER_CLASS(Stmt2, Statement); - std::tuple + std::tuple t; }; // ATOMIC struct OmpAtomic { TUPLE_CLASS_BOILERPLATE(OmpAtomic); - std::tuple, - std::optional> + std::tuple, std::optional> t; }; @@ -3707,8 +3721,7 @@ struct OpenMPCancelConstruct { // release // acquire struct OmpFlushMemoryClause { - ENUM_CLASS(FlushMemoryOrder, AcqRel, Release, Acquire) - WRAPPER_CLASS_BOILERPLATE(OmpFlushMemoryClause, FlushMemoryOrder); + WRAPPER_CLASS_BOILERPLATE(OmpFlushMemoryClause, llvm::omp::Clause); CharBlock source; }; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index cd5ee0de556dc..a7f4a1ae492c7 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -300,9 +300,9 @@ TYPE_PARSER(sourced(construct(verbatim("CANCEL"_tok), // release // acquire TYPE_PARSER(sourced(construct( - "ACQ_REL" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::AcqRel) || - "RELEASE" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::Release) || - "ACQUIRE" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::Acquire)))) + "ACQ_REL" >> pure(llvm::omp::Clause::OMPC_acq_rel) || + "RELEASE" >> pure(llvm::omp::Clause::OMPC_release) || + "ACQUIRE" >> pure(llvm::omp::Clause::OMPC_acquire)))) TYPE_PARSER(sourced(construct(verbatim("FLUSH"_tok), maybe(Parser{}), @@ -384,51 +384,74 @@ TYPE_PARSER(construct(Parser{}) || construct(Parser{}, parenthesized(optionalList(actualArgSpec)))))) -// 2.13.6 ATOMIC [seq_cst[,]] atomic-clause [[,]seq_cst] | ATOMIC [seq_cst] -// atomic-clause -> READ | WRITE | UPDATE | CAPTURE +// Hint Expression => HINT(hint-expression) +TYPE_PARSER("HINT" >> construct(parenthesized(constantExpr))) + +// 2.17.7 atomic -> ATOMIC [clause [,]] atomic-clause [[,] clause] | +// ATOMIC [clause] +// clause -> memory-order-clause | HINT(hint-expression) +// memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED +// atomic-clause -> READ | WRITE | UPDATE | CAPTURE // OMP END ATOMIC TYPE_PARSER(construct(startOmpLine >> "END ATOMIC"_tok)) -// ATOMIC Memory related clause -TYPE_PARSER(sourced(construct( - "SEQ_CST" >> pure(OmpMemoryClause::MemoryOrder::SeqCst)))) +// Memory order clause +TYPE_PARSER(sourced(construct( + "SEQ_CST" >> pure(llvm::omp::Clause::OMPC_seq_cst) || + "ACQ_REL" >> pure(llvm::omp::Clause::OMPC_acq_rel) || + "RELEASE" >> pure(llvm::omp::Clause::OMPC_release) || + "ACQUIRE" >> pure(llvm::omp::Clause::OMPC_acquire) || + "RELAXED" >> pure(llvm::omp::Clause::OMPC_relaxed)))) -// ATOMIC Memory Clause List -TYPE_PARSER(construct( - many(maybe(","_tok) >> Parser{}))) +// ATOMIC Memory order clause or Hint expression +TYPE_PARSER( + construct(Parser{}) || + construct(Parser{})) -TYPE_PARSER(construct( - many(maybe(","_tok) >> Parser{}))) +// ATOMIC Memory order Clause List +TYPE_PARSER(construct( + many(maybe(","_tok) >> Parser{}))) -// OMP [SEQ_CST] ATOMIC READ [SEQ_CST] -TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("READ"_tok), Parser{} / endOmpLine, - statement(assignmentStmt), maybe(Parser{} / endOmpLine))) +TYPE_PARSER(construct( + many(maybe(","_tok) >> Parser{}))) -// OMP ATOMIC [SEQ_CST] CAPTURE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] READ [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("CAPTURE"_tok), Parser{} / endOmpLine, - statement(assignmentStmt), statement(assignmentStmt), - Parser{} / endOmpLine)) + construct( + Parser{} / maybe(","_tok), + verbatim("READ"_tok), + Parser{} / endOmpLine, + statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// OMP ATOMIC [SEQ_CST] UPDATE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] CAPTURE [MEMORY-ORDER-CLAUSE-LIST] +TYPE_PARSER( + "ATOMIC" >> construct( + Parser{} / maybe(","_tok), + verbatim("CAPTURE"_tok), + Parser{} / endOmpLine, + statement(assignmentStmt), statement(assignmentStmt), + Parser{} / endOmpLine)) + +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] UPDATE [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("UPDATE"_tok), Parser{} / endOmpLine, + construct( + Parser{} / maybe(","_tok), + verbatim("UPDATE"_tok), + Parser{} / endOmpLine, statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// OMP ATOMIC [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER(construct(verbatim("ATOMIC"_tok), - Parser{} / endOmpLine, statement(assignmentStmt), - maybe(Parser{} / endOmpLine))) + Parser{} / endOmpLine, + statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// ATOMIC [SEQ_CST] WRITE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] WRITE [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("WRITE"_tok), Parser{} / endOmpLine, + construct( + Parser{} / maybe(","_tok), + verbatim("WRITE"_tok), + Parser{} / endOmpLine, statement(assignmentStmt), maybe(Parser{} / endOmpLine))) // Atomic Construct @@ -444,9 +467,7 @@ TYPE_PARSER(startOmpLine >> verbatim("END CRITICAL"_tok), maybe(parenthesized(name)))) / endOmpLine) TYPE_PARSER(sourced(construct(verbatim("CRITICAL"_tok), - maybe(parenthesized(name)), - maybe("HINT" >> construct( - parenthesized(constantExpr))))) / + maybe(parenthesized(name)), maybe(Parser{}))) / endOmpLine) TYPE_PARSER(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index e26795d0825bb..ab94aa2e00c26 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2222,19 +2222,36 @@ class UnparseVisitor { break; } } - void Unparse(const OmpMemoryClause &x) { + void Unparse(const OmpHintExpr &x) { Word("HINT("), Walk(x.v), Put(')'); } + void Unparse(const OmpMemoryOrderClause &x) { switch (x.v) { - case OmpMemoryClause::MemoryOrder::SeqCst: + case llvm::omp::Clause::OMPC_seq_cst: Word("SEQ_CST"); break; + case llvm::omp::Clause::OMPC_acq_rel: + Word("ACQ_REL"); + break; + case llvm::omp::Clause::OMPC_release: + Word("RELEASE"); + break; + case llvm::omp::Clause::OMPC_acquire: + Word("ACQUIRE"); + break; + case llvm::omp::Clause::OMPC_relaxed: + Word("RELAXED"); + break; + default: + break; } } - void Unparse(const OmpMemoryClauseList &x) { Walk(" ", x.v, " "); } - void Unparse(const OmpMemoryClausePostList &x) { Walk(" ", x.v, " "); } + void Unparse(const OmpAtomicMemoryOrderClauseList &x) { Walk(" ", x.v, " "); } + void Unparse(const OmpAtomicMemoryOrderClausePostList &x) { + Walk(" ", x.v, " "); + } void Unparse(const OmpAtomic &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2245,9 +2262,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicCapture &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" CAPTURE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get(x.t)); @@ -2260,9 +2277,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicRead &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" READ"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2273,9 +2290,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicUpdate &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" UPDATE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2286,9 +2303,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicWrite &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" WRITE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2300,8 +2317,7 @@ class UnparseVisitor { BeginOpenMP(); Word("!$OMP CRITICAL"); Walk(" (", std::get>(x.t), ")"); - Walk(" HINT(", std::get>(x.t), - ")"); + Walk(std::get>(x.t)); Put("\n"); EndOpenMP(); } @@ -2431,15 +2447,17 @@ class UnparseVisitor { } void Unparse(const OmpFlushMemoryClause &x) { switch (x.v) { - case OmpFlushMemoryClause::FlushMemoryOrder::AcqRel: + case llvm::omp::Clause::OMPC_acq_rel: Word("ACQ_REL "); break; - case OmpFlushMemoryClause::FlushMemoryOrder::Release: + case llvm::omp::Clause::OMPC_release: Word("RELEASE "); break; - case OmpFlushMemoryClause::FlushMemoryOrder::Acquire: + case llvm::omp::Clause::OMPC_acquire: Word("ACQUIRE "); break; + default: + break; } } void Unparse(const OpenMPFlushConstruct &x) { diff --git a/flang/test/Semantics/omp-atomic.f90 b/flang/test/Semantics/omp-atomic.f90 index d5cb87aaba32d..8d3f95a770454 100644 --- a/flang/test/Semantics/omp-atomic.f90 +++ b/flang/test/Semantics/omp-atomic.f90 @@ -1,5 +1,5 @@ ! RUN: %S/test_errors.sh %s %t %f18 -fopenmp - +use omp_lib ! Check OpenMP 2.13.6 atomic Construct a = 1.0 @@ -11,12 +11,32 @@ a = b !$omp end atomic + !$omp atomic read acquire hint(OMP_LOCK_HINT_CONTENDED) + a = b + + !$omp atomic release hint(OMP_LOCK_HINT_UNCONTENDED) write + a = b + !$omp atomic capture seq_cst b = a a = a + 1 !$omp end atomic + !$omp atomic hint(1) acq_rel capture + b = a + a = a + 1 + !$omp end atomic + + !ERROR: expected end of line + !ERROR: expected end of line + !$omp atomic read write + a = a + 1 + !$omp atomic a = a + 1 + + !$omp atomic relaxed + a = a + 1 + !$omp end parallel end From 10af5bad443dd15b79876fbad66d836ab9e9a4ed Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 7 Sep 2020 18:29:48 -0700 Subject: [PATCH 0013/1079] [llvm-objcopy] Consolidate and unify version tests In this diff the tests which verify version printing functionality are refactored. Since they are not specific to a particular format we move them into tool-version.test and slightly unify (similarly to tool-name.test and tool-help-message.test). Test plan: make check-all Differential revision: https://reviews.llvm.org/D87211 --- .../tools/llvm-objcopy/ELF/objcopy-version.test | 4 ---- .../tools/llvm-objcopy/ELF/strip-version.test | 5 ----- .../MachO/install-name-tool-version.test | 2 -- llvm/test/tools/llvm-objcopy/tool-version.test | 15 +++++++++++++++ 4 files changed, 15 insertions(+), 11 deletions(-) delete mode 100644 llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test delete mode 100644 llvm/test/tools/llvm-objcopy/ELF/strip-version.test delete mode 100644 llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test create mode 100644 llvm/test/tools/llvm-objcopy/tool-version.test diff --git a/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test b/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test deleted file mode 100644 index 7494ccd2866d3..0000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: llvm-objcopy --version | FileCheck %s -# RUN: llvm-objcopy -V | FileCheck %s - -# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-version.test b/llvm/test/tools/llvm-objcopy/ELF/strip-version.test deleted file mode 100644 index 4b2f137ce2aad..0000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/strip-version.test +++ /dev/null @@ -1,5 +0,0 @@ -# RUN: llvm-strip --version | FileCheck %s -# RUN: llvm-strip -V | FileCheck %s - -# CHECK-DAG: {{ version }} -# CHECK-DAG: GNU strip diff --git a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test b/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test deleted file mode 100644 index 295e573561012..0000000000000 --- a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test +++ /dev/null @@ -1,2 +0,0 @@ -# RUN: llvm-install-name-tool --version | FileCheck %s -# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objcopy/tool-version.test b/llvm/test/tools/llvm-objcopy/tool-version.test new file mode 100644 index 0000000000000..5fe33eb8e7173 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/tool-version.test @@ -0,0 +1,15 @@ +# RUN: llvm-objcopy --version | FileCheck --check-prefix=OBJCOPY %s +# RUN: llvm-objcopy -V | FileCheck --check-prefix=OBJCOPY %s + +# RUN: llvm-strip --version | FileCheck --check-prefix=STRIP %s +# RUN: llvm-strip -V | FileCheck --check-prefix=STRIP %s + +# RUN: llvm-install-name-tool --version | FileCheck %s + +# OBJCOPY-DAG: {{ version }} +# OBJCOPY-DAG: GNU objcopy + +# STRIP-DAG: {{ version }} +# STRIP-DAG: GNU strip + +# CHECK: {{ version }} From 3c0b3250230b3847a2a47dfeacfdb794c2285f02 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 8 Sep 2020 11:03:09 +0800 Subject: [PATCH 0014/1079] [PowerPC] Implement instruction clustering for stores On Power10, it's profitable to schedule some stores with adjacent target address together. This patch implements this feature. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D86754 --- llvm/lib/Target/PowerPC/PPC.td | 11 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 109 ++++++- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 13 + llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 + llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 4 + .../test/CodeGen/PowerPC/fusion-load-store.ll | 268 ++++++++++++++++++ .../PowerPC/pcrel-call-linkage-leaf.ll | 2 +- 8 files changed, 405 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fusion-load-store.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index a617715d4bd86..1b38a6f1d13d9 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -174,6 +174,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", "HasAddisLoadFusion", "true", "Power8 Addis-Load fusion", [FeatureFusion]>; +def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", + "Target supports store clustering", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -345,10 +348,12 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. + list FusionFeatures = [FeatureStoreFusion]; list P10AdditionalFeatures = - [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, - FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, - FeaturePairedVectorMemops]; + !listconcat(FusionFeatures, [ + DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, + FeaturePairedVectorMemops]); list P10SpecificFeatures = []; list P10InheritableFeatures = !listconcat(P9InheritableFeatures, P10AdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 0732e0f0ace36..2c4549899e0c3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2222,6 +2222,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } +bool PPCInstrInfo::getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; +} + +static bool isLdStSafeToCluster(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef()) + return false; + + if (LdSt.getOperand(2).isFI()) + return true; + + assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld r2,3(r2) + if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) + return false; + + return true; +} + +// Only cluster instruction pair that have the same opcode, and they are +// clusterable according to PowerPC specification. +static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, + const PPCSubtarget &Subtarget) { + switch (FirstOpc) { + default: + return false; + case PPC::STD: + case PPC::STFD: + case PPC::STXSD: + case PPC::DFSTOREf64: + return FirstOpc == SecondOpc; + // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with + // 32bit and 64bit instruction selection. They are clusterable pair though + // they are different opcode. + case PPC::STW: + case PPC::STW8: + return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; + } +} + +bool PPCInstrInfo::shouldClusterMemOps( + ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads, + unsigned NumBytes) const { + + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); + assert(BaseOp1.isReg() || + BaseOp1.isFI() && + "Only base registers and frame indices are supported."); + + // The NumLoads means the number of loads that has been clustered. + // Don't cluster memory op if there are already two ops clustered at least. + if (NumLoads > 2) + return false; + + // Cluster the load/store only when they have the same base + // register or FI. + if ((BaseOp1.isReg() != BaseOp2.isReg()) || + (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || + (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) + return false; + + // Check if the load/store are clusterable according to the PowerPC + // specification. + const MachineInstr &FirstLdSt = *BaseOp1.getParent(); + const MachineInstr &SecondLdSt = *BaseOp2.getParent(); + unsigned FirstOpc = FirstLdSt.getOpcode(); + unsigned SecondOpc = SecondLdSt.getOpcode(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + // Cluster the load/store only when they have the same opcode, and they are + // clusterable opcode according to PowerPC specification. + if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) + return false; + + // Can't cluster load/store that have ordered or volatile memory reference. + if (!isLdStSafeToCluster(FirstLdSt, TRI) || + !isLdStSafeToCluster(SecondLdSt, TRI)) + return false; + + int64_t Offset1 = 0, Offset2 = 0; + unsigned Width1 = 0, Width2 = 0; + const MachineOperand *Base1 = nullptr, *Base2 = nullptr; + if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || + !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || + Width1 != Width2) + return false; + + assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && + "getMemOperandWithOffsetWidth return incorrect base op"); + // The caller should already have ordered FirstMemOp/SecondMemOp by offset. + assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); + return Offset1 + Width1 == Offset2; +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -4664,7 +4770,8 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( return false; // Handle only loads/stores with base register followed by immediate offset. - if (LdSt.getNumExplicitOperands() != 3) + if (!LdSt.getOperand(1).isImm() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) return false; if (!LdSt.getOperand(1).isImm() || (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 75e8224892f4c..2f867b16aa24f 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -494,6 +494,19 @@ class PPCInstrInfo : public PPCGenInstrInfo { int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + /// Get the base operand and byte offset of an instruction that reads/writes + /// memory. + bool getMemOperandsWithOffsetWidth( + const MachineInstr &MI, SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const override; + + /// Returns true if the two given memory operations should be scheduled + /// adjacent. + bool shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, + unsigned NumLoads, unsigned NumBytes) const override; + /// Return true if two MIs access different memory addresses and false /// otherwise bool diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 8021cfa4a18c6..05922dbb38fc6 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -108,6 +108,7 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFloat128 = false; HasFusion = false; + HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 76b43dfc7a723..0a134bb83ed2f 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -137,6 +137,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasHTM; bool HasFloat128; bool HasFusion; + bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; @@ -308,6 +309,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } + bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index ea9b37de6ff39..c5671d6c73e05 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -271,6 +271,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); @@ -285,6 +287,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( std::make_unique(C) : std::make_unique(C), true); // add DAG Mutations here. + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; diff --git a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll new file mode 100644 index 0000000000000..75b2eca2168c0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll @@ -0,0 +1,268 @@ +; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The +; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused. + +; REQUIRES: asserts +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ +; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \ +; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s + +define i64 @store_i64(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store i64 %v, i64* %arrayidx3 + ret i64 %v +} + +define i32 @store_i32(i32* nocapture %P, i32 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52 +; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48 +; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44 +; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48 +; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44 +; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52 +; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56 + %arrayidx = getelementptr inbounds i32, i32* %P, i32 13 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14 + store i32 %v, i32* %arrayidx3 + ret i32 %v +} + +define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 + store i64 %v, i64* %arrayidx3 + ret void +} + +define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12 +; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4 +; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8 +; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4 +; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8 +; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12 +; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16 + %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 + store i32 %v, i32* %arrayidx3 + ret void +} + +define void @store_double(double* nocapture %P, double %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_double:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24 +; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8 +; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16 +; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_double:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8 +; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16 +; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24 +; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32 + %arrayidx = getelementptr inbounds double, double* %P, i64 3 + store double %v, double* %arrayidx + %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 + store double %v, double* %arrayidx1 + %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 + store double %v, double* %arrayidx2 + %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 + store double %v, double* %arrayidx3 + ret void +} + +define void @store_float(float* nocapture %P, float %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_float:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12 +; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4 +; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8 +; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_float:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12 +; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4 +; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8 +; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16 + %arrayidx = getelementptr inbounds float, float* %P, i64 3 + store float %v, float* %arrayidx + %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 + store float %v, float* %arrayidx1 + %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 + store float %v, float* %arrayidx2 + %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 + store float %v, float* %arrayidx3 + ret void +} + +; Cannot fuse the store/load if there is volatile in between +define i64 @store_volatile(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_volatile:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_volatile:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store volatile i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store volatile i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store volatile i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store volatile i64 %v, i64* %arrayidx3 + ret i64 %v +} + +@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4 + +define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]]) +; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24 +; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]]) +; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24 +; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20 + store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 + store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 + %add = add nsw i32 %n, %m + store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4 + ret void +} + +define void @store_i32_stw8(i32 signext %m, i32 signext %n) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24 +; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24 +; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28 + store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 + store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 + ret void +} + +declare void @bar(i64*) + +define void @store_frame_index(i32 %a, i32 %b) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_frame_index:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf +; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf + %buf = alloca [8 x i64], align 8 + %0 = bitcast [8 x i64]* %buf to i8* + %conv = zext i32 %a to i64 + %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0 + store i64 %conv, i64* %arrayidx, align 8 + %conv1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1 + store i64 %conv1, i64* %arrayidx2, align 8 + call void @bar(i64* nonnull %arrayidx) + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 9141fdc735a0e..1623889200848 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -104,6 +104,7 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-P9-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry ; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r11, r4, r3 ; CHECK-S-NEXT: sub r29, r8, r9 ; CHECK-S-NEXT: add r9, r10, r9 @@ -119,7 +120,6 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-S-NEXT: mullw r3, r3, r7 ; CHECK-S-NEXT: sub r2, r6, r7 ; CHECK-S-NEXT: mullw r3, r3, r8 -; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r2 ; CHECK-S-NEXT: mullw r3, r3, r30 From 7907e5516a418fec29137beed3ff985f40e04f17 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 7 Sep 2020 20:55:05 -0700 Subject: [PATCH 0015/1079] [Sema] fix /gr warning test case --- clang/test/SemaCXX/no-rtti.cpp | 2 +- clang/test/SemaCXX/no_dynamic_cast.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/SemaCXX/no-rtti.cpp b/clang/test/SemaCXX/no-rtti.cpp index e0b57153c24c9..f8487a0902dda 100644 --- a/clang/test/SemaCXX/no-rtti.cpp +++ b/clang/test/SemaCXX/no-rtti.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -fno-rtti %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -fno-rtti %s namespace std { class type_info; diff --git a/clang/test/SemaCXX/no_dynamic_cast.cpp b/clang/test/SemaCXX/no_dynamic_cast.cpp index 4db21d36f4a99..074b02f4668bc 100644 --- a/clang/test/SemaCXX/no_dynamic_cast.cpp +++ b/clang/test/SemaCXX/no_dynamic_cast.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -fno-rtti-data -fsyntax-only -verify +// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -fno-rtti-data -fsyntax-only -verify namespace std { struct type_info {}; From 247d02396524649a31bc45541f97457e32b8ef48 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 8 Sep 2020 11:14:36 +0700 Subject: [PATCH 0016/1079] [Test] Auto-generated checks for some IndVarSimplify tests --- .../IndVarSimplify/canonicalize-cmp.ll | 69 +++++++++++++++---- .../IndVarSimplify/lftr-multi-exit.ll | 36 +++++----- .../test/Transforms/IndVarSimplify/pr18223.ll | 20 +++++- 3 files changed, 93 insertions(+), 32 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll index 2b939767284a4..7c4bad11a5ea5 100644 --- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll +++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -indvars < %s | FileCheck %s ; Check that we replace signed comparisons between non-negative values with @@ -6,13 +7,35 @@ target datalayout = "n8:16:32:64" define i32 @test_01(i32 %a, i32 %b, i32* %p) { - ; CHECK-LABEL: @test_01( -; CHECK-NOT: icmp slt -; CHECK: %cmp1 = icmp ult i32 %iv, 100 -; CHECK: %cmp2 = icmp ult i32 %iv, 100 -; CHECK-NOT: %cmp3 -; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: loop.entry: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BE:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[CMP1]], label [[B1:%.*]], label [[B2:%.*]] +; CHECK: b1: +; CHECK-NEXT: store i32 [[IV]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b2: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[CMP2]], label [[B3:%.*]], label [[B4:%.*]] +; CHECK: b3: +; CHECK-NEXT: store i32 [[IV]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: b4: +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: loop.be: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_ENTRY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 999 +; entry: br label %loop.entry @@ -52,13 +75,35 @@ exit: } define i32 @test_02(i32 %a, i32 %b, i32* %p) { - ; CHECK-LABEL: @test_02( -; CHECK-NOT: icmp sgt -; CHECK: %cmp1 = icmp ugt i32 100, %iv -; CHECK: %cmp2 = icmp ugt i32 100, %iv -; CHECK-NOT: %cmp3 -; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: loop.entry: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BE:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 100, [[IV]] +; CHECK-NEXT: br i1 [[CMP1]], label [[B1:%.*]], label [[B2:%.*]] +; CHECK: b1: +; CHECK-NEXT: store i32 [[IV]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b2: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 100, [[IV]] +; CHECK-NEXT: br i1 [[CMP2]], label [[B3:%.*]], label [[B4:%.*]] +; CHECK: b3: +; CHECK-NEXT: store i32 [[IV]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: b4: +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: loop.be: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_ENTRY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 999 +; entry: br label %loop.entry diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll index 66951eda7a575..7dfd4ebc00158 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll @@ -19,7 +19,7 @@ define void @analyzeable_early_exit(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -49,12 +49,12 @@ define void @unanalyzeable_early_exit() { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -89,12 +89,12 @@ define void @multiple_early_exits(i32 %n, i32 %m) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV]], [[M:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LATCH]], label [[EXIT]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -137,7 +137,7 @@ define void @compound_early_exit(i32 %n, i32 %m) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -174,8 +174,8 @@ define void @unanalyzeable_latch(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[VOL]], 1000 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -210,7 +210,7 @@ define void @single_exit_no_latch(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -243,11 +243,11 @@ define void @no_latch_exit(i32 %n, i32 %m) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV]], [[M:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LATCH]], label [[EXIT]] ; CHECK: latch: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: @@ -287,7 +287,7 @@ define void @combine_ivs(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 999 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -324,7 +324,7 @@ define void @combine_ivs2(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -362,7 +362,7 @@ define void @simplify_exit_test(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 65 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -396,13 +396,13 @@ define void @simplify_exit_test2(i32 %n) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[FX:%.*]] = udiv i32 [[IV]], 4 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[FX]], 1024 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -442,12 +442,12 @@ define void @nested(i32 %n) { ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV1_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] -; CHECK-NEXT: store volatile i32 [[IV1]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV1]], i32* @A, align 4 ; CHECK-NEXT: [[IV1_NEXT]] = add nuw nsw i32 [[IV1]], 1 ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 0, [[OUTER]] ], [ [[IV2_NEXT:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: store volatile i32 [[IV2]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV2]], i32* @A, align 4 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i32 [[IV2]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV2]], 20 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER_LATCH]], label [[EXIT_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/pr18223.ll b/llvm/test/Transforms/IndVarSimplify/pr18223.ll index f922aa424a17e..da620c8062198 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr18223.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr18223.ll @@ -1,12 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -indvars -S < %s | FileCheck %s ; indvars should transform the phi node pair from the for-loop -; CHECK-LABEL: @main( -; CHECK: ret = phi i32 [ 0, %entry ], [ 0, {{.*}} ] @c = common global i32 0, align 4 define i32 @main() #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @c, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RET:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RET]] +; entry: %0 = load i32, i32* @c, align 4 %tobool = icmp eq i32 %0, 0 From 79651265b2e08e105f3d66d5f75bc9f5fa803e45 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 27 Aug 2020 20:34:07 -0500 Subject: [PATCH 0017/1079] [Attributor][FIX] Properly return changed if the IR was modified Deleting or replacing anything is certainly a modification. This caused a later assertion in IPSCCP when compiling 400.perlbench with the new PM. I'm not sure how to test this. --- llvm/lib/Transforms/IPO/Attributor.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index ea285b51982c1..f020c4aaf1dfd 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1306,9 +1306,27 @@ ChangeStatus Attributor::cleanupIR() { CGUpdater.removeFunction(*Fn); } + if (!ToBeChangedUses.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!ToBeChangedToUnreachableInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + if (!ToBeDeletedFunctions.empty()) ManifestChange = ChangeStatus::CHANGED; + if (!ToBeDeletedBlocks.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!ToBeDeletedInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!InvokeWithDeadSuccessor.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!DeadInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + NumFnDeleted += ToBeDeletedFunctions.size(); LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted From ff70c25d76561d0789743fa9f718dcd520199a7c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 3 Sep 2020 11:08:39 -0500 Subject: [PATCH 0018/1079] [Attributor][NFC] Expand `auto` types (clang-fix-it) --- llvm/lib/Transforms/IPO/Attributor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index f020c4aaf1dfd..d5c33f08827d2 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1449,7 +1449,7 @@ static void createShallowWrapper(Function &F) { BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper); SmallVector Args; - auto FArgIt = F.arg_begin(); + Argument *FArgIt = F.arg_begin(); for (Argument &Arg : Wrapper->args()) { Args.push_back(&Arg); Arg.setName((FArgIt++)->getName()); @@ -1773,8 +1773,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures( assert(Success && "Assumed call site replacement to succeed!"); // Rewire the arguments. - auto OldFnArgIt = OldFn->arg_begin(); - auto NewFnArgIt = NewFn->arg_begin(); + Argument *OldFnArgIt = OldFn->arg_begin(); + Argument *NewFnArgIt = NewFn->arg_begin(); for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum, ++OldFnArgIt) { if (const std::unique_ptr &ARI = From 8637acac5a3f4688114290b524eb5154a0bcdbdf Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 5 Sep 2020 13:26:20 -0500 Subject: [PATCH 0019/1079] [Attributor][NFC] Clang tidy: no else after continue --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b76e83def6e80..0fa5ad92c299e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1141,11 +1141,13 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB); continue; - } else if (isa(RetVal)) { + } + if (isa(RetVal)) { // Call sites are resolved by the callee attribute over time, no need to // do anything for us. continue; - } else if (isa(RetVal)) { + } + if (isa(RetVal)) { // Constants are valid everywhere, we can simply take them. NewRVsMap[RetVal].insert(RIs.begin(), RIs.end()); continue; From e6208849c8d63690ac3489813eb13196df7ed8dc Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 3 Sep 2020 16:13:28 -0500 Subject: [PATCH 0020/1079] [Attributor][NFC] Change variable spelling --- llvm/lib/Transforms/IPO/Attributor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index d5c33f08827d2..ac9b48a537637 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -132,11 +132,11 @@ static cl::opt PrintDependencies("attributor-print-dep", cl::Hidden, /// Logic operators for the change status enum class. /// ///{ -ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) { - return l == ChangeStatus::CHANGED ? l : r; +ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) { + return L == ChangeStatus::CHANGED ? L : R; } -ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { - return l == ChangeStatus::UNCHANGED ? l : r; +ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) { + return L == ChangeStatus::UNCHANGED ? L : R; } ///} From 53e4ef7fc25903430436ce456909d97aaa0fd6b2 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 3 Sep 2020 23:42:33 -0500 Subject: [PATCH 0021/1079] [Attributor][NFC] Cleanup internalize test case One run line was different and probably introduced for the manually added function attribute & name checks. We can do this with the script and a check prefix used for the other run lines as well. --- .../test/Transforms/Attributor/internalize.ll | 71 +++++++------------ 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 8a244b5c998c3..25f16474e8340 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -12,16 +12,14 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED -; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER ; TEST 1: This function is of linkage `linkonce`, we cannot internalize this ; function and use information derived from it ; -; DWRAPPER-NOT: Function Attrs -; DWRAPPER-NOT: inner1.internalized +; CHECK-NOT: inner1.internalized define linkonce i32 @inner1(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner1 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -34,11 +32,10 @@ entry: ; TEST 2: This function is of linkage `weak`, we cannot internalize this function and ; use information derived from it ; -; DWRAPPER-NOT: Function Attrs -; DWRAPPER-NOT: inner2.internalized +; CHECK-NOT: inner2.internalized define weak i32 @inner2(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner2 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -51,17 +48,12 @@ entry: ; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the ; deep wrapper, and the IP information derived from this function can be used ; -; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b) -; DWRAPPER-NEXT: entry: -; DWRAPPER-NEXT: %c = add i32 %a, %b -; DWRAPPER-NEXT: ret i32 %c define linkonce_odr i32 @inner3(i32 %a, i32 %b) { -; CHECK-LABEL: define {{[^@]+}}@inner3 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK_DISABLED-LABEL: define {{[^@]+}}@inner3 +; CHECK_DISABLED-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK_DISABLED-NEXT: ret i32 [[C]] ; entry: %c = add i32 %a, %b @@ -71,17 +63,12 @@ entry: ; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep ; wrapper ; -; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b) -; DWRAPPER-NEXT: entry: -; DWRAPPER-NEXT: %c = add i32 %a, %b -; DWRAPPER-NEXT: ret i32 %c define weak_odr i32 @inner4(i32 %a, i32 %b) { -; CHECK-LABEL: define {{[^@]+}}@inner4 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK_DISABLED-LABEL: define {{[^@]+}}@inner4 +; CHECK_DISABLED-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK_DISABLED-NEXT: ret i32 [[C]] ; entry: %c = add i32 %a, %b @@ -91,10 +78,10 @@ entry: ; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there ; is no need to internalize this ; -; DWRAPPER-NOT: inner5.internalized +; CHECK-NOT: inner5.internalized define linkonce_odr i32 @inner5(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner5 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -109,16 +96,8 @@ entry: ; Since the inner3 is internalized, the use of the original function should be replaced by the ; copied one ; -; DWRAPPER-NOT: call i32 @inner1.internalized -; DWRAPPER: call i32 @inner1 -; DWRAPPER-NOT: call i32 @inner2.internalized -; DWRAPPER: call i32 @inner2 -; DWRAPPER-NOT: call i32 @inner3 -; DWRAPPER: call i32 @inner3.internalized -; DWRAPPER-NOT: call i32 @inner4 -; DWRAPPER: call i32 @inner4.internalized define i32 @outer1() { -; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() { ; CHECK_DISABLED-NEXT: entry: ; CHECK_DISABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 noundef 1, i32 noundef 2) ; CHECK_DISABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 noundef 1, i32 noundef 2) @@ -126,7 +105,7 @@ define i32 @outer1() { ; CHECK_DISABLED-NEXT: [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]]) ; CHECK_DISABLED-NEXT: ret i32 [[RET4]] ; -; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() { ; CHECK_ENABLED-NEXT: entry: ; CHECK_ENABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 noundef 1, i32 noundef 2) ; CHECK_ENABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 noundef 1, i32 noundef 2) @@ -145,28 +124,26 @@ entry: define linkonce_odr void @unused_arg(i8) { ; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg -; CHECK_DISABLED-SAME: (i8 [[TMP0:%.*]]) +; CHECK_DISABLED-SAME: (i8 [[TMP0:%.*]]) { ; CHECK_DISABLED-NEXT: unreachable ; unreachable } define void @unused_arg_caller() { -; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg_caller() { ; CHECK_DISABLED-NEXT: call void @unused_arg(i8 noundef 0) ; CHECK_DISABLED-NEXT: ret void ; ; IS__TUNIT_____ENABLED: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller +; IS__TUNIT_____ENABLED-SAME: () [[ATTR1:#.*]] { ; IS__TUNIT_____ENABLED-NEXT: unreachable ; ; IS__CGSCC_____ENABLED: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn -; IS__CGSCC_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; IS__CGSCC_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller +; IS__CGSCC_____ENABLED-SAME: () [[ATTR2:#.*]] { ; IS__CGSCC_____ENABLED-NEXT: unreachable -; -; DWRAPPER: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn -; DWRAPPER-LABEL: define {{[^@]+}}@unused_arg_caller() -; DWRAPPER-NEXT: unreachable ; call void @unused_arg(i8 0) ret void From 711bf7dcf9546fefe18d32a5772d48e7b5166f08 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 18 Aug 2020 15:32:21 -0500 Subject: [PATCH 0022/1079] [Attributor][FIX] Don't crash on internalizing linkonce_odr hidden functions The CloneFunctionInto has implicit requirements with regards to the linkage and visibility of the function. We now update these after we did the CloneFunctionInto on the copy with the same linkage and visibility as the original. --- llvm/lib/Transforms/IPO/Attributor.cpp | 10 +++++++--- llvm/test/Transforms/Attributor/internalize.ll | 11 +++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index ac9b48a537637..32420e847129f 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1481,9 +1481,8 @@ static Function *internalizeFunction(Function &F) { FunctionType *FnTy = F.getFunctionType(); // create a copy of the current function - Function *Copied = - Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(), - F.getName() + ".internalized"); + Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), + F.getName() + ".internalized"); ValueToValueMapTy VMap; auto *NewFArgIt = Copied->arg_begin(); for (auto &Arg : F.args()) { @@ -1496,6 +1495,11 @@ static Function *internalizeFunction(Function &F) { // Copy the body of the original function to the new one CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns); + // Set the linakage and visibility late as CloneFunctionInto has some implicit + // requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + // Copy metadata SmallVector, 1> MDs; F.getAllMetadata(MDs); diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 25f16474e8340..3e485382e9be0 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -148,3 +148,14 @@ define void @unused_arg_caller() { call void @unused_arg(i8 0) ret void } + +; Don't crash on linkonce_odr hidden functions +define linkonce_odr hidden void @__clang_call_terminate() { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@__clang_call_terminate() { +; CHECK_DISABLED-NEXT: call void @__clang_call_terminate() +; CHECK_DISABLED-NEXT: unreachable +; + call void @__clang_call_terminate() + unreachable +} + From e59d9df774ed7d94455b224f0e3f6eaeae707259 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Sep 2020 21:44:26 -0700 Subject: [PATCH 0023/1079] [ELF] --symbol-ordering-file: optimize a loop --- lld/ELF/Writer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index b26817b66e271..5ef37e9ecb895 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1346,9 +1346,11 @@ static DenseMap buildSectionOrder() { addSym(*sym); for (InputFile *file : objectFiles) - for (Symbol *sym : file->getSymbols()) - if (sym->isLocal()) - addSym(*sym); + for (Symbol *sym : file->getSymbols()) { + if (!sym->isLocal()) + break; + addSym(*sym); + } if (config->warnSymbolOrdering) for (auto orderEntry : symbolOrder) From 78071fb52456f5da9d044588e58a946c0ad96830 Mon Sep 17 00:00:00 2001 From: Andrew Wei Date: Tue, 8 Sep 2020 13:14:53 +0800 Subject: [PATCH 0024/1079] [LSR] Canonicalize a formula before insert it into the list In GenerateConstantOffsetsImpl, we may generate non canonical Formula if BaseRegs of that Formula is updated and includes a recurrent expr reg related with current loop while its ScaledReg is not. Patched by: mdchen Reviewed By: qcolombet Differential Revision: https://reviews.llvm.org/D86939 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 8 +- .../LoopStrengthReduce/AArch64/pr47329.ll | 299 ++++++++++++++++++ 2 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c3e46c1fadef3..47329fa1f043e 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3834,10 +3834,14 @@ void LSRInstance::GenerateConstantOffsetsImpl( F.BaseOffset = (uint64_t)F.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) return; - if (IsScaledReg) + if (IsScaledReg) { F.ScaledReg = G; - else + } else { F.BaseRegs[Idx] = G; + // We may generate non canonical Formula if G is a recurrent expr reg + // related with current loop while F.ScaledReg is not. + F.canonicalize(*L); + } (void)InsertFormula(LU, LUIdx, F); } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll new file mode 100644 index 0000000000000..bd2d6b4b0b4ca --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll @@ -0,0 +1,299 @@ +; RUN: opt < %s -loop-reduce +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@d = internal unnamed_addr global i32** null, align 8 + +define dso_local i32 @main() local_unnamed_addr { +entry: + %.pre.pre = load i32**, i32*** @d, align 8 + br label %for.body9 + +for.body9: ; preds = %for.body9, %entry + %i = phi i32** [ %.pre.pre, %entry ], [ %incdec.ptr, %for.body9 ] + %incdec.ptr = getelementptr inbounds i32*, i32** %i, i64 -1 + br i1 undef, label %for.body9, label %for.inc + +for.inc: ; preds = %for.body9 + br label %for.body9.118 + +for.body9.1: ; preds = %for.inc.547, %for.body9.1 + %i1 = phi i32** [ %incdec.ptr.1, %for.body9.1 ], [ %incdec.ptr.542, %for.inc.547 ] + %incdec.ptr.1 = getelementptr inbounds i32*, i32** %i1, i64 -1 + br i1 undef, label %for.body9.1, label %for.inc.1 + +for.inc.1: ; preds = %for.body9.1 + br label %for.body9.1.1 + +for.body9.2: ; preds = %for.inc.1.5, %for.body9.2 + %i2 = phi i32** [ %incdec.ptr.2, %for.body9.2 ], [ %incdec.ptr.1.5, %for.inc.1.5 ] + %incdec.ptr.2 = getelementptr inbounds i32*, i32** %i2, i64 -1 + br i1 undef, label %for.body9.2, label %for.inc.2 + +for.inc.2: ; preds = %for.body9.2 + br label %for.body9.2.1 + +for.body9.3: ; preds = %for.inc.2.5, %for.body9.3 + %i3 = phi i32** [ %incdec.ptr.3, %for.body9.3 ], [ %incdec.ptr.2.5, %for.inc.2.5 ] + %incdec.ptr.3 = getelementptr inbounds i32*, i32** %i3, i64 -1 + br i1 undef, label %for.body9.3, label %for.inc.3 + +for.inc.3: ; preds = %for.body9.3 + br label %for.body9.3.1 + +for.body9.4: ; preds = %for.inc.3.5, %for.body9.4 + %i4 = phi i32** [ %incdec.ptr.4, %for.body9.4 ], [ %incdec.ptr.3.5, %for.inc.3.5 ] + %incdec.ptr.4 = getelementptr inbounds i32*, i32** %i4, i64 -1 + br i1 undef, label %for.body9.4, label %for.inc.4 + +for.inc.4: ; preds = %for.body9.4 + br label %for.body9.4.1 + +for.body9.5: ; preds = %for.inc.4.5, %for.body9.5 + %i5 = phi i32** [ %incdec.ptr.5, %for.body9.5 ], [ %incdec.ptr.4.5, %for.inc.4.5 ] + %incdec.ptr.5 = getelementptr inbounds i32*, i32** %i5, i64 -1 + br i1 undef, label %for.body9.5, label %for.inc.5 + +for.inc.5: ; preds = %for.body9.5 + br label %for.body9.5.1 + +for.body9.5.1: ; preds = %for.body9.5.1, %for.inc.5 + %i6 = phi i32** [ %incdec.ptr.5.1, %for.body9.5.1 ], [ %incdec.ptr.5, %for.inc.5 ] + %incdec.ptr.5.1 = getelementptr inbounds i32*, i32** %i6, i64 -1 + br i1 undef, label %for.body9.5.1, label %for.inc.5.1 + +for.inc.5.1: ; preds = %for.body9.5.1 + br label %for.body9.5.2 + +for.body9.5.2: ; preds = %for.body9.5.2, %for.inc.5.1 + %i7 = phi i32** [ %incdec.ptr.5.2, %for.body9.5.2 ], [ %incdec.ptr.5.1, %for.inc.5.1 ] + %incdec.ptr.5.2 = getelementptr inbounds i32*, i32** %i7, i64 -1 + br i1 undef, label %for.body9.5.2, label %for.inc.5.2 + +for.inc.5.2: ; preds = %for.body9.5.2 + br label %for.body9.5.3 + +for.body9.5.3: ; preds = %for.body9.5.3, %for.inc.5.2 + %i8 = phi i32** [ %incdec.ptr.5.3, %for.body9.5.3 ], [ %incdec.ptr.5.2, %for.inc.5.2 ] + %incdec.ptr.5.3 = getelementptr inbounds i32*, i32** %i8, i64 -1 + br i1 undef, label %for.body9.5.3, label %for.inc.5.3 + +for.inc.5.3: ; preds = %for.body9.5.3 + br label %for.body9.5.4 + +for.body9.5.4: ; preds = %for.body9.5.4, %for.inc.5.3 + %i9 = phi i32** [ %incdec.ptr.5.4, %for.body9.5.4 ], [ %incdec.ptr.5.3, %for.inc.5.3 ] + %incdec.ptr.5.4 = getelementptr inbounds i32*, i32** %i9, i64 -1 + br i1 undef, label %for.body9.5.4, label %for.inc.5.4 + +for.inc.5.4: ; preds = %for.body9.5.4 + br label %for.body9.5.5 + +for.body9.5.5: ; preds = %for.body9.5.5, %for.inc.5.4 + %i10 = phi i32** [ undef, %for.body9.5.5 ], [ %incdec.ptr.5.4, %for.inc.5.4 ] + %i11 = bitcast i32** %i10 to i64* + %i12 = load i64, i64* %i11, align 8 + br label %for.body9.5.5 + +for.body9.4.1: ; preds = %for.body9.4.1, %for.inc.4 + %i13 = phi i32** [ %incdec.ptr.4.1, %for.body9.4.1 ], [ %incdec.ptr.4, %for.inc.4 ] + %incdec.ptr.4.1 = getelementptr inbounds i32*, i32** %i13, i64 -1 + br i1 undef, label %for.body9.4.1, label %for.inc.4.1 + +for.inc.4.1: ; preds = %for.body9.4.1 + br label %for.body9.4.2 + +for.body9.4.2: ; preds = %for.body9.4.2, %for.inc.4.1 + %i14 = phi i32** [ %incdec.ptr.4.2, %for.body9.4.2 ], [ %incdec.ptr.4.1, %for.inc.4.1 ] + %incdec.ptr.4.2 = getelementptr inbounds i32*, i32** %i14, i64 -1 + br i1 undef, label %for.body9.4.2, label %for.inc.4.2 + +for.inc.4.2: ; preds = %for.body9.4.2 + br label %for.body9.4.3 + +for.body9.4.3: ; preds = %for.body9.4.3, %for.inc.4.2 + %i15 = phi i32** [ %incdec.ptr.4.3, %for.body9.4.3 ], [ %incdec.ptr.4.2, %for.inc.4.2 ] + %incdec.ptr.4.3 = getelementptr inbounds i32*, i32** %i15, i64 -1 + br i1 undef, label %for.body9.4.3, label %for.inc.4.3 + +for.inc.4.3: ; preds = %for.body9.4.3 + br label %for.body9.4.4 + +for.body9.4.4: ; preds = %for.body9.4.4, %for.inc.4.3 + %i16 = phi i32** [ %incdec.ptr.4.4, %for.body9.4.4 ], [ %incdec.ptr.4.3, %for.inc.4.3 ] + %incdec.ptr.4.4 = getelementptr inbounds i32*, i32** %i16, i64 -1 + br i1 undef, label %for.body9.4.4, label %for.inc.4.4 + +for.inc.4.4: ; preds = %for.body9.4.4 + br label %for.body9.4.5 + +for.body9.4.5: ; preds = %for.body9.4.5, %for.inc.4.4 + %i17 = phi i32** [ %incdec.ptr.4.5, %for.body9.4.5 ], [ %incdec.ptr.4.4, %for.inc.4.4 ] + %incdec.ptr.4.5 = getelementptr inbounds i32*, i32** %i17, i64 -1 + br i1 undef, label %for.body9.4.5, label %for.inc.4.5 + +for.inc.4.5: ; preds = %for.body9.4.5 + br label %for.body9.5 + +for.body9.3.1: ; preds = %for.body9.3.1, %for.inc.3 + %i18 = phi i32** [ %incdec.ptr.3.1, %for.body9.3.1 ], [ %incdec.ptr.3, %for.inc.3 ] + %incdec.ptr.3.1 = getelementptr inbounds i32*, i32** %i18, i64 -1 + br i1 undef, label %for.body9.3.1, label %for.inc.3.1 + +for.inc.3.1: ; preds = %for.body9.3.1 + br label %for.body9.3.2 + +for.body9.3.2: ; preds = %for.body9.3.2, %for.inc.3.1 + %i19 = phi i32** [ %incdec.ptr.3.2, %for.body9.3.2 ], [ %incdec.ptr.3.1, %for.inc.3.1 ] + %incdec.ptr.3.2 = getelementptr inbounds i32*, i32** %i19, i64 -1 + br i1 undef, label %for.body9.3.2, label %for.inc.3.2 + +for.inc.3.2: ; preds = %for.body9.3.2 + br label %for.body9.3.3 + +for.body9.3.3: ; preds = %for.body9.3.3, %for.inc.3.2 + %i20 = phi i32** [ %incdec.ptr.3.3, %for.body9.3.3 ], [ %incdec.ptr.3.2, %for.inc.3.2 ] + %incdec.ptr.3.3 = getelementptr inbounds i32*, i32** %i20, i64 -1 + br i1 undef, label %for.body9.3.3, label %for.inc.3.3 + +for.inc.3.3: ; preds = %for.body9.3.3 + br label %for.body9.3.4 + +for.body9.3.4: ; preds = %for.body9.3.4, %for.inc.3.3 + %i21 = phi i32** [ %incdec.ptr.3.4, %for.body9.3.4 ], [ %incdec.ptr.3.3, %for.inc.3.3 ] + %incdec.ptr.3.4 = getelementptr inbounds i32*, i32** %i21, i64 -1 + br i1 undef, label %for.body9.3.4, label %for.inc.3.4 + +for.inc.3.4: ; preds = %for.body9.3.4 + br label %for.body9.3.5 + +for.body9.3.5: ; preds = %for.body9.3.5, %for.inc.3.4 + %i22 = phi i32** [ %incdec.ptr.3.5, %for.body9.3.5 ], [ %incdec.ptr.3.4, %for.inc.3.4 ] + %incdec.ptr.3.5 = getelementptr inbounds i32*, i32** %i22, i64 -1 + br i1 undef, label %for.body9.3.5, label %for.inc.3.5 + +for.inc.3.5: ; preds = %for.body9.3.5 + br label %for.body9.4 + +for.body9.2.1: ; preds = %for.body9.2.1, %for.inc.2 + %i23 = phi i32** [ %incdec.ptr.2.1, %for.body9.2.1 ], [ %incdec.ptr.2, %for.inc.2 ] + %incdec.ptr.2.1 = getelementptr inbounds i32*, i32** %i23, i64 -1 + br i1 undef, label %for.body9.2.1, label %for.inc.2.1 + +for.inc.2.1: ; preds = %for.body9.2.1 + br label %for.body9.2.2 + +for.body9.2.2: ; preds = %for.body9.2.2, %for.inc.2.1 + %i24 = phi i32** [ %incdec.ptr.2.2, %for.body9.2.2 ], [ %incdec.ptr.2.1, %for.inc.2.1 ] + %incdec.ptr.2.2 = getelementptr inbounds i32*, i32** %i24, i64 -1 + br i1 undef, label %for.body9.2.2, label %for.inc.2.2 + +for.inc.2.2: ; preds = %for.body9.2.2 + br label %for.body9.2.3 + +for.body9.2.3: ; preds = %for.body9.2.3, %for.inc.2.2 + %i25 = phi i32** [ %incdec.ptr.2.3, %for.body9.2.3 ], [ %incdec.ptr.2.2, %for.inc.2.2 ] + %incdec.ptr.2.3 = getelementptr inbounds i32*, i32** %i25, i64 -1 + br i1 undef, label %for.body9.2.3, label %for.inc.2.3 + +for.inc.2.3: ; preds = %for.body9.2.3 + br label %for.body9.2.4 + +for.body9.2.4: ; preds = %for.body9.2.4, %for.inc.2.3 + %i26 = phi i32** [ %incdec.ptr.2.4, %for.body9.2.4 ], [ %incdec.ptr.2.3, %for.inc.2.3 ] + %incdec.ptr.2.4 = getelementptr inbounds i32*, i32** %i26, i64 -1 + br i1 undef, label %for.body9.2.4, label %for.inc.2.4 + +for.inc.2.4: ; preds = %for.body9.2.4 + br label %for.body9.2.5 + +for.body9.2.5: ; preds = %for.body9.2.5, %for.inc.2.4 + %i27 = phi i32** [ %incdec.ptr.2.5, %for.body9.2.5 ], [ %incdec.ptr.2.4, %for.inc.2.4 ] + %incdec.ptr.2.5 = getelementptr inbounds i32*, i32** %i27, i64 -1 + br i1 undef, label %for.body9.2.5, label %for.inc.2.5 + +for.inc.2.5: ; preds = %for.body9.2.5 + br label %for.body9.3 + +for.body9.1.1: ; preds = %for.body9.1.1, %for.inc.1 + %i28 = phi i32** [ %incdec.ptr.1.1, %for.body9.1.1 ], [ %incdec.ptr.1, %for.inc.1 ] + %incdec.ptr.1.1 = getelementptr inbounds i32*, i32** %i28, i64 -1 + br i1 undef, label %for.body9.1.1, label %for.inc.1.1 + +for.inc.1.1: ; preds = %for.body9.1.1 + br label %for.body9.1.2 + +for.body9.1.2: ; preds = %for.body9.1.2, %for.inc.1.1 + %i29 = phi i32** [ %incdec.ptr.1.2, %for.body9.1.2 ], [ %incdec.ptr.1.1, %for.inc.1.1 ] + %incdec.ptr.1.2 = getelementptr inbounds i32*, i32** %i29, i64 -1 + br i1 undef, label %for.body9.1.2, label %for.inc.1.2 + +for.inc.1.2: ; preds = %for.body9.1.2 + br label %for.body9.1.3 + +for.body9.1.3: ; preds = %for.body9.1.3, %for.inc.1.2 + %i30 = phi i32** [ %incdec.ptr.1.3, %for.body9.1.3 ], [ %incdec.ptr.1.2, %for.inc.1.2 ] + %incdec.ptr.1.3 = getelementptr inbounds i32*, i32** %i30, i64 -1 + br i1 undef, label %for.body9.1.3, label %for.inc.1.3 + +for.inc.1.3: ; preds = %for.body9.1.3 + br label %for.body9.1.4 + +for.body9.1.4: ; preds = %for.body9.1.4, %for.inc.1.3 + %i31 = phi i32** [ %incdec.ptr.1.4, %for.body9.1.4 ], [ %incdec.ptr.1.3, %for.inc.1.3 ] + %incdec.ptr.1.4 = getelementptr inbounds i32*, i32** %i31, i64 -1 + br i1 undef, label %for.body9.1.4, label %for.inc.1.4 + +for.inc.1.4: ; preds = %for.body9.1.4 + br label %for.body9.1.5 + +for.body9.1.5: ; preds = %for.body9.1.5, %for.inc.1.4 + %i32 = phi i32** [ %incdec.ptr.1.5, %for.body9.1.5 ], [ %incdec.ptr.1.4, %for.inc.1.4 ] + %incdec.ptr.1.5 = getelementptr inbounds i32*, i32** %i32, i64 -1 + br i1 undef, label %for.body9.1.5, label %for.inc.1.5 + +for.inc.1.5: ; preds = %for.body9.1.5 + br label %for.body9.2 + +for.body9.118: ; preds = %for.body9.118, %for.inc + %i33 = phi i32** [ %incdec.ptr, %for.inc ], [ %incdec.ptr.114, %for.body9.118 ] + %incdec.ptr.114 = getelementptr inbounds i32*, i32** %i33, i64 -1 + br i1 undef, label %for.body9.118, label %for.inc.119 + +for.inc.119: ; preds = %for.body9.118 + br label %for.body9.225 + +for.body9.225: ; preds = %for.body9.225, %for.inc.119 + %i34 = phi i32** [ %incdec.ptr.114, %for.inc.119 ], [ %incdec.ptr.221, %for.body9.225 ] + %incdec.ptr.221 = getelementptr inbounds i32*, i32** %i34, i64 -1 + %i35 = bitcast i32** %i34 to i64* + %i36 = load i64, i64* %i35, align 8 + br i1 undef, label %for.body9.225, label %for.inc.226 + +for.inc.226: ; preds = %for.body9.225 + br label %for.body9.332 + +for.body9.332: ; preds = %for.body9.332, %for.inc.226 + %i37 = phi i32** [ %incdec.ptr.221, %for.inc.226 ], [ %incdec.ptr.328, %for.body9.332 ] + %incdec.ptr.328 = getelementptr inbounds i32*, i32** %i37, i64 -1 + br i1 undef, label %for.body9.332, label %for.inc.333 + +for.inc.333: ; preds = %for.body9.332 + br label %for.body9.439 + +for.body9.439: ; preds = %for.body9.439, %for.inc.333 + %i38 = phi i32** [ %incdec.ptr.328, %for.inc.333 ], [ %incdec.ptr.435, %for.body9.439 ] + %incdec.ptr.435 = getelementptr inbounds i32*, i32** %i38, i64 -1 + br i1 undef, label %for.body9.439, label %for.inc.440 + +for.inc.440: ; preds = %for.body9.439 + br label %for.body9.546 + +for.body9.546: ; preds = %for.body9.546, %for.inc.440 + %i39 = phi i32** [ %incdec.ptr.435, %for.inc.440 ], [ %incdec.ptr.542, %for.body9.546 ] + %incdec.ptr.542 = getelementptr inbounds i32*, i32** %i39, i64 -1 + br i1 undef, label %for.body9.546, label %for.inc.547 + +for.inc.547: ; preds = %for.body9.546 + br label %for.body9.1 +} From 28b9ace85f6871cdb48f1483314d8342e099b136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 8 Sep 2020 09:26:39 +0300 Subject: [PATCH 0025/1079] [clang] Remove a stray semicolon, fixing pedantic GCC warnings. NFC. --- clang/include/clang/AST/IgnoreExpr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h index 15d31f3af9954..0aeb547606a2b 100644 --- a/clang/include/clang/AST/IgnoreExpr.h +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -19,7 +19,7 @@ namespace clang { namespace detail { /// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, /// Return Fn_n(...(Fn_1(E))) -inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; }; +inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; } template Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); From ea795304ec073a63c3c5b4fd0c5579e667201dad Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Tue, 8 Sep 2020 08:05:47 +0200 Subject: [PATCH 0026/1079] [PowerPC] Add parentheses to silence gcc warning Without gcc 7.4 warns with ../lib/Target/PowerPC/PPCInstrInfo.cpp:2284:25: warning: suggest parentheses around '&&' within '||' [-Wparentheses] BaseOp1.isFI() && ~~~~~~~~~~~~~~~^~ "Only base registers and frame indices are supported."); ~ --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 2c4549899e0c3..9afc0308533ec 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2280,9 +2280,8 @@ bool PPCInstrInfo::shouldClusterMemOps( assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); const MachineOperand &BaseOp1 = *BaseOps1.front(); const MachineOperand &BaseOp2 = *BaseOps2.front(); - assert(BaseOp1.isReg() || - BaseOp1.isFI() && - "Only base registers and frame indices are supported."); + assert((BaseOp1.isReg() || BaseOp1.isFI()) && + "Only base registers and frame indices are supported."); // The NumLoads means the number of loads that has been clustered. // Don't cluster memory op if there are already two ops clustered at least. From 8ee1419ab688ee2da2ac2cb0cf19db03f4c4742e Mon Sep 17 00:00:00 2001 From: Simon Wallis Date: Tue, 8 Sep 2020 08:04:52 +0100 Subject: [PATCH 0027/1079] [AARCH64][RegisterCoalescer] clang miscompiles zero-extension to long long Implement AArch64 variant of shouldCoalesce() to detect a known failing case and prevent the coalescing of a 32-bit copy into a 64-bit sign-extending load. Do not coalesce in the following case: COPY where source is bottom 32 bits of a 64-register, and destination is a 32-bit subregister of a 64-bit register, ie it causes the rest of the register to be implicitly set to zero. A mir test has been added. In the test case, the 32-bit copy implements a 32 to 64 bit zero extension and relies on the upper 32 bits being zeroed. Coalescing to the result of the 64-bit load meant overwriting the upper 32 bits incorrectly when the loaded byte was negative. Reviewed By: john.brawn Differential Revision: https://reviews.llvm.org/D85956 --- .../Target/AArch64/AArch64RegisterInfo.cpp | 16 +++++++++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 6 ++++ .../CodeGen/AArch64/zext-reg-coalesce.mir | 33 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 2f1317d8f1ea8..b3694411966b5 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -734,3 +734,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister( return getBaseRegister(); return getFrameRegister(MF); } + +/// SrcRC and DstRC will be morphed into NewRC if this returns true +bool AArch64RegisterInfo::shouldCoalesce( + MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, + const TargetRegisterClass *DstRC, unsigned DstSubReg, + const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { + if (MI->isCopy() && + ((DstRC->getID() == AArch64::GPR64RegClassID) || + (DstRC->getID() == AArch64::GPR64commonRegClassID)) && + MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg()) + // Do not coalesce in the case of a 32-bit subregister copy + // which implements a 32 to 64 bit zero extension + // which relies on the upper 32 bits being zeroed. + return false; + return true; +} diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index e3c8a77f433f8..d7580d7b68330 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -129,6 +129,12 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { unsigned getLocalAddressRegister(const MachineFunction &MF) const; bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const; + + /// SrcRC and DstRC will be morphed into NewRC if this returns true + bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, + unsigned SubReg, const TargetRegisterClass *DstRC, + unsigned DstSubReg, const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir new file mode 100644 index 0000000000000..b31144b409fca --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir @@ -0,0 +1,33 @@ +# RUN: llc -mtriple=aarch64-arm-none-eabi -o - %s \ +# RUN: -run-pass simple-register-coalescing | FileCheck %s + +# In this test case, the 32-bit copy implements a 32 to 64 bit zero extension +# and relies on the upper 32 bits being zeroed. +# Coalescing to the result of the 64-bit load meant overwriting +# the upper 32 bits incorrectly when the loaded byte was negative. + +--- | + @c = local_unnamed_addr global i8 -1, align 4 + + define i64 @bug_e(i32 %i32) local_unnamed_addr { + ret i64 0 + } +... +--- +name: bug_e +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + %1:gpr32 = COPY $w0 + %2:gpr64common = ADRP target-flags(aarch64-page) @c + %3:gpr64 = LDRSBXui %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (dereferenceable load 1 from @c, align 4) + %0:gpr32 = COPY %3.sub_32 + ; CHECK: {{.*}}.sub_32:gpr64 = COPY {{.*}}.sub_32 + STRBBui %1, %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (store 1 into @c, align 4) + %8:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32 + $x0 = COPY %8 + ; CHECK: $x0 = COPY + RET_ReallyLR implicit $x0 +... From bb39eb9e7f42ba8d1f86f961d7f887f9d626b733 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 8 Sep 2020 15:30:16 +0800 Subject: [PATCH 0028/1079] [PowerPC] Fix getMemOperandWithOffsetWidth Commit 3c0b3250 introduced memory cluster under pwr10 target, but a check for operands was unexpectedly removed. This adds it back to avoid regression. --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 9afc0308533ec..8cb8c82e62833 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -4765,7 +4765,7 @@ MachineInstr *PPCInstrInfo::findLoopInstr( bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { - if (!LdSt.mayLoadOrStore()) + if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) return false; // Handle only loads/stores with base register followed by immediate offset. From 046f2402025c2ac93c1efc02acd60c5222e052f7 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 8 Sep 2020 14:33:47 +0700 Subject: [PATCH 0029/1079] [Test] More tests where IndVars fails to eliminate a range check --- .../IndVarSimplify/monotonic_checks.ll | 82 ++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll index 988b3923263f6..048254427c5fa 100644 --- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -83,8 +83,8 @@ exit: ret i32 0 } -; Monotonic incrementing iv. we should be able to prove that %iv.next s len +; basing on its nsw and the fact that its starting value >s len. define i32 @test_02(i32* %p) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: @@ -164,6 +164,84 @@ exit: ret i32 0 } +define i32 @test_03(i32* %p) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG2:!range !.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ugt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret i32 -1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %len = load i32, i32* %p, !range !2 + br label %loop + +loop: + %iv = phi i32 [%len, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, 1 + %rc = icmp sgt i32 %iv.next, %len + br i1 %rc, label %backedge, label %fail + +backedge: + %loop.cond = icmp ne i32 %iv, 1000 + br i1 %loop.cond, label %loop, label %exit + +fail: + ret i32 -1 + +exit: + ret i32 0 +} + +define i32 @test_04(i32* %p) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret i32 -1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %len = load i32, i32* %p, !range !2 + br label %loop + +loop: + %iv = phi i32 [%len, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, -1 + %rc = icmp slt i32 %iv.next, %len + br i1 %rc, label %backedge, label %fail + +backedge: + %loop.cond = icmp ne i32 %iv, 0 + br i1 %loop.cond, label %loop, label %exit + +fail: + ret i32 -1 + +exit: + ret i32 0 +} !0 = !{i32 0, i32 2147483647} !1 = !{i32 -2147483648, i32 0} +!2 = !{i32 0, i32 1000} From 69230e75f120141979248becac30ceaca4ab2e87 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Thu, 3 Sep 2020 11:44:03 +0100 Subject: [PATCH 0030/1079] [flang] Convert release notes to markdown Switch ReleaseNotes from .rst to .md to match the other docs. At the same time, fix the version number for master. --- flang/docs/ReleaseNotes.md | 87 +++++++++++++++++++++++++++++++++ flang/docs/ReleaseNotes.rst | 96 ------------------------------------- 2 files changed, 87 insertions(+), 96 deletions(-) create mode 100644 flang/docs/ReleaseNotes.md delete mode 100644 flang/docs/ReleaseNotes.rst diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md new file mode 100644 index 0000000000000..b4b00ee65ffb2 --- /dev/null +++ b/flang/docs/ReleaseNotes.md @@ -0,0 +1,87 @@ +# Flang 12.0.0 (In-Progress) Release Notes + +> **warning** +> +> These are in-progress notes for the upcoming LLVM 12.0.0 release. +> Release notes for previous releases can be found on [the Download +> Page](https://releases.llvm.org/download.html). + +## Introduction + +This document contains the release notes for the Flang Fortran frontend, +part of the LLVM Compiler Infrastructure, release 12.0.0. Here we +describe the status of Flang in some detail, including major +improvements from the previous release and new feature work. For the +general LLVM release notes, see [the LLVM +documentation](https://llvm.org/docs/ReleaseNotes.html). All LLVM +releases may be downloaded from the [LLVM releases web +site](https://llvm.org/releases/). + +Note that if you are reading this file from a Git checkout, this +document applies to the *next* release, not the current one. To see the +release notes for a specific release, please see the [releases +page](https://llvm.org/releases/). + +## Known Issues + +These are issues that couldn't be fixed before the release. See the bug +reports for the latest status. + + * ... + +## Introducing Flang + +Flang is LLVM's Fortran front end and is new for the LLVM 11 release. + +Flang is still a work in progress for this release and is included for +experimentation and feedback. + +Flang is able to parse a comprehensive subset of the Fortran language +and check it for correctness. Flang is not yet able to generate LLVM IR +for the source code and thus is unable to compile a running binary. + +Flang is able to unparse the input source code into a canonical form and +emit it to allow testing. Flang can also invoke an external Fortran +compiler on this canonical input. + +Flang's parser has comprehensive support for: + * Fortran 2018 + * OpenMP 4.5 + * OpenACC 3.0 + +Interested users are invited to try to compile their Fortran codes with +flang in and report any issues in parsing or semantic checking in +[bugzilla](https://bugs.llvm.org/enter_bug.cgi?product=flang). + +### Major missing features + + * Flang is not supported on Windows platforms. + +## Using Flang + +Usage: `flang hello.f90 -o hello.bin` + +By default, Flang will parse the Fortran file `hello.f90` then unparse it to a +canonical Fortran source file. Flang will then invoke an external +Fortran compiler to compile this source file and link it, placing the +resulting executable in `hello.bin`. + +To specify the external Fortran compiler, set the `F18_FC` environment +variable to the name of the compiler binary and ensure that it is on your +`PATH`. The default value for `F18_FC` is `gfortran`. + +When invoked with no source input, Flang will wait for input on stdin. +When invoked in this way, Flang performs the same actions as if +called with `-fdebug-measure-parse-tree -funparse` and does not invoke +`F18_FC`. + +For a full list of options that Flang supports, run `flang --help`. + +## Additional Information + +Flang's documentation is located in the `flang/docs/` directory in the +LLVM monorepo. + +If you have any questions or comments about Flang, please feel free to +contact us via the [mailing +list](https://lists.llvm.org/mailman/listinfo/flang-dev). diff --git a/flang/docs/ReleaseNotes.rst b/flang/docs/ReleaseNotes.rst deleted file mode 100644 index bbc7377412d63..0000000000000 --- a/flang/docs/ReleaseNotes.rst +++ /dev/null @@ -1,96 +0,0 @@ -======================================== -Flang 11.0.0 (In-Progress) Release Notes -======================================== - -.. contents:: - :local: - :depth: 2 - -.. warning:: - - These are in-progress notes for the upcoming LLVM 11.0.0 release. - Release notes for previous releases can be found on - `the Download Page `_. - -Introduction -============ - -This document contains the release notes for the Flang Fortran -frontend, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we -describe the status of Flang in some detail, including major -improvements from the previous release and new feature work. For the -general LLVM release notes, see `the LLVM -documentation `_. All LLVM -releases may be downloaded from the `LLVM releases web -site `_. - -Note that if you are reading this file from a Git checkout, this document -applies to the *next* release, not -the current one. To see the release notes for a specific release, please -see the `releases page `_. - -Known Issues -============ - -These are issues that couldn't be fixed before the release. See the bug reports for the latest status. - -- ... - -Introducing Flang -================= - -Flang is LLVM's Fortran front end and is new for the LLVM 11 release. - -Flang is still a work in progress for this release and is included for -experimentation and feedback. - -Flang status ------------- - -Flang is able to parse a comprehensive subset of the Fortran language -and check it for correctness. Flang is not yet able to generate LLVM IR for -the source code and thus is unable to compile a running binary. - -Flang is able to unparse the input source code into a canonical form and emit -it to allow testing. Flang can also invoke an external Fortran compiler on this -canonical input. - -Flang's parser has comprehensive support for: -- Fortran 2018 -- OpenMP 4.5 -- OpenACC 3.0 - -Major missing features ----------------------- - -- Flang is not supported on Windows platforms. - -Using Flang -=========== - -Usage: ``flang hello.f90 -o hello.bin`` - -Flang will parse the Fortran file ``hello.f90`` then unparse it to a canonical -Fortran source file. Flang will then invoke an external Fortran compiler to -compile this source file and link it, placing the resulting executable -in ``hello.bin``. - -To specify the external Fortran compiler, set the ``F18_FC`` environment -variable to the name of the compiler binary and ensure it is on your ``PATH``. -The default value for ``F18_FC`` is ``gfortran``. - -When invoked with no source input, Flang will wait for input on standard in. -When invoked in this way, Flang performs the same actions as if called with -``-fdebug-measure-parse-tree -funparse`` and does not invoke ``F18_FC``. - -For a full list of options that Flang supports, run ``flang --help``. - -Additional Information -====================== - -Flang's documentation is located in the ``flang/docs/`` directory in -the LLVM monorepo. - -If you have any questions or comments about Flang, please feel free to -contact us via the `mailing -list `_. From 3cda69872362526b1672ae23de4ac968b7564c2b Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 8 Sep 2020 16:08:42 +0800 Subject: [PATCH 0031/1079] [obj2yaml] Stop parsing the debug_str section when it encounters a string without the null terminator. When obj2yaml encounters a string without the null terminator, it should stop parsing the debug_str section. This patch addresses comments in [D86867](https://reviews.llvm.org/D86867#inline-803291). Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87261 --- .../ObjectYAML/MachO/DWARF-debug_str.yaml | 58 +++++++++++++++++++ .../tools/obj2yaml/ELF/DWARF/debug-str.yaml | 24 ++++++++ llvm/tools/obj2yaml/dwarf2yaml.cpp | 20 ++++--- llvm/tools/obj2yaml/elf2yaml.cpp | 2 +- llvm/tools/obj2yaml/macho2yaml.cpp | 6 +- llvm/tools/obj2yaml/obj2yaml.h | 3 +- 6 files changed, 100 insertions(+), 13 deletions(-) diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml index 29247b334a1a9..9bb55ea350911 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml @@ -321,3 +321,61 @@ DWARF: # EMPTY-STRING-NEXT: debug_str: # EMPTY-STRING-NEXT: - '' # EMPTY-STRING-NEXT: ... + +## d) Test generating and dumping a __debug_str section which contains a string without a null terminator. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | FileCheck %s --check-prefix=NO-TERMINATOR + +# NO-TERMINATOR-NOT: DWARF: +# NO-TERMINATOR: Sections: +# NO-TERMINATOR-NEXT: - sectname: __debug_str +# NO-TERMINATOR-NEXT: segname: __DWARF +# NO-TERMINATOR-NEXT: addr: 0x0000000000000000 +# NO-TERMINATOR-NEXT: size: 7 +# NO-TERMINATOR-NEXT: offset: 0x00000210 +# NO-TERMINATOR-NEXT: align: 0 +# NO-TERMINATOR-NEXT: reloff: 0x00000000 +# NO-TERMINATOR-NEXT: nreloc: 0 +# NO-TERMINATOR-NEXT: flags: 0x00000000 +# NO-TERMINATOR-NEXT: reserved1: 0x00000000 +# NO-TERMINATOR-NEXT: reserved2: 0x00000000 +# NO-TERMINATOR-NEXT: reserved3: 0x00000000 +# NO-TERMINATOR-NEXT: content: '61626300616263' +# NO-TERMINATOR-NEXT: ... + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_str + segname: __DWARF + addr: 0x00 + size: 7 + offset: 0x210 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '61626300616263' ## "abc\0abc" diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml index e058642877243..76c1c5c1b3650 100644 --- a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml @@ -99,3 +99,27 @@ FileHeader: Type: ET_EXEC DWARF: debug_str: [] + +## d) Test that yaml2obj stops parsing the .debug_str section if it encounters a +## string without a null terminator. The output uses a raw content section instead of +## the DWARF tag to represent the broken .debug_str section. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | FileCheck %s --check-prefix=NO-TERMINATOR + +# NO-TERMINATOR-NOT: DWARF: +# NO-TERMINATOR: Sections: +# NO-TERMINATOR-NEXT: - Name: .debug_str +# NO-TERMINATOR-NEXT: Type: SHT_PROGBITS +# NO-TERMINATOR-NEXT: Flags: [ SHF_MERGE, SHF_STRINGS ] +# NO-TERMINATOR-NEXT: Content: '61626300616263' +# NO-TERMINATOR-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_str + Type: SHT_PROGBITS + Content: "61626300616263" ## "abc\0abc" diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index 513fa0fdef01d..cef7b699805c8 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -46,14 +46,20 @@ void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) { } } -void dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { - StringRef RemainingTable = DCtx.getDWARFObj().getStrSection(); - Y.DebugStrings.emplace(); - while (RemainingTable.size() > 0) { - auto SymbolPair = RemainingTable.split('\0'); - RemainingTable = SymbolPair.second; - Y.DebugStrings->push_back(SymbolPair.first); +Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { + DataExtractor StrData = DCtx.getStringExtractor(); + uint64_t Offset = 0; + std::vector DebugStr; + Error Err = Error::success(); + while (StrData.isValidOffset(Offset)) { + const char *CStr = StrData.getCStr(&Offset, &Err); + if (Err) + return Err; + DebugStr.push_back(CStr); } + + Y.DebugStrings = DebugStr; + return Err; } Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 9f524479bb04c..264bc4d1dbf36 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -415,7 +415,7 @@ Optional ELFDumper::dumpDWARFSections( if (RawSec->Name == ".debug_aranges") Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); else if (RawSec->Name == ".debug_str") - dumpDebugStrings(*DWARFCtx.get(), DWARF); + Err = dumpDebugStrings(*DWARFCtx.get(), DWARF); // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index 3a93d5c6846b5..49347431b9a4f 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -154,10 +154,8 @@ static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, } if (SecName == "__debug_ranges") return dumpDebugRanges(DCtx, DWARF); - if (SecName == "__debug_str") { - dumpDebugStrings(DCtx, DWARF); - return Error::success(); - } + if (SecName == "__debug_str") + return dumpDebugStrings(DCtx, DWARF); return createStringError(errc::not_supported, "dumping " + SecName + " section is not supported"); } diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h index 85a7ac9a4787b..66a2d2753622c 100644 --- a/llvm/tools/obj2yaml/obj2yaml.h +++ b/llvm/tools/obj2yaml/obj2yaml.h @@ -47,6 +47,7 @@ void dumpDebugPubSections(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugInfo(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugLines(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); llvm::Error dumpDebugRanges(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); -void dumpDebugStrings(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); +llvm::Error dumpDebugStrings(llvm::DWARFContext &DCtx, + llvm::DWARFYAML::Data &Y); #endif From 9be6178449555576645ac922e342936319445cac Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 8 Sep 2020 03:39:23 -0400 Subject: [PATCH 0032/1079] [mlir][Vector] Make VectorToSCF deterministic Differential Revision: https://reviews.llvm.org/D87273 --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 11 +++++------ mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 8f7d43829846b..08d0117e6a17c 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -584,9 +584,9 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-copy-load-dealloc. + MLIRContext *ctx = op->getContext(); Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); - Value vec = vector_type_cast(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { auto ivs = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. @@ -595,13 +595,12 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); ArrayRef indicesRef(indices), ivsRef(ivs); - Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value vector = vector_insert_element(remote(indicesRef), - local(ivsRef.drop_back()), pos); + Value pos = std_index_cast(IntegerType::get(32, ctx), ivsRef.back()); + Value scal = remote(indicesRef); + Value vector = vector_insert_element(scal, local(ivsRef.drop_back()), pos); local(ivsRef.drop_back()) = vector; }); - Value vectorValue = std_load(vec); + Value vectorValue = std_load(vector_type_cast(tmp)); // 3. Propagate. rewriter.replaceOp(op, vectorValue); diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 240925baf3d8c..5e8aea1f51135 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -99,8 +99,8 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: %[[L3:.*]] = select // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // - // CHECK-DAG: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-DAG: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref + // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: } From 2168dbf4cc766dfb552076d9b1e84b00122b7993 Mon Sep 17 00:00:00 2001 From: Shivanshu Goyal Date: Tue, 8 Sep 2020 10:17:05 +0200 Subject: [PATCH 0033/1079] getClangStripDependencyFileAdjuster(): Do not remove -M args when using MSVC cl driver MSVC's cl.exe has a few command line arguments which start with -M such as "-MD", "-MDd", "-MT", "-MTd", "-MP". These arguments are not dependency file generation related, and these arguments were being removed by getClangStripDependencyFileAdjuster() which was wrong. Differential revision: https://reviews.llvm.org/D86999 --- clang/lib/Tooling/ArgumentsAdjusters.cpp | 34 ++++++++++++++++++------ clang/unittests/Tooling/ToolingTest.cpp | 34 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp index a857b57fbf7bc..bcfb5b39a0770 100644 --- a/clang/lib/Tooling/ArgumentsAdjusters.cpp +++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp @@ -21,6 +21,16 @@ namespace clang { namespace tooling { +static StringRef getDriverMode(const CommandLineArguments &Args) { + for (const auto &Arg : Args) { + StringRef ArgRef = Arg; + if (ArgRef.consume_front("--driver-mode=")) { + return ArgRef; + } + } + return StringRef(); +} + /// Add -fsyntax-only option and drop options that triggers output generation. ArgumentsAdjuster getClangSyntaxOnlyAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { @@ -93,20 +103,28 @@ ArgumentsAdjuster getClangStripSerializeDiagnosticAdjuster() { ArgumentsAdjuster getClangStripDependencyFileAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { + auto UsingClDriver = (getDriverMode(Args) == "cl"); + CommandLineArguments AdjustedArgs; for (size_t i = 0, e = Args.size(); i < e; ++i) { StringRef Arg = Args[i]; - // All dependency-file options begin with -M. These include -MM, - // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD. - if (!Arg.startswith("-M") && !Arg.startswith("/showIncludes") && - !Arg.startswith("-showIncludes")) { - AdjustedArgs.push_back(Args[i]); + + // These flags take an argument: -MX foo. Skip the next argument also. + if (!UsingClDriver && (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ")) { + ++i; continue; } + // When not using the cl driver mode, dependency file generation options + // begin with -M. These include -MM, -MF, -MG, -MP, -MT, -MQ, -MD, and + // -MMD. + if (!UsingClDriver && Arg.startswith("-M")) + continue; + // Under MSVC's cl driver mode, dependency file generation is controlled + // using /showIncludes + if (Arg.startswith("/showIncludes") || Arg.startswith("-showIncludes")) + continue; - if (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ") - // These flags take an argument: -MX foo. Skip the next argument also. - ++i; + AdjustedArgs.push_back(Args[i]); } return AdjustedArgs; }; diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index cc6f453284d71..691a847d5a715 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -563,6 +563,40 @@ TEST(ClangToolTest, StripDependencyFileAdjusterShowIncludes) { EXPECT_TRUE(HasFlag("-c")); } +// Check getClangStripDependencyFileAdjuster doesn't strip args when using the +// MSVC cl.exe driver +TEST(ClangToolTest, StripDependencyFileAdjusterMsvc) { + FixedCompilationDatabase Compilations( + "/", {"--driver-mode=cl", "-MD", "-MDd", "-MT", "-O1", "-MTd", "-MP"}); + + ClangTool Tool(Compilations, std::vector(1, "/a.cc")); + Tool.mapVirtualFile("/a.cc", "void a() {}"); + + std::unique_ptr Action( + newFrontendActionFactory()); + + CommandLineArguments FinalArgs; + ArgumentsAdjuster CheckFlagsAdjuster = + [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) { + FinalArgs = Args; + return Args; + }; + Tool.clearArgumentsAdjusters(); + Tool.appendArgumentsAdjuster(getClangStripDependencyFileAdjuster()); + Tool.appendArgumentsAdjuster(CheckFlagsAdjuster); + Tool.run(Action.get()); + + auto HasFlag = [&FinalArgs](const std::string &Flag) { + return llvm::find(FinalArgs, Flag) != FinalArgs.end(); + }; + EXPECT_TRUE(HasFlag("-MD")); + EXPECT_TRUE(HasFlag("-MDd")); + EXPECT_TRUE(HasFlag("-MT")); + EXPECT_TRUE(HasFlag("-O1")); + EXPECT_TRUE(HasFlag("-MTd")); + EXPECT_TRUE(HasFlag("-MP")); +} + // Check getClangStripPluginsAdjuster strips plugin related args. TEST(ClangToolTest, StripPluginsAdjuster) { FixedCompilationDatabase Compilations( From 38778e1087b2825e91b07ce4570c70815b49dcdc Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Thu, 25 Jun 2020 05:57:01 -0400 Subject: [PATCH 0034/1079] Provide anchor for compiler extensions This patch is cherry-picked from 04b0a4e22e3b4549f9d241f8a9f37eebecb62a31, and amended to prevent an undefined reference to `llvm::EnableABIBreakingChecks' --- llvm/lib/Extensions/Extensions.cpp | 15 +++++++++++++++ llvm/lib/Extensions/LLVMBuild.txt | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Extensions/Extensions.cpp b/llvm/lib/Extensions/Extensions.cpp index e69de29bb2d1d..2fe537f91876a 100644 --- a/llvm/lib/Extensions/Extensions.cpp +++ b/llvm/lib/Extensions/Extensions.cpp @@ -0,0 +1,15 @@ +#include "llvm/Passes/PassPlugin.h" +#define HANDLE_EXTENSION(Ext) \ + llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + + +namespace llvm { + namespace details { + void extensions_anchor() { +#define HANDLE_EXTENSION(Ext) \ + static auto Ext = get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + } + } +} diff --git a/llvm/lib/Extensions/LLVMBuild.txt b/llvm/lib/Extensions/LLVMBuild.txt index 2005830a4dd7a..7a98c8f680513 100644 --- a/llvm/lib/Extensions/LLVMBuild.txt +++ b/llvm/lib/Extensions/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Extensions parent = Libraries -required_libraries = +required_libraries = Support From 67b37f571cc27d5684125f694d719b114ad72a18 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 8 Sep 2020 08:31:52 +0000 Subject: [PATCH 0035/1079] [mlir] Conv ops vectorization pass In this commit a new way of convolution ops lowering is introduced. The conv op vectorization pass lowers linalg convolution ops into vector contractions. This lowering is possible when conv op is first tiled by 1 along specific dimensions which transforms it into dot product between input and kernel subview memory buffers. This pass converts such conv op into vector contraction and does all necessary vector transfers that make it work. Differential Revision: https://reviews.llvm.org/D86619 --- .../Dialect/Linalg/Transforms/Transforms.h | 51 ++++++ .../Linalg/Transforms/Vectorization.cpp | 95 ++++++++++ .../LinalgToVector/linalg-to-vector.mlir | 167 ++++++++++++++++++ mlir/test/lib/Transforms/CMakeLists.txt | 1 + .../lib/Transforms/TestConvVectorization.cpp | 51 ++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 6 files changed, 367 insertions(+) create mode 100644 mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir create mode 100644 mlir/test/lib/Transforms/TestConvVectorization.cpp diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index f438b6587c8bc..ce3b5fd2fd247 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -30,6 +30,10 @@ struct TiledLinalgOp { SmallVector loops; }; +/// Populates patterns for vectorization of all ConvN-D ops. +void populateConvVectorizationPatterns(MLIRContext *context, + OwningRewritePatternList &patterns); + /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` /// The permutation is expressed as a list of integers that specify @@ -531,6 +535,53 @@ struct AffineMinSCFCanonicalizationPattern PatternRewriter &rewriter) const override; }; +/// Converts Convolution op into vector contraction. +/// +/// Conversion expects ConvOp to have dimensions marked in the *mask* as +/// false of size 1. This ensures that the ConvOp can be lowered to vector +/// contraction of dimensions marked in the *mask* as true. +/// +/// A good example is ConvNHWCOp which is 2D Conv op with channels as the last +/// dimension. For this op we contract last 3 dimensions. +/// The initial op definition looks like this: +/// ``` +/// linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : +/// (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) +/// ``` +/// This op can be expressed as a dot product between %arg0 (input) and +/// %arg1 (kernel) which is written into first entry of %arg2 (output). This is +/// the ConvOp this pass expects and converts into: +/// ``` +/// #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +/// #map1 = affine_map<(d0, d1, d2) -> ()> +/// ..... +/// %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %1 = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %2 = vector.contract {indexing_maps = [#map0, #map0, #map1], +/// iterator_types = ["reduction", "reduction", "reduction"]} %0, %1, +/// %c0_f32 : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +/// store %2, %arg2[%c0, %c0, %c0, %c0] : memref +/// ``` +/// where first 2 operations read input and kernel memory buffers into vectors. +/// Subsequently, they are contracted together and the result is written to +/// the first entry of the output buffer. +template +struct ConvOpVectorization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + SmallVector mask; + + ConvOpVectorization(MLIRContext *context, SmallVector msk) + : OpRewritePattern(context) { + assert(msk.size() == N && "Mask size does not match rank"); + this->mask = msk; + } + + LogicalResult matchAndRewrite(ConvOp minOp, + PatternRewriter &rewriter) const override; +}; + //===----------------------------------------------------------------------===// // Support for staged pattern application. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index ada89f1c82b5c..cd36c753b6f69 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -367,3 +367,98 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( return success(); } + +template +LogicalResult ConvOpVectorization::matchAndRewrite( + ConvOp op, PatternRewriter &rewriter) const { + const uint dimSize = 3; + Location loc = op.getLoc(); + MLIRContext *context = op.getContext(); + edsc::ScopedContext scope(rewriter, loc); + + ShapedType inShapeType = op.getInputShapedType(0); + ShapedType kShapeType = op.getInputShapedType(1); + + ArrayRef inShape = inShapeType.getShape(); + ArrayRef kShape = kShapeType.getShape(); + + if (!inShapeType.hasStaticShape() || !kShapeType.hasStaticShape()) + return failure(); + + SmallVector mapping; + // Fail to apply when the size of not vectorized dimension is not 1 or + // when the size of vectorized dimension is not dimSize. + for (unsigned i = 0; i < N; i++) { + if (!mask[i] && (inShape[i] != 1 || kShape[i] != 1)) + return failure(); + if (mask[i] && (inShape[i] != dimSize || kShape[i] != dimSize)) + return failure(); + + if (mask[i]) + mapping.push_back(getAffineDimExpr(i, context)); + } + + Value input = op.getInput(0); + Value kernel = op.getInput(1); + Value output = op.getOutputBuffer(0); + + uint rank = inShapeType.getRank(); + uint numDims = mapping.size(); + Type elemType = inShapeType.getElementType(); + + auto map = AffineMap::get(rank, 0, mapping, context); + SmallVector zeros(rank, std_constant_index(0)); + auto vecType = + VectorType::get(SmallVector(numDims, dimSize), elemType); + + auto inputVec = vector_transfer_read(vecType, input, zeros, map); + auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map); + + auto acc = std_constant(elemType, rewriter.getZeroAttr(elemType)); + + std::array indexingMaps{ + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::get(numDims, 0, {}, context)}; + + std::vector iteratorTypes(numDims, "reduction"); + + auto result = rewriter.create( + loc, inputVec, kernelVec, acc, + rewriter.getAffineMapArrayAttr(indexingMaps), + rewriter.getStrArrayAttr(iteratorTypes)); + + rewriter.create(loc, result, output, ValueRange(zeros)); + rewriter.eraseOp(op); + return success(); +} + +void mlir::linalg::populateConvVectorizationPatterns( + MLIRContext *context, OwningRewritePatternList &patterns) { + patterns.insert>( + context, SmallVector{true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); +} diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir new file mode 100644 index 0000000000000..487718301d005 --- /dev/null +++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir @@ -0,0 +1,167 @@ +// RUN: mlir-opt %s -test-conv-vectorization --cse | FileCheck %s + +// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0) -> (d0)> +// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0) -> ()> +// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)> +// CHECK-DAG: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0, d1) -> ()> +// CHECK-DAG: #[[$map5:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)> +// CHECK-DAG: #[[$map6:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[$map7:.*]] = affine_map<(d0, d1, d2) -> ()> +// CHECK-DAG: #[[$map8:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d2, d3, d4)> +// CHECK-DAG: #[[$map9:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-DAG: #[[$map10:.*]] = affine_map<(d0, d1, d2, d3) -> ()> + +func @conv_1d(%arg0: memref<3xf32>, %arg1: memref<3xf32>, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref<3xf32>, memref<3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]]], %[[cst]] : memref<3xf32>, vector<3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map0]], #[[$map1]]], iterator_types = ["reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3xf32>, vector<3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]]] : memref +// CHECK: return + +func @conv_1d_ncw(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_ncw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + + +func @conv_1d_nwc(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_nwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>, %arg2: memref) { + linalg.conv_2d %arg0, %arg1, %arg2 : (memref<3x3xf32>, memref<3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]]], %[[cst]] : memref<3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nchw(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nchw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nhwc(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d(%arg0: memref<3x3x3xf32>, %arg1: memref<3x3x3xf32>, %arg2: memref) { + linalg.conv_3d %arg0, %arg1, %arg2 : (memref<3x3x3xf32>, memref<3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ncdhw(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ncdhw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ndhwc(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ndhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index de894467d63d4..3ac1e7c552350 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_mlir_library(MLIRTestTransforms TestExpandTanh.cpp TestCallGraph.cpp TestConstantFold.cpp + TestConvVectorization.cpp TestConvertCallOp.cpp TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp new file mode 100644 index 0000000000000..37e509cbbbe1b --- /dev/null +++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp @@ -0,0 +1,51 @@ +//===- TestConvVectorization.cpp - Linalg to Vector dialect conversion ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { +/// A pass converting MLIR Linalg ops into Vector ops. +class TestConvVectorization + : public PassWrapper> { + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); + } +}; +} // namespace + +void TestConvVectorization::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + OwningRewritePatternList patterns; + linalg::populateConvVectorizationPatterns(context, patterns); + + if (failed(applyPartialConversion(module, target, patterns))) + return signalPassFailure(); +} + +namespace mlir { +void registerTestConvVectorization() { + PassRegistration testTransformPatternsPass( + "test-conv-vectorization", "Test vectorization of convolutions"); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 34e03a5f99201..437b5f4b6f1a6 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -45,6 +45,7 @@ void registerTestAllReduceLoweringPass(); void registerTestBufferPlacementPreparationPass(); void registerTestCallGraphPass(); void registerTestConstantFold(); +void registerTestConvVectorization(); void registerTestConvertGPUKernelToCubinPass(); void registerTestConvertGPUKernelToHsacoPass(); void registerTestDominancePass(); @@ -93,6 +94,7 @@ void registerTestPasses() { registerTestAffineLoopUnswitchingPass(); registerTestLoopPermutationPass(); registerTestCallGraphPass(); + registerTestConvVectorization(); registerTestConstantFold(); #if MLIR_CUDA_CONVERSIONS_ENABLED registerTestConvertGPUKernelToCubinPass(); From 239eff502bca64f544f311e7d7a65fdec01cb9c4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 7 Sep 2020 17:39:16 +0200 Subject: [PATCH 0036/1079] [mlir][VectorOps] Redo the scalar loop emission in VectoToSCF to pad instead of clipping This replaces the select chain for edge-padding with an scf.if that performs the memory operation when the index is in bounds and uses the pad value when it's not. For transfer_write the same mechanism is used, skipping the store when the index is out of bounds. The integration test has a bunch of cases of how I believe this should work. Differential Revision: https://reviews.llvm.org/D87241 --- .../Vector/CPU/test-transfer-to-loops.mlir | 24 +++ .../VectorToLLVM/ConvertVectorToLLVM.cpp | 2 +- .../Conversion/VectorToSCF/VectorToSCF.cpp | 186 +++++++++--------- .../VectorToSCF/vector-to-loops.mlir | 97 +++------ 4 files changed, 151 insertions(+), 158 deletions(-) diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir index 8d965779dfc6d..38cbabc329989 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -4,6 +4,7 @@ // RUN: FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1) -> (d1)> func @print_memref_f32(memref<*xf32>) @@ -29,6 +30,7 @@ func @main() { %c0 = constant 0 : index %c1 = constant 1 : index %c2 = constant 2 : index + %c3 = constant 3 : index %c6 = constant 6 : index %cst = constant -4.2e+01 : f32 %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref @@ -76,6 +78,28 @@ func @main() { // CHECK-SAME: ( 205, 305, 405, 505, 504 ), // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + %3 = vector.transfer_read %0[%c2, %c3], %cst : memref, vector<5x5xf32> + vector.print %3 : vector<5x5xf32> + // New 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ), + // CHECK-SAME: ( 404, 504, 503, -42, -42 ), + // CHECK-SAME: ( 405, 505, 504, -42, -42 ), + // CHECK-SAME: ( 305, 405, 505, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref, vector<5x5xf32> + vector.print %4 : vector<5x5xf32> + // Transposed 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ), + // CHECK-SAME: ( 503, 504, 505, 405, -42 ), + // CHECK-SAME: ( 502, 503, 504, 505, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref, vector<5xf32> + vector.print %5 : vector<5xf32> + // CHECK-NEXT: ( 403, 503, 502, -42, -42 ) + dealloc %0 : memref return } diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index a43bec855ff0a..d51a96dca3849 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1096,7 +1096,7 @@ static bool isContiguous(MemRefType memRefType, SmallVectorImpl &strides) { int64_t offset; auto successStrides = getStridesAndOffset(memRefType, strides, offset); - bool isContiguous = (strides.back() == 1); + bool isContiguous = strides.empty() || strides.back() == 1; if (isContiguous) { auto sizes = memRefType.getShape(); for (int index = 0, e = strides.size() - 2; index < e; ++index) { diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 08d0117e6a17c..801ead825ffc9 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -111,15 +111,6 @@ class NDTransferOpHelper { template void emitLoops(Lambda loopBodyBuilder); - /// Operate within the body of `emitLoops` to: - /// 1. Compute the indexings `majorIvs + majorOffsets` and save them in - /// `majorIvsPlusOffsets`. - /// 2. Return a boolean that determines whether the first `majorIvs.rank()` - /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. - Value emitInBoundsCondition(ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets); - /// Common state to lower vector transfer ops. PatternRewriter &rewriter; const VectorTransferToSCFOptions &options; @@ -196,11 +187,16 @@ static Value onTheFlyFoldSLT(Value v, Value ub) { return slt(v, ub); } -template -Value NDTransferOpHelper::emitInBoundsCondition( - ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets) { +/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in +/// `majorIvsPlusOffsets`. +/// 2. Return a value of i1 that determines whether the first `majorIvs.rank()` +/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. +static Value +emitInBoundsCondition(PatternRewriter &rewriter, + VectorTransferOpInterface xferOp, unsigned leadingRank, + ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, + SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); unsigned idx = 0; @@ -271,7 +267,8 @@ LogicalResult NDTransferOpHelper::doReplace() { // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2. If the condition is not null, we need an IfOp, which may yield @@ -374,7 +371,8 @@ LogicalResult NDTransferOpHelper::doReplace() { // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2.a. If the condition is not null, we need an IfOp, to write @@ -424,60 +422,6 @@ static int computeCoalescedIndex(TransferOpTy transfer) { return coalescedIdx; } -/// Emits remote memory accesses that are clipped to the boundaries of the -/// MemRef. -template -static SmallVector -clip(TransferOpTy transfer, MemRefBoundsCapture &bounds, ArrayRef ivs) { - using namespace mlir::edsc; - - Value zero(std_constant_index(0)), one(std_constant_index(1)); - SmallVector memRefAccess(transfer.indices()); - SmallVector clippedScalarAccessExprs(memRefAccess.size()); - // Indices accessing to remote memory are clipped and their expressions are - // returned in clippedScalarAccessExprs. - for (unsigned memRefDim = 0; memRefDim < clippedScalarAccessExprs.size(); - ++memRefDim) { - // Linear search on a small number of entries. - int loopIndex = -1; - auto exprs = transfer.permutation_map().getResults(); - for (auto en : llvm::enumerate(exprs)) { - auto expr = en.value(); - auto dim = expr.template dyn_cast(); - // Sanity check. - assert( - (dim || expr.template cast().getValue() == 0) && - "Expected dim or 0 in permutationMap"); - if (dim && memRefDim == dim.getPosition()) { - loopIndex = en.index(); - break; - } - } - - // We cannot distinguish atm between unrolled dimensions that implement - // the "always full" tile abstraction and need clipping from the other - // ones. So we conservatively clip everything. - using namespace edsc::op; - auto N = bounds.ub(memRefDim); - auto i = memRefAccess[memRefDim]; - if (loopIndex < 0) { - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i, N), i, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i, zero), zero, select_1); - } else { - auto ii = ivs[loopIndex]; - auto i_plus_ii = i + ii; - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i_plus_ii, N), i_plus_ii, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i_plus_ii, zero), zero, select_1); - } - } - - return clippedScalarAccessExprs; -} - namespace mlir { template @@ -497,6 +441,60 @@ MemRefType VectorTransferRewriter::tmpMemRefType( {}, 0); } +static void emitWithBoundsChecks( + PatternRewriter &rewriter, VectorTransferOpInterface transfer, + ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture, + function_ref)> inBoundsFun, + function_ref)> outOfBoundsFun = nullptr) { + // Permute the incoming indices according to the permutation map. + SmallVector indices = + linalg::applyMapToValues(rewriter, transfer.getLoc(), + transfer.permutation_map(), transfer.indices()); + + // Generate a bounds check if necessary. + SmallVector majorIvsPlusOffsets; + Value inBoundsCondition = + emitInBoundsCondition(rewriter, transfer, 0, ivs, indices, + memRefBoundsCapture, majorIvsPlusOffsets); + + // Apply the permutation map to the ivs. The permutation map may not use all + // the inputs. + SmallVector scalarAccessExprs(transfer.indices().size()); + for (unsigned memRefDim = 0; memRefDim < transfer.indices().size(); + ++memRefDim) { + // Linear search on a small number of entries. + int loopIndex = -1; + auto exprs = transfer.permutation_map().getResults(); + for (auto en : llvm::enumerate(exprs)) { + auto expr = en.value(); + auto dim = expr.dyn_cast(); + // Sanity check. + assert((dim || expr.cast().getValue() == 0) && + "Expected dim or 0 in permutationMap"); + if (dim && memRefDim == dim.getPosition()) { + loopIndex = en.index(); + break; + } + } + + using namespace edsc::op; + auto i = transfer.indices()[memRefDim]; + scalarAccessExprs[memRefDim] = loopIndex < 0 ? i : i + ivs[loopIndex]; + } + + if (inBoundsCondition) + conditionBuilder( + /* scf.if */ inBoundsCondition, // { + [&] { inBoundsFun(scalarAccessExprs); }, + // } else { + outOfBoundsFun ? [&] { outOfBoundsFun(scalarAccessExprs); } + : function_ref() + // } + ); + else + inBoundsFun(scalarAccessExprs); +} + /// Lowers TransferReadOp into a combination of: /// 1. local memory allocation; /// 2. perfect loop nest over: @@ -588,17 +586,25 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); + auto ivsStorage = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); - Value pos = std_index_cast(IntegerType::get(32, ctx), ivsRef.back()); - Value scal = remote(indicesRef); - Value vector = vector_insert_element(scal, local(ivsRef.drop_back()), pos); - local(ivsRef.drop_back()) = vector; + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); + Value pos = std_index_cast(IntegerType::get(32, ctx), ivs.back()); + Value inVector = local(ivs.drop_back()); + auto loadValue = [&](ArrayRef indices) { + Value vector = vector_insert_element(remote(indices), inVector, pos); + local(ivs.drop_back()) = vector; + }; + auto loadPadding = [&](ArrayRef) { + Value vector = vector_insert_element(transfer.padding(), inVector, pos); + local(ivs.drop_back()) = vector; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, loadValue, loadPadding); }); Value vectorValue = std_load(vector_type_cast(tmp)); @@ -674,17 +680,21 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); - // Swap the ivs which will reorder memory accesses. + auto ivsStorage = llvm::to_vector<8>(loopIvs); + // Swap the ivsStorage which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos); - remote(indices) = scalar; + std_index_cast(IntegerType::get(32, op->getContext()), ivs.back()); + auto storeValue = [&](ArrayRef indices) { + Value scalar = vector_extract_element(local(ivs.drop_back()), pos); + remote(indices) = scalar; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, storeValue); }); // 3. Erase. diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 5e8aea1f51135..ef1b2e995053c 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -15,11 +15,13 @@ func @materialize_read_1d() { %ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1) %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32> // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds. - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED1:.*]] = select - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED2:.*]] = select - // CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> + // CHECK: scf.if + // CHECK-NEXT: load + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store + // CHECK-NEXT: else + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store } } return @@ -53,7 +55,6 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { @@ -72,37 +73,18 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L0:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L1:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L2:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L3:.*]] = select - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } else { + // CHECK-NEXT: %[[CVEC:.*]] = vector.insertelement + // CHECK-NEXT: store %[[CVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -132,7 +114,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { @@ -153,37 +134,15 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %{{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index - // CHECK-NEXT: %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) + // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } From 8d9c13f37d2081c11186718ae8b5aef8b507d152 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 8 Sep 2020 17:20:00 +0800 Subject: [PATCH 0037/1079] Revert "[PowerPC] Implement instruction clustering for stores" This reverts commit 3c0b3250230b3847a2a47dfeacfdb794c2285f02, (along with ea795304 and bb39eb9e) since it breaks test with UB sanitizer. --- llvm/lib/Target/PowerPC/PPC.td | 11 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 108 +------ llvm/lib/Target/PowerPC/PPCInstrInfo.h | 13 - llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 - llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 - llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 4 - .../test/CodeGen/PowerPC/fusion-load-store.ll | 268 ------------------ .../PowerPC/pcrel-call-linkage-leaf.ll | 2 +- 8 files changed, 5 insertions(+), 404 deletions(-) delete mode 100644 llvm/test/CodeGen/PowerPC/fusion-load-store.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 1b38a6f1d13d9..a617715d4bd86 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -174,9 +174,6 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", "HasAddisLoadFusion", "true", "Power8 Addis-Load fusion", [FeatureFusion]>; -def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", - "Target supports store clustering", - [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -348,12 +345,10 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. - list FusionFeatures = [FeatureStoreFusion]; list P10AdditionalFeatures = - !listconcat(FusionFeatures, [ - DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, - FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, - FeaturePairedVectorMemops]); + [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, + FeaturePairedVectorMemops]; list P10SpecificFeatures = []; list P10InheritableFeatures = !listconcat(P9InheritableFeatures, P10AdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 8cb8c82e62833..2423bca42e805 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2222,111 +2222,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } -bool PPCInstrInfo::getMemOperandsWithOffsetWidth( - const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, - const TargetRegisterInfo *TRI) const { - const MachineOperand *BaseOp; - if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) - return false; - BaseOps.push_back(BaseOp); - return true; -} - -static bool isLdStSafeToCluster(const MachineInstr &LdSt, - const TargetRegisterInfo *TRI) { - // If this is a volatile load/store, don't mess with it. - if (LdSt.hasOrderedMemoryRef()) - return false; - - if (LdSt.getOperand(2).isFI()) - return true; - - assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); - // Can't cluster if the instruction modifies the base register - // or it is update form. e.g. ld r2,3(r2) - if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) - return false; - - return true; -} - -// Only cluster instruction pair that have the same opcode, and they are -// clusterable according to PowerPC specification. -static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, - const PPCSubtarget &Subtarget) { - switch (FirstOpc) { - default: - return false; - case PPC::STD: - case PPC::STFD: - case PPC::STXSD: - case PPC::DFSTOREf64: - return FirstOpc == SecondOpc; - // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with - // 32bit and 64bit instruction selection. They are clusterable pair though - // they are different opcode. - case PPC::STW: - case PPC::STW8: - return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; - } -} - -bool PPCInstrInfo::shouldClusterMemOps( - ArrayRef BaseOps1, - ArrayRef BaseOps2, unsigned NumLoads, - unsigned NumBytes) const { - - assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); - const MachineOperand &BaseOp1 = *BaseOps1.front(); - const MachineOperand &BaseOp2 = *BaseOps2.front(); - assert((BaseOp1.isReg() || BaseOp1.isFI()) && - "Only base registers and frame indices are supported."); - - // The NumLoads means the number of loads that has been clustered. - // Don't cluster memory op if there are already two ops clustered at least. - if (NumLoads > 2) - return false; - - // Cluster the load/store only when they have the same base - // register or FI. - if ((BaseOp1.isReg() != BaseOp2.isReg()) || - (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || - (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) - return false; - - // Check if the load/store are clusterable according to the PowerPC - // specification. - const MachineInstr &FirstLdSt = *BaseOp1.getParent(); - const MachineInstr &SecondLdSt = *BaseOp2.getParent(); - unsigned FirstOpc = FirstLdSt.getOpcode(); - unsigned SecondOpc = SecondLdSt.getOpcode(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // Cluster the load/store only when they have the same opcode, and they are - // clusterable opcode according to PowerPC specification. - if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) - return false; - - // Can't cluster load/store that have ordered or volatile memory reference. - if (!isLdStSafeToCluster(FirstLdSt, TRI) || - !isLdStSafeToCluster(SecondLdSt, TRI)) - return false; - - int64_t Offset1 = 0, Offset2 = 0; - unsigned Width1 = 0, Width2 = 0; - const MachineOperand *Base1 = nullptr, *Base2 = nullptr; - if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || - !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || - Width1 != Width2) - return false; - - assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && - "getMemOperandWithOffsetWidth return incorrect base op"); - // The caller should already have ordered FirstMemOp/SecondMemOp by offset. - assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); - return Offset1 + Width1 == Offset2; -} - /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -4769,8 +4664,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( return false; // Handle only loads/stores with base register followed by immediate offset. - if (!LdSt.getOperand(1).isImm() || - (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) + if (LdSt.getNumExplicitOperands() != 3) return false; if (!LdSt.getOperand(1).isImm() || (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 2f867b16aa24f..75e8224892f4c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -494,19 +494,6 @@ class PPCInstrInfo : public PPCGenInstrInfo { int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; - /// Get the base operand and byte offset of an instruction that reads/writes - /// memory. - bool getMemOperandsWithOffsetWidth( - const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, - const TargetRegisterInfo *TRI) const override; - - /// Returns true if the two given memory operations should be scheduled - /// adjacent. - bool shouldClusterMemOps(ArrayRef BaseOps1, - ArrayRef BaseOps2, - unsigned NumLoads, unsigned NumBytes) const override; - /// Return true if two MIs access different memory addresses and false /// otherwise bool diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 05922dbb38fc6..8021cfa4a18c6 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -108,7 +108,6 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFloat128 = false; HasFusion = false; - HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 0a134bb83ed2f..76b43dfc7a723 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -137,7 +137,6 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasHTM; bool HasFloat128; bool HasFusion; - bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; @@ -309,7 +308,6 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } - bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index c5671d6c73e05..ea9b37de6ff39 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -271,8 +271,6 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); - if (ST.hasStoreFusion()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); @@ -287,8 +285,6 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( std::make_unique(C) : std::make_unique(C), true); // add DAG Mutations here. - if (ST.hasStoreFusion()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; diff --git a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll deleted file mode 100644 index 75b2eca2168c0..0000000000000 --- a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll +++ /dev/null @@ -1,268 +0,0 @@ -; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The -; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused. - -; REQUIRES: asserts -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ -; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \ -; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s - -define i64 @store_i64(i64* nocapture %P, i64 %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 -; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 -; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 -; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64:%bb.0 -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16 -; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8 -; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24 -; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 - %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 - store i64 %v, i64* %arrayidx - %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 - store i64 %v, i64* %arrayidx1 - %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx2 - %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 - store i64 %v, i64* %arrayidx3 - ret i64 %v -} - -define i32 @store_i32(i32* nocapture %P, i32 %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52 -; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48 -; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44 -; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32:%bb.0 -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48 -; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44 -; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52 -; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56 - %arrayidx = getelementptr inbounds i32, i32* %P, i32 13 - store i32 %v, i32* %arrayidx - %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12 - store i32 %v, i32* %arrayidx1 - %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11 - store i32 %v, i32* %arrayidx2 - %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14 - store i32 %v, i32* %arrayidx3 - ret i32 %v -} - -define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24 -; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8 -; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16 -; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8 -; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16 -; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24 -; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32 - %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 - store i64 %v, i64* %arrayidx - %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx1 - %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 - store i64 %v, i64* %arrayidx2 - %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 - store i64 %v, i64* %arrayidx3 - ret void -} - -define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12 -; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4 -; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8 -; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4 -; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8 -; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12 -; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16 - %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 - store i32 %v, i32* %arrayidx - %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 - store i32 %v, i32* %arrayidx1 - %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 - store i32 %v, i32* %arrayidx2 - %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 - store i32 %v, i32* %arrayidx3 - ret void -} - -define void @store_double(double* nocapture %P, double %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_double:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24 -; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8 -; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16 -; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_double:%bb.0 -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8 -; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16 -; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24 -; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32 - %arrayidx = getelementptr inbounds double, double* %P, i64 3 - store double %v, double* %arrayidx - %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 - store double %v, double* %arrayidx1 - %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 - store double %v, double* %arrayidx2 - %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 - store double %v, double* %arrayidx3 - ret void -} - -define void @store_float(float* nocapture %P, float %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_float:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12 -; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4 -; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8 -; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_float:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12 -; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4 -; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8 -; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16 - %arrayidx = getelementptr inbounds float, float* %P, i64 3 - store float %v, float* %arrayidx - %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 - store float %v, float* %arrayidx1 - %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 - store float %v, float* %arrayidx2 - %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 - store float %v, float* %arrayidx3 - ret void -} - -; Cannot fuse the store/load if there is volatile in between -define i64 @store_volatile(i64* nocapture %P, i64 %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_volatile:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 -; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 -; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 -; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_volatile:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24 -; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16 -; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8 -; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 - %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 - store volatile i64 %v, i64* %arrayidx - %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 - store volatile i64 %v, i64* %arrayidx1 - %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 - store volatile i64 %v, i64* %arrayidx2 - %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 - store volatile i64 %v, i64* %arrayidx3 - ret i64 %v -} - -@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4 - -define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]]) -; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24 -; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]]) -; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24 -; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20 - store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 - store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 - %add = add nsw i32 %n, %m - store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4 - ret void -} - -define void @store_i32_stw8(i32 signext %m, i32 signext %n) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24 -; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24 -; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28 - store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 - store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 - ret void -} - -declare void @bar(i64*) - -define void @store_frame_index(i32 %a, i32 %b) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_frame_index:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf -; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf - %buf = alloca [8 x i64], align 8 - %0 = bitcast [8 x i64]* %buf to i8* - %conv = zext i32 %a to i64 - %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0 - store i64 %conv, i64* %arrayidx, align 8 - %conv1 = zext i32 %b to i64 - %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1 - store i64 %conv1, i64* %arrayidx2, align 8 - call void @bar(i64* nonnull %arrayidx) - ret void -} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 1623889200848..9141fdc735a0e 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -104,7 +104,6 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-P9-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry ; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r11, r4, r3 ; CHECK-S-NEXT: sub r29, r8, r9 ; CHECK-S-NEXT: add r9, r10, r9 @@ -120,6 +119,7 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-S-NEXT: mullw r3, r3, r7 ; CHECK-S-NEXT: sub r2, r6, r7 ; CHECK-S-NEXT: mullw r3, r3, r8 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r2 ; CHECK-S-NEXT: mullw r3, r3, r30 From 7aabb6ad7764366fd3150d18b16da9aef35e6492 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Mon, 7 Sep 2020 10:39:14 +0100 Subject: [PATCH 0038/1079] [ARM][LowOverheadLoops] Remove modifications to the correct element count register After my patch at D86087, code that now uses the mov operand rather than the vctp operand will no longer remove modifications to the vctp operand as they should. This patch fixes that by explicitly removing modifications to the vctp operand rather than the register used as the element count. --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 7 ++++++- .../CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir | 5 ++--- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index a98590fd79c68..69e188fe5f888 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -527,7 +527,12 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { }; MBB = VCTP->getParent(); - if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) { + // Remove modifications to the element count since they have no purpose in a + // tail predicated loop. Explicitly refer to the vctp operand no matter which + // register NumElements has been assigned to, since that is what the + // modifications will be using + if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), + VCTP->getOperand(1).getReg())) { SmallPtrSet ElementChain; SmallPtrSet Ignore = { VCTP }; unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index 9a5856335dfc6..210eae9e64350 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -173,11 +173,10 @@ body: | ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 - ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: dead $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: bb.3.do.body: ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) - ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2 ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4) ; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 5a370e5f96e76..1cf101ea5d5f1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -27,7 +27,6 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: .LBB0_3: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 ; CHECK-NEXT: vsub.f32 q2, q2, q1 ; CHECK-NEXT: vfma.f32 q0, q2, q2 From 83d82d1fb1cfac06257ebbd7c063a3d2d1af20fb Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 8 Sep 2020 09:42:25 +0000 Subject: [PATCH 0039/1079] [mlir] Fix of broken build on windows caused by using uint --- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index cd36c753b6f69..51781af9cb304 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -371,7 +371,7 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( template LogicalResult ConvOpVectorization::matchAndRewrite( ConvOp op, PatternRewriter &rewriter) const { - const uint dimSize = 3; + const unsigned dimSize = 3; Location loc = op.getLoc(); MLIRContext *context = op.getContext(); edsc::ScopedContext scope(rewriter, loc); @@ -402,8 +402,8 @@ LogicalResult ConvOpVectorization::matchAndRewrite( Value kernel = op.getInput(1); Value output = op.getOutputBuffer(0); - uint rank = inShapeType.getRank(); - uint numDims = mapping.size(); + unsigned rank = inShapeType.getRank(); + unsigned numDims = mapping.size(); Type elemType = inShapeType.getElementType(); auto map = AffineMap::get(rank, 0, mapping, context); From 2325d6b42f096bf93d2ab0bed7096759e5c96ce8 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Thu, 27 Aug 2020 09:43:14 +0000 Subject: [PATCH 0040/1079] [SyntaxTree] Ignore implicit non-leaf `CXXConstructExpr` Differential Revision: https://reviews.llvm.org/D86699 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 27 +- .../Tooling/Syntax/BuildTreeTest.cpp | 324 ++++++++++++++++-- 2 files changed, 325 insertions(+), 26 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index a9f326439a2a5..e5389ae4eff47 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -13,6 +13,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/IgnoreExpr.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeLoc.h" @@ -44,8 +45,28 @@ using namespace clang; +// Ignores the implicit `CXXConstructExpr` for copy/move constructor calls +// generated by the compiler, as well as in implicit conversions like the one +// wrapping `1` in `X x = 1;`. +static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { + if (auto *C = dyn_cast(E)) { + auto NumArgs = C->getNumArgs(); + if (NumArgs == 1 || (NumArgs > 1 && isa(C->getArg(1)))) { + Expr *A = C->getArg(0); + if (C->getParenOrBraceRange().isInvalid()) + return A; + } + } + return E; +} + +static Expr *IgnoreImplicit(Expr *E) { + return IgnoreExprNodes(E, IgnoreImplicitSingleStep, + IgnoreImplicitConstructorSingleStep); +} + LLVM_ATTRIBUTE_UNUSED -static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } +static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; } namespace { /// Get start location of the Declarator from the TypeLoc. @@ -740,7 +761,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { for (auto *D : DS->decls()) Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = dyn_cast_or_null(S)) { - return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); + return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E)); } return RecursiveASTVisitor::TraverseStmt(S); } @@ -1579,7 +1600,7 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { if (!Child) return; - Child = Child->IgnoreImplicit(); + Child = IgnoreImplicit(Child); syntax::Tree *ChildNode = Mapping.find(Child); assert(ChildNode != nullptr); diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index aab20008a4974..fe89e0d7d1a2c 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -1745,19 +1745,15 @@ TEST_P(SyntaxTreeTest, OverloadedOperator_Plus) { struct X { friend X operator+(X, const X&); }; -// FIXME: Remove additional `UnknownExpression` wrapping `x`. For that, ignore -// implicit copy constructor called on `x`. This should've been ignored already, -// as we `IgnoreImplicit` when traversing an `Stmt`. void test(X x, X y) { [[x + y]]; } )cpp", {R"txt( BinaryOperatorExpression Expression -|-UnknownExpression LeftHandSide -| `-IdExpression -| `-UnqualifiedId UnqualifiedId -| `-'x' +|-IdExpression LeftHandSide +| `-UnqualifiedId UnqualifiedId +| `-'x' |-'+' OperatorToken `-IdExpression RightHandSide `-UnqualifiedId UnqualifiedId @@ -3821,26 +3817,137 @@ TranslationUnit Detached )txt")); } +TEST_P(SyntaxTreeTest, InitDeclarator_Equal) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { S(int);}; +void test() { + [[S s = 1]]; +} +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken +)txt"})); +} + TEST_P(SyntaxTreeTest, InitDeclarator_Brace) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( -int a {}; +struct S { + S(); + S(int); + S(int, float); +}; +void test(){ + // FIXME: 's...' is a declarator and '{...}' is initializer + [[S s0{}]]; + [[S s1{1}]]; + [[S s2{1, 2.}]]; +} )cpp", - R"txt( -TranslationUnit Detached -`-SimpleDeclaration - |-'int' - |-SimpleDeclarator Declarator - | |-'a' - | `-UnknownExpression - | `-UnknownExpression - | |-'{' - | `-'}' - `-';' -)txt")); + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s0' + |-'{' + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s1' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s2' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' +)txt"})); +} + +TEST_P(SyntaxTreeTest, InitDeclarator_EqualBrace) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { + S(); + S(int); + S(int, float); +}; +void test() { + // FIXME: '= {...}' is initializer + [[S s0 = {}]]; + [[S s1 = {1}]]; + [[S s2 = {1, 2.}]]; +} +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s0' + |-'=' + `-UnknownExpression + |-'{' + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s1' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s2' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' +)txt"})); } TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { @@ -3851,15 +3958,134 @@ TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { R"cpp( struct S { S(int); + S(int, float); }; -[[S s(1);]] +// FIXME: 's...' is a declarator and '(...)' is initializer +[[S s1(1);]] +[[S s2(1, 2.);]] )cpp", {R"txt( SimpleDeclaration |-'S' |-SimpleDeclarator Declarator | `-UnknownExpression -| |-'s' +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ImplicitConversion_Argument) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +void TakeX(const X&); +void test() { + [[TakeX(1)]]; +} +)cpp", + {R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'TakeX' +|-'(' OpenParen +|-CallArguments Arguments +| `-IntegerLiteralExpression ListElement +| `-'1' LiteralToken +`-')' CloseParen +)txt"})); +} + +TEST_P(SyntaxTreeTest, ImplicitConversion_Return) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +X CreateX(){ + [[return 1;]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-IntegerLiteralExpression ReturnValue +| `-'1' LiteralToken +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_ZeroArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(); +}; +X test() { + [[return X();]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_OneArgument) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +X test() { + // FIXME: Remove `UnknownExpression` due to implicit `CXXFunctionalCastExpr` + [[return X(1);]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| `-UnknownExpression +| |-'X' | |-'(' | |-IntegerLiteralExpression | | `-'1' LiteralToken @@ -3868,6 +4094,58 @@ SimpleDeclaration )txt"})); } +TEST_P(SyntaxTreeTest, ConstructorCall_MultipleArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int, char); +}; +X test() { + [[return X(1, '2');]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-CharacterLiteralExpression +| | `-''2'' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, TypeConversion_FunctionalNotation) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +float test() { + [[return float(1);]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'float' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' +)txt"})); +} + TEST_P(SyntaxTreeTest, ArrayDeclarator_Simple) { EXPECT_TRUE(treeDumpEqual( R"cpp( From 46f4439dc9bf9b8cfee0001b6752c3d074c83b00 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Thu, 27 Aug 2020 09:44:09 +0000 Subject: [PATCH 0041/1079] [SyntaxTree] Ignore implicit leaf `CXXConstructExpr` Differential Revision: https://reviews.llvm.org/D86700 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 8 ++++++++ clang/unittests/Tooling/Syntax/BuildTreeTest.cpp | 15 ++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index e5389ae4eff47..72083eeefa31c 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -1132,6 +1132,14 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } + bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) { + // Ignore the implicit calls to default constructors. + if ((S->getNumArgs() == 0 || isa(S->getArg(0))) && + S->getParenOrBraceRange().isInvalid()) + return true; + return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S); + } + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { // To construct a syntax tree of the same shape for calls to built-in and // user-defined operators, ignore the `DeclRefExpr` that refers to the diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index fe89e0d7d1a2c..00e18057d7be0 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -548,9 +548,6 @@ namespace n { struct S { }; } void test() { - // FIXME: Remove the `UnknownExpression` wrapping `s1` and `s2`. This - // `UnknownExpression` comes from a leaf `CXXConstructExpr` in the - // ClangAST. We need to ignore leaf implicit nodes. [[::n::S s1]]; [[n::S s2]]; } @@ -564,8 +561,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s1' + `-'s1' )txt", R"txt( SimpleDeclaration @@ -575,8 +571,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s2' + `-'s2' )txt"})); } @@ -608,8 +603,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s1' + `-'s1' )txt", R"txt( SimpleDeclaration @@ -623,8 +617,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s2' + `-'s2' )txt"})); } From 134455a07c1f1de4cff62a6afb4ccd98b98343ec Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 7 Sep 2020 08:40:49 +0000 Subject: [PATCH 0042/1079] [SyntaxTree] Ignore implicit `CXXFunctionalCastExpr` wrapping constructor Differential Revision: https://reviews.llvm.org/D87229 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 19 ++++++++++++++++++- .../Tooling/Syntax/BuildTreeTest.cpp | 12 +++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 72083eeefa31c..bb2b1494793a1 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -14,6 +14,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/IgnoreExpr.h" +#include "clang/AST/OperationKinds.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeLoc.h" @@ -60,9 +61,25 @@ static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { return E; } +// In: +// struct X { +// X(int) +// }; +// X x = X(1); +// Ignores the implicit `CXXFunctionalCastExpr` that wraps +// `CXXConstructExpr X(1)`. +static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) { + if (auto *F = dyn_cast(E)) { + if (F->getCastKind() == CK_ConstructorConversion) + return F->getSubExpr(); + } + return E; +} + static Expr *IgnoreImplicit(Expr *E) { return IgnoreExprNodes(E, IgnoreImplicitSingleStep, - IgnoreImplicitConstructorSingleStep); + IgnoreImplicitConstructorSingleStep, + IgnoreCXXFunctionalCastExprWrappingConstructor); } LLVM_ATTRIBUTE_UNUSED diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 00e18057d7be0..7a106e9297b91 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -4069,7 +4069,6 @@ struct X { X(int); }; X test() { - // FIXME: Remove `UnknownExpression` due to implicit `CXXFunctionalCastExpr` [[return X(1);]] } )cpp", @@ -4077,12 +4076,11 @@ X test() { ReturnStatement Statement |-'return' IntroducerKeyword |-UnknownExpression ReturnValue -| `-UnknownExpression -| |-'X' -| |-'(' -| |-IntegerLiteralExpression -| | `-'1' LiteralToken -| `-')' +| |-'X' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' `-';' )txt"})); } From f5087d5c7248104b6580c7b079ed5f227332c2ef Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 7 Sep 2020 17:47:09 +0000 Subject: [PATCH 0043/1079] [SyntaxTree] Fix crash on functions with default arguments. * Do not visit `CXXDefaultArgExpr` * To build `CallArguments` nodes, just go through non-default arguments Differential Revision: https://reviews.llvm.org/D87249 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 15 +- .../Tooling/Syntax/BuildTreeTest.cpp | 195 ++++++++++++++++++ 2 files changed, 209 insertions(+), 1 deletion(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index bb2b1494793a1..1942290b5abc5 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -154,6 +154,13 @@ struct GetStartLoc : TypeLocVisitor { }; } // namespace +static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) { + auto firstDefaultArg = std::find_if(Args.begin(), Args.end(), [](auto it) { + return isa(it); + }); + return llvm::make_range(Args.begin(), firstDefaultArg); +} + static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { switch (E.getOperator()) { // Comparison @@ -1111,7 +1118,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } - syntax::CallArguments *buildCallArguments(CallExpr::arg_range Args) { + /// Builds `CallArguments` syntax node from arguments that appear in source + /// code, i.e. not default arguments. + syntax::CallArguments * + buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { + auto Args = dropDefaultArgs(ArgsAndDefaultArgs); for (const auto &Arg : Args) { Builder.markExprChild(Arg, syntax::NodeRole::ListElement); const auto *DelimiterToken = @@ -1233,6 +1244,8 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } } + bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; } + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { auto Tokens = Builder.getDeclarationRange(S); if (Tokens.front().kind() == tok::coloncolon) { diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 7a106e9297b91..225885437267b 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -2733,6 +2733,54 @@ CallExpression Expression )txt"})); } +TEST_P(SyntaxTreeTest, CallExpression_DefaultArguments) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +void f(int i = 1, char c = '2'); +void test() { + [[f()]]; + [[f(1)]]; + [[f(1, '2')]]; +} +)cpp", + {R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +`-')' CloseParen + )txt", + R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +|-CallArguments Arguments +| `-IntegerLiteralExpression ListElement +| `-'1' LiteralToken +`-')' CloseParen + )txt", + R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +|-CallArguments Arguments +| |-IntegerLiteralExpression ListElement +| | `-'1' LiteralToken +| |-',' ListDelimiter +| `-CharacterLiteralExpression ListElement +| `-''2'' LiteralToken +`-')' CloseParen +)txt"})); +} + TEST_P(SyntaxTreeTest, MultipleDeclaratorsGrouping) { EXPECT_TRUE(treeDumpEqual( R"cpp( @@ -3986,6 +4034,56 @@ SimpleDeclaration )txt"})); } +TEST_P(SyntaxTreeTest, InitDeclarator_Paren_DefaultArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { + S(int i = 1, float = 2.); +}; +[[S s0;]] +// FIXME: 's...' is a declarator and '(...)' is initializer +[[S s1(1);]] +[[S s2(1, 2.);]] +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-'s0' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' +`-';' +)txt"})); +} + TEST_P(SyntaxTreeTest, ImplicitConversion_Argument) { if (!GetParam().isCXX()) { return; @@ -4114,6 +4212,48 @@ ReturnStatement Statement )txt"})); } +TEST_P(SyntaxTreeTest, ConstructorCall_DefaultArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int i = 1, char c = '2'); +}; +X test() { + auto x0 = [[X()]]; + auto x1 = [[X(1)]]; + auto x2 = [[X(1, '2')]]; +} +)cpp", + {R"txt( +UnknownExpression +|-'X' +|-'(' +`-')' +)txt", + R"txt( +UnknownExpression +|-'X' +|-'(' +|-IntegerLiteralExpression +| `-'1' LiteralToken +`-')' +)txt", + R"txt( +UnknownExpression +|-'X' +|-'(' +|-IntegerLiteralExpression +| `-'1' LiteralToken +|-',' +|-CharacterLiteralExpression +| `-''2'' LiteralToken +`-')' +)txt"})); +} + TEST_P(SyntaxTreeTest, TypeConversion_FunctionalNotation) { if (!GetParam().isCXX()) { return; @@ -4375,6 +4515,61 @@ TranslationUnit Detached )txt")); } +TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Default_One) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +int func1([[int a = 1]]); +)cpp", + {R"txt( +ParameterDeclarationList Parameters +`-SimpleDeclaration ListElement + |-'int' + `-SimpleDeclarator Declarator + |-'a' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken +)txt"})); +} + +TEST_P(SyntaxTreeTest, + ParametersAndQualifiers_InFreeFunctions_Default_Multiple) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +int func2([[int *ap, int a = 1, char c = '2']]); +)cpp", + {R"txt( +ParameterDeclarationList Parameters +|-SimpleDeclaration ListElement +| |-'int' +| `-SimpleDeclarator Declarator +| |-'*' +| `-'ap' +|-',' ListDelimiter +|-SimpleDeclaration ListElement +| |-'int' +| `-SimpleDeclarator Declarator +| |-'a' +| |-'=' +| `-IntegerLiteralExpression +| `-'1' LiteralToken +|-',' ListDelimiter +`-SimpleDeclaration ListElement + |-'char' + `-SimpleDeclarator Declarator + |-'c' + |-'=' + `-CharacterLiteralExpression + `-''2'' LiteralToken +)txt"})); +} + TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InVariadicFunctionTemplate_ParameterPack) { if (!GetParam().isCXX11OrLater() || GetParam().hasDelayedTemplateParsing()) { From 307dc7b236924b5eeb5bf46b725a67dcb41bcd89 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 11:57:50 +0200 Subject: [PATCH 0044/1079] [mlir][VectorOps] Clean up outdated comments. NFCI. While there - De-templatify code that can use function_ref - Make BoundCaptures usable when they're const - Address post-submit review comment (static function into global namespace) --- .../mlir/Dialect/StandardOps/EDSC/Builders.h | 18 ++--- .../Conversion/VectorToSCF/VectorToSCF.cpp | 73 +++++-------------- 2 files changed, 26 insertions(+), 65 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h index 36df24f60c704..ffb3ba30b699a 100644 --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h @@ -20,10 +20,10 @@ namespace edsc { class BoundsCapture { public: unsigned rank() const { return lbs.size(); } - Value lb(unsigned idx) { return lbs[idx]; } - Value ub(unsigned idx) { return ubs[idx]; } - int64_t step(unsigned idx) { return steps[idx]; } - std::tuple range(unsigned idx) { + Value lb(unsigned idx) const { return lbs[idx]; } + Value ub(unsigned idx) const { return ubs[idx]; } + int64_t step(unsigned idx) const { return steps[idx]; } + std::tuple range(unsigned idx) const { return std::make_tuple(lbs[idx], ubs[idx], steps[idx]); } void swapRanges(unsigned i, unsigned j) { @@ -34,9 +34,9 @@ class BoundsCapture { std::swap(steps[i], steps[j]); } - ArrayRef getLbs() { return lbs; } - ArrayRef getUbs() { return ubs; } - ArrayRef getSteps() { return steps; } + ArrayRef getLbs() const { return lbs; } + ArrayRef getUbs() const { return ubs; } + ArrayRef getSteps() const { return steps; } protected: SmallVector lbs; @@ -52,8 +52,6 @@ class BoundsCapture { class MemRefBoundsCapture : public BoundsCapture { public: explicit MemRefBoundsCapture(Value v); - MemRefBoundsCapture(const MemRefBoundsCapture &) = default; - MemRefBoundsCapture &operator=(const MemRefBoundsCapture &) = default; unsigned fastestVarying() const { return rank() - 1; } @@ -69,8 +67,6 @@ class VectorBoundsCapture : public BoundsCapture { public: explicit VectorBoundsCapture(Value v); explicit VectorBoundsCapture(VectorType t); - VectorBoundsCapture(const VectorBoundsCapture &) = default; - VectorBoundsCapture &operator=(const VectorBoundsCapture &) = default; private: Value base; diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 801ead825ffc9..0eb46f7ba3cfb 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -108,8 +108,10 @@ class NDTransferOpHelper { private: /// Creates the loop nest on the "major" dimensions and calls the /// `loopBodyBuilder` lambda in the context of the loop nest. - template - void emitLoops(Lambda loopBodyBuilder); + void + emitLoops(llvm::function_ref + loopBodyBuilder); /// Common state to lower vector transfer ops. PatternRewriter &rewriter; @@ -129,10 +131,13 @@ class NDTransferOpHelper { VectorType minorVectorType; // vector<(minor_dims) x type> MemRefType memRefMinorVectorType; // memref> }; +} // namespace template -template -void NDTransferOpHelper::emitLoops(Lambda loopBodyBuilder) { +void NDTransferOpHelper::emitLoops( + llvm::function_ref + loopBodyBuilder) { /// Loop nest operates on the major dimensions MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); @@ -195,7 +200,7 @@ static Value emitInBoundsCondition(PatternRewriter &rewriter, VectorTransferOpInterface xferOp, unsigned leadingRank, ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, + const MemRefBoundsCapture &memrefBounds, SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); @@ -242,7 +247,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { /// Lambda to load 1-D vector in the current loop ivs + offset context. auto load1DVector = [&](ValueRange majorIvsPlusOffsets) -> Value { SmallVector indexing; @@ -341,7 +346,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { // Lower to 1-D vector_transfer_write and let recursion handle it. auto emitTransferWrite = [&](ValueRange majorIvsPlusOffsets) { SmallVector indexing; @@ -390,8 +395,6 @@ LogicalResult NDTransferOpHelper::doReplace() { return success(); } -} // namespace - /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of @@ -422,8 +425,6 @@ static int computeCoalescedIndex(TransferOpTy transfer) { return coalescedIdx; } -namespace mlir { - template VectorTransferRewriter::VectorTransferRewriter( VectorTransferToSCFOptions options, MLIRContext *context) @@ -443,7 +444,7 @@ MemRefType VectorTransferRewriter::tmpMemRefType( static void emitWithBoundsChecks( PatternRewriter &rewriter, VectorTransferOpInterface transfer, - ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture, + ValueRange ivs, const MemRefBoundsCapture &memRefBoundsCapture, function_ref)> inBoundsFun, function_ref)> outOfBoundsFun = nullptr) { // Permute the incoming indices according to the permutation map. @@ -499,43 +500,13 @@ static void emitWithBoundsChecks( /// 1. local memory allocation; /// 2. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (with padding). /// 3. vector_load from local buffer (viewed as a memref<1 x vector>); /// 4. local memory deallocation. /// /// Lowers the data transfer part of a TransferReadOp while ensuring no /// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be read multiple -/// times and concurrently. -/// -/// Important notes about clipping and "full-tiles only" abstraction: -/// ================================================================= -/// When using clipping for dealing with boundary conditions, the same edge -/// value will appear multiple times (a.k.a edge padding). This is fine if the -/// subsequent vector operations are all data-parallel but **is generally -/// incorrect** in the presence of reductions or extract operations. -/// -/// More generally, clipping is a scalar abstraction that is expected to work -/// fine as a baseline for CPUs and GPUs but not for vector_load and DMAs. -/// To deal with real vector_load and DMAs, a "padded allocation + view" -/// abstraction with the ability to read out-of-memref-bounds (but still within -/// the allocated region) is necessary. -/// -/// Whether using scalar loops or vector_load/DMAs to perform the transfer, -/// junk values will be materialized in the vectors and generally need to be -/// filtered out and replaced by the "neutral element". This neutral element is -/// op-dependent so, in the future, we expect to create a vector filter and -/// apply it to a splatted constant vector with the proper neutral element at -/// each ssa-use. This filtering is not necessary for pure data-parallel -/// operations. -/// -/// In the case of vector_store/DMAs, Read-Modify-Write will be required, which -/// also have concurrency implications. Note that by using clipped scalar stores -/// in the presence of data-parallel only operations, we generate code that -/// writes the same value multiple time on the edge locations. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// padding. /// Performs the rewrite. template <> @@ -618,19 +589,11 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( /// 2. vector_store to local buffer (viewed as a memref<1 x vector>); /// 3. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (if in bounds). /// 4. local memory deallocation. /// /// More specifically, lowers the data transfer part while ensuring no -/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be written to multiple -/// times and concurrently. -/// -/// See `Important notes about clipping and full-tiles only abstraction` in the -/// description of `readClipped` above. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// out-of-bounds accesses are possible. template <> LogicalResult VectorTransferRewriter::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -702,6 +665,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( return success(); } +namespace mlir { + void populateVectorToSCFConversionPatterns( OwningRewritePatternList &patterns, MLIRContext *context, const VectorTransferToSCFOptions &options) { From 58970eb7d1ddd067e98f49fdcfb04373086245bc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 11:59:38 +0100 Subject: [PATCH 0045/1079] [OpenMP] Fix typo in CodeGenFunction::EmitOMPWorksharingLoop (PR46412) Fixes issue noticed by static analysis where we have a copy+paste typo, testing ScheduleKind.M1 twice instead of ScheduleKind.M2. Differential Revision: https://reviews.llvm.org/D87250 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c1def6c88f0a6..b9260892bd215 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2982,7 +2982,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || - ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; if ((RT.isStaticNonchunked(ScheduleKind.Schedule, From 847299d3f00507f172097bad9dde61dfad0d355b Mon Sep 17 00:00:00 2001 From: Ehsan Toosi Date: Thu, 20 Aug 2020 12:56:19 +0200 Subject: [PATCH 0046/1079] [mlir] remove BufferAssignmentPlacer from BufferAssignmentOpConversionPattern BufferPlacement has been removed, as allocations are no longer placed during the conversion. Differential Revision: https://reviews.llvm.org/D87079 --- .../include/mlir/Transforms/BufferPlacement.h | 52 +++---------------- .../Linalg/Transforms/TensorsToBuffers.cpp | 31 ++++------- mlir/lib/Transforms/BufferPlacement.cpp | 17 ------ .../lib/Transforms/TestBufferPlacement.cpp | 31 ++++------- 4 files changed, 28 insertions(+), 103 deletions(-) diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index b3db7794fd971..6d88ac3599cf1 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -24,34 +24,6 @@ namespace mlir { -/// Prepares a buffer placement phase. It can place (user-defined) alloc -/// nodes. This simplifies the integration of the actual buffer-placement -/// pass. Sample usage: -/// BufferAssignmentPlacer baHelper(regionOp); -/// -> determine alloc positions -/// auto allocPosition = baHelper.computeAllocPosition(value); -/// -> place alloc -/// allocBuilder.setInsertionPoint(positions.getAllocPosition()); -/// -/// Note: this class is intended to be used during legalization. In order -/// to move alloc and dealloc nodes into the right places you can use the -/// createBufferPlacementPass() function. -class BufferAssignmentPlacer { -public: - /// Creates a new assignment builder. - explicit BufferAssignmentPlacer(Operation *op); - - /// Returns the operation this analysis was constructed from. - Operation *getOperation() const { return operation; } - - /// Computes the actual position to place allocs for the given result. - OpBuilder::InsertPoint computeAllocPosition(OpResult result); - -private: - /// The operation this analysis was constructed from. - Operation *operation; -}; - /// A helper type converter class for using inside Buffer Assignment operation /// conversion patterns. The default constructor keeps all the types intact /// except for the ranked-tensor types which is converted to memref types. @@ -157,31 +129,20 @@ class BufferAssignmentTypeConverter : public TypeConverter { SmallVector decomposeTypeConversions; }; -/// Helper conversion pattern that encapsulates a BufferAssignmentPlacer -/// instance. Sample usage: -/// class CustomConversionPattern : public -/// BufferAssignmentOpConversionPattern -/// { -/// ... matchAndRewrite(...) { -/// -> Access stored BufferAssignmentPlacer -/// bufferAssignment->computeAllocPosition(resultOp); -/// } -/// }; +/// Helper conversion pattern that encapsulates a BufferAssignmentTypeConverter +/// instance. template class BufferAssignmentOpConversionPattern : public OpConversionPattern { public: explicit BufferAssignmentOpConversionPattern( - MLIRContext *context, BufferAssignmentPlacer *bufferAssignment = nullptr, - BufferAssignmentTypeConverter *converter = nullptr, + MLIRContext *context, BufferAssignmentTypeConverter *converter, PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit), - bufferAssignment(bufferAssignment), converter(converter) { + : OpConversionPattern(context, benefit), converter(converter) { assert(converter && "The type converter has not been defined"); } protected: - BufferAssignmentPlacer *bufferAssignment; BufferAssignmentTypeConverter *converter; }; @@ -282,8 +243,7 @@ class BufferAssignmentCallOpConverter template static void populateWithBufferAssignmentOpConversionPatterns( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { // clang-format off patterns->insert< @@ -291,7 +251,7 @@ static void populateWithBufferAssignmentOpConversionPatterns( BufferAssignmentFuncOpConverter, BufferAssignmentReturnOpConverter - >(context, placer, converter); + >(context, converter); // clang-format on } } // end namespace mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index 89a01f9ca6292..6af0067c8928c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -51,11 +51,6 @@ class GenericOpConverter return rewriter.notifyMatchFailure( op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - - // Compute alloc position and insert a custom allocation node. - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter.create(loc, memrefType); newArgs.push_back(alloc); newResults.push_back(alloc); @@ -99,13 +94,12 @@ class GenericOpConverter /// Populate the given list with patterns to convert Linalg operations on /// tensors to buffers. static void populateConvertLinalgOnTensorsToBuffersPattern( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); - patterns->insert(context, placer, converter); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, + patterns); + patterns->insert(context, converter); } /// Converts Linalg operations that work on tensor-type operands or results to @@ -119,6 +113,8 @@ struct ConvertLinalgOnTensorsToBuffers // Mark all Standard operations legal. target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -144,16 +140,11 @@ struct ConvertLinalgOnTensorsToBuffers converter.setResultConversionKind( BufferAssignmentTypeConverter::AppendToArgumentsList); - // Walk over all the functions to apply buffer assignment. - getOperation().walk([&](FuncOp function) -> WalkResult { - OwningRewritePatternList patterns; - BufferAssignmentPlacer placer(function); - populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer, - &converter, &patterns); - - // Applying full conversion - return applyFullConversion(function, target, patterns); - }); + OwningRewritePatternList patterns; + populateConvertLinalgOnTensorsToBuffersPattern(&context, &converter, + &patterns); + if (failed(applyFullConversion(this->getOperation(), target, patterns))) + this->signalPassFailure(); } }; } // end anonymous namespace diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 1ab3e7e2e48dc..0279129758ab8 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -681,20 +681,6 @@ struct BufferPlacementPass : BufferPlacementBase { } // end anonymous namespace -//===----------------------------------------------------------------------===// -// BufferAssignmentPlacer -//===----------------------------------------------------------------------===// - -/// Creates a new assignment placer. -BufferAssignmentPlacer::BufferAssignmentPlacer(Operation *op) : operation(op) {} - -/// Computes the actual position to place allocs for the given value. -OpBuilder::InsertPoint -BufferAssignmentPlacer::computeAllocPosition(OpResult result) { - Operation *owner = result.getOwner(); - return OpBuilder::InsertPoint(owner->getBlock(), Block::iterator(owner)); -} - //===----------------------------------------------------------------------===// // BufferAssignmentTypeConverter //===----------------------------------------------------------------------===// @@ -891,9 +877,6 @@ LogicalResult BufferAssignmentCallOpConverter::matchAndRewrite( resultMapping.addMapping(newResultTypes.size() - 1); } else { // kind = BufferAssignmentTypeConverter::AppendToArgumentsList - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result.value())); MemRefType memref = converted.dyn_cast(); if (!memref) return callOp.emitError("Cannot allocate for a non-Memref type"); diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp index 14b72b9fc92a0..c338f0f37c4ea 100644 --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -65,11 +65,6 @@ struct TestBufferPlacementPreparationPass op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - - // Compute alloc position and insert a custom allocation node. - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter.create(loc, memrefType); newArgs.push_back(alloc); newResults.push_back(alloc); @@ -110,13 +105,12 @@ struct TestBufferPlacementPreparationPass }; void populateTensorLinalgToBufferLinalgConversionPattern( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); - patterns->insert(context, placer, converter); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, + patterns); + patterns->insert(context, converter); } void getDependentDialects(DialectRegistry ®istry) const override { @@ -133,6 +127,8 @@ struct TestBufferPlacementPreparationPass target.addLegalDialect(); target.addLegalOp(); target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -191,16 +187,11 @@ struct TestBufferPlacementPreparationPass return success(); }); - // Walk over all the functions to apply buffer assignment. - this->getOperation().walk([&](FuncOp function) -> WalkResult { - OwningRewritePatternList patterns; - BufferAssignmentPlacer placer(function); - populateTensorLinalgToBufferLinalgConversionPattern( - &context, &placer, &converter, &patterns); - - // Applying full conversion - return applyFullConversion(function, target, patterns); - }); + OwningRewritePatternList patterns; + populateTensorLinalgToBufferLinalgConversionPattern(&context, &converter, + &patterns); + if (failed(applyFullConversion(this->getOperation(), target, patterns))) + this->signalPassFailure(); }; }; } // end anonymous namespace From 25c3fa3f13336b2da7c63162b0d9da164a0a96a1 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 8 Sep 2020 19:55:14 +0800 Subject: [PATCH 0047/1079] [DWARFYAML] Make the debug_ranges section optional. This patch makes the debug_ranges section optional. When we specify an empty debug_ranges section, yaml2obj only emits the section header. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87263 --- llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 2 +- llvm/lib/ObjectYAML/DWARFYAML.cpp | 5 +-- .../ObjectYAML/MachO/DWARF-debug_ranges.yaml | 45 +++++++++++++++++++ .../yaml2obj/ELF/DWARF/debug-ranges.yaml | 14 ++++++ llvm/tools/obj2yaml/dwarf2yaml.cpp | 5 ++- 6 files changed, 67 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 99a7af87d2c78..3e5be41b8fa3b 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -214,7 +214,7 @@ struct Data { Optional> DebugStrings; Optional> DebugStrOffsets; Optional> DebugAranges; - std::vector DebugRanges; + Optional> DebugRanges; Optional> DebugAddr; Optional PubNames; Optional PubTypes; diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index bf29f40579ceb..b634f7c123e8d 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -190,7 +190,7 @@ Error DWARFYAML::emitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) { Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) { const size_t RangesOffset = OS.tell(); uint64_t EntryIndex = 0; - for (auto DebugRanges : DI.DebugRanges) { + for (auto DebugRanges : *DI.DebugRanges) { const size_t CurrOffset = OS.tell() - RangesOffset; if (DebugRanges.Offset && (uint64_t)*DebugRanges.Offset < CurrOffset) return createStringError(errc::invalid_argument, diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 353e5058a0e5d..975b9b40b6b18 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -28,7 +28,7 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SecNames.insert("debug_str"); if (DebugAranges) SecNames.insert("debug_aranges"); - if (!DebugRanges.empty()) + if (DebugRanges) SecNames.insert("debug_ranges"); if (!DebugLines.empty()) SecNames.insert("debug_line"); @@ -95,8 +95,7 @@ void MappingTraits::mapping(IO &IO, DWARFYAML::Data &DWARF) { IO.mapOptional("debug_str", DWARF.DebugStrings); IO.mapOptional("debug_abbrev", DWARF.DebugAbbrev); IO.mapOptional("debug_aranges", DWARF.DebugAranges); - if (!DWARF.DebugRanges.empty() || !IO.outputting()) - IO.mapOptional("debug_ranges", DWARF.DebugRanges); + IO.mapOptional("debug_ranges", DWARF.DebugRanges); IO.mapOptional("debug_pubnames", DWARF.PubNames); IO.mapOptional("debug_pubtypes", DWARF.PubTypes); DWARFCtx.IsGNUPubSec = true; diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index 8948bf92b7d76..30997ba1144b6 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -239,3 +239,48 @@ DWARF: - AbbrCode: 0x00000000 Values: [] ... + +## Test generating and dumping an empty __debug_ranges section. + +# RUN: yaml2obj --docnum=2 %s | obj2yaml | FileCheck %s --check-prefix=EMPTY + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_ranges: [] +# EMPTY-NEXT: ... + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_ranges + segname: __DWARF + addr: 0x00 + size: [[SIZE=0]] + offset: 0x210 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: [[CONTENT=]] diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml index 6a9cd7a6195e7..f80dd6de53689 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml @@ -407,3 +407,17 @@ DWARF: Entries: - LowOffset: 0x1234 HighOffset: 0x5678 + +## l) Test that the .debug_ranges section header is emitted if the "debug_ranges" +## entry is empty. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readobj -S %t12.o | FileCheck -DSIZE=0 -DADDRALIGN=1 %s --check-prefix=DWARF-HEADER + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_ranges: [] diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index cef7b699805c8..1dcf6d42d6ada 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -114,6 +114,7 @@ Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { DCtx.isLittleEndian(), AddrSize); uint64_t Offset = 0; DWARFDebugRangeList DwarfRanges; + std::vector DebugRanges; while (Data.isValidOffset(Offset)) { DWARFYAML::Ranges YamlRanges; @@ -123,8 +124,10 @@ Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { return E; for (const auto &RLE : DwarfRanges.getEntries()) YamlRanges.Entries.push_back({RLE.StartAddress, RLE.EndAddress}); - Y.DebugRanges.push_back(std::move(YamlRanges)); + DebugRanges.push_back(std::move(YamlRanges)); } + + Y.DebugRanges = DebugRanges; return ErrorSuccess(); } From 0729ae367af07c2c75d08cfa881795b325fcf922 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 12:45:08 +0100 Subject: [PATCH 0048/1079] X86DomainReassignment.cpp - improve auto const/pointer/reference qualifiers. NFCI. Fix clang-tidy warnings by ensuring auto variables are more cleanly qualified, or just avoid auto entirely. --- llvm/lib/Target/X86/X86DomainReassignment.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 488ee51f1d89b..3a0d6a52ef463 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -141,7 +141,7 @@ class InstrReplacer : public InstrConverterBase { return false; // It's illegal to replace an instruction that implicitly defines a register // with an instruction that doesn't, unless that register dead. - for (auto &MO : MI->implicit_operands()) + for (const auto &MO : MI->implicit_operands()) if (MO.isReg() && MO.isDef() && !MO.isDead() && !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg())) return false; @@ -180,7 +180,7 @@ class InstrReplacerDstCOPY : public InstrConverterBase { MachineRegisterInfo *MRI) const override { assert(isLegal(MI, TII) && "Cannot convert instruction"); MachineBasicBlock *MBB = MI->getParent(); - auto &DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); Register Reg = MRI->createVirtualRegister( TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), @@ -237,7 +237,7 @@ class InstrCOPYReplacer : public InstrReplacer { MachineRegisterInfo *MRI) const override { assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY"); - for (auto &MO : MI->operands()) { + for (const auto &MO : MI->operands()) { // Physical registers will not be converted. Assume that converting the // COPY to the destination domain will eventually result in a actual // instruction. @@ -517,7 +517,7 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { } } - for (auto MI : ToErase) + for (auto *MI : ToErase) MI->eraseFromParent(); } @@ -537,7 +537,7 @@ static bool usedAsAddr(const MachineInstr &MI, unsigned Reg, for (unsigned MemOpIdx = MemOpStart, MemOpEnd = MemOpStart + X86::AddrNumOperands; MemOpIdx < MemOpEnd; ++MemOpIdx) { - auto &Op = MI.getOperand(MemOpIdx); + const MachineOperand &Op = MI.getOperand(MemOpIdx); if (Op.isReg() && Op.getReg() == Reg) return true; } From fcff2c32c0f3a85f7fce02a120de3f1b5778252c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 12:46:00 +0100 Subject: [PATCH 0049/1079] X86CallLowering.cpp - improve auto const/pointer/reference qualifiers. NFCI. Fix clang-tidy warnings by ensuring auto variables are more cleanly qualified, or just avoid auto entirely. --- llvm/lib/Target/X86/X86CallLowering.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp index 0286482ac9af8..8342cad45dfd0 100644 --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -148,9 +148,9 @@ struct X86OutgoingValueHandler : public CallLowering::IncomingValueHandler { MachineFunction &MF = MIRBuilder.getMF(); Register ExtReg = extendRegister(ValVReg, VA); - auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, - VA.getLocVT().getStoreSize(), - inferAlignFromPtrInfo(MF, MPO)); + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, + VA.getLocVT().getStoreSize(), + inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ExtReg, Addr, *MMO); } @@ -194,7 +194,7 @@ bool X86CallLowering::lowerReturn( MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - auto &DL = MF.getDataLayout(); + const DataLayout &DL = MF.getDataLayout(); LLVMContext &Ctx = Val->getType()->getContext(); const X86TargetLowering &TLI = *getTLI(); @@ -245,7 +245,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler { void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); - auto MMO = MF.getMachineMemOperand( + auto *MMO = MF.getMachineMemOperand( MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); @@ -337,8 +337,7 @@ bool X86CallLowering::lowerFormalArguments( SmallVector SplitArgs; unsigned Idx = 0; - for (auto &Arg : F.args()) { - + for (const auto &Arg : F.args()) { // TODO: handle not simple cases. if (Arg.hasAttribute(Attribute::ByVal) || Arg.hasAttribute(Attribute::InReg) || @@ -377,10 +376,10 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - auto &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); const X86Subtarget &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); - auto TRI = STI.getRegisterInfo(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); // Handle only Linux C, X86_64_SysV calling conventions for now. if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C || From ae85da86ad8fbd022129650d0b2a6b615709a790 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 13:01:09 +0100 Subject: [PATCH 0050/1079] [Codegen][X86] Begin moving X86 specific codegen tests into X86 subfolder. Discussed with @craig.topper and @spatel - this is to try and tidyup the codegen folder and move the x86 specific tests (as opposed to general tests that just happen to use x86 triples) into subfolders. Its up to other targets if they follow suit. It also helps speed up test iterations as using wildcards on lit commands often misses some filenames. --- clang/test/CodeGen/{ => X86}/x86-64-inline-asm.c | 0 clang/test/CodeGen/{ => X86}/x86-GCC-inline-asm-Y-constraints.c | 0 clang/test/CodeGen/{ => X86}/x86-atomic-long_double.c | 0 clang/test/CodeGen/{ => X86}/x86-bswap.c | 0 clang/test/CodeGen/{ => X86}/x86-builtins-vector-width.c | 0 clang/test/CodeGen/{ => X86}/x86-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-cf-protection.c | 0 clang/test/CodeGen/{ => X86}/x86-crc-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-enqcmd-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-inline-asm-min-vector-width.c | 0 clang/test/CodeGen/{ => X86}/x86-inline-asm-v-constraint.c | 0 clang/test/CodeGen/{ => X86}/x86-long-double.cpp | 0 clang/test/CodeGen/{ => X86}/x86-nontemporal.c | 0 clang/test/CodeGen/{ => X86}/x86-serialize-intrin.c | 0 clang/test/CodeGen/{ => X86}/x86-soft-float.c | 0 clang/test/CodeGen/{ => X86}/x86-tsxldtrk-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-vec-i128.c | 0 clang/test/CodeGen/{ => X86}/x86-vec-struct-packing.c | 0 clang/test/CodeGen/{ => X86}/x86-vector-width.c | 0 clang/test/CodeGen/{ => X86}/x86.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-darwin.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-iamcu.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-linux.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-nommx.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-realign.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-win32.c | 0 clang/test/CodeGen/{ => X86}/x86_32-fpcc-struct-return.c | 0 clang/test/CodeGen/{ => X86}/x86_32-inline-asm.c | 0 clang/test/CodeGen/{ => X86}/x86_32-xsave.c | 0 clang/test/CodeGen/{ => X86}/x86_64-PR42672.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments-darwin.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments-nacl.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments-win32.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments.c | 0 clang/test/CodeGen/{ => X86}/x86_64-atomic-128.c | 0 clang/test/CodeGen/{ => X86}/x86_64-floatvectors.c | 0 clang/test/CodeGen/{ => X86}/x86_64-instrument-functions.c | 0 clang/test/CodeGen/{ => X86}/x86_64-longdouble.c | 0 clang/test/CodeGen/{ => X86}/x86_64-mno-sse.c | 0 clang/test/CodeGen/{ => X86}/x86_64-mno-sse2.c | 0 clang/test/CodeGen/{ => X86}/x86_64-profiling-keep-fp.c | 0 clang/test/CodeGen/{ => X86}/x86_64-xsave.c | 0 clang/test/CodeGen/{ => X86}/x86_inlineasm_curly_bracket_escape.c | 0 43 files changed, 0 insertions(+), 0 deletions(-) rename clang/test/CodeGen/{ => X86}/x86-64-inline-asm.c (100%) rename clang/test/CodeGen/{ => X86}/x86-GCC-inline-asm-Y-constraints.c (100%) rename clang/test/CodeGen/{ => X86}/x86-atomic-long_double.c (100%) rename clang/test/CodeGen/{ => X86}/x86-bswap.c (100%) rename clang/test/CodeGen/{ => X86}/x86-builtins-vector-width.c (100%) rename clang/test/CodeGen/{ => X86}/x86-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-cf-protection.c (100%) rename clang/test/CodeGen/{ => X86}/x86-crc-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-enqcmd-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-inline-asm-min-vector-width.c (100%) rename clang/test/CodeGen/{ => X86}/x86-inline-asm-v-constraint.c (100%) rename clang/test/CodeGen/{ => X86}/x86-long-double.cpp (100%) rename clang/test/CodeGen/{ => X86}/x86-nontemporal.c (100%) rename clang/test/CodeGen/{ => X86}/x86-serialize-intrin.c (100%) rename clang/test/CodeGen/{ => X86}/x86-soft-float.c (100%) rename clang/test/CodeGen/{ => X86}/x86-tsxldtrk-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-vec-i128.c (100%) rename clang/test/CodeGen/{ => X86}/x86-vec-struct-packing.c (100%) rename clang/test/CodeGen/{ => X86}/x86-vector-width.c (100%) rename clang/test/CodeGen/{ => X86}/x86.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-darwin.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-iamcu.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-linux.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-nommx.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-realign.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-win32.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-fpcc-struct-return.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-inline-asm.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-xsave.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-PR42672.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments-darwin.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments-nacl.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments-win32.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-atomic-128.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-floatvectors.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-instrument-functions.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-longdouble.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-mno-sse.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-mno-sse2.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-profiling-keep-fp.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-xsave.c (100%) rename clang/test/CodeGen/{ => X86}/x86_inlineasm_curly_bracket_escape.c (100%) diff --git a/clang/test/CodeGen/x86-64-inline-asm.c b/clang/test/CodeGen/X86/x86-64-inline-asm.c similarity index 100% rename from clang/test/CodeGen/x86-64-inline-asm.c rename to clang/test/CodeGen/X86/x86-64-inline-asm.c diff --git a/clang/test/CodeGen/x86-GCC-inline-asm-Y-constraints.c b/clang/test/CodeGen/X86/x86-GCC-inline-asm-Y-constraints.c similarity index 100% rename from clang/test/CodeGen/x86-GCC-inline-asm-Y-constraints.c rename to clang/test/CodeGen/X86/x86-GCC-inline-asm-Y-constraints.c diff --git a/clang/test/CodeGen/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c similarity index 100% rename from clang/test/CodeGen/x86-atomic-long_double.c rename to clang/test/CodeGen/X86/x86-atomic-long_double.c diff --git a/clang/test/CodeGen/x86-bswap.c b/clang/test/CodeGen/X86/x86-bswap.c similarity index 100% rename from clang/test/CodeGen/x86-bswap.c rename to clang/test/CodeGen/X86/x86-bswap.c diff --git a/clang/test/CodeGen/x86-builtins-vector-width.c b/clang/test/CodeGen/X86/x86-builtins-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-builtins-vector-width.c rename to clang/test/CodeGen/X86/x86-builtins-vector-width.c diff --git a/clang/test/CodeGen/x86-builtins.c b/clang/test/CodeGen/X86/x86-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-builtins.c rename to clang/test/CodeGen/X86/x86-builtins.c diff --git a/clang/test/CodeGen/x86-cf-protection.c b/clang/test/CodeGen/X86/x86-cf-protection.c similarity index 100% rename from clang/test/CodeGen/x86-cf-protection.c rename to clang/test/CodeGen/X86/x86-cf-protection.c diff --git a/clang/test/CodeGen/x86-crc-builtins.c b/clang/test/CodeGen/X86/x86-crc-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-crc-builtins.c rename to clang/test/CodeGen/X86/x86-crc-builtins.c diff --git a/clang/test/CodeGen/x86-enqcmd-builtins.c b/clang/test/CodeGen/X86/x86-enqcmd-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-enqcmd-builtins.c rename to clang/test/CodeGen/X86/x86-enqcmd-builtins.c diff --git a/clang/test/CodeGen/x86-inline-asm-min-vector-width.c b/clang/test/CodeGen/X86/x86-inline-asm-min-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-inline-asm-min-vector-width.c rename to clang/test/CodeGen/X86/x86-inline-asm-min-vector-width.c diff --git a/clang/test/CodeGen/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c similarity index 100% rename from clang/test/CodeGen/x86-inline-asm-v-constraint.c rename to clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c diff --git a/clang/test/CodeGen/x86-long-double.cpp b/clang/test/CodeGen/X86/x86-long-double.cpp similarity index 100% rename from clang/test/CodeGen/x86-long-double.cpp rename to clang/test/CodeGen/X86/x86-long-double.cpp diff --git a/clang/test/CodeGen/x86-nontemporal.c b/clang/test/CodeGen/X86/x86-nontemporal.c similarity index 100% rename from clang/test/CodeGen/x86-nontemporal.c rename to clang/test/CodeGen/X86/x86-nontemporal.c diff --git a/clang/test/CodeGen/x86-serialize-intrin.c b/clang/test/CodeGen/X86/x86-serialize-intrin.c similarity index 100% rename from clang/test/CodeGen/x86-serialize-intrin.c rename to clang/test/CodeGen/X86/x86-serialize-intrin.c diff --git a/clang/test/CodeGen/x86-soft-float.c b/clang/test/CodeGen/X86/x86-soft-float.c similarity index 100% rename from clang/test/CodeGen/x86-soft-float.c rename to clang/test/CodeGen/X86/x86-soft-float.c diff --git a/clang/test/CodeGen/x86-tsxldtrk-builtins.c b/clang/test/CodeGen/X86/x86-tsxldtrk-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-tsxldtrk-builtins.c rename to clang/test/CodeGen/X86/x86-tsxldtrk-builtins.c diff --git a/clang/test/CodeGen/x86-vec-i128.c b/clang/test/CodeGen/X86/x86-vec-i128.c similarity index 100% rename from clang/test/CodeGen/x86-vec-i128.c rename to clang/test/CodeGen/X86/x86-vec-i128.c diff --git a/clang/test/CodeGen/x86-vec-struct-packing.c b/clang/test/CodeGen/X86/x86-vec-struct-packing.c similarity index 100% rename from clang/test/CodeGen/x86-vec-struct-packing.c rename to clang/test/CodeGen/X86/x86-vec-struct-packing.c diff --git a/clang/test/CodeGen/x86-vector-width.c b/clang/test/CodeGen/X86/x86-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-vector-width.c rename to clang/test/CodeGen/X86/x86-vector-width.c diff --git a/clang/test/CodeGen/x86.c b/clang/test/CodeGen/X86/x86.c similarity index 100% rename from clang/test/CodeGen/x86.c rename to clang/test/CodeGen/X86/x86.c diff --git a/clang/test/CodeGen/x86_32-arguments-darwin.c b/clang/test/CodeGen/X86/x86_32-arguments-darwin.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-darwin.c rename to clang/test/CodeGen/X86/x86_32-arguments-darwin.c diff --git a/clang/test/CodeGen/x86_32-arguments-iamcu.c b/clang/test/CodeGen/X86/x86_32-arguments-iamcu.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-iamcu.c rename to clang/test/CodeGen/X86/x86_32-arguments-iamcu.c diff --git a/clang/test/CodeGen/x86_32-arguments-linux.c b/clang/test/CodeGen/X86/x86_32-arguments-linux.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-linux.c rename to clang/test/CodeGen/X86/x86_32-arguments-linux.c diff --git a/clang/test/CodeGen/x86_32-arguments-nommx.c b/clang/test/CodeGen/X86/x86_32-arguments-nommx.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-nommx.c rename to clang/test/CodeGen/X86/x86_32-arguments-nommx.c diff --git a/clang/test/CodeGen/x86_32-arguments-realign.c b/clang/test/CodeGen/X86/x86_32-arguments-realign.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-realign.c rename to clang/test/CodeGen/X86/x86_32-arguments-realign.c diff --git a/clang/test/CodeGen/x86_32-arguments-win32.c b/clang/test/CodeGen/X86/x86_32-arguments-win32.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-win32.c rename to clang/test/CodeGen/X86/x86_32-arguments-win32.c diff --git a/clang/test/CodeGen/x86_32-fpcc-struct-return.c b/clang/test/CodeGen/X86/x86_32-fpcc-struct-return.c similarity index 100% rename from clang/test/CodeGen/x86_32-fpcc-struct-return.c rename to clang/test/CodeGen/X86/x86_32-fpcc-struct-return.c diff --git a/clang/test/CodeGen/x86_32-inline-asm.c b/clang/test/CodeGen/X86/x86_32-inline-asm.c similarity index 100% rename from clang/test/CodeGen/x86_32-inline-asm.c rename to clang/test/CodeGen/X86/x86_32-inline-asm.c diff --git a/clang/test/CodeGen/x86_32-xsave.c b/clang/test/CodeGen/X86/x86_32-xsave.c similarity index 100% rename from clang/test/CodeGen/x86_32-xsave.c rename to clang/test/CodeGen/X86/x86_32-xsave.c diff --git a/clang/test/CodeGen/x86_64-PR42672.c b/clang/test/CodeGen/X86/x86_64-PR42672.c similarity index 100% rename from clang/test/CodeGen/x86_64-PR42672.c rename to clang/test/CodeGen/X86/x86_64-PR42672.c diff --git a/clang/test/CodeGen/x86_64-arguments-darwin.c b/clang/test/CodeGen/X86/x86_64-arguments-darwin.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-darwin.c rename to clang/test/CodeGen/X86/x86_64-arguments-darwin.c diff --git a/clang/test/CodeGen/x86_64-arguments-nacl.c b/clang/test/CodeGen/X86/x86_64-arguments-nacl.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-nacl.c rename to clang/test/CodeGen/X86/x86_64-arguments-nacl.c diff --git a/clang/test/CodeGen/x86_64-arguments-win32.c b/clang/test/CodeGen/X86/x86_64-arguments-win32.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-win32.c rename to clang/test/CodeGen/X86/x86_64-arguments-win32.c diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments.c rename to clang/test/CodeGen/X86/x86_64-arguments.c diff --git a/clang/test/CodeGen/x86_64-atomic-128.c b/clang/test/CodeGen/X86/x86_64-atomic-128.c similarity index 100% rename from clang/test/CodeGen/x86_64-atomic-128.c rename to clang/test/CodeGen/X86/x86_64-atomic-128.c diff --git a/clang/test/CodeGen/x86_64-floatvectors.c b/clang/test/CodeGen/X86/x86_64-floatvectors.c similarity index 100% rename from clang/test/CodeGen/x86_64-floatvectors.c rename to clang/test/CodeGen/X86/x86_64-floatvectors.c diff --git a/clang/test/CodeGen/x86_64-instrument-functions.c b/clang/test/CodeGen/X86/x86_64-instrument-functions.c similarity index 100% rename from clang/test/CodeGen/x86_64-instrument-functions.c rename to clang/test/CodeGen/X86/x86_64-instrument-functions.c diff --git a/clang/test/CodeGen/x86_64-longdouble.c b/clang/test/CodeGen/X86/x86_64-longdouble.c similarity index 100% rename from clang/test/CodeGen/x86_64-longdouble.c rename to clang/test/CodeGen/X86/x86_64-longdouble.c diff --git a/clang/test/CodeGen/x86_64-mno-sse.c b/clang/test/CodeGen/X86/x86_64-mno-sse.c similarity index 100% rename from clang/test/CodeGen/x86_64-mno-sse.c rename to clang/test/CodeGen/X86/x86_64-mno-sse.c diff --git a/clang/test/CodeGen/x86_64-mno-sse2.c b/clang/test/CodeGen/X86/x86_64-mno-sse2.c similarity index 100% rename from clang/test/CodeGen/x86_64-mno-sse2.c rename to clang/test/CodeGen/X86/x86_64-mno-sse2.c diff --git a/clang/test/CodeGen/x86_64-profiling-keep-fp.c b/clang/test/CodeGen/X86/x86_64-profiling-keep-fp.c similarity index 100% rename from clang/test/CodeGen/x86_64-profiling-keep-fp.c rename to clang/test/CodeGen/X86/x86_64-profiling-keep-fp.c diff --git a/clang/test/CodeGen/x86_64-xsave.c b/clang/test/CodeGen/X86/x86_64-xsave.c similarity index 100% rename from clang/test/CodeGen/x86_64-xsave.c rename to clang/test/CodeGen/X86/x86_64-xsave.c diff --git a/clang/test/CodeGen/x86_inlineasm_curly_bracket_escape.c b/clang/test/CodeGen/X86/x86_inlineasm_curly_bracket_escape.c similarity index 100% rename from clang/test/CodeGen/x86_inlineasm_curly_bracket_escape.c rename to clang/test/CodeGen/X86/x86_inlineasm_curly_bracket_escape.c From df63eedef64d715ce1f31843f7de9c11fe1e597f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 14:02:46 +0200 Subject: [PATCH 0051/1079] [mlir][VectorOps] Put back anonymous namespace to work around GCC5 bug. VectorToSCF.cpp:241:61: error: specialization of 'template mlir::LogicalResult {anonymous}::NDTransferOpHelper::doReplace()' in different namespace [-fpermissive] --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 0eb46f7ba3cfb..0a74472a49f6e 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -131,7 +131,6 @@ class NDTransferOpHelper { VectorType minorVectorType; // vector<(minor_dims) x type> MemRefType memRefMinorVectorType; // memref> }; -} // namespace template void NDTransferOpHelper::emitLoops( @@ -395,6 +394,8 @@ LogicalResult NDTransferOpHelper::doReplace() { return success(); } +} // namespace + /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of From 4e9f4d0b9d1dbf2c1d3e389b870a16c3dbd5c302 Mon Sep 17 00:00:00 2001 From: Ehsan Toosi Date: Mon, 24 Aug 2020 13:19:50 +0200 Subject: [PATCH 0052/1079] [mlir] Fix bug in copy removal A crash could happen due to copy removal. The bug is fixed and two more test cases are added. Differential Revision: https://reviews.llvm.org/D87128 --- mlir/lib/Transforms/CopyRemoval.cpp | 37 +++++++++++---- mlir/test/Transforms/copy-removal.mlir | 64 ++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Transforms/CopyRemoval.cpp b/mlir/lib/Transforms/CopyRemoval.cpp index ccfd02630ac28..c5a8da6329568 100644 --- a/mlir/lib/Transforms/CopyRemoval.cpp +++ b/mlir/lib/Transforms/CopyRemoval.cpp @@ -30,16 +30,35 @@ class CopyRemovalPass : public PassWrapper> { reuseCopySourceAsTarget(copyOp); reuseCopyTargetAsSource(copyOp); }); + for (std::pair &pair : replaceList) + pair.first.replaceAllUsesWith(pair.second); for (Operation *op : eraseList) op->erase(); } private: /// List of operations that need to be removed. - DenseSet eraseList; + llvm::SmallPtrSet eraseList; + + /// List of values that need to be replaced with their counterparts. + llvm::SmallDenseSet, 4> replaceList; + + /// Returns the allocation operation for `value` in `block` if it exists. + /// nullptr otherwise. + Operation *getAllocationOpInBlock(Value value, Block *block) { + assert(block && "Block cannot be null"); + Operation *op = value.getDefiningOp(); + if (op && op->getBlock() == block) { + auto effects = dyn_cast(op); + if (effects && effects.hasEffect()) + return op; + } + return nullptr; + } /// Returns the deallocation operation for `value` in `block` if it exists. - Operation *getDeallocationInBlock(Value value, Block *block) { + /// nullptr otherwise. + Operation *getDeallocationOpInBlock(Value value, Block *block) { assert(block && "Block cannot be null"); auto valueUsers = value.getUsers(); auto it = llvm::find_if(valueUsers, [&](Operation *op) { @@ -119,9 +138,10 @@ class CopyRemovalPass : public PassWrapper> { Value to = copyOp.getTarget(); Operation *copy = copyOp.getOperation(); + Block *copyBlock = copy->getBlock(); Operation *fromDefiningOp = from.getDefiningOp(); - Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); - Operation *toDefiningOp = to.getDefiningOp(); + Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock); + Operation *toDefiningOp = getAllocationOpInBlock(to, copyBlock); if (!fromDefiningOp || !fromFreeingOp || !toDefiningOp || !areOpsInTheSameBlock({fromFreeingOp, toDefiningOp, copy}) || hasUsersBetween(to, toDefiningOp, copy) || @@ -129,7 +149,7 @@ class CopyRemovalPass : public PassWrapper> { hasMemoryEffectOpBetween(copy, fromFreeingOp)) return; - to.replaceAllUsesWith(from); + replaceList.insert({to, from}); eraseList.insert(copy); eraseList.insert(toDefiningOp); eraseList.insert(fromFreeingOp); @@ -169,8 +189,9 @@ class CopyRemovalPass : public PassWrapper> { Value to = copyOp.getTarget(); Operation *copy = copyOp.getOperation(); - Operation *fromDefiningOp = from.getDefiningOp(); - Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); + Block *copyBlock = copy->getBlock(); + Operation *fromDefiningOp = getAllocationOpInBlock(from, copyBlock); + Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock); if (!fromDefiningOp || !fromFreeingOp || !areOpsInTheSameBlock({fromFreeingOp, fromDefiningOp, copy}) || hasUsersBetween(to, fromDefiningOp, copy) || @@ -178,7 +199,7 @@ class CopyRemovalPass : public PassWrapper> { hasMemoryEffectOpBetween(copy, fromFreeingOp)) return; - from.replaceAllUsesWith(to); + replaceList.insert({from, to}); eraseList.insert(copy); eraseList.insert(fromDefiningOp); eraseList.insert(fromFreeingOp); diff --git a/mlir/test/Transforms/copy-removal.mlir b/mlir/test/Transforms/copy-removal.mlir index f750dabb18a04..a0d1193b77d58 100644 --- a/mlir/test/Transforms/copy-removal.mlir +++ b/mlir/test/Transforms/copy-removal.mlir @@ -283,3 +283,67 @@ func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>){ dealloc %temp : memref<2xf32> return } + +// ----- + +// The only redundant copy is linalg.copy(%4, %5) + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) { + // CHECK: %{{.*}} = alloc() + %0 = alloc() : memref<2xf32> + dealloc %0 : memref<2xf32> + // CHECK: %{{.*}} = alloc() + %1 = alloc() : memref<2xf32> + // CHECK: linalg.copy + linalg.copy(%arg3, %1) : memref<2xf32>, memref<2xf32> + %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) { + %3 = cmpi "eq", %arg5, %arg1 : index + // CHECK: dealloc + dealloc %arg6 : memref<2xf32> + // CHECK: %[[PERCENT4:.*]] = alloc() + %4 = alloc() : memref<2xf32> + // CHECK-NOT: alloc + // CHECK-NOT: linalg.copy + // CHECK-NOT: dealloc + %5 = alloc() : memref<2xf32> + linalg.copy(%4, %5) : memref<2xf32>, memref<2xf32> + dealloc %4 : memref<2xf32> + // CHECK: %[[PERCENT6:.*]] = alloc() + %6 = alloc() : memref<2xf32> + // CHECK: linalg.copy(%[[PERCENT4]], %[[PERCENT6]]) + linalg.copy(%5, %6) : memref<2xf32>, memref<2xf32> + scf.yield %6 : memref<2xf32> + } + // CHECK: linalg.copy + linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32> + dealloc %2 : memref<2xf32> + return +} + +// ----- + +// The linalg.copy operation can be removed in addition to alloc and dealloc +// operations. All uses of %0 is then replaced with %arg2. + +// CHECK-LABEL: func @check_with_affine_dialect +func @check_with_affine_dialect(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) { + // CHECK-SAME: (%[[ARG0:.*]]: memref<4xf32>, %[[ARG1:.*]]: memref<4xf32>, %[[RES:.*]]: memref<4xf32>) + // CHECK-NOT: alloc + %0 = alloc() : memref<4xf32> + affine.for %arg3 = 0 to 4 { + %5 = affine.load %arg0[%arg3] : memref<4xf32> + %6 = affine.load %arg1[%arg3] : memref<4xf32> + %7 = cmpf "ogt", %5, %6 : f32 + // CHECK: %[[SELECT_RES:.*]] = select + %8 = select %7, %5, %6 : f32 + // CHECK-NEXT: affine.store %[[SELECT_RES]], %[[RES]] + affine.store %8, %0[%arg3] : memref<4xf32> + } + // CHECK-NOT: linalg.copy + // CHECK-NOT: dealloc + "linalg.copy"(%0, %arg2) : (memref<4xf32>, memref<4xf32>) -> () + dealloc %0 : memref<4xf32> + //CHECK: return + return +} From 86bd8f82cc74725a08a40efe176d3d6b9c9cef92 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 17:52:23 +0300 Subject: [PATCH 0053/1079] [CMake] Remove dead FindPythonInterp code LLVM has bumped the minimum required CMake version to 3.13.4, so this has become dead code. Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D87189 --- clang/CMakeLists.txt | 37 +++++++++--------------------------- compiler-rt/CMakeLists.txt | 33 +++++++++----------------------- libcxx/CMakeLists.txt | 36 +++++++++++------------------------ lld/CMakeLists.txt | 39 ++++++++++---------------------------- llvm/CMakeLists.txt | 37 +++++++++--------------------------- 5 files changed, 48 insertions(+), 134 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 5ac0e6b6ef0cb..f015951c7ec72 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -136,38 +136,19 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) if(LLVM_INCLUDE_TESTS) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR - "Unable to find Python interpreter, required for builds and testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Check prebuilt llvm/utils. diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 0a0294f937dba..9967e293749bd 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -81,34 +81,19 @@ if (COMPILER_RT_STANDALONE_BUILD) set_target_properties(intrinsics_gen PROPERTIES FOLDER "Compiler-RT Misc") endif() - if(CMAKE_VERSION VERSION_LESS 3.12) - # Find Python interpreter. - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR " - Unable to find Python interpreter required testing. Please install Python - or specify the PYTHON_EXECUTABLE CMake variable.") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Ensure that fat libraries are built correctly on Darwin diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index ea0aa0a259a22..a5c32d94aea29 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -41,33 +41,19 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL endif() if (LIBCXX_STANDALONE_BUILD) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(WARNING "Failed to find python interpreter. " - "The libc++ test suite will be disabled.") - set(LLVM_INCLUDE_TESTS OFF) - else() - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() endif() diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 7dae682cdef07..34a7a68da42c5 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -57,38 +57,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(CheckAtomic) if(LLVM_INCLUDE_TESTS) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR - "Unable to find Python interpreter, required for testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if(${PYTHON_VERSION_STRING} VERSION_LESS 2.7) - message(FATAL_ERROR "Python 2.7 or newer is required") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() - add_executable(Python3::Interpeter IMPORTED) + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Check prebuilt llvm/utils. diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 4a7639c51121d..410103b0bfd68 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -696,38 +696,19 @@ option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default include(HandleLLVMOptions) -if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(FATAL_ERROR - "Unable to find Python interpreter, required for builds and testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") +find_package(Python3 COMPONENTS Interpreter) +if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) -else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() ###### From e67405141836fcd88183863758eeb42f32e847a6 Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Fri, 4 Sep 2020 15:03:09 +0300 Subject: [PATCH 0054/1079] [analyzer] [NFC] Introduce refactoring of PthreadLockChecker Change capitalization of some names due to LLVM naming rules. Change names of some variables to make them more speaking. Rework similar bug reports into one common function. Prepare code for the next patches to reduce unrelated changes. Differential Revision: https://reviews.llvm.org/D87138 --- .../Checkers/PthreadLockChecker.cpp | 271 ++++++++---------- 1 file changed, 118 insertions(+), 153 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp index 285d2da104f1a..88e80c481a5a7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -83,7 +83,7 @@ class PthreadLockChecker : public Checker PThreadCallbacks = { // Init. {{"pthread_mutex_init", 2}, &PthreadLockChecker::InitAnyLock}, @@ -167,46 +167,49 @@ class PthreadLockChecker : public Checker BT[], + const Expr *MtxExpr, CheckerKind CheckKind, + StringRef Desc) const; // Init. void InitAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void InitLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal Lock, CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void InitLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const; // Lock, Try-lock. void AcquirePthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void AcquireXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryFuchsiaLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryC11Lock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void AcquireLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal lock, bool isTryLock, LockingSemantics semantics, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void AcquireLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, bool IsTryLock, + LockingSemantics Semantics, CheckerKind CheckKind) const; // Release. void ReleaseAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void ReleaseLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal lock, CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void ReleaseLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const; // Destroy. void DestroyPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void DestroyXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void DestroyLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal Lock, LockingSemantics semantics, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void DestroyLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + LockingSemantics Semantics, CheckerKind CheckKind) const; public: void checkPostCall(const CallEvent &Call, CheckerContext &C) const; @@ -226,18 +229,18 @@ class PthreadLockChecker : public Checker BT_initlock[CK_NumCheckKinds]; mutable std::unique_ptr BT_lor[CK_NumCheckKinds]; - void initBugType(CheckerKind checkKind) const { - if (BT_doublelock[checkKind]) + void initBugType(CheckerKind CheckKind) const { + if (BT_doublelock[CheckKind]) return; - BT_doublelock[checkKind].reset( - new BugType{CheckNames[checkKind], "Double locking", "Lock checker"}); - BT_doubleunlock[checkKind].reset( - new BugType{CheckNames[checkKind], "Double unlocking", "Lock checker"}); - BT_destroylock[checkKind].reset(new BugType{ - CheckNames[checkKind], "Use destroyed lock", "Lock checker"}); - BT_initlock[checkKind].reset(new BugType{ - CheckNames[checkKind], "Init invalid lock", "Lock checker"}); - BT_lor[checkKind].reset(new BugType{CheckNames[checkKind], + BT_doublelock[CheckKind].reset( + new BugType{CheckNames[CheckKind], "Double locking", "Lock checker"}); + BT_doubleunlock[CheckKind].reset( + new BugType{CheckNames[CheckKind], "Double unlocking", "Lock checker"}); + BT_destroylock[CheckKind].reset(new BugType{ + CheckNames[CheckKind], "Use destroyed lock", "Lock checker"}); + BT_initlock[CheckKind].reset(new BugType{ + CheckNames[CheckKind], "Init invalid lock", "Lock checker"}); + BT_lor[CheckKind].reset(new BugType{CheckNames[CheckKind], "Lock order reversal", "Lock checker"}); } }; @@ -341,53 +344,53 @@ void PthreadLockChecker::printState(raw_ostream &Out, ProgramStateRef State, void PthreadLockChecker::AcquirePthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), false, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), false, + PthreadSemantics, CheckKind); } void PthreadLockChecker::AcquireXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), false, XNUSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), false, + XNUSemantics, CheckKind); } void PthreadLockChecker::TryPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryFuchsiaLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryC11Lock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal lock, bool isTryLock, - enum LockingSemantics semantics, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, bool IsTryLock, + enum LockingSemantics Semantics, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *lockR = lock.getAsRegion(); + const MemRegion *lockR = MtxVal.getAsRegion(); if (!lockR) return; @@ -398,28 +401,23 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, if (const LockState *LState = state->get(lockR)) { if (LState->isLocked()) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto report = std::make_unique( - *BT_doublelock[checkKind], "This lock has already been acquired", N); - report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(report)); + reportBug(C, BT_doublelock, MtxExpr, CheckKind, + "This lock has already been acquired"); return; } else if (LState->isDestroyed()) { - reportUseDestroyedBug(Call, C, ArgNo, checkKind); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, + "This lock has already been destroyed"); return; } } ProgramStateRef lockSucc = state; - if (isTryLock) { + if (IsTryLock) { // Bifurcate the state, and allow a mode where the lock acquisition fails. SVal RetVal = Call.getReturnValue(); if (auto DefinedRetVal = RetVal.getAs()) { ProgramStateRef lockFail; - switch (semantics) { + switch (Semantics) { case PthreadSemantics: std::tie(lockFail, lockSucc) = state->assume(*DefinedRetVal); break; @@ -434,7 +432,7 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, } // We might want to handle the case when the mutex lock function was inlined // and returned an Unknown or Undefined value. - } else if (semantics == PthreadSemantics) { + } else if (Semantics == PthreadSemantics) { // Assume that the return value was 0. SVal RetVal = Call.getReturnValue(); if (auto DefinedRetVal = RetVal.getAs()) { @@ -447,7 +445,7 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, // and returned an Unknown or Undefined value. } else { // XNU locking semantics return void on non-try locks - assert((semantics == XNUSemantics) && "Unknown locking semantics"); + assert((Semantics == XNUSemantics) && "Unknown locking semantics"); lockSucc = state; } @@ -459,18 +457,18 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, void PthreadLockChecker::ReleaseAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - ReleaseLockAux(Call, C, 0, Call.getArgSVal(0), checkKind); + CheckerKind CheckKind) const { + ReleaseLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), CheckKind); } void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal lock, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *lockR = lock.getAsRegion(); + const MemRegion *lockR = MtxVal.getAsRegion(); if (!lockR) return; @@ -481,18 +479,12 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, if (const LockState *LState = state->get(lockR)) { if (LState->isUnlocked()) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_doubleunlock[checkKind], "This lock has already been unlocked", - N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_doubleunlock, MtxExpr, CheckKind, + "This lock has already been unlocked"); return; } else if (LState->isDestroyed()) { - reportUseDestroyedBug(Call, C, ArgNo, checkKind); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, + "This lock has already been destroyed"); return; } } @@ -502,17 +494,9 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, if (!LS.isEmpty()) { const MemRegion *firstLockR = LS.getHead(); if (firstLockR != lockR) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto report = std::make_unique( - *BT_lor[checkKind], - "This was not the most recently acquired lock. Possible " - "lock order reversal", - N); - report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(report)); + reportBug(C, BT_lor, MtxExpr, CheckKind, + "This was not the most recently acquired lock. Possible lock " + "order reversal"); return; } // Record that the lock was released. @@ -525,25 +509,27 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, void PthreadLockChecker::DestroyPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - DestroyLockAux(Call, C, 0, Call.getArgSVal(0), PthreadSemantics, checkKind); + CheckerKind CheckKind) const { + DestroyLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), + PthreadSemantics, CheckKind); } void PthreadLockChecker::DestroyXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - DestroyLockAux(Call, C, 0, Call.getArgSVal(0), XNUSemantics, checkKind); + CheckerKind CheckKind) const { + DestroyLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), XNUSemantics, + CheckKind); } void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal Lock, - enum LockingSemantics semantics, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, + enum LockingSemantics Semantics, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *LockR = Lock.getAsRegion(); + const MemRegion *LockR = MtxVal.getAsRegion(); if (!LockR) return; @@ -556,7 +542,7 @@ void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, const LockState *LState = State->get(LockR); // Checking the return value of the destroy method only in the case of // PthreadSemantics - if (semantics == PthreadSemantics) { + if (Semantics == PthreadSemantics) { if (!LState || LState->isUnlocked()) { SymbolRef sym = Call.getReturnValue().getAsSymbol(); if (!sym) { @@ -581,36 +567,26 @@ void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, return; } } - StringRef Message; - if (LState->isLocked()) { - Message = "This lock is still locked"; - } else { - Message = "This lock has already been destroyed"; - } + StringRef Message = LState->isLocked() + ? "This lock is still locked" + : "This lock has already been destroyed"; - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_destroylock[checkKind], Message, N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, Message); } void PthreadLockChecker::InitAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - InitLockAux(Call, C, 0, Call.getArgSVal(0), checkKind); + CheckerKind CheckKind) const { + InitLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), CheckKind); } void PthreadLockChecker::InitLockAux(const CallEvent &Call, CheckerContext &C, - unsigned ArgNo, SVal Lock, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *LockR = Lock.getAsRegion(); + const MemRegion *LockR = MtxVal.getAsRegion(); if (!LockR) return; @@ -627,35 +603,24 @@ void PthreadLockChecker::InitLockAux(const CallEvent &Call, CheckerContext &C, return; } - StringRef Message; - - if (LState->isLocked()) { - Message = "This lock is still being held"; - } else { - Message = "This lock has already been initialized"; - } + StringRef Message = LState->isLocked() + ? "This lock is still being held" + : "This lock has already been initialized"; - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_initlock[checkKind], Message, N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_initlock, MtxExpr, CheckKind, Message); } -void PthreadLockChecker::reportUseDestroyedBug(const CallEvent &Call, - CheckerContext &C, - unsigned ArgNo, - CheckerKind checkKind) const { +void PthreadLockChecker::reportBug(CheckerContext &C, + std::unique_ptr BT[], + const Expr *MtxExpr, CheckerKind CheckKind, + StringRef Desc) const { ExplodedNode *N = C.generateErrorNode(); if (!N) return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_destroylock[checkKind], "This lock has already been destroyed", N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); + initBugType(CheckKind); + auto Report = + std::make_unique(*BT[CheckKind], Desc, N); + Report->addRange(MtxExpr->getSourceRange()); C.emitReport(std::move(Report)); } From 4964d75d7078b932ac6b17c1990adaa6eada75c1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 8 Sep 2020 09:17:01 -0400 Subject: [PATCH 0055/1079] [InstCombine] add bitwise logic fold tests for D86395; NFC --- llvm/test/Transforms/InstCombine/xor.ll | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 312b0125f626f..ba275a6066419 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -1171,3 +1171,77 @@ define i8 @not_ashr_wrong_const(i8 %x) { %r = xor i8 %a, -2 ret i8 %r } + +; (~A & B) ^ A --> (A | B) +; The division ops are here to thwart complexity-based canonicalization: all ops are binops. + +define i32 @test52(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test52( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[A]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %o, %b + %z = xor i32 %r, %a + ret i32 %z +} + +; (~B & A) ^ B --> (A | B) +; The division ops are here to thwart complexity-based canonicalization: all ops are binops. + +define i32 @test53(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test53( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[B]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %b, -1 + %r = and i32 %o, %a + %z = xor i32 %r, %b + ret i32 %z +} + +define i32 @test54(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test54( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[A]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %b, %o + %z = xor i32 %r, %a + ret i32 %z +} + +define i32 @test55(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test55( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[A]], [[R]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %o, %b + %z = xor i32 %a, %r + ret i32 %z +} From 156b127945a8c923d141e608b7380427da024376 Mon Sep 17 00:00:00 2001 From: Frank Derry Wanye Date: Tue, 8 Sep 2020 09:35:14 -0400 Subject: [PATCH 0056/1079] Add a new altera check for structure packing and alignment. The altera struct pack align lint check finds structs that are inefficiently packed or aligned and recommends packing/aligning of the structs using the packed and aligned attributes as needed in a warning. --- clang-tools-extra/clang-tidy/CMakeLists.txt | 2 + .../clang-tidy/ClangTidyForceLinker.h | 5 + .../clang-tidy/altera/AlteraTidyModule.cpp | 39 +++++ .../clang-tidy/altera/CMakeLists.txt | 15 ++ .../altera/StructPackAlignCheck.cpp | 144 ++++++++++++++++++ .../clang-tidy/altera/StructPackAlignCheck.h | 41 +++++ clang-tools-extra/docs/ReleaseNotes.rst | 21 +++ .../checks/altera-struct-pack-align.rst | 54 +++++++ .../docs/clang-tidy/checks/list.rst | 1 + clang-tools-extra/docs/clang-tidy/index.rst | 1 + .../checkers/altera-struct-pack-align.cpp | 101 ++++++++++++ 11 files changed, 424 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp create mode 100644 clang-tools-extra/clang-tidy/altera/CMakeLists.txt create mode 100644 clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 02573534ccaef..923976197ebe8 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -46,6 +46,7 @@ endif() # If you add a check, also add it to ClangTidyForceLinker.h in this directory. add_subdirectory(android) add_subdirectory(abseil) +add_subdirectory(altera) add_subdirectory(boost) add_subdirectory(bugprone) add_subdirectory(cert) @@ -71,6 +72,7 @@ add_subdirectory(zircon) set(ALL_CLANG_TIDY_CHECKS clangTidyAndroidModule clangTidyAbseilModule + clangTidyAlteraModule clangTidyBoostModule clangTidyBugproneModule clangTidyCERTModule diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h index 1d6bd2a4fd621..63e681f878db2 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h @@ -20,6 +20,11 @@ extern volatile int AbseilModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AbseilModuleAnchorDestination = AbseilModuleAnchorSource; +// This anchor is used to force the linker to link the AlteraModule. +extern volatile int AlteraModuleAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED AlteraModuleAnchorDestination = + AlteraModuleAnchorSource; + // This anchor is used to force the linker to link the AndroidModule. extern volatile int AndroidModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AndroidModuleAnchorDestination = diff --git a/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp new file mode 100644 index 0000000000000..d91f67ac14856 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp @@ -0,0 +1,39 @@ +//===--- AlteraTidyModule.cpp - clang-tidy --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../ClangTidy.h" +#include "../ClangTidyModule.h" +#include "../ClangTidyModuleRegistry.h" +#include "StructPackAlignCheck.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +class AlteraModule : public ClangTidyModule { +public: + void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "altera-struct-pack-align"); + } +}; + +} // namespace altera + +// Register the AlteraTidyModule using this statically initialized variable. +static ClangTidyModuleRegistry::Add + X("altera-module", "Adds Altera FPGA OpenCL lint checks."); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the AlteraModule. +volatile int AlteraModuleAnchorSource = 0; + +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt new file mode 100644 index 0000000000000..45131c1809a23 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_library(clangTidyAlteraModule + AlteraTidyModule.cpp + StructPackAlignCheck.cpp + + LINK_LIBS + clangAnalysis + clangAST + clangASTMatchers + clangBasic + clangLex + clangTidy + clangTidyUtils + ) diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp new file mode 100644 index 0000000000000..9f28a22a9d03e --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp @@ -0,0 +1,144 @@ +//===--- StructPackAlignCheck.cpp - clang-tidy ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "StructPackAlignCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecordLayout.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include +#include + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +void StructPackAlignCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(recordDecl(isStruct(), isDefinition(), + unless(isExpansionInSystemHeader())) + .bind("struct"), + this); +} + +CharUnits +StructPackAlignCheck::computeRecommendedAlignment(CharUnits MinByteSize) { + CharUnits NewAlign = CharUnits::fromQuantity(1); + if (!MinByteSize.isPowerOfTwo()) { + int MSB = (int)MinByteSize.getQuantity(); + for (; MSB > 0; MSB /= 2) { + NewAlign = NewAlign.alignTo( + CharUnits::fromQuantity(((int)NewAlign.getQuantity()) * 2)); + // Abort if the computed alignment meets the maximum configured alignment. + if (NewAlign.getQuantity() >= MaxConfiguredAlignment) + break; + } + } else { + NewAlign = MinByteSize; + } + return NewAlign; +} + +void StructPackAlignCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Struct = Result.Nodes.getNodeAs("struct"); + + // Do not trigger on templated struct declarations because the packing and + // alignment requirements are unknown. + if (Struct->isTemplated()) + return; + + // Get sizing info for the struct. + llvm::SmallVector, 10> FieldSizes; + unsigned int TotalBitSize = 0; + for (const FieldDecl *StructField : Struct->fields()) { + // For each StructField, record how big it is (in bits). + // Would be good to use a pair of to advise a better + // packing order. + unsigned int StructFieldWidth = + (unsigned int)Result.Context + ->getTypeInfo(StructField->getType().getTypePtr()) + .Width; + FieldSizes.emplace_back(StructFieldWidth, StructField->getFieldIndex()); + // FIXME: Recommend a reorganization of the struct (sort by StructField + // size, largest to smallest). + TotalBitSize += StructFieldWidth; + } + + uint64_t CharSize = Result.Context->getCharWidth(); + CharUnits CurrSize = Result.Context->getASTRecordLayout(Struct).getSize(); + CharUnits MinByteSize = + CharUnits::fromQuantity(ceil((float)TotalBitSize / CharSize)); + CharUnits MaxAlign = CharUnits::fromQuantity( + ceil((float)Struct->getMaxAlignment() / CharSize)); + CharUnits CurrAlign = + Result.Context->getASTRecordLayout(Struct).getAlignment(); + CharUnits NewAlign = computeRecommendedAlignment(MinByteSize); + + bool IsPacked = Struct->hasAttr(); + bool NeedsPacking = (MinByteSize < CurrSize) && (MaxAlign != NewAlign) && + (CurrSize != NewAlign); + bool NeedsAlignment = CurrAlign.getQuantity() != NewAlign.getQuantity(); + + if (!NeedsAlignment && !NeedsPacking) + return; + + // If it's using much more space than it needs, suggest packing. + // (Do not suggest packing if it is currently explicitly aligned to what the + // minimum byte size would suggest as the new alignment.) + if (NeedsPacking && !IsPacked) { + diag(Struct->getLocation(), + "accessing fields in struct %0 is inefficient due to padding; only " + "needs %1 bytes but is using %2 bytes") + << Struct << (int)MinByteSize.getQuantity() + << (int)CurrSize.getQuantity() + << FixItHint::CreateInsertion(Struct->getEndLoc().getLocWithOffset(1), + " __attribute__((packed))"); + diag(Struct->getLocation(), + "use \"__attribute__((packed))\" to reduce the amount of padding " + "applied to struct %0", + DiagnosticIDs::Note) + << Struct; + } + + FixItHint FixIt; + AlignedAttr *Attribute = Struct->getAttr(); + std::string NewAlignQuantity = std::to_string((int)NewAlign.getQuantity()); + if (Attribute) { + std::ostringstream FixItString; + FixItString << "aligned(" << NewAlignQuantity << ")"; + FixIt = + FixItHint::CreateReplacement(Attribute->getRange(), FixItString.str()); + } else { + std::ostringstream FixItString; + FixItString << " __attribute__((aligned(" << NewAlignQuantity << ")))"; + FixIt = FixItHint::CreateInsertion(Struct->getEndLoc().getLocWithOffset(1), + FixItString.str()); + } + + // And suggest the minimum power-of-two alignment for the struct as a whole + // (with and without packing). + if (NeedsAlignment) { + diag(Struct->getLocation(), + "accessing fields in struct %0 is inefficient due to poor alignment; " + "currently aligned to %1 bytes, but recommended alignment is %2 bytes") + << Struct << (int)CurrAlign.getQuantity() << NewAlignQuantity << FixIt; + + diag(Struct->getLocation(), + "use \"__attribute__((aligned(%0)))\" to align struct %1 to %0 bytes", + DiagnosticIDs::Note) + << NewAlignQuantity << Struct; + } +} + +void StructPackAlignCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "MaxConfiguredAlignment", MaxConfiguredAlignment); +} + +} // namespace altera +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h new file mode 100644 index 0000000000000..b903641247e3c --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h @@ -0,0 +1,41 @@ +//===--- StructPackAlignCheck.h - clang-tidy --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace altera { + +/// Finds structs that are inefficiently packed or aligned, and recommends +/// packing and/or aligning of said structs as needed. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/altera-struct-pack-align.html +class StructPackAlignCheck : public ClangTidyCheck { +public: + StructPackAlignCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + MaxConfiguredAlignment(Options.get("MaxConfiguredAlignment", 128)) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts); + +private: + const unsigned MaxConfiguredAlignment; + CharUnits computeRecommendedAlignment(CharUnits MinByteSize); +}; + +} // namespace altera +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 781fef27c4761..53c3894914e52 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -67,6 +67,27 @@ The improvements are... Improvements to clang-tidy -------------------------- +New modules +^^^^^^^^^^^ + +- New :doc:`altera ` module. + + Includes checks related to OpenCL for FPGA coding guidelines, based on the + `Altera SDK for OpenCL: Best Practices Guide + `_. + +New checks +^^^^^^^^^^ + +- New :doc:`altera-struct-pack-align + ` check. + + Finds structs that are inefficiently packed or aligned, and recommends + packing and/or aligning of said structs as needed. + +- New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc + ` check. + - New :doc:`bugprone-redundant-branch-condition ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst b/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst new file mode 100644 index 0000000000000..b03a4fcf7fcf3 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst @@ -0,0 +1,54 @@ +.. title:: clang-tidy - altera-struct-pack-align + +altera-struct-pack-align +======================== + +Finds structs that are inefficiently packed or aligned, and recommends +packing and/or aligning of said structs as needed. + +Structs that are not packed take up more space than they should, and accessing +structs that are not well aligned is inefficient. + +Fix-its are provided to fix both of these issues by inserting and/or amending +relevant struct attributes. + +Based on the `Altera SDK for OpenCL: Best Practices Guide +`_. + +.. code-block:: c++ + + // The following struct is originally aligned to 4 bytes, and thus takes up + // 12 bytes of memory instead of 10. Packing the struct will make it use + // only 10 bytes of memory, and aligning it to 16 bytes will make it + // efficient to access. + struct example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + }; + + // The following struct is arranged in such a way that packing is not needed. + // However, it is aligned to 4 bytes instead of 8, and thus needs to be + // explicitly aligned. + struct implicitly_packed_example { + char a; // 1 byte + char b; // 1 byte + char c; // 1 byte + char d; // 1 byte + int e; // 4 bytes + }; + + // The following struct is explicitly aligned and packed. + struct good_example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + } __attribute__((packed)) __attribute__((aligned(16)); + + // Explicitly aligning a struct to the wrong value will result in a warning. + // The following example should be aligned to 16 bytes, not 32. + struct badly_aligned_example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + } __attribute__((packed)) __attribute__((aligned(32))); diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 91414ee8c90f3..c569ce704d979 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -30,6 +30,7 @@ Clang-Tidy Checks `abseil-time-comparison `_, "Yes" `abseil-time-subtraction `_, "Yes" `abseil-upgrade-duration-conversions `_, "Yes" + `altera-struct-pack-align `_, `android-cloexec-accept `_, "Yes" `android-cloexec-accept4 `_, `android-cloexec-creat `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index b9a4a7d694b4f..a85c721541784 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -58,6 +58,7 @@ There are currently the following groups of checks: Name prefix Description ====================== ========================================================= ``abseil-`` Checks related to Abseil library. +``altera-`` Checks related to OpenCL programming for FPGAs. ``android-`` Checks related to Android. ``boost-`` Checks related to Boost library. ``bugprone-`` Checks that target bugprone code constructs. diff --git a/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp b/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp new file mode 100644 index 0000000000000..615b6cafe87a2 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp @@ -0,0 +1,101 @@ +// RUN: %check_clang_tidy %s altera-struct-pack-align %t -- -header-filter=.* + +// Struct needs both alignment and packing +struct error { + char a; + double b; + char c; +}; +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'error' is inefficient due to padding; only needs 10 bytes but is using 24 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((packed))" to reduce the amount of padding applied to struct 'error' +// CHECK-MESSAGES: :[[@LINE-7]]:8: warning: accessing fields in struct 'error' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-8]]:8: note: use "__attribute__((aligned(16)))" to align struct 'error' to 16 bytes +// CHECK-FIXES: __attribute__((packed)) +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is explicitly packed, but needs alignment +struct error_packed { + char a; + double b; + char c; +} __attribute__((packed)); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'error_packed' is inefficient due to poor alignment; currently aligned to 1 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'error_packed' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))) + +// Struct is properly packed, but needs alignment +struct align_only { + char a; + char b; + char c; + char d; + int e; + double f; +}; +// CHECK-MESSAGES: :[[@LINE-8]]:8: warning: accessing fields in struct 'align_only' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-9]]:8: note: use "__attribute__((aligned(16)))" to align struct 'align_only' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is perfectly packed but wrongly aligned +struct bad_align { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(8))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +struct bad_align2 { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(32))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align2' is inefficient due to poor alignment; currently aligned to 32 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align2' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +struct bad_align3 { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(4))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align3' is inefficient due to poor alignment; currently aligned to 4 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align3' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is both perfectly packed and aligned +struct success { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(16))); +//Should take 10 bytes and be aligned to 16 bytes + +// Struct is properly packed, and explicitly aligned +struct success2 { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +// If struct is properly aligned, packing not needed +struct success3 { + char a; + double b; + char c; +} __attribute__((aligned(16))); + +// If struct is templated, warnings should not be triggered +template +struct success4 { + A a; + B b; + int c; +}; + +// Warnings should not trigger on struct instantiations +void no_trigger_on_instantiation() { + struct bad_align3 instantiated { 'a', 0.001, 'b' }; +} + From 9c9974c3ccb6468cc83f759240293538cf123fcd Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 8 Sep 2020 15:34:52 +0200 Subject: [PATCH 0057/1079] [clang] Limit the maximum level of fold-expr expansion. Introduce a new diagnostic, and respect the bracket-depth (256) by default. Differential Revision: https://reviews.llvm.org/D86936 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 +++ clang/lib/Sema/TreeTransform.h | 13 +++++++++++++ clang/test/SemaCXX/fold_expr_expansion_limit.cpp | 9 +++++++++ 3 files changed, 25 insertions(+) create mode 100644 clang/test/SemaCXX/fold_expr_expansion_limit.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e1601da74b735..ec0c0fd9fa8ce 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5092,6 +5092,9 @@ def err_fold_expression_empty : Error< "with no fallback value">; def err_fold_expression_bad_operand : Error< "expression not permitted as operand of fold expression">; +def err_fold_expression_limit_exceeded: Error< + "instantiating fold expression with %0 arguments exceeded expression nesting " + "limit of %1">, DefaultFatal, NoSFINAE; def err_unexpected_typedef : Error< "unexpected type name %0: expected expression">; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 4c8293f3bf4c0..6457b192477e3 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -28,6 +28,7 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Lookup.h" @@ -13193,6 +13194,18 @@ TreeTransform::TransformCXXFoldExpr(CXXFoldExpr *E) { E->getEllipsisLoc(), RHS.get(), E->getEndLoc(), NumExpansions); } + // Formally a fold expression expands to nested parenthesized expressions. + // Enforce this limit to avoid creating trees so deep we can't safely traverse + // them. + if (NumExpansions && SemaRef.getLangOpts().BracketDepth < NumExpansions) { + SemaRef.Diag(E->getEllipsisLoc(), + clang::diag::err_fold_expression_limit_exceeded) + << *NumExpansions << SemaRef.getLangOpts().BracketDepth + << E->getSourceRange(); + SemaRef.Diag(E->getEllipsisLoc(), diag::note_bracket_depth); + return ExprError(); + } + // The transform has determined that we should perform an elementwise // expansion of the pattern. Do so. ExprResult Result = getDerived().TransformExpr(E->getInit()); diff --git a/clang/test/SemaCXX/fold_expr_expansion_limit.cpp b/clang/test/SemaCXX/fold_expr_expansion_limit.cpp new file mode 100644 index 0000000000000..600278da78287 --- /dev/null +++ b/clang/test/SemaCXX/fold_expr_expansion_limit.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fsyntax-only -fbracket-depth 2 -verify -std=c++17 %s + +template struct seq { + constexpr bool zero() { return (true && ... && (V == 0)); }; // expected-error {{instantiating fold expression with 3 arguments exceeded expression nesting limit of 2}} \ + expected-note {{use -fbracket-depth}} +}; +constexpr unsigned N = 3; +auto x = __make_integer_seq{}; +static_assert(!x.zero(), ""); // expected-note {{in instantiation of member function}} From 51d30c3429fa0f46bf8c0e4a38840952c11be4f9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 15:40:14 +0200 Subject: [PATCH 0058/1079] [mlir][VectorOps] Fix more GCC5 weirdness VectorToSCF.cpp:515:47: error: specialization of 'template mlir::LogicalResult mlir::VectorTransferRewriter::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&) const' in different namespace [-fpermissive] --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 0a74472a49f6e..c0d283d7af451 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -497,6 +497,8 @@ static void emitWithBoundsChecks( inBoundsFun(scalarAccessExprs); } +namespace mlir { + /// Lowers TransferReadOp into a combination of: /// 1. local memory allocation; /// 2. perfect loop nest over: @@ -666,8 +668,6 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( return success(); } -namespace mlir { - void populateVectorToSCFConversionPatterns( OwningRewritePatternList &patterns, MLIRContext *context, const VectorTransferToSCFOptions &options) { From 94cfbef0a74ec3e5490878dc417fea5ecfcf2a6a Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 8 Sep 2020 14:41:42 +0100 Subject: [PATCH 0059/1079] [NFC][ARM] Precommit test --- .../Thumb2/LowOverheadLoops/remat-vctp.ll | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll new file mode 100644 index 0000000000000..9178217a89e92 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m -mattr=+mve.fp %s -o - | FileCheck %s + +define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { +; CHECK-LABEL: remat_vctp: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: ldrd lr, r12, [sp, #80] +; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: .LBB0_1: @ %bb6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: subs.w r12, r12, #4 +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vabs.s32 q5, q4 +; CHECK-NEXT: vcls.s32 q3, q5 +; CHECK-NEXT: vshl.u32 q5, q5, q3 +; CHECK-NEXT: vadd.i32 q3, q3, q2 +; CHECK-NEXT: vshr.u32 q6, q5, #24 +; CHECK-NEXT: vand q6, q6, q1 +; CHECK-NEXT: vldrw.u32 q7, [lr, q6, uxtw #2] +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 +; CHECK-NEXT: vqsub.s32 q6, q0, q6 +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 +; CHECK-NEXT: vqshl.s32 q6, q6, #1 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqsub.s32 q5, q0, q5 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqshl.s32 q5, q5, #1 +; CHECK-NEXT: vpt.s32 lt, q4, zr +; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vstrwt.32 q4, [r2], #16 +; CHECK-NEXT: vstrwt.32 q3, [r3], #16 +; CHECK-NEXT: bgt .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %bb44 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r7, pc} +bb: + %i = zext i16 %arg5 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i7 = phi i32* [ %arg3, %bb ], [ %i38, %bb6 ] + %i8 = phi i32 [ %i, %bb ], [ %i42, %bb6 ] + %i9 = phi i32* [ %arg2, %bb ], [ %i41, %bb6 ] + %i10 = phi i32* [ %arg1, %bb ], [ %i40, %bb6 ] + %i11 = phi i32* [ %arg, %bb ], [ %i39, %bb6 ] + %i12 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i8) + %i13 = bitcast i32* %i11 to <4 x i32>* + %i14 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i13, i32 4, <4 x i1> %i12, <4 x i32> zeroinitializer) + %i15 = bitcast i32* %i10 to <4 x i32>* + %i16 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i15, i32 4, <4 x i1> %i12, <4 x i32> zeroinitializer) + %i17 = icmp slt <4 x i32> %i16, zeroinitializer + %i18 = sub <4 x i32> zeroinitializer, %i16 + %i19 = select <4 x i1> %i17, <4 x i32> %i18, <4 x i32> %i16 + %i20 = tail call <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32> %i19) + %i21 = shl <4 x i32> %i19, %i20 + %i22 = add <4 x i32> %i20, + %i23 = lshr <4 x i32> %i21, + %i24 = and <4 x i32> %i23, + %i25 = tail call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %arg4, <4 x i32> %i24, i32 32, i32 2, i32 0) + %i26 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i21) + %i27 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i26) + %i28 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i27) + %i29 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i28, i32 1, i32 0) + %i30 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i21) + %i31 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i30) + %i32 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i31) + %i33 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i32, i32 1, i32 0) + %i34 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %i33, <4 x i1> %i17, <4 x i32> %i33) + %i35 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i14, <4 x i32> %i34) + %i36 = bitcast i32* %i9 to <4 x i32>* + %i37 = bitcast i32* %i7 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i35, <4 x i32>* %i36, i32 4, <4 x i1> %i12) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i22, <4 x i32>* %i37, i32 4, <4 x i1> %i12) + %i38 = getelementptr inbounds i32, i32* %i7, i32 4 + %i39 = getelementptr inbounds i32, i32* %i11, i32 4 + %i40 = getelementptr inbounds i32, i32* %i10, i32 4 + %i41 = getelementptr inbounds i32, i32* %i9, i32 4 + %i42 = add nsw i32 %i8, -4 + %i43 = icmp sgt i32 %i8, 4 + br i1 %i43, label %bb6, label %bb44 + +bb44: ; preds = %bb6 + ret void +} + +declare <4 x i1> @llvm.arm.mve.vctp32(i32) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32>) +declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32) +declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) From c7b7c32f4a25d15e992215c8524871bef47d959b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 16:44:58 +0100 Subject: [PATCH 0060/1079] [DSE,MemorySSA] Increase walker limit a bit. This slightly bumps the walker limit so that it covers more cases while not increasing compile-time too much: http://llvm-compile-time-tracker.com/compare.php?from=0fc1c2b51ba0cfb9145139af35be638333865251&to=91144a50ea4fa82c0c877e77784f60371640b263&stat=instructions --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 49e811b298a60..892ba559e7903 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -114,9 +114,9 @@ static cl::opt cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)")); static cl::opt MemorySSAUpwardsStepLimit( - "dse-memoryssa-walklimit", cl::init(70), cl::Hidden, + "dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " - "MemoryDefs that may be killed (default = 70)")); + "MemoryDefs that may be killed (default = 90)")); static cl::opt MemorySSAPartialStoreLimit( "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, From e09e1d97c112ef9488b2f88db560d3d459c0652e Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 8 Sep 2020 10:00:24 -0400 Subject: [PATCH 0061/1079] [gn build] (manually) port 156b127945a8 --- .../clang-tools-extra/clang-tidy/BUILD.gn | 1 + .../clang-tidy/altera/BUILD.gn | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 81c9ec0ede11f..18aa728b0db90 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -42,6 +42,7 @@ group("all-checks") { # If you add a check, also add it to ClangTidyForceLinker.h. deps = [ "//clang-tools-extra/clang-tidy/abseil", + "//clang-tools-extra/clang-tidy/altera", "//clang-tools-extra/clang-tidy/android", "//clang-tools-extra/clang-tidy/boost", "//clang-tools-extra/clang-tidy/bugprone", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn new file mode 100644 index 0000000000000..52f2e3d5f23d6 --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn @@ -0,0 +1,18 @@ +static_library("altera") { + output_name = "clangTidyAlteraModule" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang-tools-extra/clang-tidy", + "//clang-tools-extra/clang-tidy/utils", + "//clang/lib/AST", + "//clang/lib/ASTMatchers", + "//clang/lib/Analysis", + "//clang/lib/Basic", + "//clang/lib/Lex", + "//llvm/lib/Support", + ] + sources = [ + "AlteraTidyModule.cpp", + "StructPackAlignCheck.cpp", + ] +} From 9933188c90615c9c264ebb69117f09726e909a25 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 8 Sep 2020 10:02:00 -0400 Subject: [PATCH 0062/1079] StructPackAlignCheck: Fix a -Winconsistent-missing-override warning --- clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h index b903641247e3c..510e03030590c 100644 --- a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h @@ -27,7 +27,7 @@ class StructPackAlignCheck : public ClangTidyCheck { MaxConfiguredAlignment(Options.get("MaxConfiguredAlignment", 128)) {} void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - void storeOptions(ClangTidyOptions::OptionMap &Opts); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; private: const unsigned MaxConfiguredAlignment; From 2d9d270e77918dfc19ad9b3150ee7d40eeb8ca79 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 8 Sep 2020 16:09:33 +0200 Subject: [PATCH 0063/1079] Revert 3e782bf809 "[Sema][MSVC] warn at dynamic_cast when /GR- is given" This caused more warnings than expected, see https://crbug.com/1126019 Also reverts the follow-up 7907e5516. > Differential Revision: https://reviews.llvm.org/D86369 --- clang/include/clang/Basic/DiagnosticGroups.td | 2 -- .../clang/Basic/DiagnosticSemaKinds.td | 6 ------ clang/lib/Sema/SemaCast.cpp | 12 ----------- clang/lib/Sema/SemaExprCXX.cpp | 6 ------ clang/test/SemaCXX/ms_no_dynamic_cast.cpp | 21 ------------------- clang/test/SemaCXX/no-rtti.cpp | 2 +- clang/test/SemaCXX/no_dynamic_cast.cpp | 21 ------------------- 7 files changed, 1 insertion(+), 69 deletions(-) delete mode 100644 clang/test/SemaCXX/ms_no_dynamic_cast.cpp delete mode 100644 clang/test/SemaCXX/no_dynamic_cast.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index a9bd52b8afcdf..6b4dcc850612e 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1235,5 +1235,3 @@ in addition with the pragmas or -fmax-tokens flag to get any warnings. } def WebAssemblyExceptionSpec : DiagGroup<"wasm-exception-spec">; - -def RTTI : DiagGroup<"rtti">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ec0c0fd9fa8ce..46f7ffc97ce77 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7441,12 +7441,6 @@ def err_no_typeid_with_fno_rtti : Error< "use of typeid requires -frtti">; def err_no_dynamic_cast_with_fno_rtti : Error< "use of dynamic_cast requires -frtti">; -def warn_no_dynamic_cast_with_rtti_disabled: Warning< - "dynamic_cast will not work since RTTI data is disabled by " - "%select{-fno-rtti-data|/GR-}0">, InGroup; -def warn_no_typeid_with_rtti_disabled: Warning< - "typeid will not work since RTTI data is disabled by " - "%select{-fno-rtti-data|/GR-}0">, InGroup; def err_cannot_form_pointer_to_member_of_reference_type : Error< "cannot form a pointer-to-member to member %0 of reference type %1">; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index b213fb756a650..726900c59f20e 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -890,18 +890,6 @@ void CastOperation::CheckDynamicCast() { return; } - // Warns when dynamic_cast is used with RTTI data disabled. - if (!Self.getLangOpts().RTTIData) { - bool MicrosoftABI = - Self.getASTContext().getTargetInfo().getCXXABI().isMicrosoft(); - bool isClangCL = Self.getDiagnostics().getDiagnosticOptions().getFormat() == - DiagnosticOptions::MSVC; - if (MicrosoftABI || !DestPointee->isVoidType()) - Self.Diag(OpRange.getBegin(), - diag::warn_no_dynamic_cast_with_rtti_disabled) - << isClangCL; - } - // Done. Everything else is run-time checks. Kind = CK_Dynamic; } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 8f8847e638040..d1fcdf3545278 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -646,12 +646,6 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti)); } - // Warns when typeid is used with RTTI data disabled. - if (!getLangOpts().RTTIData) - Diag(OpLoc, diag::warn_no_typeid_with_rtti_disabled) - << (getDiagnostics().getDiagnosticOptions().getFormat() == - DiagnosticOptions::MSVC); - QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl); if (isType) { diff --git a/clang/test/SemaCXX/ms_no_dynamic_cast.cpp b/clang/test/SemaCXX/ms_no_dynamic_cast.cpp deleted file mode 100644 index d2c007fd8c297..0000000000000 --- a/clang/test/SemaCXX/ms_no_dynamic_cast.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 %s -triple x86_64-windows -fdiagnostics-format msvc -fno-rtti-data -fsyntax-only -verify - -namespace std { -struct type_info {}; -} // namespace std -class B { -public: - virtual ~B() = default; -}; - -class D1 : public B { -public: - ~D1() = default; -}; - -void f() { - B* b = new D1(); - auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} - void* v = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} - (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} -} diff --git a/clang/test/SemaCXX/no-rtti.cpp b/clang/test/SemaCXX/no-rtti.cpp index f8487a0902dda..e0b57153c24c9 100644 --- a/clang/test/SemaCXX/no-rtti.cpp +++ b/clang/test/SemaCXX/no-rtti.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -fno-rtti %s +// RUN: %clang_cc1 -fsyntax-only -verify -fno-rtti %s namespace std { class type_info; diff --git a/clang/test/SemaCXX/no_dynamic_cast.cpp b/clang/test/SemaCXX/no_dynamic_cast.cpp deleted file mode 100644 index 074b02f4668bc..0000000000000 --- a/clang/test/SemaCXX/no_dynamic_cast.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -fno-rtti-data -fsyntax-only -verify - -namespace std { -struct type_info {}; -} // namespace std -class B { -public: - virtual ~B() = default; -}; - -class D1 : public B { -public: - ~D1() = default; -}; - -void f() { - B* b = new D1(); - auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by -fno-rtti-data}} - void* v = dynamic_cast(b); - (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} -} From 32ae37b038b16a1ff9c81428ae4f003377439a22 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 8 Sep 2020 16:26:48 +0200 Subject: [PATCH 0064/1079] [clang-tidy] Fix dynamic build failures after 156b127945a8c923d141e608b7380427da024376 --- clang-tools-extra/clang-tidy/altera/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt index 45131c1809a23..878e718c65963 100644 --- a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -5,11 +5,15 @@ add_clang_library(clangTidyAlteraModule StructPackAlignCheck.cpp LINK_LIBS + clangTidy + clangTidyUtils + ) + +clang_target_link_libraries(clangTidyAlteraModule + PRIVATE clangAnalysis clangAST clangASTMatchers clangBasic clangLex - clangTidy - clangTidyUtils ) From 6dc3e22b575267d2ede36f741bb9eb2455f36cff Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 19 Aug 2020 12:01:03 +0200 Subject: [PATCH 0065/1079] [DAGTypeLegalizer] Handle ZERO_EXTEND of promoted type in WidenVecRes_Convert. On SystemZ, a ZERO_EXTEND of an i1 vector handled by WidenVecRes_Convert() always ended up being scalarized, because the type action of the input is promotion which was previously an unhandled case in this method. This fixes https://bugs.llvm.org/show_bug.cgi?id=47132. Differential Revision: https://reviews.llvm.org/D86268 Patch by Eli Friedman. Review: Ulrich Weigand --- .../SelectionDAG/LegalizeVectorTypes.cpp | 23 +++++++++++++++---- llvm/test/CodeGen/SystemZ/vec-zext.ll | 16 +++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 093f7b1680edd..764472e570c04 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3307,19 +3307,34 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + LLVMContext &Ctx = *DAG.getContext(); SDValue InOp = N->getOperand(0); SDLoc DL(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); - unsigned InVTNumElts = InVT.getVectorNumElements(); const SDNodeFlags Flags = N->getFlags(); + + // Handle the case of ZERO_EXTEND where the promoted InVT element size does + // not equal that of WidenVT. + if (N->getOpcode() == ISD::ZERO_EXTEND && + getTypeAction(InVT) == TargetLowering::TypePromoteInteger && + TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() != + WidenVT.getScalarSizeInBits()) { + InOp = ZExtPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits()) + Opcode = ISD::TRUNCATE; + } + + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); diff --git a/llvm/test/CodeGen/SystemZ/vec-zext.ll b/llvm/test/CodeGen/SystemZ/vec-zext.ll index b4c8f2307b0b7..cb61d31e5ebe3 100644 --- a/llvm/test/CodeGen/SystemZ/vec-zext.ll +++ b/llvm/test/CodeGen/SystemZ/vec-zext.ll @@ -92,3 +92,19 @@ define <8 x i16> @fun10(<8 x i8> %val1) { ret <8 x i16> %z } +define <2 x i32> @fun11(<2 x i64> %Arg1, <2 x i64> %Arg2) { +; CHECK-LABEL: fun11: +; CHECK: vgbm %v0, 0 +; CHECK-NEXT: vceqg %v1, %v24, %v0 +; CHECK-NEXT: vceqg %v0, %v26, %v0 +; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vrepig %v1, 1 +; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vpkg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %i3 = icmp eq <2 x i64> %Arg1, zeroinitializer + %i5 = icmp eq <2 x i64> %Arg2, zeroinitializer + %i6 = or <2 x i1> %i3, %i5 + %i7 = zext <2 x i1> %i6 to <2 x i32> + ret <2 x i32> %i7 +} From 6454140ab34cb29cc0b9de4f1e80199d717f1a97 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Sep 2020 11:17:10 -0400 Subject: [PATCH 0066/1079] [libc++] Make sure we always print all available features Previously, we'd only print the features added through the new config, however printing all the features is important for debugging purposes. --- libcxx/utils/libcxx/test/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index d54ee8fa32913..82b696f76eec7 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -148,6 +148,8 @@ def configure(self): self.lit_config ) + self.lit_config.note("All available features: {}".format(self.config.available_features)) + def print_config_info(self): if self.cxx.use_modules: self.lit_config.note('Using modules flags: %s' % From c2f6a0012882ba9b39ccee53f3d7f4f1aedf2181 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Sep 2020 11:29:32 -0400 Subject: [PATCH 0067/1079] [libc++] Allow overriding the cached value of LIBCXX_TEST_CONFIG --- libcxx/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index a5c32d94aea29..8e7df5d19610e 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -96,7 +96,7 @@ option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) option(LIBCXX_TEST_GDB_PRETTY_PRINTERS "Test gdb pretty printers." OFF) set(LIBCXX_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/configs/legacy.cfg.in" CACHE STRING - "The Lit testing configuration to use when running the tests." FORCE) # TODO: Stop using 'FORCE' once we can assume all CMake build dirs have been re-generated + "The Lit testing configuration to use when running the tests.") set(LIBCXX_TEST_PARAMS "" CACHE STRING "A list of parameters to run the Lit test suite with.") From c81dd3d159ab03d46e4280c458d3c29e56648218 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Tue, 8 Sep 2020 16:39:11 +0100 Subject: [PATCH 0068/1079] [clang-format] Handle shifts within conditions In some situation shifts can be treated as a template, and is thus formatted as one. So, by doing a couple extra checks to assure that the condition doesn't contain a template, and is in fact a bit shift should solve this problem. This is a fix for [[ https://bugs.llvm.org/show_bug.cgi?id=46969 | bug 46969 ]] Reviewed By: MyDeveloperDay Patch By: Saldivarcher Differential Revision: https://reviews.llvm.org/D86581 --- clang/lib/Format/TokenAnnotator.cpp | 20 +++++++++++++------- clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 5dd6a7a9da40b..841f0b41e9a7f 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -56,6 +56,13 @@ static bool isLambdaParameterList(const FormatToken *Left) { Left->Previous->MatchingParen->is(TT_LambdaLSquare); } +/// Returns \c true if the token is followed by a boolean condition, \c false +/// otherwise. +static bool isKeywordWithCondition(const FormatToken &Tok) { + return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, + tok::kw_constexpr, tok::kw_catch); +} + /// A parser that gathers additional information about tokens. /// /// The \c TokenAnnotator tries to match parenthesis and square brakets and @@ -108,6 +115,12 @@ class AnnotatingParser { while (CurrentToken) { if (CurrentToken->is(tok::greater)) { + // Try to do a better job at looking for ">>" within the condition of + // a statement. + if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) && + Left->ParentBracket != tok::less && + isKeywordWithCondition(*Line.First)) + return false; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // In TT_Proto, we must distignuish between: @@ -2768,13 +2781,6 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { Right.ParameterCount > 0); } -/// Returns \c true if the token is followed by a boolean condition, \c false -/// otherwise. -static bool isKeywordWithCondition(const FormatToken &Tok) { - return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, - tok::kw_constexpr, tok::kw_catch); -} - bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index b198efa4af9ec..98e002003159c 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -7565,6 +7565,21 @@ TEST_F(FormatTest, UnderstandsTemplateParameters) { verifyFormat("static_assert(is_convertible::value, \"AAA\");"); verifyFormat("Constructor(A... a) : a_(X{std::forward(a)}...) {}"); verifyFormat("< < < < < < < < < < < < < < < < < < < < < < < < < < < < < <"); + verifyFormat("some_templated_type"); +} + +TEST_F(FormatTest, UnderstandsShiftOperators) { + verifyFormat("if (i < x >> 1)"); + verifyFormat("while (i < x >> 1)"); + verifyFormat("for (unsigned i = 0; i < i; ++i, v = v >> 1)"); + verifyFormat("for (unsigned i = 0; i < x >> 1; ++i, v = v >> 1)"); + verifyFormat( + "for (std::vector::iterator i = 0; i < x >> 1; ++i, v = v >> 1)"); + verifyFormat("Foo.call>()"); + verifyFormat("if (Foo.call>() == 0)"); + verifyFormat("for (std::vector>::iterator i = 0; i < x >> 1; " + "++i, v = v >> 1)"); + verifyFormat("if (w>, 1>::t)"); } TEST_F(FormatTest, BitshiftOperatorWidth) { From 487a80531006add8102d50dbcce4b6fd729ab1f6 Mon Sep 17 00:00:00 2001 From: Ronak Chauhan Date: Mon, 7 Sep 2020 14:40:00 +0530 Subject: [PATCH 0069/1079] [AMDGPU] Support disassembly for AMDGPU kernel descriptors Decode AMDGPU Kernel descriptors as assembler directives. Reviewed By: scott.linder, jhenderson, kzhuravl Differential Revision: https://reviews.llvm.org/D80713 --- .../llvm/Support/AMDHSAKernelDescriptor.h | 70 ++-- .../Disassembler/AMDGPUDisassembler.cpp | 345 ++++++++++++++++++ .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 30 +- llvm/test/CodeGen/AMDGPU/nop-data.ll | 4 +- .../llvm-objdump/ELF/AMDGPU/kd-failure.s | 37 ++ .../tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 49 +++ .../tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 36 ++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s | 58 +++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s | 53 +++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s | 41 +++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 17 - 11 files changed, 690 insertions(+), 50 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index d1c2147536a72..48a09ac48005d 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,39 +162,49 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; +enum : uint32_t { + GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, + RESERVED0_OFFSET = 8, + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, + RESERVED1_OFFSET = 24, + COMPUTE_PGM_RSRC3_OFFSET = 44, + COMPUTE_PGM_RSRC1_OFFSET = 48, + COMPUTE_PGM_RSRC2_OFFSET = 52, + KERNEL_CODE_PROPERTIES_OFFSET = 56, + RESERVED2_OFFSET = 58, +}; + static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert( - offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, - "invalid offset for group_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, - "invalid offset for private_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, reserved0) == 8, - "invalid offset for reserved0"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert( - offsetof(kernel_descriptor_t, reserved1) == 24, - "invalid offset for reserved1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, - "invalid offset for compute_pgm_rsrc3"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, - "invalid offset for compute_pgm_rsrc1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, - "invalid offset for compute_pgm_rsrc2"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_properties) == 56, - "invalid offset for kernel_code_properties"); -static_assert( - offsetof(kernel_descriptor_t, reserved2) == 58, - "invalid offset for reserved2"); +static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == + GROUP_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for group_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for private_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, + "invalid offset for reserved0"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, + "invalid offset for reserved1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == + COMPUTE_PGM_RSRC3_OFFSET, + "invalid offset for compute_pgm_rsrc3"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == + COMPUTE_PGM_RSRC1_OFFSET, + "invalid offset for compute_pgm_rsrc1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == + COMPUTE_PGM_RSRC2_OFFSET, + "invalid offset for compute_pgm_rsrc2"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == + KERNEL_CODE_PROPERTIES_OFFSET, + "invalid offset for kernel_code_properties"); +static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9c2f2e7eecd14..840208169168e 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,6 +34,7 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1215,6 +1216,350 @@ bool AMDGPUDisassembler::isGFX10() const { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } +//===----------------------------------------------------------------------===// +// AMDGPU specific symbol handling +//===----------------------------------------------------------------------===// +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + + // We cannot accurately backward compute #VGPRs used from + // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same + // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we + // simply calculate the inverse of what the assembler does. + + uint32_t GranulatedWorkitemVGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + + uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; + + // We cannot backward compute values used to calculate + // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following + // directives can't be computed: + // .amdhsa_reserve_vcc + // .amdhsa_reserve_flat_scratch + // .amdhsa_reserve_xnack_mask + // They take their respective default values if not specified in the assembly. + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) + // + // We compute the inverse as though all directives apart from NEXT_FREE_SGPR + // are set to 0. So while disassembling we consider that: + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + 0 + 0 + 0) + // + // The disassembler cannot recover the original values of those 3 directives. + + uint32_t GranulatedWavefrontSGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + + if (isGFX10() && GranulatedWavefrontSGPRCount) + return MCDisassembler::Fail; + + uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * + AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + return MCDisassembler::Fail; + + if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", + COMPUTE_PGM_RSRC1_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + } + return MCDisassembler::Success; +} + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + PRINT_DIRECTIVE( + ".amdhsa_system_sgpr_private_segment_wavefront_offset", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); + PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_invalid_op", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); + PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); + PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + +#undef PRINT_DIRECTIVE + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeKernelDescriptorDirective( + DataExtractor::Cursor &Cursor, ArrayRef Bytes, + raw_string_ostream &KdStream) const { +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + + uint16_t TwoByteBuffer = 0; + uint32_t FourByteBuffer = 0; + uint64_t EightByteBuffer = 0; + + StringRef ReservedBytes; + StringRef Indent = "\t"; + + assert(Bytes.size() == 64); + DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); + + switch (Cursor.tell()) { + case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer + << '\n'; + return MCDisassembler::Success; + + case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_private_segment_fixed_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + + case amdhsa::RESERVED0_OFFSET: + // 8 reserved bytes, must be 0. + EightByteBuffer = DE.getU64(Cursor); + if (EightByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: + // KERNEL_CODE_ENTRY_BYTE_OFFSET + // So far no directive controls this for Code Object V3, so simply skip for + // disassembly. + DE.skip(Cursor, 8); + return MCDisassembler::Success; + + case amdhsa::RESERVED1_OFFSET: + // 20 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 20); + for (int I = 0; I < 20; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: + // COMPUTE_PGM_RSRC3 + // - Only set for GFX10, GFX6-9 have this to be 0. + // - Currently no directives directly control this. + FourByteBuffer = DE.getU32(Cursor); + if (!isGFX10() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) + return MCDisassembler::Fail; + + // Reserved for GFX9 + if (isGFX9() && + (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { + return MCDisassembler::Fail; + } else if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_wavefront_size32", + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) + return MCDisassembler::Fail; + + return MCDisassembler::Success; + + case amdhsa::RESERVED2_OFFSET: + // 6 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 6); + for (int I = 0; I < 6; ++I) { + if (ReservedBytes[I] != 0) + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + default: + llvm_unreachable("Unhandled index. Case statements cover everything."); + return MCDisassembler::Fail; + } +#undef PRINT_DIRECTIVE +} + +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( + StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned. + if (Bytes.size() != 64 || KdAddress % 64 != 0) + return MCDisassembler::Fail; + + std::string Kd; + raw_string_ostream KdStream(Kd); + KdStream << ".amdhsa_kernel " << KdName << '\n'; + + DataExtractor::Cursor C(0); + while (C && C.tell() < Bytes.size()) { + MCDisassembler::DecodeStatus Status = + decodeKernelDescriptorDirective(C, Bytes, KdStream); + + cantFail(C.takeError()); + + if (Status == MCDisassembler::Fail) + return MCDisassembler::Fail; + } + KdStream << ".end_amdhsa_kernel\n"; + outs() << KdStream.str(); + return MCDisassembler::Success; +} + +Optional +AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const { + // Right now only kernel descriptor needs to be handled. + // We ignore all other symbols for target specific handling. + // TODO: + // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code + // Object V2 and V3 when symbols are marked protected. + + // amd_kernel_code_t for Code Object V2. + if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { + Size = 256; + return MCDisassembler::Fail; + } + + // Code Object V3 kernel descriptors. + StringRef Name = Symbol.Name; + if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + Size = 64; // Size = 64 regardless of success or failure. + return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); + } + return None; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f975af409a096..315602c35288c 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,10 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -66,6 +67,33 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; + Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; + + DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, + uint64_t KdAddress) const; + + DecodeStatus + decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, + ArrayRef Bytes, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC1. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC2. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index 7b6853acce285..e21ca97e8ffca 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel0() align 256 { entry: ret void @@ -80,7 +80,7 @@ entry: ; CHECK-EMPTY: ; CHECK-NEXT: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { entry: ret void diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s new file mode 100644 index 0000000000000..eee3fd4b7103e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s @@ -0,0 +1,37 @@ +;; Failure test. We create a malformed kernel descriptor (KD) by manually +;; setting the bytes, because one can't create a malformed KD using the +;; assembler directives. + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o + +; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ +; RUN: | tail -n +9 > %t1.sym_content +; RUN: cat %t1.sym_info %t1.sym_content > %t1.s + +; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o +; RUN: diff %t.o %t-re-assemble.o + +;; Test failure by setting one of the reserved bytes to non-zero value. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x00FF000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s new file mode 100644 index 0000000000000..0b798a298d398 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -0,0 +1,49 @@ +;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + + +;--- 1.s +;; Only set next_free_sgpr. +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 42 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_vcc 0 +.end_amdhsa_kernel + +;--- 2.s +;; Only set other directives. +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel + +;--- 3.s +;; Set all affecting directives. +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 35 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s new file mode 100644 index 0000000000000..a8883d2f74be7 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -0,0 +1,36 @@ +;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + +;--- 1.s +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 23 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 2.s +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 14 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 3.s +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s new file mode 100644 index 0000000000000..803507a130c03 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -0,0 +1,58 @@ +;; Entirely zeroed kernel descriptor (for GFX10). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t +; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s + +;; TODO: +;; This file and kd-zeroed-raw.s should produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets +;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive +;; mentions 0 (see line 36). + +;; Check the raw bytes right now. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 8 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 8 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_workgroup_processor_mode 0 + .amdhsa_memory_ordered 0 + .amdhsa_forward_progress 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 + .amdhsa_wavefront_size32 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s new file mode 100644 index 0000000000000..de4fdf74d88e0 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s @@ -0,0 +1,53 @@ +;; Entirely zeroed kernel descriptor (for GFX9). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: diff %t1 %t2 + +; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; This file and kd-zeroed-raw.s produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 0 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s new file mode 100644 index 0000000000000..85554209d5d8f --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s @@ -0,0 +1,41 @@ +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s + +;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). +;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the +;; kernel descriptor - a block of 64 zeroed bytes. + +;; The disassembly will produce the contents of kd-zeroed-*.s which on being +;; assembled contains additional relocation info. A diff over the entire object +;; will fail in this case. So we check by looking the bytes in .text. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; The entire object is zeroed out. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x0000000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b63d08b90ff51..46ed7414dbb31 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1854,23 +1854,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, outs() << SectionName << ":\n"; } - if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { - if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) - Start += 256; - } - if (SI == SE - 1 || - Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // cut trailing zeroes at the end of kernel - // cut up to 256 bytes - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) - End -= 4; - } - } - outs() << '\n'; if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", From 71133e8b5bceaf68a2cee59af371df570a1aed79 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 8 Sep 2020 09:20:06 -0700 Subject: [PATCH 0070/1079] [clang-tidy] Fix linking for FrontendOpenMP Without this, builds with `-DBUILD_SHARED_LIBS=ON` fail. --- clang-tools-extra/clang-tidy/altera/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt index 878e718c65963..ed28d9f4892d2 100644 --- a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -1,4 +1,7 @@ -set(LLVM_LINK_COMPONENTS support) +set(LLVM_LINK_COMPONENTS + FrontendOpenMP + support + ) add_clang_library(clangTidyAlteraModule AlteraTidyModule.cpp From e2394245eb28695d5eed5d7c015e99141993c723 Mon Sep 17 00:00:00 2001 From: Lubomir Litchev Date: Thu, 3 Sep 2020 13:15:39 -0700 Subject: [PATCH 0071/1079] Add an option for unrolling loops up to a factor. Currently, there is no option to allow for unrolling a loop up to a specific factor (specified by the user). The code for doing that is there and there are benefits when unrolling is done to smaller loops (smaller than the factor specified). Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D87111 --- mlir/include/mlir/Dialect/Affine/Passes.h | 3 ++- mlir/include/mlir/Dialect/Affine/Passes.td | 2 ++ .../Dialect/Affine/Transforms/LoopUnroll.cpp | 14 +++++++----- mlir/lib/Transforms/Utils/LoopUtils.cpp | 1 - mlir/test/Dialect/SCF/loop-unroll.mlir | 22 +++++++++++++++++++ .../test/lib/Transforms/TestLoopUnrolling.cpp | 3 +++ 6 files changed, 38 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h index db1c3bfead94f..580fbf53ae4f2 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -61,7 +61,8 @@ std::unique_ptr> createLoopTilingPass(); /// and no callback is provided, anything passed from the command-line (if at /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor). std::unique_ptr> createLoopUnrollPass( - int unrollFactor = -1, bool unrollFull = false, + int unrollFactor = -1, bool unrollUpToFactor = false, + bool unrollFull = false, const std::function &getUnrollFactor = nullptr); /// Creates a loop unroll jam pass to unroll jam by the specified factor. A diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 0e7f3e43661ef..7515dbaa33d86 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -71,6 +71,8 @@ def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> { let options = [ Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4", "Use this unroll factor for all loops being unrolled">, + Option<"unrollUpToFactor", "unroll-up-to-factor", "bool", /*default=*/"false", + "Allow unroling up to the factor specicied">, Option<"unrollFull", "unroll-full", "bool", /*default=*/"false", "Fully unroll loops">, Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1", diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index edb21384080f4..3dc236f3c0686 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -9,7 +9,6 @@ // This file implements loop unrolling. // //===----------------------------------------------------------------------===// - #include "PassDetail.h" #include "mlir/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -45,11 +44,13 @@ struct LoopUnroll : public AffineLoopUnrollBase { : AffineLoopUnrollBase(other), getUnrollFactor(other.getUnrollFactor) {} explicit LoopUnroll( - Optional unrollFactor = None, bool unrollFull = false, + Optional unrollFactor = None, bool unrollUpToFactor = false, + bool unrollFull = false, const std::function &getUnrollFactor = nullptr) : getUnrollFactor(getUnrollFactor) { if (unrollFactor) this->unrollFactor = *unrollFactor; + this->unrollUpToFactor = unrollUpToFactor; this->unrollFull = unrollFull; } @@ -126,13 +127,16 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { if (unrollFull) return loopUnrollFull(forOp); // Otherwise, unroll by the given unroll factor. + if (unrollUpToFactor) { + return loopUnrollUpToFactor(forOp, unrollFactor); + } return loopUnrollByFactor(forOp, unrollFactor); } std::unique_ptr> mlir::createLoopUnrollPass( - int unrollFactor, bool unrollFull, + int unrollFactor, bool unrollUpToFactor, bool unrollFull, const std::function &getUnrollFactor) { return std::make_unique( - unrollFactor == -1 ? None : Optional(unrollFactor), unrollFull, - getUnrollFactor); + unrollFactor == -1 ? None : Optional(unrollFactor), + unrollUpToFactor, unrollFull, getUnrollFactor); } diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index db6a071367d6c..7ae45171ddbd3 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -469,7 +469,6 @@ LogicalResult mlir::loopUnrollFull(AffineForOp forOp) { LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor) { Optional mayBeConstantTripCount = getConstantTripCount(forOp); - if (mayBeConstantTripCount.hasValue() && mayBeConstantTripCount.getValue() < unrollFactor) return loopUnrollByFactor(forOp, mayBeConstantTripCount.getValue()); diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir index 775188bf0ed99..134daa303ed86 100644 --- a/mlir/test/Dialect/SCF/loop-unroll.mlir +++ b/mlir/test/Dialect/SCF/loop-unroll.mlir @@ -2,6 +2,7 @@ // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2 +// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index, %arg3: memref) { @@ -248,3 +249,24 @@ func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref) { // UNROLL-BY-3-NEXT: } // UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref // UNROLL-BY-3-NEXT: return + + +// Test unroll-up-to functionality. +func @static_loop_unroll_up_to_factor(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 2 : index + affine.for %i0 = %lb to %ub { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-UP-TO-LABEL: func @static_loop_unroll_up_to_factor +// UNROLL-UP-TO-SAME: %[[MEM:.*0]]: memref +// UNROLL-UP-TO-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-UP-TO-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: store %{{.*}}, %[[MEM]][%[[V0]]] : memref +// UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: tore %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-UP-TO-NEXT: return diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp index 712fddb97028e..396f08b2cba32 100644 --- a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp +++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp @@ -55,6 +55,9 @@ class TestLoopUnrollingPass Option unrollFactor{*this, "unroll-factor", llvm::cl::desc("Loop unroll factor."), llvm::cl::init(1)}; + Option unrollUpToFactor{*this, "unroll-up-to-factor", + llvm::cl::desc("Loop unroll up to factor."), + llvm::cl::init(false)}; Option loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."), llvm::cl::init(0)}; }; From 3c83b967cf223ce6a2e0813e48b64f7689512f20 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 17:21:28 +0100 Subject: [PATCH 0072/1079] LiveRegUnits.h - reduce MachineRegisterInfo.h include. NFC. We only need to include MachineInstrBundle.h, but exposes an implicit dependency in MachineOutliner.h. Also, remove duplicate includes from LiveRegUnits.cpp + MachineOutliner.cpp. --- llvm/include/llvm/CodeGen/LiveRegUnits.h | 2 +- llvm/include/llvm/CodeGen/MachineOutliner.h | 3 ++- llvm/lib/CodeGen/LiveRegUnits.cpp | 4 ---- llvm/lib/CodeGen/MachineOutliner.cpp | 2 -- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h index 1ed091e3bb5e9..e20e04cad35cc 100644 --- a/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -15,7 +15,7 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 4a1b04ab3e886..a5dbbdb4fdcd2 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -15,10 +15,11 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/LivePhysRegs.h" namespace llvm { namespace outliner { diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index b2731aa0e7dbc..ea2075bc139df 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -11,15 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" - #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCRegisterInfo.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index f9d099e029956..715a2ba4667d2 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,10 +59,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" From d25c17f3175b344420c1f30040b206a47a512c9d Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Sun, 6 Sep 2020 10:36:07 -0700 Subject: [PATCH 0073/1079] [WebAssembly] Fix fixEndsAtEndOfFunction for try-catch When the function return type is non-void and `end` instructions are at the very end of a function, CFGStackify's `fixEndsAtEndOfFunction` function fixes the corresponding block/loop/try's type to match the function's return type. This is applied to consecutive `end` markers at the end of a function. For example, when the function return type is `i32`, ``` block i32 ;; return type is fixed to i32 ... loop i32 ;; return type is fixed to i32 ... end_loop end_block end_function ``` But try-catch is a little different, because it consists of two parts: a try part and a catch part, and both parts' return type should satisfy the function's return type. Which means, ``` try i32 ;; return type is fixed to i32 ... block i32 ;; this should be changed i32 too! ... end_block catch ... end_try end_function ``` As you can see in this example, it is not sufficient to only `end` instructions at the end of a function; in case of `try`, we should check instructions before `catch`es, in case their corresponding `try`'s type has been fixed. This changes `fixEndsAtEndOfFunction`'s algorithm to use a worklist that contains a reverse iterator, each of which is a starting point for a new backward `end` instruction search. Fixes https://bugs.llvm.org/show_bug.cgi?id=47413. Reviewed By: dschuff, tlively Differential Revision: https://reviews.llvm.org/D87207 --- .../WebAssembly/WebAssemblyCFGStackify.cpp | 72 ++++++++++++------- .../CodeGen/WebAssembly/cfg-stackify-eh.ll | 48 +++++++++++++ 2 files changed, 96 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 02330a2dd4afa..d5ee4b3b9440e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -178,6 +178,28 @@ getLatestInsertPos(MachineBasicBlock *MBB, return InsertPos; } +// Find a catch instruction and its destination register within an EH pad. +static MachineInstr *findCatch(MachineBasicBlock *EHPad, Register &ExnReg) { + assert(EHPad->isEHPad()); + MachineInstr *Catch = nullptr; + for (auto &MI : *EHPad) { + switch (MI.getOpcode()) { + case WebAssembly::CATCH: + Catch = &MI; + ExnReg = Catch->getOperand(0).getReg(); + break; + } + } + assert(Catch && "EH pad does not have a catch"); + assert(ExnReg != 0 && "Invalid register"); + return Catch; +} + +static MachineInstr *findCatch(MachineBasicBlock *EHPad) { + Register Dummy; + return findCatch(EHPad, Dummy); +} + void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, MachineInstr *End) { BeginToEnd[Begin] = End; @@ -1101,25 +1123,8 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { continue; MachineBasicBlock *EHPad = P.first; - - // Find 'catch' and 'local.set' or 'drop' instruction that follows the - // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be - // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is - // generated after 'catch' in LateEHPrepare and we don't support blocks - // taking values yet. - MachineInstr *Catch = nullptr; - unsigned ExnReg = 0; - for (auto &MI : *EHPad) { - switch (MI.getOpcode()) { - case WebAssembly::CATCH: - Catch = &MI; - ExnReg = Catch->getOperand(0).getReg(); - break; - } - } - assert(Catch && "EH pad does not have a catch"); - assert(ExnReg != 0 && "Invalid register"); - + Register ExnReg = 0; + MachineInstr *Catch = findCatch(EHPad, ExnReg); auto SplitPos = std::next(Catch->getIterator()); // Create a new BB that's gonna be the destination for branches from the @@ -1371,22 +1376,41 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { : WebAssembly::BlockType( WebAssembly::toValType(MFI.getResults().front())); - for (MachineBasicBlock &MBB : reverse(MF)) { - for (MachineInstr &MI : reverse(MBB)) { + SmallVector Worklist; + Worklist.push_back(MF.rbegin()->rbegin()); + + auto Process = [&](MachineBasicBlock::reverse_iterator It) { + auto *MBB = It->getParent(); + while (It != MBB->rend()) { + MachineInstr &MI = *It++; if (MI.isPosition() || MI.isDebugInstr()) continue; switch (MI.getOpcode()) { + case WebAssembly::END_TRY: { + // If a 'try''s return type is fixed, both its try body and catch body + // should satisfy the return type, so we need to search 'end' + // instructions before its corresponding 'catch' too. + auto *EHPad = TryToEHPad.lookup(EndToBegin[&MI]); + assert(EHPad); + Worklist.push_back(std::next(findCatch(EHPad)->getReverseIterator())); + LLVM_FALLTHROUGH; + } case WebAssembly::END_BLOCK: case WebAssembly::END_LOOP: - case WebAssembly::END_TRY: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; default: - // Something other than an `end`. We're done. + // Something other than an `end`. We're done for this BB. return; } } - } + // We've reached the beginning of a BB. Continue the search in the previous + // BB. + Worklist.push_back(MBB->getPrevNode()->rbegin()); + }; + + while (!Worklist.empty()) + Process(Worklist.pop_back_val()); } // WebAssembly functions end with an end instruction, as if the function body diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 887dc470b3bc8..f78d56ca0b962 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -1023,6 +1023,54 @@ while.end: ; preds = %while.body, %while. ret void } +; When the function return type is non-void and 'end' instructions are at the +; very end of a function, CFGStackify's fixEndsAtEndOfFunction function fixes +; the corresponding block/loop/try's type to match the function's return type. +; But when a `try`'s type is fixed, we should also check `end` instructions +; before its corresponding `catch`, because both `try` and `catch` body should +; satisfy the return type requirements. + +; NOSORT-LABEL: test19 +; NOSORT: try i32 +; NOSORT: loop i32 +; NOSORT: end_loop +; NOSORT: catch +; NOSORT: end_try +; NOSORT-NEXT: end_function +define i32 @test19(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + %t = alloca %class.Object, align 1 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br label %for.body + +for.body: ; preds = %for.cond + %div = sdiv i32 %n, 2 + %cmp1 = icmp eq i32 %i.0, %div + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %call = invoke i32 @baz() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %if.then + %call2 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %t) #4 + ret i32 %call + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +ehcleanup: ; preds = %if.then + %0 = cleanuppad within none [] + %call3 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %t) #4 [ "funclet"(token %0) ] + cleanupret from %0 unwind to caller +} + + ; Check if the unwind destination mismatch stats are correct ; NOSORT-STAT: 17 wasm-cfg-stackify - Number of EH pad unwind mismatches found From 1242dd330d9054a57c1403f16d5487f9e3a3a92f Mon Sep 17 00:00:00 2001 From: Volkan Keles Date: Tue, 8 Sep 2020 09:46:38 -0700 Subject: [PATCH 0074/1079] GlobalISel: Combine `op undef, x` to 0 https://reviews.llvm.org/D86611 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 ++ .../include/llvm/Target/GlobalISel/Combine.td | 7 +++++ .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 ++++ .../AArch64/GlobalISel/combine-shl.mir | 29 +++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 8607ad02d5063..cff6b496cca27 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -321,6 +321,9 @@ class CombinerHelper { /// Check if operand \p OpIdx is zero. bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx); + /// Check if operand \p OpIdx is undef. + bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx); + /// Erase \p MI bool eraseInst(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 6a6f97ae78b04..5b940551dad59 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -194,6 +194,12 @@ def undef_to_negative_one: GICombineRule< [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithConstant(*${root}, -1); }])>; +def binop_left_undef_to_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SHL):$root, + [{ return Helper.matchOperandIsUndef(*${root}, 1); }]), + (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>; + // Instructions where if any source operand is undef, the instruction can be // replaced with undef. def propagate_undef_any_op: GICombineRule< @@ -384,6 +390,7 @@ def not_cmp_fold : GICombineRule< // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, + binop_left_undef_to_zero, propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 10cd58f17e9aa..d58ba7cf5a8c6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1989,6 +1989,12 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { MRI); } +bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return MO.isReg() && + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir new file mode 100644 index 0000000000000..fe75f9965bc90 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir @@ -0,0 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: test_combine_shl_undef_x_s32 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_shl_undef_x_s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s32) = G_SHL %1(s32), %0(s32) + $w0 = COPY %2(s32) +... +--- +name: test_combine_shl_undef_x_v2s32 +body: | + bb.1: + liveins: $d0 + ; CHECK-LABEL: name: test_combine_shl_undef_x_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = G_IMPLICIT_DEF + %2:_(<2 x s32>) = G_SHL %1(<2 x s32>), %0(<2 x s32>) + $d0 = COPY %2(<2 x s32>) +... From 514df1b2bb1ecd1a33327001ea38a347fd2d0380 Mon Sep 17 00:00:00 2001 From: Ties Stuij Date: Fri, 28 Aug 2020 15:08:02 +0100 Subject: [PATCH 0075/1079] [ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij --- clang/include/clang/Basic/CodeGenOptions.def | 6 +- clang/include/clang/Driver/Options.td | 8 +- clang/lib/CodeGen/CGExpr.cpp | 118 +- clang/lib/CodeGen/CGRecordLayout.h | 17 +- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 166 +- clang/lib/Frontend/CompilerInvocation.cpp | 3 + clang/test/CodeGen/aapcs-bitfield.c | 3292 +++++++++++++++++- clang/test/CodeGen/bitfield-2.c | 12 +- 8 files changed, 3519 insertions(+), 103 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index ec77f68062e7a..f2f29db2334e4 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -392,9 +392,13 @@ CODEGENOPT(Addrsig, 1, 0) /// Whether to emit unused static constants. CODEGENOPT(KeepStaticConsts, 1, 0) -/// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield +/// Whether to follow the AAPCS enforcing at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) +/// Whether to not follow the AAPCS that enforces volatile bit-field access width to be +/// according to the field declaring type width. +CODEGENOPT(AAPCSBitfieldWidth, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4ba5d40117e77..81d63330b4279 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2363,9 +2363,15 @@ def mno_neg_immediates: Flag<["-"], "mno-neg-immediates">, Group, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Allow use of CMSE (Armv8-M Security Extensions)">; -def ForceAAPCSBitfieldLoad : Flag<["-"], "fAAPCSBitfieldLoad">, Group, +def ForceAAPCSBitfieldLoad : Flag<["-"], "faapcs-bitfield-load">, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Follows the AAPCS standard that all volatile bit-field write generates at least one load. (ARM only).">; +def ForceNoAAPCSBitfieldWidth : Flag<["-"], "fno-aapcs-bitfield-width">, Group, + Flags<[DriverOption,CC1Option]>, + HelpText<"Do not follow the AAPCS standard requirement that volatile bit-field width is dictated by the field container type. (ARM only).">; +def AAPCSBitfieldWidth : Flag<["-"], "faapcs-bitfield-width">, Group, + Flags<[DriverOption,CC1Option]>, + HelpText<"Follow the AAPCS standard requirement stating that volatile bit-field width is dictated by the field container type. (ARM only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 7351926035e64..df024a84462db 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1927,22 +1927,27 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, llvm::Type *ResLTy = ConvertType(LV.getType()); Address Ptr = LV.getBitFieldAddress(); - llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); - + llvm::Value *Val = + Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); + + bool UseVolatile = LV.isVolatileQualified() && + Info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget()); + const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; + const unsigned StorageSize = + UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; if (Info.IsSigned) { - assert(static_cast(Info.Offset + Info.Size) <= Info.StorageSize); - unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size; + assert(static_cast(Offset + Info.Size) <= StorageSize); + unsigned HighBits = StorageSize - Offset - Info.Size; if (HighBits) Val = Builder.CreateShl(Val, HighBits, "bf.shl"); - if (Info.Offset + HighBits) - Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr"); + if (Offset + HighBits) + Val = Builder.CreateAShr(Val, Offset + HighBits, "bf.ashr"); } else { - if (Info.Offset) - Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr"); - if (static_cast(Info.Offset) + Info.Size < Info.StorageSize) - Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize, - Info.Size), - "bf.clear"); + if (Offset) + Val = Builder.CreateLShr(Val, Offset, "bf.lshr"); + if (static_cast(Offset) + Info.Size < StorageSize) + Val = Builder.CreateAnd( + Val, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), "bf.clear"); } Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast"); EmitScalarRangeCheck(Val, LV.getType(), Loc); @@ -2144,39 +2149,43 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; + const bool UseVolatile = CGM.getCodeGenOpts().AAPCSBitfieldWidth && + Dst.isVolatileQualified() && + Info.VolatileStorageSize != 0 && + isAAPCS(CGM.getTarget()); + const unsigned StorageSize = + UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; + const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; // See if there are other bits in the bitfield's storage we'll need to load // and mask together with source before storing. - if (Info.StorageSize != Info.Size) { - assert(Info.StorageSize > Info.Size && "Invalid bitfield size."); + if (StorageSize != Info.Size) { + assert(StorageSize > Info.Size && "Invalid bitfield size."); llvm::Value *Val = - Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); + Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); // Mask the source value as needed. if (!hasBooleanRepresentation(Dst.getType())) - SrcVal = Builder.CreateAnd(SrcVal, - llvm::APInt::getLowBitsSet(Info.StorageSize, - Info.Size), - "bf.value"); + SrcVal = Builder.CreateAnd( + SrcVal, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), + "bf.value"); MaskedVal = SrcVal; - if (Info.Offset) - SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl"); + if (Offset) + SrcVal = Builder.CreateShl(SrcVal, Offset, "bf.shl"); // Mask out the original value. - Val = Builder.CreateAnd(Val, - ~llvm::APInt::getBitsSet(Info.StorageSize, - Info.Offset, - Info.Offset + Info.Size), - "bf.clear"); + Val = Builder.CreateAnd( + Val, ~llvm::APInt::getBitsSet(StorageSize, Offset, Offset + Info.Size), + "bf.clear"); // Or together the unchanged values and the source value. SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set"); } else { - assert(Info.Offset == 0); + assert(Offset == 0); // According to the AACPS: // When a volatile bit-field is written, and its container does not overlap - // with any non-bit-field member, its container must be read exactly once and - // written exactly once using the access width appropriate to the type of the - // container. The two accesses are not atomic. + // with any non-bit-field member, its container must be read exactly once + // and written exactly once using the access width appropriate to the type + // of the container. The two accesses are not atomic. if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) && CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad) Builder.CreateLoad(Ptr, true, "bf.load"); @@ -2191,8 +2200,8 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Sign extend the value if needed. if (Info.IsSigned) { - assert(Info.Size <= Info.StorageSize); - unsigned HighBits = Info.StorageSize - Info.Size; + assert(Info.Size <= StorageSize); + unsigned HighBits = StorageSize - Info.Size; if (HighBits) { ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl"); ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr"); @@ -4204,32 +4213,45 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (field->isBitField()) { const CGRecordLayout &RL = - CGM.getTypes().getCGRecordLayout(field->getParent()); + CGM.getTypes().getCGRecordLayout(field->getParent()); const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); + const bool UseVolatile = isAAPCS(CGM.getTarget()) && + CGM.getCodeGenOpts().AAPCSBitfieldWidth && + Info.VolatileStorageSize != 0 && + field->getType() + .withCVRQualifiers(base.getVRQualifiers()) + .isVolatileQualified(); Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); const RecordDecl *rec = field->getParent(); - if (!IsInPreservedAIRegion && - (!getDebugInfo() || !rec->hasAttr())) { - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); - } else { - llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( - getContext().getRecordType(rec), rec->getLocation()); - Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, - getDebugInfoFIndex(rec, field->getFieldIndex()), - DbgInfo); + if (!UseVolatile) { + if (!IsInPreservedAIRegion && + (!getDebugInfo() || !rec->hasAttr())) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex( + Addr, Idx, getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); + } } - + const unsigned SS = + UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; // Get the access type. - llvm::Type *FieldIntTy = - llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); + llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS); if (Addr.getElementType() != FieldIntTy) Addr = Builder.CreateElementBitCast(Addr, FieldIntTy); + if (UseVolatile) { + const unsigned VolatileOffset = Info.VolatileStorageOffset.getQuantity(); + if (VolatileOffset) + Addr = Builder.CreateConstInBoundsGEP(Addr, VolatileOffset); + } QualType fieldType = - field->getType().withCVRQualifiers(base.getVRQualifiers()); + field->getType().withCVRQualifiers(base.getVRQualifiers()); // TODO: Support TBAA for bit fields. LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource()); return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h index 730ee4c438e7e..e6665b72bcba1 100644 --- a/clang/lib/CodeGen/CGRecordLayout.h +++ b/clang/lib/CodeGen/CGRecordLayout.h @@ -46,7 +46,7 @@ namespace CodeGen { /// }; /// /// This will end up as the following LLVM type. The first array is the -/// bitfield, and the second is the padding out to a 4-byte alignmnet. +/// bitfield, and the second is the padding out to a 4-byte alignment. /// /// %t = type { i8, i8, i8, i8, i8, [3 x i8] } /// @@ -80,8 +80,21 @@ struct CGBitFieldInfo { /// The offset of the bitfield storage from the start of the struct. CharUnits StorageOffset; + /// The offset within a contiguous run of bitfields that are represented as a + /// single "field" within the LLVM struct type, taking into account the AAPCS + /// rules for volatile bitfields. This offset is in bits. + unsigned VolatileOffset : 16; + + /// The storage size in bits which should be used when accessing this + /// bitfield. + unsigned VolatileStorageSize; + + /// The offset of the bitfield storage from the start of the struct. + CharUnits VolatileStorageOffset; + CGBitFieldInfo() - : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset() {} + : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(), + VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {} CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned, unsigned StorageSize, CharUnits StorageOffset) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 4e5d1d3f16f65..ce35880106c20 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -109,6 +109,14 @@ struct CGRecordLowering { D->isMsStruct(Context); } + /// Helper function to check if we are targeting AAPCS. + bool isAAPCS() const { + return Context.getTargetInfo().getABI().startswith("aapcs"); + } + + /// Helper function to check if the target machine is BigEndian. + bool isBE() const { return Context.getTargetInfo().isBigEndian(); } + /// The Itanium base layout rule allows virtual bases to overlap /// other bases, which complicates layout in specific ways. /// @@ -172,7 +180,8 @@ struct CGRecordLowering { void lowerUnion(); void accumulateFields(); void accumulateBitFields(RecordDecl::field_iterator Field, - RecordDecl::field_iterator FieldEnd); + RecordDecl::field_iterator FieldEnd); + void computeVolatileBitfields(); void accumulateBases(); void accumulateVPtrs(); void accumulateVBases(); @@ -237,6 +246,10 @@ void CGRecordLowering::setBitFieldInfo( // least-significant-bit. if (DataLayout.isBigEndian()) Info.Offset = Info.StorageSize - (Info.Offset + Info.Size); + + Info.VolatileStorageSize = 0; + Info.VolatileOffset = 0; + Info.VolatileStorageOffset = CharUnits::Zero(); } void CGRecordLowering::lower(bool NVBaseType) { @@ -261,15 +274,21 @@ void CGRecordLowering::lower(bool NVBaseType) { // 8) Format the complete list of members in a way that can be consumed by // CodeGenTypes::ComputeRecordLayout. CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize(); - if (D->isUnion()) - return lowerUnion(); + if (D->isUnion()) { + lowerUnion(); + computeVolatileBitfields(); + return; + } accumulateFields(); // RD implies C++. if (RD) { accumulateVPtrs(); accumulateBases(); - if (Members.empty()) - return appendPaddingBytes(Size); + if (Members.empty()) { + appendPaddingBytes(Size); + computeVolatileBitfields(); + return; + } if (!NVBaseType) accumulateVBases(); } @@ -281,6 +300,7 @@ void CGRecordLowering::lower(bool NVBaseType) { Members.pop_back(); calculateZeroInit(); fillOutputFields(); + computeVolatileBitfields(); } void CGRecordLowering::lowerUnion() { @@ -418,9 +438,9 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || !DataLayout.fitsInLegalInteger(OffsetInRecord)) return false; - // Make sure StartBitOffset is natually aligned if it is treated as an + // Make sure StartBitOffset is naturally aligned if it is treated as an // IType integer. - if (StartBitOffset % + if (StartBitOffset % Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != 0) return false; @@ -503,6 +523,123 @@ void CGRecordLowering::accumulateBases() { } } +/// The AAPCS that defines that, when possible, bit-fields should +/// be accessed using containers of the declared type width: +/// When a volatile bit-field is read, and its container does not overlap with +/// any non-bit-field member or any zero length bit-field member, its container +/// must be read exactly once using the access width appropriate to the type of +/// the container. When a volatile bit-field is written, and its container does +/// not overlap with any non-bit-field member or any zero-length bit-field +/// member, its container must be read exactly once and written exactly once +/// using the access width appropriate to the type of the container. The two +/// accesses are not atomic. +/// +/// Enforcing the width restriction can be disabled using +/// -fno-aapcs-bitfield-width. +void CGRecordLowering::computeVolatileBitfields() { + if (!isAAPCS() || !Types.getCodeGenOpts().AAPCSBitfieldWidth) + return; + + for (auto &I : BitFields) { + const FieldDecl *Field = I.first; + CGBitFieldInfo &Info = I.second; + llvm::Type *ResLTy = Types.ConvertTypeForMem(Field->getType()); + // If the record alignment is less than the type width, we can't enforce a + // aligned load, bail out. + if ((uint64_t)(Context.toBits(Layout.getAlignment())) < + ResLTy->getPrimitiveSizeInBits()) + continue; + // CGRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets + // for big-endian targets, but it assumes a container of width + // Info.StorageSize. Since AAPCS uses a different container size (width + // of the type), we first undo that calculation here and redo it once + // the bit-field offset within the new container is calculated. + const unsigned OldOffset = + isBE() ? Info.StorageSize - (Info.Offset + Info.Size) : Info.Offset; + // Offset to the bit-field from the beginning of the struct. + const unsigned AbsoluteOffset = + Context.toBits(Info.StorageOffset) + OldOffset; + + // Container size is the width of the bit-field type. + const unsigned StorageSize = ResLTy->getPrimitiveSizeInBits(); + // Nothing to do if the access uses the desired + // container width and is naturally aligned. + if (Info.StorageSize == StorageSize && (OldOffset % StorageSize == 0)) + continue; + + // Offset within the container. + unsigned Offset = AbsoluteOffset & (StorageSize - 1); + // Bail out if an aligned load of the container cannot cover the entire + // bit-field. This can happen for example, if the bit-field is part of a + // packed struct. AAPCS does not define access rules for such cases, we let + // clang to follow its own rules. + if (Offset + Info.Size > StorageSize) + continue; + + // Re-adjust offsets for big-endian targets. + if (isBE()) + Offset = StorageSize - (Offset + Info.Size); + + const CharUnits StorageOffset = + Context.toCharUnitsFromBits(AbsoluteOffset & ~(StorageSize - 1)); + const CharUnits End = StorageOffset + + Context.toCharUnitsFromBits(StorageSize) - + CharUnits::One(); + + const ASTRecordLayout &Layout = + Context.getASTRecordLayout(Field->getParent()); + // If we access outside memory outside the record, than bail out. + const CharUnits RecordSize = Layout.getSize(); + if (End >= RecordSize) + continue; + + // Bail out if performing this load would access non-bit-fields members. + bool Conflict = false; + for (const auto *F : D->fields()) { + // Allow sized bit-fields overlaps. + if (F->isBitField() && !F->isZeroLengthBitField(Context)) + continue; + + const CharUnits FOffset = Context.toCharUnitsFromBits( + Layout.getFieldOffset(F->getFieldIndex())); + + // As C11 defines, a zero sized bit-field defines a barrier, so + // fields after and before it should be race condition free. + // The AAPCS acknowledges it and imposes no restritions when the + // natural container overlaps a zero-length bit-field. + if (F->isZeroLengthBitField(Context)) { + if (End > FOffset && StorageOffset < FOffset) { + Conflict = true; + break; + } + } + + const CharUnits FEnd = + FOffset + + Context.toCharUnitsFromBits( + Types.ConvertTypeForMem(F->getType())->getPrimitiveSizeInBits()) - + CharUnits::One(); + // If no overlap, continue. + if (End < FOffset || FEnd < StorageOffset) + continue; + + // The desired load overlaps a non-bit-field member, bail out. + Conflict = true; + break; + } + + if (Conflict) + continue; + // Write the new bit-field access parameters. + // As the storage offset now is defined as the number of elements from the + // start of the structure, we should divide the Offset by the element size. + Info.VolatileStorageOffset = + StorageOffset / Context.toCharUnitsFromBits(StorageSize).getQuantity(); + Info.VolatileStorageSize = StorageSize; + Info.VolatileOffset = Offset; + } +} + void CGRecordLowering::accumulateVPtrs() { if (Layout.hasOwnVFPtr()) Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr, @@ -848,8 +985,10 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { assert(Info.StorageSize <= SL->getSizeInBits() && "Union not large enough for bitfield storage"); } else { - assert(Info.StorageSize == - getDataLayout().getTypeAllocSizeInBits(ElementTy) && + assert((Info.StorageSize == + getDataLayout().getTypeAllocSizeInBits(ElementTy) || + Info.VolatileStorageSize == + getDataLayout().getTypeAllocSizeInBits(ElementTy)) && "Storage size does not match the element type size"); } assert(Info.Size > 0 && "Empty bitfield!"); @@ -897,11 +1036,12 @@ LLVM_DUMP_METHOD void CGRecordLayout::dump() const { void CGBitFieldInfo::print(raw_ostream &OS) const { OS << ""; + << " StorageOffset:" << StorageOffset.getQuantity() + << " VolatileOffset:" << VolatileOffset + << " VolatileStorageSize:" << VolatileStorageSize + << " VolatileStorageOffset:" << VolatileStorageOffset.getQuantity() << ">"; } LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index fbccff11562c1..1fbeb458a9d23 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1453,6 +1453,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); + Opts.AAPCSBitfieldWidth = Args.hasFlag(OPT_AAPCSBitfieldWidth, + OPT_ForceNoAAPCSBitfieldWidth, + true); return Success; } diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c index 4fc889bcf379e..13db68d6ae81b 100644 --- a/clang/test/CodeGen/aapcs-bitfield.c +++ b/clang/test/CodeGen/aapcs-bitfield.c @@ -1,8 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LE -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BE -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=LE,LENUMLOADS -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=BE,BENUMLOADS +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=LE +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=BE +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=LENUMLOADS +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=BENUMLOADS +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LEWIDTH +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BEWIDTH +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=LEWIDTHNUM +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=BEWIDTHNUM struct st0 { short c : 7; @@ -25,6 +29,57 @@ struct st0 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st0_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st0_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st0_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st0_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st0_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st0_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st0_check_load(struct st0 *m) { return m->c; } @@ -47,6 +102,60 @@ int st0_check_load(struct st0 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st0_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st0_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st0_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st0_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st0_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st0_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st0_check_store(struct st0 *m) { m->c = 1; } @@ -73,6 +182,57 @@ struct st1 { // BE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st1_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st1_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st1_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st1_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st1_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st1_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st1_check_load(struct st1 *m) { return m->c; } @@ -95,6 +255,60 @@ int st1_check_load(struct st1 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st1_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 +// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st1_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st1_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 +// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st1_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st1_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 +// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st1_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void st1_check_store(struct st1 *m) { m->c = 1; } @@ -121,6 +335,57 @@ struct st2 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st2_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st2_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st2_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st2_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st2_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st2_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st2_check_load(struct st2 *m) { return m->c; } @@ -143,6 +408,60 @@ int st2_check_load(struct st2 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st2_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st2_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st2_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st2_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st2_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st2_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st2_check_store(struct st2 *m) { m->c = 1; } @@ -168,6 +487,57 @@ struct st3 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st3_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st3_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st3_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st3_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st3_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st3_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st3_check_load(struct st3 *m) { return m->c; } @@ -190,6 +560,60 @@ int st3_check_load(struct st3 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st3_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st3_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st3_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st3_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 +// BEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st3_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st3_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 +// BEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st3_check_store(struct st3 *m) { m->c = 1; } @@ -221,6 +645,68 @@ struct st4 { // BE-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st4_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 +// LENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st4_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 +// BENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st4_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st4_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st4_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st4_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st4_check_load(struct st4 *m) { return m->c; } @@ -243,6 +729,64 @@ int st4_check_load(struct st4 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st4_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st4_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4 +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st4_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st4_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 +// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st4_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st4_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 +// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void st4_check_store(struct st4 *m) { m->c = 1; } @@ -265,6 +809,60 @@ void st4_check_store(struct st4 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st4_check_nonv_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st4_check_nonv_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st4_check_nonv_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st4_check_nonv_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st4_check_nonv_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st4_check_nonv_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void st4_check_nonv_store(struct st4 *m) { m->b = 1; } @@ -291,6 +889,57 @@ struct st5 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st5_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st5_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st5_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st5_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st5_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st5_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st5_check_load(struct st5 *m) { return m->c; } @@ -313,6 +962,60 @@ int st5_check_load(struct st5 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st5_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st5_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 +// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st5_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st5_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 +// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st5_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st5_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 +// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st5_check_store(struct st5 *m) { m->c = 1; } @@ -331,7 +1034,7 @@ struct st6 { // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 // LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -349,7 +1052,7 @@ struct st6 { // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -359,6 +1062,114 @@ struct st6 { // BE-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] // BE-NEXT: ret i32 [[ADD4]] // +// LENUMLOADS-LABEL: @st6_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 +// LENUMLOADS-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 +// LENUMLOADS-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 +// LENUMLOADS-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] +// LENUMLOADS-NEXT: ret i32 [[ADD5]] +// +// BENUMLOADS-LABEL: @st6_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// BENUMLOADS-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 +// BENUMLOADS-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 +// BENUMLOADS-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] +// BENUMLOADS-NEXT: ret i32 [[ADD4]] +// +// LEWIDTH-LABEL: @st6_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 +// LEWIDTH-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 +// LEWIDTH-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 +// LEWIDTH-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] +// LEWIDTH-NEXT: ret i32 [[ADD5]] +// +// BEWIDTH-LABEL: @st6_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// BEWIDTH-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 +// BEWIDTH-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 +// BEWIDTH-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] +// BEWIDTH-NEXT: ret i32 [[ADD4]] +// +// LEWIDTHNUM-LABEL: @st6_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 +// LEWIDTHNUM-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 +// LEWIDTHNUM-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 +// LEWIDTHNUM-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] +// LEWIDTHNUM-NEXT: ret i32 [[ADD5]] +// +// BEWIDTHNUM-LABEL: @st6_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 +// BEWIDTHNUM-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 +// BEWIDTHNUM-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] +// BEWIDTHNUM-NEXT: ret i32 [[ADD4]] +// int st6_check_load(volatile struct st6 *m) { int x = m->a; x += m->b; @@ -374,7 +1185,7 @@ int st6_check_load(volatile struct st6 *m) { // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 // LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: store i8 2, i8* [[B]], align 2 +// LE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // LE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // LE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 @@ -390,7 +1201,7 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: store i8 2, i8* [[B]], align 2 +// BE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // BE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // BE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 @@ -398,6 +1209,102 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st6_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// LENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 +// LENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 +// LENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st6_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 +// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 +// BENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 +// BENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st6_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 +// LEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 +// LEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st6_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 +// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 +// BEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 +// BEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st6_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 +// LEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 +// LEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st6_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 +// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 +// BEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 +// BEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void st6_check_store(struct st6 *m) { m->a = 1; m->b = 2; @@ -418,10 +1325,10 @@ struct st7b { // LE-LABEL: @st7_check_load( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 +// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 // LE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -435,10 +1342,10 @@ struct st7b { // BE-LABEL: @st7_check_load( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 +// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 // BE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -448,6 +1355,105 @@ struct st7b { // BE-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] // BE-NEXT: ret i32 [[ADD3]] // +// LENUMLOADS-LABEL: @st7_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// LENUMLOADS-NEXT: ret i32 [[ADD3]] +// +// BENUMLOADS-LABEL: @st7_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// BENUMLOADS-NEXT: ret i32 [[ADD3]] +// +// LEWIDTH-LABEL: @st7_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// LEWIDTH-NEXT: ret i32 [[ADD3]] +// +// BEWIDTH-LABEL: @st7_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// BEWIDTH-NEXT: ret i32 [[ADD3]] +// +// LEWIDTHNUM-LABEL: @st7_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// LEWIDTHNUM-NEXT: ret i32 [[ADD3]] +// +// BEWIDTHNUM-LABEL: @st7_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// BEWIDTHNUM-NEXT: ret i32 [[ADD3]] +// int st7_check_load(struct st7b *m) { int r = m->x; r += m->y.a; @@ -458,9 +1464,9 @@ int st7_check_load(struct st7b *m) { // LE-LABEL: @st7_check_store( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: store i8 1, i8* [[X]], align 4 +// LE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: store volatile i8 2, i8* [[A]], align 4 +// LE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 @@ -471,9 +1477,9 @@ int st7_check_load(struct st7b *m) { // BE-LABEL: @st7_check_store( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: store i8 1, i8* [[X]], align 4 +// BE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: store volatile i8 2, i8* [[A]], align 4 +// BE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 @@ -481,6 +1487,84 @@ int st7_check_load(struct st7b *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st7_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 +// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st7_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 +// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st7_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 +// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st7_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 +// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st7_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 +// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st7_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 +// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void st7_check_store(struct st7b *m) { m->x = 1; m->y.a = 2; @@ -504,6 +1588,42 @@ struct st8 { // BE-NEXT: store i16 -1, i16* [[TMP0]], align 4 // BE-NEXT: ret i32 65535 // +// LENUMLOADS-LABEL: @st8_check_assignment( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret i32 65535 +// +// BENUMLOADS-LABEL: @st8_check_assignment( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret i32 65535 +// +// LEWIDTH-LABEL: @st8_check_assignment( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret i32 65535 +// +// BEWIDTH-LABEL: @st8_check_assignment( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret i32 65535 +// +// LEWIDTHNUM-LABEL: @st8_check_assignment( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret i32 65535 +// +// BEWIDTHNUM-LABEL: @st8_check_assignment( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret i32 65535 +// int st8_check_assignment(struct st8 *m) { return m->f = 0xffff; } @@ -526,6 +1646,50 @@ struct st9{ // BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // +// LENUMLOADS-LABEL: @read_st9( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// BENUMLOADS-LABEL: @read_st9( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTH-LABEL: @read_st9( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 +// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTH-LABEL: @read_st9( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 +// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTHNUM-LABEL: @read_st9( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 +// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTHNUM-LABEL: @read_st9( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 +// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// int read_st9(volatile struct st9 *m) { return m->f; } @@ -533,17 +1697,65 @@ int read_st9(volatile struct st9 *m) { // LE-LABEL: @store_st9( // LE-NEXT: entry: // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // LE-NEXT: ret void // // BE-LABEL: @store_st9( // BE-NEXT: entry: // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st9( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st9( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st9( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st9( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st9( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st9( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void store_st9(volatile struct st9 *m) { m->f = 1; } @@ -553,7 +1765,6 @@ void store_st9(volatile struct st9 *m) { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // LE-NEXT: ret void // @@ -562,10 +1773,75 @@ void store_st9(volatile struct st9 *m) { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st9( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st9( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st9( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st9( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st9( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st9( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_st9(volatile struct st9 *m) { ++m->f; } @@ -593,6 +1869,56 @@ struct st10{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // +// LENUMLOADS-LABEL: @read_st10( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// BENUMLOADS-LABEL: @read_st10( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTH-LABEL: @read_st10( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTH-LABEL: @read_st10( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTHNUM-LABEL: @read_st10( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTHNUM-LABEL: @read_st10( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// int read_st10(volatile struct st10 *m) { return m->f; } @@ -615,6 +1941,60 @@ int read_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st10( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2 +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st10( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st10( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st10( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st10( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st10( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void store_st10(volatile struct st10 *m) { m->f = 1; } @@ -643,6 +2023,78 @@ void store_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st10( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st10( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st10( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st10( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st10( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st10( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_st10(volatile struct st10 *m) { ++m->f; } @@ -666,6 +2118,48 @@ struct st11{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // +// LENUMLOADS-LABEL: @read_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// BENUMLOADS-LABEL: @read_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTH-LABEL: @read_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: ret i32 [[BF_CAST]] +// +// BEWIDTH-LABEL: @read_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTHNUM-LABEL: @read_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] +// +// BEWIDTHNUM-LABEL: @read_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] +// int read_st11(volatile struct st11 *m) { return m->f; } @@ -673,17 +2167,55 @@ int read_st11(volatile struct st11 *m) { // LE-LABEL: @store_st11( // LE-NEXT: entry: // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 1, i16* [[F]], align 1 // LE-NEXT: ret void // // BE-LABEL: @store_st11( // BE-NEXT: entry: // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 1, i16* [[F]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void store_st11(volatile struct st11 *m) { m->f = 1; } @@ -693,7 +2225,6 @@ void store_st11(volatile struct st11 *m) { // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // LE-NEXT: ret void // @@ -702,10 +2233,61 @@ void store_st11(volatile struct st11 *m) { // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_st11(volatile struct st11 *m) { ++m->f; } @@ -713,19 +2295,67 @@ void increment_st11(volatile struct st11 *m) { // LE-LABEL: @increment_e_st11( // LE-NEXT: entry: // LE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 +// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 // LE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 +// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 // LE-NEXT: ret void // // BE-LABEL: @increment_e_st11( // BE-NEXT: entry: // BE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 +// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 // BE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 +// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_e_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_e_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_e_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_e_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_e_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_e_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BEWIDTHNUM-NEXT: ret void +// void increment_e_st11(volatile struct st11 *m) { ++m->e; } @@ -751,6 +2381,54 @@ struct st12{ // BE-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 // BE-NEXT: ret i32 [[BF_ASHR]] // +// LENUMLOADS-LABEL: @read_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// LENUMLOADS-NEXT: ret i32 [[BF_ASHR]] +// +// BENUMLOADS-LABEL: @read_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// BENUMLOADS-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTH-LABEL: @read_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTH-LABEL: @read_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTHNUM-LABEL: @read_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTHNUM-LABEL: @read_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// int read_st12(volatile struct st12 *m) { return m->f; } @@ -773,6 +2451,60 @@ int read_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void store_st12(volatile struct st12 *m) { m->f = 1; } @@ -801,6 +2533,78 @@ void store_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_st12(volatile struct st12 *m) { ++m->f; } @@ -829,6 +2633,78 @@ void increment_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_e_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_e_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_e_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_e_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_e_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_e_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_e_st12(volatile struct st12 *m) { ++m->e; } @@ -866,6 +2742,90 @@ struct st13 { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_b_st13( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_st13( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_st13( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_st13( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_st13( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_st13( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_b_st13(volatile struct st13 *s) { s->b++; } @@ -879,7 +2839,6 @@ struct st14 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -888,10 +2847,61 @@ struct st14 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_a_st14( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_st14( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_st14( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_st14( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_st14( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_st14( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_a_st14(volatile struct st14 *s) { s->a++; } @@ -905,7 +2915,6 @@ struct st15 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -914,10 +2923,61 @@ struct st15 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_a_st15( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_st15( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_st15( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_st15( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_st15( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_st15( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_a_st15(volatile struct st15 *s) { s->a++; } @@ -955,6 +3015,84 @@ struct st16 { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_a_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_a_st16(struct st16 *s) { s->a++; } @@ -987,6 +3125,90 @@ void increment_a_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_b_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_b_st16(struct st16 *s) { s->b++; } @@ -1019,6 +3241,90 @@ void increment_b_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_c_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_c_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_c_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_c_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_c_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_c_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_c_st16(struct st16 *s) { s->c++; } @@ -1053,6 +3359,96 @@ void increment_c_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_d_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_d_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_d_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_d_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_d_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_d_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_d_st16(struct st16 *s) { s->d++; } @@ -1085,6 +3481,68 @@ void increment_d_st16(struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_a_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_a_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_a_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_a_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_a_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_a_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_a_st16(volatile struct st16 *s) { s->a++; } @@ -1119,6 +3577,88 @@ void increment_v_a_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_b_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_b_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_b_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_b_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_b_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_b_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_b_st16(volatile struct st16 *s) { s->b++; } @@ -1153,6 +3693,74 @@ void increment_v_b_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_c_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_c_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_c_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_c_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_c_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_c_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_c_st16(volatile struct st16 *s) { s->c++; } @@ -1189,6 +3797,90 @@ void increment_v_c_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_d_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_d_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_d_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_d_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_d_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_d_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_d_st16(volatile struct st16 *s) { s->d++; } @@ -1227,6 +3919,90 @@ char c : 8; // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_b_st17( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_b_st17( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_b_st17( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_b_st17( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_b_st17( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_b_st17( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_b_st17(volatile struct st17 *s) { s->b++; } @@ -1259,6 +4035,458 @@ void increment_v_b_st17(volatile struct st17 *s) { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_c_st17( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[INC]] to i40 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_c_st17( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i8 [[INC]] to i40 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_c_st17( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_c_st17( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_c_st17( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_c_st17( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_c_st17(volatile struct st17 *s) { s->c++; } + +// A zero bitfield should block, as the C11 specification +// requires a and b to be different memory positions +struct zero_bitfield { + int a : 8; + char : 0; + int b : 8; +}; + +// LE-LABEL: @increment_a_zero_bitfield( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_a_zero_bitfield( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_a_zero_bitfield( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_zero_bitfield( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_zero_bitfield( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_zero_bitfield( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// +void increment_a_zero_bitfield(volatile struct zero_bitfield *s) { + s->a++; +} + +// LE-LABEL: @increment_b_zero_bitfield( +// LE-NEXT: entry: +// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_b_zero_bitfield( +// BE-NEXT: entry: +// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_b_zero_bitfield( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_zero_bitfield( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_zero_bitfield( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_zero_bitfield( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: ret void +// +void increment_b_zero_bitfield(volatile struct zero_bitfield *s) { + s->b++; +} + +// The zero bitfield here does not affect +struct zero_bitfield_ok { + short a : 8; + char a1 : 8; + long : 0; + int b : 24; +}; + +// LE-LABEL: @increment_a_zero_bitfield_ok( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LE-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 +// LE-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 +// LE-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] +// LE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// LE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LE-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 +// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 +// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] +// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_a_zero_bitfield_ok( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BE-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BE-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 +// BE-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] +// BE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// BE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 +// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] +// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 +// LENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] +// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// LENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 +// BENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// BENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_zero_bitfield_ok( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// LEWIDTH-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 +// LEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// LEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_zero_bitfield_ok( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BEWIDTH-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// BEWIDTH-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 +// BEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// BEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 +// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// LEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP2]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// BEWIDTHNUM-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 +// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// BEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP3]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 +// BEWIDTHNUM-NEXT: ret void +// +void increment_a_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { + s->a1 += s->a; +} + +// LE-LABEL: @increment_b_zero_bitfield_ok( +// LE-NEXT: entry: +// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LE-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LE-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_b_zero_bitfield_ok( +// BE-NEXT: entry: +// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BE-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BE-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_zero_bitfield_ok( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_zero_bitfield_ok( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// +void increment_b_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { + s->b++; +} diff --git a/clang/test/CodeGen/bitfield-2.c b/clang/test/CodeGen/bitfield-2.c index 9d669575ecd11..661d42683bc27 100644 --- a/clang/test/CodeGen/bitfield-2.c +++ b/clang/test/CodeGen/bitfield-2.c @@ -14,7 +14,7 @@ // CHECK-RECORD: LLVMType:%struct.s0 = type { [3 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: +// CHECK-RECORD: -// CHECK-RECORD: +// CHECK-RECORD: +// CHECK-RECORD: -// CHECK-RECORD: +// CHECK-RECORD: Date: Tue, 8 Sep 2020 11:26:10 -0500 Subject: [PATCH 0076/1079] [GVN] Add testcase that uses masked loads and stores, NFC --- llvm/test/Transforms/GVN/masked-load-store.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 llvm/test/Transforms/GVN/masked-load-store.ll diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll new file mode 100644 index 0000000000000..8119d77bb76e0 --- /dev/null +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] +; CHECK-NEXT: ret <128 x i8> [[V3]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1 +; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: [[V3:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V3]] +; CHECK-NEXT: ret <128 x i8> [[V4]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1 + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a2, <128 x i8>* %v1, i32 4, <128 x i1> %v0) + %v3 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v4 = add <128 x i8> %v2, %v3 + ret <128 x i8> %v4 +} + +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) + From 97e77ac0ed80877cda58b1dddf98890cc7b0d167 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Sep 2020 16:53:24 +0000 Subject: [PATCH 0077/1079] Add more explicit error message when creating a type or attribute for an unregistered dialect (NFC) Differential Revision: https://reviews.llvm.org/D87177 --- mlir/include/mlir/IR/AttributeSupport.h | 17 +++++++++++++++++ mlir/include/mlir/IR/TypeSupport.h | 15 +++++++++++++++ mlir/include/mlir/Support/StorageUniquer.h | 10 ++++++++++ mlir/lib/Support/StorageUniquer.cpp | 16 ++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/mlir/include/mlir/IR/AttributeSupport.h b/mlir/include/mlir/IR/AttributeSupport.h index 35084a20493f5..c0e3a0bb9b26e 100644 --- a/mlir/include/mlir/IR/AttributeSupport.h +++ b/mlir/include/mlir/IR/AttributeSupport.h @@ -16,6 +16,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/StorageUniquerSupport.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/Twine.h" namespace mlir { class MLIRContext; @@ -142,6 +143,14 @@ class AttributeUniquer { static typename std::enable_if_t< !std::is_same::value, T> get(MLIRContext *ctx, Args &&...args) { +#ifndef NDEBUG + if (!ctx->getAttributeUniquer().isParametricStorageInitialized( + T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create Attribute '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getAttributeUniquer().get( [ctx](AttributeStorage *storage) { initializeAttributeStorage(storage, ctx, T::getTypeID()); @@ -153,6 +162,14 @@ class AttributeUniquer { static typename std::enable_if_t< std::is_same::value, T> get(MLIRContext *ctx) { +#ifndef NDEBUG + if (!ctx->getAttributeUniquer().isSingletonStorageInitialized( + T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create Attribute '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getAttributeUniquer().get(T::getTypeID()); } diff --git a/mlir/include/mlir/IR/TypeSupport.h b/mlir/include/mlir/IR/TypeSupport.h index ace5eaa733454..c1de589579154 100644 --- a/mlir/include/mlir/IR/TypeSupport.h +++ b/mlir/include/mlir/IR/TypeSupport.h @@ -15,6 +15,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/StorageUniquerSupport.h" +#include "llvm/ADT/Twine.h" namespace mlir { class Dialect; @@ -126,6 +127,13 @@ struct TypeUniquer { static typename std::enable_if_t< !std::is_same::value, T> get(MLIRContext *ctx, Args &&...args) { +#ifndef NDEBUG + if (!ctx->getTypeUniquer().isParametricStorageInitialized(T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create type '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getTypeUniquer().get( [&](TypeStorage *storage) { storage->initialize(AbstractType::lookup(T::getTypeID(), ctx)); @@ -137,6 +145,13 @@ struct TypeUniquer { static typename std::enable_if_t< std::is_same::value, T> get(MLIRContext *ctx) { +#ifndef NDEBUG + if (!ctx->getTypeUniquer().isSingletonStorageInitialized(T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create type '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getTypeUniquer().get(T::getTypeID()); } diff --git a/mlir/include/mlir/Support/StorageUniquer.h b/mlir/include/mlir/Support/StorageUniquer.h index eb04688be1902..d0a6170805bfd 100644 --- a/mlir/include/mlir/Support/StorageUniquer.h +++ b/mlir/include/mlir/Support/StorageUniquer.h @@ -210,6 +210,16 @@ class StorageUniquer { return get(TypeID::get()); } + /// Test if there is a singleton storage uniquer initialized for the provided + /// TypeID. This is only useful for debugging/diagnostic purpose: the uniquer + /// is initialized when a dialect is loaded. + bool isSingletonStorageInitialized(TypeID id); + + /// Test if there is a parametric storage uniquer initialized for the provided + /// TypeID. This is only useful for debugging/diagnostic purpose: the uniquer + /// is initialized when a dialect is loaded. + bool isParametricStorageInitialized(TypeID id); + /// Changes the mutable component of 'storage' by forwarding the trailing /// arguments to the 'mutate' function of the derived class. template diff --git a/mlir/lib/Support/StorageUniquer.cpp b/mlir/lib/Support/StorageUniquer.cpp index 73578b5c91acf..a3e296e99e738 100644 --- a/mlir/lib/Support/StorageUniquer.cpp +++ b/mlir/lib/Support/StorageUniquer.cpp @@ -89,6 +89,9 @@ struct StorageUniquerImpl { // Parametric Storage //===--------------------------------------------------------------------===// + /// Check if an instance of a parametric storage class exists. + bool hasParametricStorage(TypeID id) { return parametricUniquers.count(id); } + /// Get or create an instance of a parametric type. BaseStorage * getOrCreate(TypeID id, unsigned hashValue, @@ -176,6 +179,9 @@ struct StorageUniquerImpl { return singletonInstance; } + /// Check if an instance of a singleton storage class exists. + bool hasSingleton(TypeID id) { return singletonInstances.count(id); } + //===--------------------------------------------------------------------===// // Instance Storage //===--------------------------------------------------------------------===// @@ -227,6 +233,16 @@ auto StorageUniquer::getSingletonImpl(TypeID id) -> BaseStorage * { return impl->getSingleton(id); } +/// Test is the storage singleton is initialized. +bool StorageUniquer::isSingletonStorageInitialized(TypeID id) { + return impl->hasSingleton(id); +} + +/// Test is the parametric storage is initialized. +bool StorageUniquer::isParametricStorageInitialized(TypeID id) { + return impl->hasParametricStorage(id); +} + /// Implementation for registering an instance of a derived type with default /// storage. void StorageUniquer::registerSingletonImpl( From 2d7fd38cf7db18edbbfa0e6dfb7454a255171867 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 4 Sep 2020 19:19:20 -0700 Subject: [PATCH 0078/1079] [sanitizers] Remove unneeded MaybeCall*DefaultOptions() and nullptr checks D28596 added SANITIZER_INTERFACE_WEAK_DEF which can guarantee `*_default_options` are always defined. The weak attributes on the `__{asan,lsan,msan,ubsan}_default_options` declarations can thus be removed. `MaybeCall*DefaultOptions` no longer need nullptr checks, so their call sites can just be replaced by `__*_default_options`. Reviewed By: #sanitizers, vitalybuka Differential Revision: https://reviews.llvm.org/D87175 --- compiler-rt/lib/asan/asan_flags.cpp | 10 +++------- compiler-rt/lib/asan/asan_interface_internal.h | 4 ++-- compiler-rt/lib/cfi/cfi.cpp | 2 +- compiler-rt/lib/hwasan/hwasan.cpp | 2 +- compiler-rt/lib/lsan/lsan.cpp | 2 +- compiler-rt/lib/lsan/lsan_common.cpp | 9 ++------- compiler-rt/lib/msan/msan.cpp | 14 +++++--------- compiler-rt/lib/msan/msan_interface_internal.h | 4 ++-- compiler-rt/lib/tsan/rtl/tsan_flags.cpp | 2 +- compiler-rt/lib/ubsan/ubsan_flags.cpp | 6 +----- compiler-rt/lib/ubsan/ubsan_flags.h | 2 -- 11 files changed, 19 insertions(+), 38 deletions(-) diff --git a/compiler-rt/lib/asan/asan_flags.cpp b/compiler-rt/lib/asan/asan_flags.cpp index c5c70eaed737f..cb6a89fe32ce7 100644 --- a/compiler-rt/lib/asan/asan_flags.cpp +++ b/compiler-rt/lib/asan/asan_flags.cpp @@ -26,10 +26,6 @@ namespace __asan { Flags asan_flags_dont_use_directly; // use via flags(). -static const char *MaybeCallAsanDefaultOptions() { - return (&__asan_default_options) ? __asan_default_options() : ""; -} - static const char *MaybeUseAsanDefaultOptionsCompileDefinition() { #ifdef ASAN_DEFAULT_OPTIONS return SANITIZER_STRINGIFY(ASAN_DEFAULT_OPTIONS); @@ -108,14 +104,14 @@ void InitializeFlags() { asan_parser.ParseString(asan_compile_def); // Override from user-specified string. - const char *asan_default_options = MaybeCallAsanDefaultOptions(); + const char *asan_default_options = __asan_default_options(); asan_parser.ParseString(asan_default_options); #if CAN_SANITIZE_UB - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif #if CAN_SANITIZE_LEAKS - const char *lsan_default_options = __lsan::MaybeCallLsanDefaultOptions(); + const char *lsan_default_options = __lsan_default_options(); lsan_parser.ParseString(lsan_default_options); #endif diff --git a/compiler-rt/lib/asan/asan_interface_internal.h b/compiler-rt/lib/asan/asan_interface_internal.h index f14cbbcb76a35..3e6e660288746 100644 --- a/compiler-rt/lib/asan/asan_interface_internal.h +++ b/compiler-rt/lib/asan/asan_interface_internal.h @@ -173,8 +173,8 @@ extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __asan_print_accumulated_stats(); - SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE - const char* __asan_default_options(); + SANITIZER_INTERFACE_ATTRIBUTE + const char *__asan_default_options(); SANITIZER_INTERFACE_ATTRIBUTE extern uptr __asan_shadow_memory_dynamic_address; diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp index fd48f71643b6f..b75c72b215c27 100644 --- a/compiler-rt/lib/cfi/cfi.cpp +++ b/compiler-rt/lib/cfi/cfi.cpp @@ -379,7 +379,7 @@ void InitializeFlags() { __ubsan::RegisterUbsanFlags(&ubsan_parser, uf); RegisterCommonFlags(&ubsan_parser); - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); ubsan_parser.ParseStringFromEnv("UBSAN_OPTIONS"); #endif diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index 11b4d3891bc2c..c5322110cb662 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -112,7 +112,7 @@ static void InitializeFlags() { if (__hwasan_default_options) parser.ParseString(__hwasan_default_options()); #if HWASAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp index 80a6e2fa70169..c8cc045783d45 100644 --- a/compiler-rt/lib/lsan/lsan.cpp +++ b/compiler-rt/lib/lsan/lsan.cpp @@ -73,7 +73,7 @@ static void InitializeFlags() { RegisterCommonFlags(&parser); // Override from user-specified string. - const char *lsan_default_options = MaybeCallLsanDefaultOptions(); + const char *lsan_default_options = __lsan_default_options(); parser.ParseString(lsan_default_options); parser.ParseStringFromEnv("LSAN_OPTIONS"); diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 67f85f2f31de4..93ce0ddc3d68e 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -110,10 +110,6 @@ void InitializeRootRegions() { root_regions = new (placeholder) InternalMmapVector(); } -const char *MaybeCallLsanDefaultOptions() { - return (&__lsan_default_options) ? __lsan_default_options() : ""; -} - void InitCommonLsan() { InitializeRootRegions(); if (common_flags()->detect_leaks) { @@ -900,12 +896,11 @@ int __lsan_do_recoverable_leak_check() { return 0; } -#if !SANITIZER_SUPPORTS_WEAK_HOOKS -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char * __lsan_default_options() { +SANITIZER_INTERFACE_WEAK_DEF(const char *, __lsan_default_options, void) { return ""; } +#if !SANITIZER_SUPPORTS_WEAK_HOOKS SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int __lsan_is_turned_off() { return 0; diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 3028f79f041c3..d651a376789bd 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -172,10 +172,9 @@ static void InitializeFlags() { #endif // Override from user-specified string. - if (__msan_default_options) - parser.ParseString(__msan_default_options()); + parser.ParseString(__msan_default_options()); #if MSAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif @@ -726,12 +725,9 @@ void __msan_finish_switch_fiber(const void **bottom_old, uptr *size_old) { } } -#if !SANITIZER_SUPPORTS_WEAK_HOOKS -extern "C" { -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char* __msan_default_options() { return ""; } -} // extern "C" -#endif +SANITIZER_INTERFACE_WEAK_DEF(const char *, __msan_default_options, void) { + return ""; +} extern "C" { SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index 17922a888b9c9..1edacbc7504f5 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -129,8 +129,8 @@ void __msan_set_keep_going(int keep_going); SANITIZER_INTERFACE_ATTRIBUTE int __msan_set_poison_in_malloc(int do_poison); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -/* OPTIONAL */ const char* __msan_default_options(); +SANITIZER_INTERFACE_ATTRIBUTE +const char *__msan_default_options(); // For testing. SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp index 44bf325cd35bb..49e4a9c21da9c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp @@ -87,7 +87,7 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) { // Let a frontend override. parser.ParseString(__tsan_default_options()); #if TSAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif // Override from command line. diff --git a/compiler-rt/lib/ubsan/ubsan_flags.cpp b/compiler-rt/lib/ubsan/ubsan_flags.cpp index 721c2273f133a..25cefd46ce27c 100644 --- a/compiler-rt/lib/ubsan/ubsan_flags.cpp +++ b/compiler-rt/lib/ubsan/ubsan_flags.cpp @@ -21,10 +21,6 @@ namespace __ubsan { -const char *MaybeCallUbsanDefaultOptions() { - return (&__ubsan_default_options) ? __ubsan_default_options() : ""; -} - static const char *GetFlag(const char *flag) { // We cannot call getenv() from inside a preinit array initializer if (SANITIZER_CAN_USE_PREINIT_ARRAY) { @@ -66,7 +62,7 @@ void InitializeFlags() { RegisterUbsanFlags(&parser, f); // Override from user-specified string. - parser.ParseString(MaybeCallUbsanDefaultOptions()); + parser.ParseString(__ubsan_default_options()); // Override from environment variable. parser.ParseStringFromEnv("UBSAN_OPTIONS"); InitializeCommonFlags(); diff --git a/compiler-rt/lib/ubsan/ubsan_flags.h b/compiler-rt/lib/ubsan/ubsan_flags.h index daa0d7c701e04..c47009bafe539 100644 --- a/compiler-rt/lib/ubsan/ubsan_flags.h +++ b/compiler-rt/lib/ubsan/ubsan_flags.h @@ -34,8 +34,6 @@ inline Flags *flags() { return &ubsan_flags; } void InitializeFlags(); void RegisterUbsanFlags(FlagParser *parser, Flags *f); -const char *MaybeCallUbsanDefaultOptions(); - } // namespace __ubsan extern "C" { From 0dacf3b5ac3a8c4079b781c788f758709345883f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 18:04:41 +0100 Subject: [PATCH 0079/1079] RISCVMatInt.h - remove unnecessary includes. NFCI. Add APInt forward declaration and move include to RISCVMatInt.cpp --- llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp | 4 +--- llvm/lib/Target/RISCV/Utils/RISCVMatInt.h | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp index f390ddb89e3c9..1f3dead610112 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp @@ -8,10 +8,8 @@ #include "RISCVMatInt.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/MathExtras.h" -#include namespace llvm { diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h index b12ae2eade999..17ca57458b493 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h @@ -9,12 +9,11 @@ #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H #define LLVM_LIB_TARGET_RISCV_MATINT_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" #include namespace llvm { +class APInt; namespace RISCVMatInt { struct Inst { From cd5c5c484830e65854cc12cb64a0feb0a9060734 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 18:24:52 +0100 Subject: [PATCH 0080/1079] CFGUpdate.h - remove unused APInt include. NFCI. --- llvm/include/llvm/Support/CFGUpdate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Support/CFGUpdate.h b/llvm/include/llvm/Support/CFGUpdate.h index af4cd6ed1f1df..3a12b9d86c18a 100644 --- a/llvm/include/llvm/Support/CFGUpdate.h +++ b/llvm/include/llvm/Support/CFGUpdate.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_CFGUPDATE_H #define LLVM_SUPPORT_CFGUPDATE_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/Compiler.h" From d6f3f612318f31c46b95dd62eee45a75397ccfcf Mon Sep 17 00:00:00 2001 From: Ties Stuij Date: Tue, 8 Sep 2020 18:43:59 +0100 Subject: [PATCH 0081/1079] Revert "[ARM] Follow AACPS standard for volatile bit-fields access width" This reverts commit 514df1b2bb1ecd1a33327001ea38a347fd2d0380. Some of the buildbots got llvm-lit errors on CodeGen/volatile.c --- clang/include/clang/Basic/CodeGenOptions.def | 6 +- clang/include/clang/Driver/Options.td | 8 +- clang/lib/CodeGen/CGExpr.cpp | 118 +- clang/lib/CodeGen/CGRecordLayout.h | 17 +- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 166 +- clang/lib/Frontend/CompilerInvocation.cpp | 3 - clang/test/CodeGen/aapcs-bitfield.c | 3292 +----------------- clang/test/CodeGen/bitfield-2.c | 12 +- 8 files changed, 103 insertions(+), 3519 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index f2f29db2334e4..ec77f68062e7a 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -392,13 +392,9 @@ CODEGENOPT(Addrsig, 1, 0) /// Whether to emit unused static constants. CODEGENOPT(KeepStaticConsts, 1, 0) -/// Whether to follow the AAPCS enforcing at least one read before storing to a volatile bitfield +/// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) -/// Whether to not follow the AAPCS that enforces volatile bit-field access width to be -/// according to the field declaring type width. -CODEGENOPT(AAPCSBitfieldWidth, 1, 1) - #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 81d63330b4279..4ba5d40117e77 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2363,15 +2363,9 @@ def mno_neg_immediates: Flag<["-"], "mno-neg-immediates">, Group, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Allow use of CMSE (Armv8-M Security Extensions)">; -def ForceAAPCSBitfieldLoad : Flag<["-"], "faapcs-bitfield-load">, Group, +def ForceAAPCSBitfieldLoad : Flag<["-"], "fAAPCSBitfieldLoad">, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Follows the AAPCS standard that all volatile bit-field write generates at least one load. (ARM only).">; -def ForceNoAAPCSBitfieldWidth : Flag<["-"], "fno-aapcs-bitfield-width">, Group, - Flags<[DriverOption,CC1Option]>, - HelpText<"Do not follow the AAPCS standard requirement that volatile bit-field width is dictated by the field container type. (ARM only).">; -def AAPCSBitfieldWidth : Flag<["-"], "faapcs-bitfield-width">, Group, - Flags<[DriverOption,CC1Option]>, - HelpText<"Follow the AAPCS standard requirement stating that volatile bit-field width is dictated by the field container type. (ARM only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index df024a84462db..7351926035e64 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1927,27 +1927,22 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, llvm::Type *ResLTy = ConvertType(LV.getType()); Address Ptr = LV.getBitFieldAddress(); - llvm::Value *Val = - Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); - - bool UseVolatile = LV.isVolatileQualified() && - Info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget()); - const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; - const unsigned StorageSize = - UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; + llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); + if (Info.IsSigned) { - assert(static_cast(Offset + Info.Size) <= StorageSize); - unsigned HighBits = StorageSize - Offset - Info.Size; + assert(static_cast(Info.Offset + Info.Size) <= Info.StorageSize); + unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size; if (HighBits) Val = Builder.CreateShl(Val, HighBits, "bf.shl"); - if (Offset + HighBits) - Val = Builder.CreateAShr(Val, Offset + HighBits, "bf.ashr"); + if (Info.Offset + HighBits) + Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr"); } else { - if (Offset) - Val = Builder.CreateLShr(Val, Offset, "bf.lshr"); - if (static_cast(Offset) + Info.Size < StorageSize) - Val = Builder.CreateAnd( - Val, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), "bf.clear"); + if (Info.Offset) + Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr"); + if (static_cast(Info.Offset) + Info.Size < Info.StorageSize) + Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize, + Info.Size), + "bf.clear"); } Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast"); EmitScalarRangeCheck(Val, LV.getType(), Loc); @@ -2149,43 +2144,39 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; - const bool UseVolatile = CGM.getCodeGenOpts().AAPCSBitfieldWidth && - Dst.isVolatileQualified() && - Info.VolatileStorageSize != 0 && - isAAPCS(CGM.getTarget()); - const unsigned StorageSize = - UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; - const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; // See if there are other bits in the bitfield's storage we'll need to load // and mask together with source before storing. - if (StorageSize != Info.Size) { - assert(StorageSize > Info.Size && "Invalid bitfield size."); + if (Info.StorageSize != Info.Size) { + assert(Info.StorageSize > Info.Size && "Invalid bitfield size."); llvm::Value *Val = - Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); + Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); // Mask the source value as needed. if (!hasBooleanRepresentation(Dst.getType())) - SrcVal = Builder.CreateAnd( - SrcVal, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), - "bf.value"); + SrcVal = Builder.CreateAnd(SrcVal, + llvm::APInt::getLowBitsSet(Info.StorageSize, + Info.Size), + "bf.value"); MaskedVal = SrcVal; - if (Offset) - SrcVal = Builder.CreateShl(SrcVal, Offset, "bf.shl"); + if (Info.Offset) + SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl"); // Mask out the original value. - Val = Builder.CreateAnd( - Val, ~llvm::APInt::getBitsSet(StorageSize, Offset, Offset + Info.Size), - "bf.clear"); + Val = Builder.CreateAnd(Val, + ~llvm::APInt::getBitsSet(Info.StorageSize, + Info.Offset, + Info.Offset + Info.Size), + "bf.clear"); // Or together the unchanged values and the source value. SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set"); } else { - assert(Offset == 0); + assert(Info.Offset == 0); // According to the AACPS: // When a volatile bit-field is written, and its container does not overlap - // with any non-bit-field member, its container must be read exactly once - // and written exactly once using the access width appropriate to the type - // of the container. The two accesses are not atomic. + // with any non-bit-field member, its container must be read exactly once and + // written exactly once using the access width appropriate to the type of the + // container. The two accesses are not atomic. if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) && CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad) Builder.CreateLoad(Ptr, true, "bf.load"); @@ -2200,8 +2191,8 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Sign extend the value if needed. if (Info.IsSigned) { - assert(Info.Size <= StorageSize); - unsigned HighBits = StorageSize - Info.Size; + assert(Info.Size <= Info.StorageSize); + unsigned HighBits = Info.StorageSize - Info.Size; if (HighBits) { ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl"); ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr"); @@ -4213,45 +4204,32 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (field->isBitField()) { const CGRecordLayout &RL = - CGM.getTypes().getCGRecordLayout(field->getParent()); + CGM.getTypes().getCGRecordLayout(field->getParent()); const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); - const bool UseVolatile = isAAPCS(CGM.getTarget()) && - CGM.getCodeGenOpts().AAPCSBitfieldWidth && - Info.VolatileStorageSize != 0 && - field->getType() - .withCVRQualifiers(base.getVRQualifiers()) - .isVolatileQualified(); Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); const RecordDecl *rec = field->getParent(); - if (!UseVolatile) { - if (!IsInPreservedAIRegion && - (!getDebugInfo() || !rec->hasAttr())) { - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); - } else { - llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( - getContext().getRecordType(rec), rec->getLocation()); - Addr = Builder.CreatePreserveStructAccessIndex( - Addr, Idx, getDebugInfoFIndex(rec, field->getFieldIndex()), - DbgInfo); - } + if (!IsInPreservedAIRegion && + (!getDebugInfo() || !rec->hasAttr())) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, + getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); } - const unsigned SS = - UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; + // Get the access type. - llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS); + llvm::Type *FieldIntTy = + llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); if (Addr.getElementType() != FieldIntTy) Addr = Builder.CreateElementBitCast(Addr, FieldIntTy); - if (UseVolatile) { - const unsigned VolatileOffset = Info.VolatileStorageOffset.getQuantity(); - if (VolatileOffset) - Addr = Builder.CreateConstInBoundsGEP(Addr, VolatileOffset); - } QualType fieldType = - field->getType().withCVRQualifiers(base.getVRQualifiers()); + field->getType().withCVRQualifiers(base.getVRQualifiers()); // TODO: Support TBAA for bit fields. LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource()); return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h index e6665b72bcba1..730ee4c438e7e 100644 --- a/clang/lib/CodeGen/CGRecordLayout.h +++ b/clang/lib/CodeGen/CGRecordLayout.h @@ -46,7 +46,7 @@ namespace CodeGen { /// }; /// /// This will end up as the following LLVM type. The first array is the -/// bitfield, and the second is the padding out to a 4-byte alignment. +/// bitfield, and the second is the padding out to a 4-byte alignmnet. /// /// %t = type { i8, i8, i8, i8, i8, [3 x i8] } /// @@ -80,21 +80,8 @@ struct CGBitFieldInfo { /// The offset of the bitfield storage from the start of the struct. CharUnits StorageOffset; - /// The offset within a contiguous run of bitfields that are represented as a - /// single "field" within the LLVM struct type, taking into account the AAPCS - /// rules for volatile bitfields. This offset is in bits. - unsigned VolatileOffset : 16; - - /// The storage size in bits which should be used when accessing this - /// bitfield. - unsigned VolatileStorageSize; - - /// The offset of the bitfield storage from the start of the struct. - CharUnits VolatileStorageOffset; - CGBitFieldInfo() - : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(), - VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {} + : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset() {} CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned, unsigned StorageSize, CharUnits StorageOffset) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index ce35880106c20..4e5d1d3f16f65 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -109,14 +109,6 @@ struct CGRecordLowering { D->isMsStruct(Context); } - /// Helper function to check if we are targeting AAPCS. - bool isAAPCS() const { - return Context.getTargetInfo().getABI().startswith("aapcs"); - } - - /// Helper function to check if the target machine is BigEndian. - bool isBE() const { return Context.getTargetInfo().isBigEndian(); } - /// The Itanium base layout rule allows virtual bases to overlap /// other bases, which complicates layout in specific ways. /// @@ -180,8 +172,7 @@ struct CGRecordLowering { void lowerUnion(); void accumulateFields(); void accumulateBitFields(RecordDecl::field_iterator Field, - RecordDecl::field_iterator FieldEnd); - void computeVolatileBitfields(); + RecordDecl::field_iterator FieldEnd); void accumulateBases(); void accumulateVPtrs(); void accumulateVBases(); @@ -246,10 +237,6 @@ void CGRecordLowering::setBitFieldInfo( // least-significant-bit. if (DataLayout.isBigEndian()) Info.Offset = Info.StorageSize - (Info.Offset + Info.Size); - - Info.VolatileStorageSize = 0; - Info.VolatileOffset = 0; - Info.VolatileStorageOffset = CharUnits::Zero(); } void CGRecordLowering::lower(bool NVBaseType) { @@ -274,21 +261,15 @@ void CGRecordLowering::lower(bool NVBaseType) { // 8) Format the complete list of members in a way that can be consumed by // CodeGenTypes::ComputeRecordLayout. CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize(); - if (D->isUnion()) { - lowerUnion(); - computeVolatileBitfields(); - return; - } + if (D->isUnion()) + return lowerUnion(); accumulateFields(); // RD implies C++. if (RD) { accumulateVPtrs(); accumulateBases(); - if (Members.empty()) { - appendPaddingBytes(Size); - computeVolatileBitfields(); - return; - } + if (Members.empty()) + return appendPaddingBytes(Size); if (!NVBaseType) accumulateVBases(); } @@ -300,7 +281,6 @@ void CGRecordLowering::lower(bool NVBaseType) { Members.pop_back(); calculateZeroInit(); fillOutputFields(); - computeVolatileBitfields(); } void CGRecordLowering::lowerUnion() { @@ -438,9 +418,9 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || !DataLayout.fitsInLegalInteger(OffsetInRecord)) return false; - // Make sure StartBitOffset is naturally aligned if it is treated as an + // Make sure StartBitOffset is natually aligned if it is treated as an // IType integer. - if (StartBitOffset % + if (StartBitOffset % Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != 0) return false; @@ -523,123 +503,6 @@ void CGRecordLowering::accumulateBases() { } } -/// The AAPCS that defines that, when possible, bit-fields should -/// be accessed using containers of the declared type width: -/// When a volatile bit-field is read, and its container does not overlap with -/// any non-bit-field member or any zero length bit-field member, its container -/// must be read exactly once using the access width appropriate to the type of -/// the container. When a volatile bit-field is written, and its container does -/// not overlap with any non-bit-field member or any zero-length bit-field -/// member, its container must be read exactly once and written exactly once -/// using the access width appropriate to the type of the container. The two -/// accesses are not atomic. -/// -/// Enforcing the width restriction can be disabled using -/// -fno-aapcs-bitfield-width. -void CGRecordLowering::computeVolatileBitfields() { - if (!isAAPCS() || !Types.getCodeGenOpts().AAPCSBitfieldWidth) - return; - - for (auto &I : BitFields) { - const FieldDecl *Field = I.first; - CGBitFieldInfo &Info = I.second; - llvm::Type *ResLTy = Types.ConvertTypeForMem(Field->getType()); - // If the record alignment is less than the type width, we can't enforce a - // aligned load, bail out. - if ((uint64_t)(Context.toBits(Layout.getAlignment())) < - ResLTy->getPrimitiveSizeInBits()) - continue; - // CGRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets - // for big-endian targets, but it assumes a container of width - // Info.StorageSize. Since AAPCS uses a different container size (width - // of the type), we first undo that calculation here and redo it once - // the bit-field offset within the new container is calculated. - const unsigned OldOffset = - isBE() ? Info.StorageSize - (Info.Offset + Info.Size) : Info.Offset; - // Offset to the bit-field from the beginning of the struct. - const unsigned AbsoluteOffset = - Context.toBits(Info.StorageOffset) + OldOffset; - - // Container size is the width of the bit-field type. - const unsigned StorageSize = ResLTy->getPrimitiveSizeInBits(); - // Nothing to do if the access uses the desired - // container width and is naturally aligned. - if (Info.StorageSize == StorageSize && (OldOffset % StorageSize == 0)) - continue; - - // Offset within the container. - unsigned Offset = AbsoluteOffset & (StorageSize - 1); - // Bail out if an aligned load of the container cannot cover the entire - // bit-field. This can happen for example, if the bit-field is part of a - // packed struct. AAPCS does not define access rules for such cases, we let - // clang to follow its own rules. - if (Offset + Info.Size > StorageSize) - continue; - - // Re-adjust offsets for big-endian targets. - if (isBE()) - Offset = StorageSize - (Offset + Info.Size); - - const CharUnits StorageOffset = - Context.toCharUnitsFromBits(AbsoluteOffset & ~(StorageSize - 1)); - const CharUnits End = StorageOffset + - Context.toCharUnitsFromBits(StorageSize) - - CharUnits::One(); - - const ASTRecordLayout &Layout = - Context.getASTRecordLayout(Field->getParent()); - // If we access outside memory outside the record, than bail out. - const CharUnits RecordSize = Layout.getSize(); - if (End >= RecordSize) - continue; - - // Bail out if performing this load would access non-bit-fields members. - bool Conflict = false; - for (const auto *F : D->fields()) { - // Allow sized bit-fields overlaps. - if (F->isBitField() && !F->isZeroLengthBitField(Context)) - continue; - - const CharUnits FOffset = Context.toCharUnitsFromBits( - Layout.getFieldOffset(F->getFieldIndex())); - - // As C11 defines, a zero sized bit-field defines a barrier, so - // fields after and before it should be race condition free. - // The AAPCS acknowledges it and imposes no restritions when the - // natural container overlaps a zero-length bit-field. - if (F->isZeroLengthBitField(Context)) { - if (End > FOffset && StorageOffset < FOffset) { - Conflict = true; - break; - } - } - - const CharUnits FEnd = - FOffset + - Context.toCharUnitsFromBits( - Types.ConvertTypeForMem(F->getType())->getPrimitiveSizeInBits()) - - CharUnits::One(); - // If no overlap, continue. - if (End < FOffset || FEnd < StorageOffset) - continue; - - // The desired load overlaps a non-bit-field member, bail out. - Conflict = true; - break; - } - - if (Conflict) - continue; - // Write the new bit-field access parameters. - // As the storage offset now is defined as the number of elements from the - // start of the structure, we should divide the Offset by the element size. - Info.VolatileStorageOffset = - StorageOffset / Context.toCharUnitsFromBits(StorageSize).getQuantity(); - Info.VolatileStorageSize = StorageSize; - Info.VolatileOffset = Offset; - } -} - void CGRecordLowering::accumulateVPtrs() { if (Layout.hasOwnVFPtr()) Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr, @@ -985,10 +848,8 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { assert(Info.StorageSize <= SL->getSizeInBits() && "Union not large enough for bitfield storage"); } else { - assert((Info.StorageSize == - getDataLayout().getTypeAllocSizeInBits(ElementTy) || - Info.VolatileStorageSize == - getDataLayout().getTypeAllocSizeInBits(ElementTy)) && + assert(Info.StorageSize == + getDataLayout().getTypeAllocSizeInBits(ElementTy) && "Storage size does not match the element type size"); } assert(Info.Size > 0 && "Empty bitfield!"); @@ -1036,12 +897,11 @@ LLVM_DUMP_METHOD void CGRecordLayout::dump() const { void CGBitFieldInfo::print(raw_ostream &OS) const { OS << ""; + << " StorageOffset:" << StorageOffset.getQuantity() << ">"; } LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 1fbeb458a9d23..fbccff11562c1 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1453,9 +1453,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); - Opts.AAPCSBitfieldWidth = Args.hasFlag(OPT_AAPCSBitfieldWidth, - OPT_ForceNoAAPCSBitfieldWidth, - true); return Success; } diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c index 13db68d6ae81b..4fc889bcf379e 100644 --- a/clang/test/CodeGen/aapcs-bitfield.c +++ b/clang/test/CodeGen/aapcs-bitfield.c @@ -1,12 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=LE -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=BE -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=LENUMLOADS -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=BENUMLOADS -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LEWIDTH -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BEWIDTH -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=LEWIDTHNUM -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=BEWIDTHNUM +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LE +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BE +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=LE,LENUMLOADS +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=BE,BENUMLOADS struct st0 { short c : 7; @@ -29,57 +25,6 @@ struct st0 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st0_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st0_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st0_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st0_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st0_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st0_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st0_check_load(struct st0 *m) { return m->c; } @@ -102,60 +47,6 @@ int st0_check_load(struct st0 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st0_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st0_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st0_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st0_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st0_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st0_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st0_check_store(struct st0 *m) { m->c = 1; } @@ -182,57 +73,6 @@ struct st1 { // BE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st1_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st1_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st1_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st1_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st1_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st1_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st1_check_load(struct st1 *m) { return m->c; } @@ -255,60 +95,6 @@ int st1_check_load(struct st1 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st1_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 -// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st1_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st1_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 -// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st1_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st1_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 -// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st1_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void st1_check_store(struct st1 *m) { m->c = 1; } @@ -335,57 +121,6 @@ struct st2 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st2_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st2_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st2_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st2_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st2_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st2_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st2_check_load(struct st2 *m) { return m->c; } @@ -408,60 +143,6 @@ int st2_check_load(struct st2 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st2_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st2_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st2_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st2_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st2_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st2_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st2_check_store(struct st2 *m) { m->c = 1; } @@ -487,57 +168,6 @@ struct st3 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st3_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st3_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st3_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st3_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st3_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st3_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st3_check_load(struct st3 *m) { return m->c; } @@ -560,60 +190,6 @@ int st3_check_load(struct st3 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st3_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st3_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st3_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st3_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 -// BEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st3_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st3_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 -// BEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st3_check_store(struct st3 *m) { m->c = 1; } @@ -645,68 +221,6 @@ struct st4 { // BE-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st4_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 -// LENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st4_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 -// BENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st4_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st4_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st4_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st4_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st4_check_load(struct st4 *m) { return m->c; } @@ -729,64 +243,6 @@ int st4_check_load(struct st4 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st4_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st4_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4 -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st4_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st4_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 -// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st4_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st4_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 -// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void st4_check_store(struct st4 *m) { m->c = 1; } @@ -809,60 +265,6 @@ void st4_check_store(struct st4 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st4_check_nonv_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st4_check_nonv_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st4_check_nonv_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st4_check_nonv_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st4_check_nonv_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st4_check_nonv_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void st4_check_nonv_store(struct st4 *m) { m->b = 1; } @@ -889,57 +291,6 @@ struct st5 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st5_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st5_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st5_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st5_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st5_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st5_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st5_check_load(struct st5 *m) { return m->c; } @@ -962,60 +313,6 @@ int st5_check_load(struct st5 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st5_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st5_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 -// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st5_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st5_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 -// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st5_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st5_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 -// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st5_check_store(struct st5 *m) { m->c = 1; } @@ -1034,7 +331,7 @@ struct st6 { // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 // LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -1052,7 +349,7 @@ struct st6 { // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -1062,114 +359,6 @@ struct st6 { // BE-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] // BE-NEXT: ret i32 [[ADD4]] // -// LENUMLOADS-LABEL: @st6_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 -// LENUMLOADS-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 -// LENUMLOADS-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 -// LENUMLOADS-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] -// LENUMLOADS-NEXT: ret i32 [[ADD5]] -// -// BENUMLOADS-LABEL: @st6_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// BENUMLOADS-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 -// BENUMLOADS-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 -// BENUMLOADS-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] -// BENUMLOADS-NEXT: ret i32 [[ADD4]] -// -// LEWIDTH-LABEL: @st6_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 -// LEWIDTH-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 -// LEWIDTH-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 -// LEWIDTH-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] -// LEWIDTH-NEXT: ret i32 [[ADD5]] -// -// BEWIDTH-LABEL: @st6_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// BEWIDTH-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 -// BEWIDTH-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 -// BEWIDTH-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] -// BEWIDTH-NEXT: ret i32 [[ADD4]] -// -// LEWIDTHNUM-LABEL: @st6_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 -// LEWIDTHNUM-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 -// LEWIDTHNUM-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 -// LEWIDTHNUM-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] -// LEWIDTHNUM-NEXT: ret i32 [[ADD5]] -// -// BEWIDTHNUM-LABEL: @st6_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 -// BEWIDTHNUM-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 -// BEWIDTHNUM-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] -// BEWIDTHNUM-NEXT: ret i32 [[ADD4]] -// int st6_check_load(volatile struct st6 *m) { int x = m->a; x += m->b; @@ -1185,7 +374,7 @@ int st6_check_load(volatile struct st6 *m) { // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 // LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LE-NEXT: store i8 2, i8* [[B]], align 2 // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // LE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // LE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 @@ -1201,7 +390,7 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BE-NEXT: store i8 2, i8* [[B]], align 2 // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // BE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // BE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 @@ -1209,102 +398,6 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st6_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// LENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 -// LENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 -// LENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st6_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 -// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 -// BENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 -// BENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st6_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 -// LEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 -// LEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st6_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 -// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 -// BEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 -// BEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st6_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 -// LEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 -// LEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st6_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 -// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 -// BEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 -// BEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void st6_check_store(struct st6 *m) { m->a = 1; m->b = 2; @@ -1325,10 +418,10 @@ struct st7b { // LE-LABEL: @st7_check_load( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 // LE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -1342,10 +435,10 @@ struct st7b { // BE-LABEL: @st7_check_load( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 // BE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -1355,105 +448,6 @@ struct st7b { // BE-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] // BE-NEXT: ret i32 [[ADD3]] // -// LENUMLOADS-LABEL: @st7_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// LENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// LENUMLOADS-NEXT: ret i32 [[ADD3]] -// -// BENUMLOADS-LABEL: @st7_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// BENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// BENUMLOADS-NEXT: ret i32 [[ADD3]] -// -// LEWIDTH-LABEL: @st7_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// LEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// LEWIDTH-NEXT: ret i32 [[ADD3]] -// -// BEWIDTH-LABEL: @st7_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// BEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// BEWIDTH-NEXT: ret i32 [[ADD3]] -// -// LEWIDTHNUM-LABEL: @st7_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// LEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// LEWIDTHNUM-NEXT: ret i32 [[ADD3]] -// -// BEWIDTHNUM-LABEL: @st7_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// BEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// BEWIDTHNUM-NEXT: ret i32 [[ADD3]] -// int st7_check_load(struct st7b *m) { int r = m->x; r += m->y.a; @@ -1464,9 +458,9 @@ int st7_check_load(struct st7b *m) { // LE-LABEL: @st7_check_store( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LE-NEXT: store i8 1, i8* [[X]], align 4 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LE-NEXT: store volatile i8 2, i8* [[A]], align 4 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 @@ -1477,9 +471,9 @@ int st7_check_load(struct st7b *m) { // BE-LABEL: @st7_check_store( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BE-NEXT: store i8 1, i8* [[X]], align 4 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BE-NEXT: store volatile i8 2, i8* [[A]], align 4 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 @@ -1487,84 +481,6 @@ int st7_check_load(struct st7b *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st7_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 -// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st7_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 -// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st7_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 -// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st7_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 -// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st7_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 -// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st7_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 -// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void st7_check_store(struct st7b *m) { m->x = 1; m->y.a = 2; @@ -1588,42 +504,6 @@ struct st8 { // BE-NEXT: store i16 -1, i16* [[TMP0]], align 4 // BE-NEXT: ret i32 65535 // -// LENUMLOADS-LABEL: @st8_check_assignment( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret i32 65535 -// -// BENUMLOADS-LABEL: @st8_check_assignment( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret i32 65535 -// -// LEWIDTH-LABEL: @st8_check_assignment( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret i32 65535 -// -// BEWIDTH-LABEL: @st8_check_assignment( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret i32 65535 -// -// LEWIDTHNUM-LABEL: @st8_check_assignment( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret i32 65535 -// -// BEWIDTHNUM-LABEL: @st8_check_assignment( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret i32 65535 -// int st8_check_assignment(struct st8 *m) { return m->f = 0xffff; } @@ -1646,50 +526,6 @@ struct st9{ // BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // -// LENUMLOADS-LABEL: @read_st9( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// BENUMLOADS-LABEL: @read_st9( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTH-LABEL: @read_st9( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 -// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTH-LABEL: @read_st9( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 -// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTHNUM-LABEL: @read_st9( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 -// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTHNUM-LABEL: @read_st9( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 -// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// int read_st9(volatile struct st9 *m) { return m->f; } @@ -1697,65 +533,17 @@ int read_st9(volatile struct st9 *m) { // LE-LABEL: @store_st9( // LE-NEXT: entry: // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // LE-NEXT: ret void // // BE-LABEL: @store_st9( // BE-NEXT: entry: // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st9( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st9( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st9( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st9( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st9( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st9( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void store_st9(volatile struct st9 *m) { m->f = 1; } @@ -1765,6 +553,7 @@ void store_st9(volatile struct st9 *m) { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // LE-NEXT: ret void // @@ -1773,75 +562,10 @@ void store_st9(volatile struct st9 *m) { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st9( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st9( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st9( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st9( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st9( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st9( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_st9(volatile struct st9 *m) { ++m->f; } @@ -1869,56 +593,6 @@ struct st10{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // -// LENUMLOADS-LABEL: @read_st10( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// BENUMLOADS-LABEL: @read_st10( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTH-LABEL: @read_st10( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTH-LABEL: @read_st10( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTHNUM-LABEL: @read_st10( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTHNUM-LABEL: @read_st10( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// int read_st10(volatile struct st10 *m) { return m->f; } @@ -1941,60 +615,6 @@ int read_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st10( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2 -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st10( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st10( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st10( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st10( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st10( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void store_st10(volatile struct st10 *m) { m->f = 1; } @@ -2023,78 +643,6 @@ void store_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st10( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st10( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st10( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st10( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st10( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st10( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_st10(volatile struct st10 *m) { ++m->f; } @@ -2118,48 +666,6 @@ struct st11{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // -// LENUMLOADS-LABEL: @read_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// BENUMLOADS-LABEL: @read_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTH-LABEL: @read_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: ret i32 [[BF_CAST]] -// -// BEWIDTH-LABEL: @read_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTHNUM-LABEL: @read_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] -// -// BEWIDTHNUM-LABEL: @read_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] -// int read_st11(volatile struct st11 *m) { return m->f; } @@ -2167,55 +673,17 @@ int read_st11(volatile struct st11 *m) { // LE-LABEL: @store_st11( // LE-NEXT: entry: // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 1, i16* [[F]], align 1 // LE-NEXT: ret void // // BE-LABEL: @store_st11( // BE-NEXT: entry: // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 1, i16* [[F]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void store_st11(volatile struct st11 *m) { m->f = 1; } @@ -2225,6 +693,7 @@ void store_st11(volatile struct st11 *m) { // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // LE-NEXT: ret void // @@ -2233,61 +702,10 @@ void store_st11(volatile struct st11 *m) { // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_st11(volatile struct st11 *m) { ++m->f; } @@ -2295,67 +713,19 @@ void increment_st11(volatile struct st11 *m) { // LE-LABEL: @increment_e_st11( // LE-NEXT: entry: // LE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 // LE-NEXT: ret void // // BE-LABEL: @increment_e_st11( // BE-NEXT: entry: // BE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_e_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_e_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_e_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_e_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_e_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_e_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// BEWIDTHNUM-NEXT: ret void -// void increment_e_st11(volatile struct st11 *m) { ++m->e; } @@ -2381,54 +751,6 @@ struct st12{ // BE-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 // BE-NEXT: ret i32 [[BF_ASHR]] // -// LENUMLOADS-LABEL: @read_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// LENUMLOADS-NEXT: ret i32 [[BF_ASHR]] -// -// BENUMLOADS-LABEL: @read_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// BENUMLOADS-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTH-LABEL: @read_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTH-LABEL: @read_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTHNUM-LABEL: @read_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTHNUM-LABEL: @read_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// int read_st12(volatile struct st12 *m) { return m->f; } @@ -2451,60 +773,6 @@ int read_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void store_st12(volatile struct st12 *m) { m->f = 1; } @@ -2533,78 +801,6 @@ void store_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_st12(volatile struct st12 *m) { ++m->f; } @@ -2633,78 +829,6 @@ void increment_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_e_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_e_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_e_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_e_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_e_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_e_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_e_st12(volatile struct st12 *m) { ++m->e; } @@ -2742,90 +866,6 @@ struct st13 { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_b_st13( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_st13( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_st13( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_st13( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_st13( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_st13( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_b_st13(volatile struct st13 *s) { s->b++; } @@ -2839,6 +879,7 @@ struct st14 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -2847,61 +888,10 @@ struct st14 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_a_st14( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_st14( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_st14( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_st14( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_st14( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_st14( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_a_st14(volatile struct st14 *s) { s->a++; } @@ -2915,6 +905,7 @@ struct st15 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -2923,61 +914,10 @@ struct st15 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_a_st15( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_st15( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_st15( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_st15( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_st15( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_st15( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_a_st15(volatile struct st15 *s) { s->a++; } @@ -3015,84 +955,6 @@ struct st16 { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_a_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_a_st16(struct st16 *s) { s->a++; } @@ -3125,90 +987,6 @@ void increment_a_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_b_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_b_st16(struct st16 *s) { s->b++; } @@ -3241,90 +1019,6 @@ void increment_b_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_c_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_c_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_c_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_c_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_c_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_c_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_c_st16(struct st16 *s) { s->c++; } @@ -3359,96 +1053,6 @@ void increment_c_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_d_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_d_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_d_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_d_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_d_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_d_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_d_st16(struct st16 *s) { s->d++; } @@ -3481,68 +1085,6 @@ void increment_d_st16(struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_a_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_a_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_a_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_a_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_a_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_a_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_a_st16(volatile struct st16 *s) { s->a++; } @@ -3577,88 +1119,6 @@ void increment_v_a_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_b_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_b_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_b_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_b_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_b_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_b_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_b_st16(volatile struct st16 *s) { s->b++; } @@ -3693,74 +1153,6 @@ void increment_v_b_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_c_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_c_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_c_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_c_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_c_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_c_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_c_st16(volatile struct st16 *s) { s->c++; } @@ -3797,90 +1189,6 @@ void increment_v_c_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_d_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_d_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_d_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_d_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_d_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_d_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_d_st16(volatile struct st16 *s) { s->d++; } @@ -3919,90 +1227,6 @@ char c : 8; // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_b_st17( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_b_st17( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_b_st17( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_b_st17( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_b_st17( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_b_st17( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_b_st17(volatile struct st17 *s) { s->b++; } @@ -4035,458 +1259,6 @@ void increment_v_b_st17(volatile struct st17 *s) { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_c_st17( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[INC]] to i40 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_c_st17( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i8 [[INC]] to i40 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_c_st17( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_c_st17( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_c_st17( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_c_st17( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_c_st17(volatile struct st17 *s) { s->c++; } - -// A zero bitfield should block, as the C11 specification -// requires a and b to be different memory positions -struct zero_bitfield { - int a : 8; - char : 0; - int b : 8; -}; - -// LE-LABEL: @increment_a_zero_bitfield( -// LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_a_zero_bitfield( -// BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_a_zero_bitfield( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_zero_bitfield( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_zero_bitfield( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_zero_bitfield( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// -void increment_a_zero_bitfield(volatile struct zero_bitfield *s) { - s->a++; -} - -// LE-LABEL: @increment_b_zero_bitfield( -// LE-NEXT: entry: -// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_b_zero_bitfield( -// BE-NEXT: entry: -// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_b_zero_bitfield( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_zero_bitfield( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_zero_bitfield( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_zero_bitfield( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: ret void -// -void increment_b_zero_bitfield(volatile struct zero_bitfield *s) { - s->b++; -} - -// The zero bitfield here does not affect -struct zero_bitfield_ok { - short a : 8; - char a1 : 8; - long : 0; - int b : 24; -}; - -// LE-LABEL: @increment_a_zero_bitfield_ok( -// LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LE-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 -// LE-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 -// LE-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] -// LE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// LE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LE-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 -// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 -// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] -// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_a_zero_bitfield_ok( -// BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BE-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BE-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 -// BE-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] -// BE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// BE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 -// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] -// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 -// LENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] -// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// LENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 -// BENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// BENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_zero_bitfield_ok( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// LEWIDTH-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 -// LEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// LEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_zero_bitfield_ok( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BEWIDTH-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// BEWIDTH-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 -// BEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// BEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 -// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// LEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP2]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// BEWIDTHNUM-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 -// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// BEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP3]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 -// BEWIDTHNUM-NEXT: ret void -// -void increment_a_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { - s->a1 += s->a; -} - -// LE-LABEL: @increment_b_zero_bitfield_ok( -// LE-NEXT: entry: -// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LE-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LE-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_b_zero_bitfield_ok( -// BE-NEXT: entry: -// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BE-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BE-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_zero_bitfield_ok( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_zero_bitfield_ok( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// -void increment_b_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { - s->b++; -} diff --git a/clang/test/CodeGen/bitfield-2.c b/clang/test/CodeGen/bitfield-2.c index 661d42683bc27..9d669575ecd11 100644 --- a/clang/test/CodeGen/bitfield-2.c +++ b/clang/test/CodeGen/bitfield-2.c @@ -14,7 +14,7 @@ // CHECK-RECORD: LLVMType:%struct.s0 = type { [3 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: struct __attribute((packed)) s0 { int f0 : 24; }; @@ -54,8 +54,8 @@ unsigned long long test_0() { // CHECK-RECORD: LLVMType:%struct.s1 = type { [3 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: +// CHECK-RECORD: #pragma pack(push) #pragma pack(1) @@ -102,7 +102,7 @@ unsigned long long test_1() { // CHECK-RECORD: LLVMType:%union.u2 = type { i8 } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: union __attribute__((packed)) u2 { unsigned long long f0 : 3; @@ -274,8 +274,8 @@ _Bool test_6() { // CHECK-RECORD: LLVMType:%struct.s7 = type { i32, i32, i32, i8, i32, [12 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: +// CHECK-RECORD: struct __attribute__((aligned(16))) s7 { int a, b, c; From 1f870bd9284ad55dff96ab6f99afd92fd5f294be Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Wed, 2 Sep 2020 11:50:30 -0400 Subject: [PATCH 0082/1079] Add detailed reference for the SearchableTables backend. --- llvm/docs/TableGen/BackEnds.rst | 381 +++++++++++++++++++++++++++++++- 1 file changed, 377 insertions(+), 4 deletions(-) diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst index 8b31338356689..a93f2ace78808 100644 --- a/llvm/docs/TableGen/BackEnds.rst +++ b/llvm/docs/TableGen/BackEnds.rst @@ -226,16 +226,14 @@ SearchableTables **Purpose**: Generate custom searchable tables. -**Output**: Enums, global tables and lookup helper functions. +**Output**: Enums, global tables, and lookup helper functions. **Usage**: This backend allows generating free-form, target-specific tables from TableGen records. The ARM and AArch64 targets use this backend to generate tables of system registers; the AMDGPU target uses it to generate meta-data about complex image and memory buffer instructions. -More documentation is available in ``include/llvm/TableGen/SearchableTable.td``, -which also contains the definitions of TableGen classes which must be -instantiated in order to define the enums and tables emitted by this backend. +See `SearchableTables Reference`_ for a detailed description. CTags ----- @@ -438,6 +436,381 @@ used for documenting user-facing attributes. General BackEnds ================ +SearchableTables Reference +-------------------------- + +A TableGen include file, ``SearchableTable.td``, provides classes for +generating C++ searchable tables. These tables are described in the +following sections. To generate the C++ code, run ``llvm-tblgen`` with the +``--gen-searchable-tables`` option, which invokes the backend that generates +the tables from the records you provide. + +Each of the data structures generated for searchable tables is guarded by an +``#ifdef``. This allows you to include the generated ``.inc`` file and select only +certain data structures for inclusion. The examples below show the macro +names used in these guards. + +Generic Enumerated Types +~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``GenericEnum`` class makes it easy to define a C++ enumerated type and +the enumerated *elements* of that type. To define the type, define a record +whose parent class is ``GenericEnum`` and whose name is the desired enum +type. This class provides three fields, which you can set in the record +using the ``let`` statement. + +* ``string FilterClass``. The enum type will have one element for each record + that derives from this class. These records are collected to assemble the + complete set of elements. + +* ``string NameField``. The name of a field *in the collected records* that specifies + the name of the element. If a record has no such field, the record's + name will be used. + +* ``string ValueField``. The name of a field *in the collected records* that + specifies the numerical value of the element. If a record has no such + field, it will be assigned an integer value. Values are assigned in + alphabetical order starting with 0. + +Here is an example where the values of the elements are specified +explicitly, as a template argument to the ``BEntry`` class. The resulting +C++ code is shown. + +.. code-block:: text + + def BValues : GenericEnum { + let FilterClass = "BEntry"; + let NameField = "Name"; + let ValueField = "Encoding"; + } + + class BEntry enc> { + string Name = NAME; + bits<16> Encoding = enc; + } + + def BFoo : BEntry<0xac>; + def BBar : BEntry<0x14>; + def BZoo : BEntry<0x80>; + def BSnork : BEntry<0x4c>; + +.. code-block:: text + + #ifdef GET_BValues_DECL + enum BValues { + BBar = 20, + BFoo = 172, + BSnork = 76, + BZoo = 128, + }; + #endif + +In the following example, the values of the elements are assigned +automatically. Note that values are assigned from 0, in alphabetical order +by element name. + +.. code-block:: text + + def CEnum : GenericEnum { + let FilterClass = "CEnum"; + } + + class CEnum; + + def CFoo : CEnum; + def CBar : CEnum; + def CBaz : CEnum; + +.. code-block:: text + + #ifdef GET_CEnum_DECL + enum CEnum { + CBar = 0, + CBaz = 1, + CFoo = 2, + }; + #endif + + +Generic Tables +~~~~~~~~~~~~~~ + +The ``GenericTable`` class is used to define a searchable generic table. +TableGen produces C++ code to define the table entries and also produces +the declaration and definition of a function to search the table based on a +primary key. To define the table, define a record whose parent class is +``GenericTable`` and whose name is the name of the global table of entries. +This class provides six fields. + +* ``string FilterClass``. The table will have one entry for each record + that derives from this class. + +* ``string CppTypeName``. The name of the C++ struct/class type of the + table that holds the entries. If unspecified, the ``FilterClass`` name is + used. + +* ``list Fields``. A list of the names of the fields in the + collected records that contain the data for the table entries. The order of + this list determines the order of the values in the C++ initializers. See + below for information about the types of these fields. + +* ``list PrimaryKey``. The list of fields that make up the + primary key. + +* ``string PrimaryKeyName``. The name of the generated C++ function + that performs a lookup on the primary key. + +* ``bit PrimaryKeyEarlyOut``. See the third example below. + +TableGen attempts to deduce the type of each of the table fields. It can +deduce ``bit``, ``bits``, ``string``, ``Intrinsic``, and ``Instruction``. +These can be used in the primary key. TableGen also deduces ``code``, but it +cannot be used in the primary key. Any other field types must be specified +explicitly; this is done as shown in the second example below. Such fields +cannot be used in the primary key. + +Here is an example where TableGen can deduce the field types. Note that the +table entry records are anonymous; the names of entry records are +irrelevant. + +.. code-block:: text + + def ATable : GenericTable { + let FilterClass = "AEntry"; + let Fields = ["Str", "Val1", "Val2"]; + let PrimaryKey = ["Val1", "Val2"]; + let PrimaryKeyName = "lookupATableByValues"; + } + + class AEntry { + string Str = str; + bits<8> Val1 = val1; + bits<10> Val2 = val2; + } + + def : AEntry<"Bob", 5, 3>; + def : AEntry<"Carol", 2, 6>; + def : AEntry<"Ted", 4, 4>; + def : AEntry<"Alice", 4, 5>; + def : AEntry<"Costa", 2, 1>; + +Here is the generated C++ code. The declaration of ``lookupATableByValues`` +is guarded by ``GET_ATable_DECL``, while the definitions are guarded by +``GET_ATable_IMPL``. + +.. code-block:: text + + #ifdef GET_ATable_DECL + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2); + #endif + + #ifdef GET_ATable_IMPL + constexpr AEntry ATable[] = { + { "Costa", 0x2, 0x1 }, // 0 + { "Carol", 0x2, 0x6 }, // 1 + { "Ted", 0x4, 0x4 }, // 2 + { "Alice", 0x4, 0x5 }, // 3 + { "Bob", 0x5, 0x3 }, // 4 + }; + + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) { + struct KeyType { + uint8_t Val1; + uint16_t Val2; + }; + KeyType Key = { Val1, Val2 }; + auto Table = makeArrayRef(ATable); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const AEntry &LHS, const KeyType &RHS) { + if (LHS.Val1 < RHS.Val1) + return true; + if (LHS.Val1 > RHS.Val1) + return false; + if (LHS.Val2 < RHS.Val2) + return true; + if (LHS.Val2 > RHS.Val2) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Val1 != Idx->Val1 || + Key.Val2 != Idx->Val2) + return nullptr; + return &*Idx; + } + #endif + +The table entries in ``ATable`` are sorted in order by ``Val1``, and within +each of those values, by ``Val2``. This allows a binary search of the table, +which is performed in the lookup function by ``std::lower_bound``. The +lookup function returns a reference to the found table entry, or the null +pointer if no entry is found. + +This example includes a field whose type TableGen cannot deduce. The ``Kind`` +field uses the enumerated type ``CEnum`` defined above. To inform TableGen +of the type, the class derived from ``GenericTable`` must include a field +named ``TypeOf_``\ *field*, where *field* is the name of the field whose type +is required. + +.. code-block:: text + + def CTable : GenericTable { + let FilterClass = "CEntry"; + let Fields = ["Name", "Kind", "Encoding"]; + GenericEnum TypeOf_Kind = CEnum; + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupCEntryByEncoding"; + } + + class CEntry { + string Name = name; + CEnum Kind = kind; + bits<16> Encoding = enc; + } + + def : CEntry<"Apple", CFoo, 10>; + def : CEntry<"Pear", CBaz, 15>; + def : CEntry<"Apple", CBar, 13>; + +Here is the generated C++ code. + +.. code-block:: text + + #ifdef GET_CTable_DECL + const CEntry *lookupCEntryByEncoding(uint16_t Encoding); + #endif + + #ifdef GET_CTable_IMPL + constexpr CEntry CTable[] = { + { "Apple", CFoo, 0xA }, // 0 + { "Apple", CBar, 0xD }, // 1 + { "Pear", CBaz, 0xF }, // 2 + }; + + const CEntry *lookupCEntryByEncoding(uint16_t Encoding) { + struct KeyType { + uint16_t Encoding; + }; + KeyType Key = { Encoding }; + auto Table = makeArrayRef(CTable); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const CEntry &LHS, const KeyType &RHS) { + if (LHS.Encoding < RHS.Encoding) + return true; + if (LHS.Encoding > RHS.Encoding) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Encoding != Idx->Encoding) + return nullptr; + return &*Idx; + } + +The ``PrimaryKeyEarlyOut`` field, when set to 1, modifies the lookup +function so that it tests the first field of the primary key to determine +whether it is within the range of the collected records' primary keys. If +not, the function returns the null pointer without performing the binary +search. This is useful for tables that provide data for only some of the +elements of a larger enum-based space. The first field of the primary key +must be an integral type; it cannot be a string. + +Adding ``let PrimaryKeyEarlyOut = 1`` to the ``ATable`` above: + +.. code-block:: text + + def ATable : GenericTable { + let FilterClass = "AEntry"; + let Fields = ["Str", "Val1", "Val2"]; + let PrimaryKey = ["Val1", "Val2"]; + let PrimaryKeyName = "lookupATableByValues"; + let PrimaryKeyEarlyOut = 1; + } + +causes the lookup function to change as follows: + +.. code-block:: text + + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) { + if ((Val1 < 0x2) || + (Val1 > 0x5)) + return nullptr; + + struct KeyType { + ... + +Search Indexes +~~~~~~~~~~~~~~ + +The ``SearchIndex`` class is used to define additional lookup functions for +generic tables. To define an additional function, define a record whose parent +class is ``SearchIndex`` and whose name is the name of the desired lookup +function. This class provides three fields. + +* ``GenericTable Table``. The name of the table that is to receive another + lookup function. + +* ``list Key``. The list of fields that make up the secondary key. + +* ``bit EarlyOut``. See the third example in `Generic Tables`_. + +Here is an example of a secondary key added to the ``CTable`` above. The +generated function looks up entries based on the ``Name`` and ``Kind`` fields. + +.. code-block:: text + + def lookupCEntry : SearchIndex { + let Table = CTable; + let Key = ["Name", "Kind"]; + } + +This use of ``SearchIndex`` generates the following additional C++ code. + +.. code-block:: text + + const CEntry *lookupCEntry(StringRef Name, unsigned Kind); + + ... + + const CEntry *lookupCEntryByName(StringRef Name, unsigned Kind) { + struct IndexType { + const char * Name; + unsigned Kind; + unsigned _index; + }; + static const struct IndexType Index[] = { + { "APPLE", CBar, 1 }, + { "APPLE", CFoo, 0 }, + { "PEAR", CBaz, 2 }, + }; + + struct KeyType { + std::string Name; + unsigned Kind; + }; + KeyType Key = { Name.upper(), Kind }; + auto Table = makeArrayRef(Index); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const IndexType &LHS, const KeyType &RHS) { + int CmpName = StringRef(LHS.Name).compare(RHS.Name); + if (CmpName < 0) return true; + if (CmpName > 0) return false; + if ((unsigned)LHS.Kind < (unsigned)RHS.Kind) + return true; + if ((unsigned)LHS.Kind > (unsigned)RHS.Kind) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Name != Idx->Name || + Key.Kind != Idx->Kind) + return nullptr; + return &CTable[Idx->_index]; + } + JSON ---- From e6bb4c8e7b3e27f214c9665763a2dd09aa96a5ac Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Sep 2020 10:49:32 -0700 Subject: [PATCH 0083/1079] [X86] SSE4_A should only imply SSE3 not SSSE3 in the frontend. SSE4_1 and SSE4_2 due imply SSSE3. So I guess I got confused when switching the code to being table based in D83273. Fixes PR47464 --- clang/test/Preprocessor/predefined-arch-macros.c | 2 ++ llvm/lib/Support/X86TargetParser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 5326596fee93c..3c369ace32d51 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2525,6 +2525,7 @@ // CHECK_AMDFAM10_M32: #define __SSE4A__ 1 // CHECK_AMDFAM10_M32: #define __SSE_MATH__ 1 // CHECK_AMDFAM10_M32: #define __SSE__ 1 +// CHECK_AMDFAM10_M32-NOT: #define __SSSE3__ 1 // CHECK_AMDFAM10_M32: #define __amdfam10 1 // CHECK_AMDFAM10_M32: #define __amdfam10__ 1 // CHECK_AMDFAM10_M32: #define __i386 1 @@ -2547,6 +2548,7 @@ // CHECK_AMDFAM10_M64: #define __SSE4A__ 1 // CHECK_AMDFAM10_M64: #define __SSE_MATH__ 1 // CHECK_AMDFAM10_M64: #define __SSE__ 1 +// CHECK_AMDFAM10_M64-NOT: #define __SSSE3__ 1 // CHECK_AMDFAM10_M64: #define __amd64 1 // CHECK_AMDFAM10_M64: #define __amd64__ 1 // CHECK_AMDFAM10_M64: #define __amdfam10 1 diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index a5af98582452b..b7d9bd4f865c9 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -529,7 +529,7 @@ static constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {}; static constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {}; // SSE4_A->FMA4->XOP chain. -static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSSE3; +static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3; static constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A; static constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4; From 59a467ee4faeee5b569960e53a76a0311d050d18 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Tue, 8 Sep 2020 10:58:35 -0700 Subject: [PATCH 0084/1079] [Coroutine] Make dealing with alloca spills more robust D66230 attempted to fix a problem where when there are allocas used before CoroBegin. It keeps allocas and their uses stay in put if there are no escapse/changes to the data before CoroBegin. Unfortunately that's incorrect. Consider this code: %var = alloca i32 %1 = getelementptr .. %var; stays put %f = call i8* @llvm.coro.begin store ... %1 After this fix, %1 will now stay put, however if a store happens after coro.begin and hence modifies the content, this change will not be reflected in the coroutine frame (and will eventually be DCEed). To generalize the problem, if any alias ptr is created before coro.begin for an Alloca and that alias ptr is latter written into after coro.begin, it will lead to incorrect behavior. There are also a few other minor issues, such as incorrect dominate condition check in the ptr visitor, unhandled memory intrinsics and etc. Ths patch attempts to fix some of these issue, and make it more robust to deal with aliases. While visiting through the alloca pointer, we also keep track of all aliases created that will be used after CoroBegin. We track the offset of each alias, and then reacreate these aliases after CoroBegin using these offset. It's worth noting that this is not perfect and there will still be cases we cannot handle. I think it's impractical to handle all cases given the current design. This patch makes it more robust and should be a pure win. In the meantime, we need to think about what how to completely elimiante these issues, likely through the route as @rjmccall mentioned in D66230. Differential Revision: https://reviews.llvm.org/D86859 --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 121 ++++++++++++++---- .../Transforms/Coroutines/coro-param-copy.ll | 57 ++++++--- 2 files changed, 136 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index b2677b4572e47..acb14b11aba9e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -625,7 +625,22 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, // We use a pointer use visitor to discover if there are any writes into an // alloca that dominates CoroBegin. If that is the case, insertSpills will copy // the value from the alloca into the coroutine frame spill slot corresponding -// to that alloca. +// to that alloca. We also collect any alias pointing to the alloca created +// before CoroBegin but used after CoroBegin. These alias will be recreated +// after CoroBegin from the frame address so that latter references are +// pointing to the frame instead of the stack. +// Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the +// pointer is potentially written into. +// TODO: If the pointer is really escaped, we are in big trouble because we +// will be escaping a pointer to a stack address that would no longer exist +// soon. However most escape analysis isn't good enough to precisely tell, +// so we are assuming that if a pointer is escaped that it's written into. +// TODO: Another potential issue is if we are creating an alias through +// a function call, e.g: +// %a = AllocaInst ... +// %b = call @computeAddress(... %a) +// If %b is an alias of %a and will be used after CoroBegin, this will be broken +// and there is nothing we can do about it. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; @@ -633,49 +648,83 @@ struct AllocaUseVisitor : PtrUseVisitor { const CoroBeginInst &CB) : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} - // We are only interested in uses that dominate coro.begin. + // We are only interested in uses that's not dominated by coro.begin. void visit(Instruction &I) { - if (DT.dominates(&I, &CoroBegin)) + if (!DT.dominates(&CoroBegin, &I)) Base::visit(I); } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } - void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + // We cannot handle PHI node and SelectInst because they could be selecting + // between two addresses that point to different Allocas. + void visitPHINode(PHINode &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle PHI node of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitSelectInst(SelectInst &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle Select of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitLoadInst(LoadInst &) {} // If the use is an operand, the pointer escaped and anything can write into // that memory. If the use is the pointer, we are definitely writing into the // alloca and therefore we need to copy. - void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } - // Any other instruction that is not filtered out by PtrUseVisitor, will - // result in the copy. - void visitInstruction(Instruction &I) { PI.setAborted(&I); } + // All mem intrinsics modify the data. + void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } + + void visitBitCastInst(BitCastInst &BC) { + Base::visitBitCastInst(BC); + handleAlias(BC); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + Base::visitAddrSpaceCastInst(ASC); + handleAlias(ASC); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The base visitor will adjust Offset accordingly. + Base::visitGetElementPtrInst(GEPI); + handleAlias(GEPI); + } + + const SmallVector, 1> &getAliases() const { + return Aliases; + } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; + // All alias to the original AllocaInst, and are used after CoroBegin. + // Each entry contains the instruction and the offset in the original Alloca. + SmallVector, 1> Aliases{}; + + bool usedAfterCoroBegin(Instruction &I) { + for (auto &U : I.uses()) + if (DT.dominates(&CoroBegin, U)) + return true; + return false; + } + + void handleAlias(Instruction &I) { + if (!usedAfterCoroBegin(I)) + return; + + assert(IsOffsetKnown && "Can only handle alias with known offset created " + "before CoroBegin and used after"); + Aliases.emplace_back(&I, Offset); + } }; } // namespace -static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, - const CoroBeginInst &CB) { - const DataLayout &DL = A.getModule()->getDataLayout(); - AllocaUseVisitor Visitor(DL, DT, CB); - auto PtrI = Visitor.visitPtr(A); - if (PtrI.isEscaped() || PtrI.isAborted()) { - auto *PointerEscapingInstr = PtrI.getEscapingInst() - ? PtrI.getEscapingInst() - : PtrI.getAbortingInst(); - if (PointerEscapingInstr) { - LLVM_DEBUG( - dbgs() << "AllocaInst copy was triggered by instruction: " - << *PointerEscapingInstr << "\n"); - } - return true; - } - return false; -} // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that @@ -955,7 +1004,11 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { for (auto &P : Allocas) { AllocaInst *const A = P.first; - if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); + auto PtrI = Visitor.visitPtr(*A); + assert(!PtrI.isAborted()); + if (PtrI.isEscaped()) { + // isEscaped really means potentially modified before CoroBegin. if (A->isArrayAllocation()) report_fatal_error( "Coroutines cannot handle copying of array allocas yet"); @@ -964,6 +1017,20 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); Builder.CreateStore(Value, G); } + // For each alias to Alloca created before CoroBegin but used after + // CoroBegin, we recreate them after CoroBegin by appplying the offset + // to the pointer in the frame. + for (const auto &Alias : Visitor.getAliases()) { + auto *FramePtr = GetFramePointer(P.second, A); + auto *FramePtrRaw = + Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); + auto *AliasPtr = Builder.CreateGEP( + FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); + auto *AliasPtrTyped = + Builder.CreateBitCast(AliasPtr, Alias.first->getType()); + Alias.first->replaceUsesWithIf( + AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); + } } } return FramePtr; diff --git a/llvm/test/Transforms/Coroutines/coro-param-copy.ll b/llvm/test/Transforms/Coroutines/coro-param-copy.ll index 5967a05226fdb..da08c4f15e156 100644 --- a/llvm/test/Transforms/Coroutines/coro-param-copy.ll +++ b/llvm/test/Transforms/Coroutines/coro-param-copy.ll @@ -5,22 +5,37 @@ define i8* @f() "coroutine.presplit"="1" { entry: + %a.addr = alloca i64 ; read-only before coro.begin + %a = load i64, i64* %a.addr ; cannot modify the value, don't need to copy + %x.addr = alloca i64 - call void @use(i64* %x.addr) ; might write to %x + call void @use(i64* %x.addr) ; uses %x.addr before coro.begin + %y.addr = alloca i64 - %y = load i64, i64* %y.addr ; cannot modify the value, don't need to copy - call void @print(i64 %y) + %y.cast = bitcast i64* %y.addr to i8* ; alias created and used after coro.begin + + %z.addr = alloca i64 + %flag = call i1 @check() + br i1 %flag, label %flag_true, label %flag_merge + +flag_true: + call void @use(i64* %z.addr) ; conditionally used %z.addr + br label %flag_merge +flag_merge: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() - %alloc = call i8* @myAlloc(i64 %y, i32 %size) + %alloc = call i8* @myAlloc(i32 %size) %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @llvm.memset.p0i8.i32(i8* %y.cast, i8 1, i32 4, i1 false) %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %resume i8 1, label %cleanup] resume: + call void @use(i64* %a.addr) call void @use(i64* %x.addr) call void @use(i64* %y.addr) + call void @use(i64* %z.addr) br label %cleanup cleanup: @@ -33,26 +48,36 @@ suspend: } ; See that we added both x and y to the frame. -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i64, i64, i1 } ; See that all of the uses prior to coro-begin stays put. ; CHECK-LABEL: define i8* @f() { ; CHECK-NEXT: entry: +; CHECK-NEXT: %a.addr = alloca i64 ; CHECK-NEXT: %x.addr = alloca i64 ; CHECK-NEXT: call void @use(i64* %x.addr) ; CHECK-NEXT: %y.addr = alloca i64 -; CHECK-NEXT: %y = load i64, i64* %y.addr -; CHECK-NEXT: call void @print(i64 %y) +; CHECK-NEXT: %z.addr = alloca i64 ; See that we only copy the x as y was not modified prior to coro.begin. -; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr -; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 -; CHECK-NEXT: %1 = load i64, i64* %x.addr -; CHECK-NEXT: store i64 %1, i64* %0 -; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK-NEXT: store i1 false, i1* %index.addr1 +; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr +; The next 3 instructions are to copy data in %x.addr from stack to frame. +; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: %1 = load i64, i64* %x.addr, align 4 +; CHECK-NEXT: store i64 %1, i64* %0, align 4 +; The next 2 instructions are to recreate %y.cast in the original IR. +; CHECK-NEXT: %2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK-NEXT: %3 = bitcast i64* %2 to i8* +; The next 3 instructions are to copy data in %z.addr from stack to frame. +; CHECK-NEXT: %4 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK-NEXT: %5 = load i64, i64* %z.addr, align 4 +; CHECK-NEXT: store i64 %5, i64* %4, align 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %3, i8 1, i32 4, i1 false) +; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6 +; CHECK-NEXT: store i1 false, i1* %index.addr1, align 1 ; CHECK-NEXT: ret i8* %hdl + declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) @@ -64,7 +89,9 @@ declare i1 @llvm.coro.alloc(token) declare i8* @llvm.coro.begin(token, i8*) declare i1 @llvm.coro.end(i8*, i1) -declare noalias i8* @myAlloc(i64, i32) -declare void @print(i64) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) + +declare noalias i8* @myAlloc(i32) declare void @use(i64*) declare void @free(i8*) +declare i1 @check() From e97f3b1b4327f9db0ca12cdd7157c304ad206802 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 17:23:48 +0200 Subject: [PATCH 0085/1079] [InstCombine] Fold abs of known negative operand If we know that the abs operand is known negative, we can replace it with a neg. To avoid computing known bits twice, I've removed the fold for the non-negative case from InstSimplify. Both the non-negative and the negative case are handled by InstCombine now, with one known bits call. Differential Revision: https://reviews.llvm.org/D87196 --- llvm/lib/Analysis/InstructionSimplify.cpp | 3 --- .../InstCombine/InstCombineCalls.cpp | 19 +++++++++++++++---- .../Transforms/InstCombine/abs-intrinsic.ll | 7 +++---- .../Transforms/InstSimplify/abs_intrinsic.ll | 17 ++++++++++++----- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 7c13b41bc7e64..e59c0a84044aa 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5274,9 +5274,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // on the outer abs. if (match(Op0, m_Intrinsic(m_Value(), m_Value()))) return Op0; - // If the sign bit is clear already, then abs does not do anything. - if (isKnownNonNegative(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return Op0; break; case Intrinsic::smax: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 40f6e9e147d76..11c2367d1608e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -657,6 +657,19 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { return nullptr; } +static Optional getKnownSign(Value *Op, Instruction *CxtI, + const DataLayout &DL, AssumptionCache *AC, + DominatorTree *DT) { + KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT); + if (Known.isNonNegative()) + return false; + if (Known.isNegative()) + return true; + + return isImpliedByDomCondition( + ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -791,11 +804,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); - if (Optional Imp = isImpliedByDomCondition( - ICmpInst::ICMP_SGE, IIOperand, - Constant::getNullValue(IIOperand->getType()), II, DL)) { + if (Optional Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) { // abs(x) -> x if x >= 0 - if (*Imp) + if (!*Sign) return replaceInstUsesWith(*II, IIOperand); // abs(x) -> -x if x < 0 diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index b00681d44d26c..b5a74f728ac39 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -233,7 +233,7 @@ define i32 @abs_assume_neg(i32 %x) { ; CHECK-LABEL: @abs_assume_neg( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: [[ABS:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: ret i32 [[ABS]] ; %cmp = icmp slt i32 %x, 0 @@ -245,9 +245,8 @@ define i32 @abs_assume_neg(i32 %x) { define i32 @abs_known_neg(i16 %x) { ; CHECK-LABEL: @abs_known_neg( ; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[EXT]], -1 -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[NEG]], i1 false) -; CHECK-NEXT: ret i32 [[ABS]] +; CHECK-NEXT: [[NEG_NEG:%.*]] = add nuw nsw i32 [[EXT]], 1 +; CHECK-NEXT: ret i32 [[NEG_NEG]] ; %ext = zext i16 %x to i32 %neg = sub nsw i32 -1, %ext diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index 70b50da9f0415..4598c5732e121 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -47,11 +47,14 @@ define i32 @test_abs_abs_3(i32 %x) { } ; If the sign bit is known zero, the abs is not needed. +; These cases are only folded by InstCombine, to avoid computing known bits +; twice, for the non-negative and the negative case. define i32 @zext_abs(i31 %x) { ; CHECK-LABEL: @zext_abs( ; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: ret i32 [[ZEXT]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[ZEXT]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] ; %zext = zext i31 %x to i32 %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) @@ -61,7 +64,8 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], -; CHECK-NEXT: ret <3 x i82> [[LSHR]] +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) +; CHECK-NEXT: ret <3 x i82> [[ABS]] ; %lshr = lshr <3 x i82> %x, %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) @@ -71,7 +75,8 @@ define <3 x i82> @lshr_abs(<3 x i82> %x) { define i32 @and_abs(i32 %x) { ; CHECK-LABEL: @and_abs( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 -; CHECK-NEXT: ret i32 [[AND]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] ; %and = and i32 %x, 2147483644 %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) @@ -81,7 +86,8 @@ define i32 @and_abs(i32 %x) { define <3 x i82> @select_abs(<3 x i1> %cond) { ; CHECK-LABEL: @select_abs( ; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> -; CHECK-NEXT: ret <3 x i82> [[SEL]] +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[SEL]], i1 false) +; CHECK-NEXT: ret <3 x i82> [[ABS]] ; %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) @@ -94,7 +100,8 @@ define i32 @assume_abs(i32 %x) { ; CHECK-LABEL: @assume_abs( ; CHECK-NEXT: [[ASSUME:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] ; %assume = icmp sge i32 %x, 0 call void @llvm.assume(i1 %assume) From 6eef387ddd863db1afe044e208bbff4366d5dac2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 20:20:32 +0200 Subject: [PATCH 0086/1079] [InstCombine] Test comparison of abs with int min (NFC) --- .../Transforms/InstCombine/abs-intrinsic.ll | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index b5a74f728ac39..d63b0a21f217f 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +declare i8 @llvm.abs.i8(i8, i1) declare i32 @llvm.abs.i32(i32, i1) declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) @@ -253,3 +254,43 @@ define i32 @abs_known_neg(i16 %x) { %abs = call i32 @llvm.abs.i32(i32 %neg, i1 false) ret i32 %abs } + +define i1 @abs_eq_int_min_poison(i8 %x) { +; CHECK-LABEL: @abs_eq_int_min_poison( +; CHECK-NEXT: ret i1 false +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp eq i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_ne_int_min_poison(i8 %x) { +; CHECK-LABEL: @abs_ne_int_min_poison( +; CHECK-NEXT: ret i1 true +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp ne i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_eq_int_min_nopoison(i8 %x) { +; CHECK-LABEL: @abs_eq_int_min_nopoison( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ABS]], -128 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp eq i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_ne_int_min_nopoison(i8 %x) { +; CHECK-LABEL: @abs_ne_int_min_nopoison( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[ABS]], -128 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp ne i8 %abs, -128 + ret i1 %cmp +} From f6b87da0c73fcf7f8f051151ce62d2e07a466a8e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 20:23:03 +0200 Subject: [PATCH 0087/1079] [InstCombine] Fold comparison of abs with int min If the abs is poisoning, this is already folded to true/false. For non-poisoning abs, we can convert this to a comparison with the operand. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 5 +++-- llvm/test/Transforms/InstCombine/abs-intrinsic.ll | 6 ++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 608017b6dca25..74e9525e8ed46 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3090,9 +3090,10 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( switch (II->getIntrinsicID()) { case Intrinsic::abs: // abs(A) == 0 -> A == 0 - if (C.isNullValue()) + // abs(A) == INT_MIN -> A == INT_MIN + if (C.isNullValue() || C.isMinSignedValue()) return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), - Constant::getNullValue(Ty)); + ConstantInt::get(Ty, C)); break; case Intrinsic::bswap: diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index d63b0a21f217f..30e5a9ddab3c6 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -275,8 +275,7 @@ define i1 @abs_ne_int_min_poison(i8 %x) { define i1 @abs_eq_int_min_nopoison(i8 %x) { ; CHECK-LABEL: @abs_eq_int_min_nopoison( -; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ABS]], -128 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], -128 ; CHECK-NEXT: ret i1 [[CMP]] ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) @@ -286,8 +285,7 @@ define i1 @abs_eq_int_min_nopoison(i8 %x) { define i1 @abs_ne_int_min_nopoison(i8 %x) { ; CHECK-LABEL: @abs_ne_int_min_nopoison( -; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[ABS]], -128 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], -128 ; CHECK-NEXT: ret i1 [[CMP]] ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) From d95ef009bd502a1c2c82952d4fa6fd1db836cef9 Mon Sep 17 00:00:00 2001 From: Azharuddin Mohammed Date: Tue, 8 Sep 2020 10:57:06 -0700 Subject: [PATCH 0088/1079] Update clang/test/Driver/darwin-infer-simulator-sdkroot.c - Fix it to work on Apple Silicon - Add testcases for simulators running on Apple Silicon --- .../Driver/darwin-infer-simulator-sdkroot.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/darwin-infer-simulator-sdkroot.c b/clang/test/Driver/darwin-infer-simulator-sdkroot.c index a084bf6346b62..7d4d4070b81a1 100644 --- a/clang/test/Driver/darwin-infer-simulator-sdkroot.c +++ b/clang/test/Driver/darwin-infer-simulator-sdkroot.c @@ -17,7 +17,7 @@ // // RUN: rm -rf %t/SDKs/iPhoneSimulator8.0.sdk // RUN: mkdir -p %t/SDKs/iPhoneSimulator8.0.sdk -// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SIMULATOR %s // // CHECK-SIMULATOR: clang @@ -27,6 +27,18 @@ // CHECK-SIMULATOR: "-ios_simulator_version_min" "8.0.0" // // +// RUN: rm -rf %t/SDKs/iPhoneSimulator14.0.sdk +// RUN: mkdir -p %t/SDKs/iPhoneSimulator14.0.sdk +// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator14.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-SIMULATOR-ARM64 %s +// +// CHECK-SIMULATOR-ARM64: clang +// CHECK-SIMULATOR-ARM64: "-cc1" +// CHECK-SIMULATOR-ARM64: -apple-ios14.0.0-simulator" +// CHECK-SIMULATOR-ARM64: ld +// CHECK-SIMULATOR-ARM64: "-ios_simulator_version_min" "14.0.0" +// +// // RUN: rm -rf %t/SDKs/WatchOS3.0.sdk // RUN: mkdir -p %t/SDKs/WatchOS3.0.sdk // RUN: env SDKROOT=%t/SDKs/WatchOS3.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ @@ -43,7 +55,7 @@ // // RUN: rm -rf %t/SDKs/WatchSimulator3.0.sdk // RUN: mkdir -p %t/SDKs/WatchSimulator3.0.sdk -// RUN: env SDKROOT=%t/SDKs/WatchSimulator3.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/WatchSimulator3.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-WATCH-SIMULATOR %s // // CHECK-WATCH-SIMULATOR: clang @@ -53,6 +65,18 @@ // CHECK-WATCH-SIMULATOR: "-watchos_simulator_version_min" "3.0.0" // // +// RUN: rm -rf %t/SDKs/WatchSimulator7.0.sdk +// RUN: mkdir -p %t/SDKs/WatchSimulator7.0.sdk +// RUN: env SDKROOT=%t/SDKs/WatchSimulator7.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WATCH-SIMULATOR-ARM64 %s +// +// CHECK-WATCH-SIMULATOR-ARM64: clang +// CHECK-WATCH-SIMULATOR-ARM64: "-cc1" +// CHECK-WATCH-SIMULATOR-ARM64: -apple-watchos7.0.0-simulator" +// CHECK-WATCH-SIMULATOR-ARM64: ld +// CHECK-WATCH-SIMULATOR-ARM64: "-watchos_simulator_version_min" "7.0.0" +// +// // RUN: rm -rf %t/SDKs/AppleTVOS10.0.sdk // RUN: mkdir -p %t/SDKs/AppleTVOS10.0.sdk // RUN: env SDKROOT=%t/SDKs/AppleTVOS10.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ @@ -67,7 +91,7 @@ // // RUN: rm -rf %t/SDKs/AppleTVSimulator10.0.sdk // RUN: mkdir -p %t/SDKs/AppleTVSimulator10.0.sdk -// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator10.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator10.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TV-SIMULATOR %s // // CHECK-TV-SIMULATOR: clang @@ -75,3 +99,16 @@ // CHECK-TV-SIMULATOR: -apple-tvos10.0.0-simulator" // CHECK-TV-SIMULATOR: ld // CHECK-TV-SIMULATOR: "-tvos_simulator_version_min" "10.0.0" +// +// +// RUN: rm -rf %t/SDKs/AppleTVSimulator14.0.sdk +// RUN: mkdir -p %t/SDKs/AppleTVSimulator14.0.sdk +// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator14.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-TV-SIMULATOR-ARM64 %s +// +// CHECK-TV-SIMULATOR-ARM64: clang +// CHECK-TV-SIMULATOR-ARM64: "-cc1" +// CHECK-TV-SIMULATOR-ARM64: -apple-tvos14.0.0-simulator" +// CHECK-TV-SIMULATOR-ARM64: ld +// CHECK-TV-SIMULATOR-ARM64: "-tvos_simulator_version_min" "14.0.0" + From ce49b7d9ca01f4abbba1e5a00339d539b0ea563e Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 8 Sep 2020 10:24:58 -0700 Subject: [PATCH 0089/1079] [llvm-install-name-tool] Add a test with multiple input files This diff adds a test which checks the error-message when multiple input files are passed to llvm-install-name-tool. Test plan: make check-all Differential revision: https://reviews.llvm.org/D87268 --- llvm/test/tools/llvm-objcopy/tool-help-message.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/tools/llvm-objcopy/tool-help-message.test b/llvm/test/tools/llvm-objcopy/tool-help-message.test index 1a0712b7a7ce5..3f99d910ee97e 100644 --- a/llvm/test/tools/llvm-objcopy/tool-help-message.test +++ b/llvm/test/tools/llvm-objcopy/tool-help-message.test @@ -18,6 +18,7 @@ # RUN: not llvm-install-name-tool -abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s # RUN: not llvm-install-name-tool --abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s # RUN: not llvm-install-name-tool -add_rpath @executable 2>&1 | FileCheck %s --check-prefix=NO-INPUT-FILES +# RUN: not llvm-install-name-tool -add_rpath @executable f1 f2 2>&1 | FileCheck %s --check-prefix=MULTIPLE-INPUT-FILES # OBJCOPY-USAGE: USAGE: llvm-objcopy [options] input [output] # OBJCOPY-USAGE: Pass @FILE as argument to read options from FILE. @@ -30,3 +31,4 @@ # UNKNOWN-ARG: unknown argument '{{-+}}abcabc' # NO-INPUT-FILES: no input file specified +# MULTIPLE-INPUT-FILES: expects a single input file From 863aa0a37bd1a57b0720eda6d646f9abd51bf6c2 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Mon, 7 Sep 2020 17:36:14 +0100 Subject: [PATCH 0090/1079] [LLD][ELF] Fix performance of MarkLive::scanEhFrameSection MarkLive::scanEhFrameSection is used to retain personality/LSDA functions when --gc-sections is enabled. Improve its performance by only iterating over the .eh_frame relocations that need to be resolved for an EhSectionPiece. This optimization makes the same assumption as elsewhere in LLD that the .eh_frame relocations are sorted by r_offset. This appears to be a performance regression introduced in commit e6c24299d237 (https://reviews.llvm.org/D59800). This change has been seen to reduce link time by up to ~50%. Differential Revision: https://reviews.llvm.org/D87245 --- lld/ELF/MarkLive.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 28e13e8c1234b..af6c08c215816 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -152,9 +152,9 @@ void MarkLive::scanEhFrameSection(EhInputSection &eh, // a LSDA. We only need to keep the LSDA alive, so ignore anything that // points to executable sections. uint64_t pieceEnd = piece.inputOff + piece.size; - for (size_t j = firstRelI, end2 = rels.size(); j < end2; ++j) - if (rels[j].r_offset < pieceEnd) - resolveReloc(eh, rels[j], true); + for (size_t j = firstRelI, end2 = rels.size(); + j < end2 && rels[j].r_offset < pieceEnd; ++j) + resolveReloc(eh, rels[j], true); } } From 17dce2fe43c9d3335d64936ece576b0e36d8fe31 Mon Sep 17 00:00:00 2001 From: David Stenberg Date: Tue, 8 Sep 2020 18:54:30 +0200 Subject: [PATCH 0091/1079] [UnifyFunctionExitNodes] Remove unused getters, NFC The get{Return,Unwind,Unreachable}Block functions in UnifyFunctionExitNodes have not been used for many years, so just remove them. Reviewed By: bjope Differential Revision: https://reviews.llvm.org/D87078 --- .../Transforms/Utils/UnifyFunctionExitNodes.h | 16 +------------ .../Utils/UnifyFunctionExitNodes.cpp | 24 +++++-------------- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h index ff70446e163d4..ce7cb16b3886d 100644 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // This pass is used to ensure that functions have at most one return and one -// unwind instruction in them. Additionally, it keeps track of which node is -// the new exit node of the CFG. If there are no return or unwind instructions -// in the function, the getReturnBlock/getUnwindBlock methods will return a null -// pointer. +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -24,10 +21,6 @@ namespace llvm { class BasicBlock; struct UnifyFunctionExitNodes : public FunctionPass { - BasicBlock *ReturnBlock = nullptr; - BasicBlock *UnwindBlock = nullptr; - BasicBlock *UnreachableBlock; - public: static char ID; // Pass identification, replacement for typeid UnifyFunctionExitNodes(); @@ -35,13 +28,6 @@ struct UnifyFunctionExitNodes : public FunctionPass { // We can preserve non-critical-edgeness when we unify function exit nodes void getAnalysisUsage(AnalysisUsage &AU) const override; - // getReturn|Unwind|UnreachableBlock - Return the new single (or nonexistent) - // return, unwind, or unreachable basic blocks in the CFG. - // - BasicBlock *getReturnBlock() const { return ReturnBlock; } - BasicBlock *getUnwindBlock() const { return UnwindBlock; } - BasicBlock *getUnreachableBlock() const { return UnreachableBlock; } - bool runOnFunction(Function &F) override; }; diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 9af39d9a0dd1c..b124d0536254b 100644 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -6,10 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This pass is used to ensure that functions have at most one return -// instruction in them. Additionally, it keeps track of which node is the new -// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode -// method will return a null pointer. +// This pass is used to ensure that functions have at most one return and one +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -61,12 +59,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { UnreachableBlocks.push_back(&I); // Then unreachable blocks. - if (UnreachableBlocks.empty()) { - UnreachableBlock = nullptr; - } else if (UnreachableBlocks.size() == 1) { - UnreachableBlock = UnreachableBlocks.front(); - } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), + if (UnreachableBlocks.size() > 1) { + BasicBlock *UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); @@ -76,14 +70,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } } - // Now handle return blocks. - if (ReturningBlocks.empty()) { - ReturnBlock = nullptr; - return false; // No blocks return - } else if (ReturningBlocks.size() == 1) { - ReturnBlock = ReturningBlocks.front(); // Already has a single return block + // There is nothing more to do if we do not have multiple return blocks. + if (ReturningBlocks.size() <= 1) return false; - } // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return @@ -115,6 +104,5 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } - ReturnBlock = NewRetBlock; return true; } From 5b2b4f331d78f326e5e29166bec5ad92c864343d Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 1 Sep 2020 18:52:14 -0700 Subject: [PATCH 0092/1079] Retry of D84974 The test is being disabled on Linux, as lldb-vscode has a bug with --wait-for on LInux. I'm also fixing some compilation warnings. --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +++- .../tools/lldb-vscode/runInTerminal/Makefile | 3 + .../runInTerminal/TestVSCode_runInTerminal.py | 48 +++++ .../tools/lldb-vscode/runInTerminal/main.c | 11 ++ lldb/tools/lldb-vscode/JSONUtils.cpp | 40 +++++ lldb/tools/lldb-vscode/JSONUtils.h | 12 ++ lldb/tools/lldb-vscode/VSCode.cpp | 70 +++++++- lldb/tools/lldb-vscode/VSCode.h | 45 +++++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++++-------- lldb/tools/lldb-vscode/package.json | 5 + 11 files changed, 363 insertions(+), 82 deletions(-) create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index fa5a9c0db1ebd..5710751ec34bf 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True): + sourceMap=None, disconnectAutomatically=True, runInTerminal=False): '''Sending launch request to vscode ''' @@ -316,10 +316,16 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap) + sourceMap=sourceMap, + runInTerminal=runInTerminal) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) + # We need to trigger a request_configurationDone after we've successfully + # attached a runInTerminal process to finish initialization. + if runInTerminal: + self.vscode.request_configurationDone() + def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -327,7 +333,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None): + debuggerRoot=None, runInTerminal=False): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -337,4 +343,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot) + terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 6b1c1c961b545..834e33ef5c3da 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,12 +300,29 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response = self.recv_packet(filter_type='response') - if response is None: + response_or_request = self.recv_packet(filter_type=['response', 'request']) + if response_or_request is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - self.validate_response(command, response) - return response + if response_or_request['type'] == 'response': + self.validate_response(command, response_or_request) + return response_or_request + else: + if response_or_request['command'] == 'runInTerminal': + subprocess.Popen(response_or_request['arguments']['args'], + env=response_or_request['arguments']['env']) + self.send_packet({ + "type": "response", + "seq": -1, + "request_seq": response_or_request['seq'], + "success": True, + "command": "runInTerminal", + "body": {} + }, set_sequence=False) + else: + desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) + raise ValueError(desc) + return None def wait_for_event(self, filter=None, timeout=None): @@ -599,7 +616,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None): + debuggerRoot=None, launchCommands=None, sourceMap=None, + runInTerminal=False): args_dict = { 'program': program } @@ -638,6 +656,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap + if runInTerminal: + args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py new file mode 100644 index 0000000000000..6a463dfacc1f9 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py @@ -0,0 +1,48 @@ +""" +Test lldb-vscode runInTerminal reverse request +""" + + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase +import time +import os + + +class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipUnlessDarwin + @skipIfRemote + def test_runInTerminal(self): + ''' + Tests the "runInTerminal" reverse request. It makes sure that the IDE can + launch the inferior with the correct environment variables and arguments. + ''' + program = self.getBuildArtifact("a.out") + source = 'main.c' + self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) + breakpoint_line = line_number(source, '// breakpoint') + + self.set_source_breakpoints(source, [breakpoint_line]) + self.continue_to_next_stop() + + # We verify we actually stopped inside the loop + counter = int(self.vscode.get_local_variable_value('counter')) + self.assertTrue(counter > 0) + + # We verify we were able to set the launch arguments + argc = int(self.vscode.get_local_variable_value('argc')) + self.assertEqual(argc, 2) + + argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] + self.assertIn('foobar', argv1) + + # We verify we were able to set the environment + env = self.vscode.request_evaluate('foo')['body']['result'] + self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c new file mode 100644 index 0000000000000..676bd830e657b --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + const char *foo = getenv("FOO"); + for (int counter = 1;; counter++) { + sleep(1); // breakpoint + } + return 0; +} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 36156ca2c42f9..044bfd13ec463 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,4 +998,44 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } +/// See +/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { + llvm::json::Object reverse_request; + reverse_request.try_emplace("type", "request"); + reverse_request.try_emplace("command", "runInTerminal"); + + llvm::json::Object run_in_terminal_args; + // This indicates the IDE to open an embedded terminal, instead of opening the + // terminal in a new window. + run_in_terminal_args.try_emplace("kind", "integrated"); + + auto launch_request_arguments = launch_request.getObject("arguments"); + std::vector args = GetStrings(launch_request_arguments, "args"); + // The program path must be the first entry in the "args" field + args.insert(args.begin(), + GetString(launch_request_arguments, "program").str()); + run_in_terminal_args.try_emplace("args", args); + + const auto cwd = GetString(launch_request_arguments, "cwd"); + if (!cwd.empty()) + run_in_terminal_args.try_emplace("cwd", cwd); + + // We need to convert the input list of environments variables into a + // dictionary + std::vector envs = GetStrings(launch_request_arguments, "env"); + llvm::json::Object environment; + for (const std::string &env : envs) { + size_t index = env.find("="); + environment.try_emplace(env.substr(0, index), env.substr(index + 1)); + } + run_in_terminal_args.try_emplace("env", + llvm::json::Value(std::move(environment))); + + reverse_request.try_emplace( + "arguments", llvm::json::Value(std::move(run_in_terminal_args))); + return reverse_request; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index df4428f390ba2..88cbef9e5fdd4 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,6 +443,18 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); +/// Create a runInTerminal reverse request object +/// +/// \param[in] launch_request +/// The original launch_request object whose fields are used to construct +/// the reverse request object. +/// +/// \return +/// A "runInTerminal" JSON object that follows the specification outlined by +/// Microsoft. +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); + } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 537cae7868631..d57330ce6ff1a 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,7 +38,8 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false) { + stop_at_entry(false), is_attach(false), + reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -362,4 +363,71 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } +PacketStatus VSCode::GetObject(llvm::json::Object &object) { + std::string json = ReadJSON(); + if (json.empty()) + return PacketStatus::EndOfFile; + + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + *log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return PacketStatus::JSONMalformed; + } + object = *json_value->getAsObject(); + if (!json_value->getAsObject()) { + if (log) + *log << "error: json packet isn't a object" << std::endl; + return PacketStatus::JSONNotObject; + } + return PacketStatus::Success; +} + +bool VSCode::HandleObject(const llvm::json::Object &object) { + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(object); + return true; // Success + } else { + if (log) + *log << "error: unhandled command \"" << command.data() << std::endl; + return false; // Fail + } + } + return false; +} + +PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response) { + request.try_emplace("seq", ++reverse_request_seq); + SendJSON(llvm::json::Value(std::move(request))); + while (true) { + PacketStatus status = GetObject(response); + const auto packet_type = GetString(response, "type"); + if (packet_type == "response") + return status; + else { + // Not our response, we got another packet + HandleObject(response); + } + } + return PacketStatus::EndOfFile; +} + +void VSCode::RegisterRequestCallback(std::string request, + RequestCallback callback) { + request_handlers[request] = callback; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 88a0c08de2454..4a20c56c53eb0 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H +#include #include #include #include @@ -19,6 +20,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -65,6 +67,15 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; +typedef void (*RequestCallback)(const llvm::json::Object &command); + +enum class PacketStatus { + Success = 0, + EndOfFile, + JSONMalformed, + JSONNotObject +}; + struct VSCode { InputStream input; OutputStream output; @@ -91,6 +102,10 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; + uint32_t reverse_request_seq; + std::map request_handlers; + std::condition_variable request_in_terminal_cv; + bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -152,6 +167,36 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); + + const std::map &GetRequestHandlers(); + + PacketStatus GetObject(llvm::json::Object &object); + bool HandleObject(const llvm::json::Object &object); + + /// Send a Debug Adapter Protocol reverse request to the IDE + /// + /// \param[in] request + /// The payload of the request to send. + /// + /// \param[out] response + /// The response of the IDE. It might be undefined if there was an error. + /// + /// \return + /// A \a PacketStatus object indicating the sucess or failure of the + /// request. + PacketStatus SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response); + + /// Registers a callback handler for a Debug Adapter Protocol request + /// + /// \param[in] request + /// The name of the request following the Debug Adapter Protocol + /// specification. + /// + /// \param[in] callback + /// The callback to execute when the given request is triggered by the + /// IDE. + void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 54f2e653d0697..ee01822ba6217 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,7 +384,12 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: + case lldb::eStateStopped: { + if (g_vsc.waiting_for_run_in_terminal) { + g_vsc.waiting_for_run_in_terminal = false; + g_vsc.request_in_terminal_cv.notify_one(); + } + } // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1374,6 +1379,9 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); + // The debug adapter supports launching a debugee in intergrated VSCode + // terminal. + body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1433,6 +1441,49 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } +void request_runInTerminal(const llvm::json::Object &launch_request, + llvm::json::Object &launch_response) { + // We have already created a target that has a valid "program" path to the + // executable. We will attach to the next process whose name matches that + // of the target's. + g_vsc.is_attach = true; + lldb::SBAttachInfo attach_info; + lldb::SBError error; + attach_info.SetWaitForLaunch(true, /*async*/ true); + g_vsc.target.Attach(attach_info, error); + + llvm::json::Object reverse_request = + CreateRunInTerminalReverseRequest(launch_request); + llvm::json::Object reverse_response; + lldb_vscode::PacketStatus status = + g_vsc.SendReverseRequest(reverse_request, reverse_response); + if (status != lldb_vscode::PacketStatus::Success) + error.SetErrorString("Process cannot be launched by IDE."); + + if (error.Success()) { + // Wait for the attach stop event to happen or for a timeout. + g_vsc.waiting_for_run_in_terminal = true; + static std::mutex mutex; + std::unique_lock locker(mutex); + g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); + + auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); + if (attached_pid == LLDB_INVALID_PROCESS_ID) + error.SetErrorString("Failed to attach to a process"); + else + SendProcessEvent(Attach); + } + + if (error.Fail()) { + launch_response["success"] = llvm::json::Value(false); + EmplaceSafeString(launch_response, "message", + std::string(error.GetCString())); + } else { + launch_response["success"] = llvm::json::Value(true); + g_vsc.SendJSON(CreateEventObject("initialized")); + } +} + // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1505,6 +1556,12 @@ void request_launch(const llvm::json::Object &request) { return; } + if (GetBoolean(arguments, "runInTerminal", false)) { + request_runInTerminal(request, response); + g_vsc.SendJSON(llvm::json::Value(std::move(response))); + return; + } + // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2831,39 +2888,35 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -const std::map &GetRequestHandlers() { -#define REQUEST_CALLBACK(name) \ - { #name, request_##name } - static std::map g_request_handlers = { - // VSCode Debug Adaptor requests - REQUEST_CALLBACK(attach), - REQUEST_CALLBACK(completions), - REQUEST_CALLBACK(continue), - REQUEST_CALLBACK(configurationDone), - REQUEST_CALLBACK(disconnect), - REQUEST_CALLBACK(evaluate), - REQUEST_CALLBACK(exceptionInfo), - REQUEST_CALLBACK(getCompileUnits), - REQUEST_CALLBACK(initialize), - REQUEST_CALLBACK(launch), - REQUEST_CALLBACK(next), - REQUEST_CALLBACK(pause), - REQUEST_CALLBACK(scopes), - REQUEST_CALLBACK(setBreakpoints), - REQUEST_CALLBACK(setExceptionBreakpoints), - REQUEST_CALLBACK(setFunctionBreakpoints), - REQUEST_CALLBACK(setVariable), - REQUEST_CALLBACK(source), - REQUEST_CALLBACK(stackTrace), - REQUEST_CALLBACK(stepIn), - REQUEST_CALLBACK(stepOut), - REQUEST_CALLBACK(threads), - REQUEST_CALLBACK(variables), - // Testing requests - REQUEST_CALLBACK(_testGetTargetBreakpoints), - }; -#undef REQUEST_CALLBACK - return g_request_handlers; +void RegisterRequestCallbacks() { + g_vsc.RegisterRequestCallback("attach", request_attach); + g_vsc.RegisterRequestCallback("completions", request_completions); + g_vsc.RegisterRequestCallback("continue", request_continue); + g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); + g_vsc.RegisterRequestCallback("disconnect", request_disconnect); + g_vsc.RegisterRequestCallback("evaluate", request_evaluate); + g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); + g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); + g_vsc.RegisterRequestCallback("initialize", request_initialize); + g_vsc.RegisterRequestCallback("launch", request_launch); + g_vsc.RegisterRequestCallback("next", request_next); + g_vsc.RegisterRequestCallback("pause", request_pause); + g_vsc.RegisterRequestCallback("scopes", request_scopes); + g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); + g_vsc.RegisterRequestCallback("setExceptionBreakpoints", + request_setExceptionBreakpoints); + g_vsc.RegisterRequestCallback("setFunctionBreakpoints", + request_setFunctionBreakpoints); + g_vsc.RegisterRequestCallback("setVariable", request_setVariable); + g_vsc.RegisterRequestCallback("source", request_source); + g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); + g_vsc.RegisterRequestCallback("stepIn", request_stepIn); + g_vsc.RegisterRequestCallback("stepOut", request_stepOut); + g_vsc.RegisterRequestCallback("threads", request_threads); + g_vsc.RegisterRequestCallback("variables", request_variables); + // Testing requests + g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", + request__testGetTargetBreakpoints); } } // anonymous namespace @@ -2895,6 +2948,8 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); + RegisterRequestCallbacks(); + int portno = -1; LLDBVSCodeOptTable T; @@ -2937,49 +2992,17 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } - auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - std::string json = g_vsc.ReadJSON(); - if (json.empty()) + llvm::json::Object object; + lldb_vscode::PacketStatus status = g_vsc.GetObject(object); + if (status == lldb_vscode::PacketStatus::EndOfFile) break; + if (status != lldb_vscode::PacketStatus::Success) + return 1; // Fatal error - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (g_vsc.log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - - *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return 1; - } - - auto object = json_value->getAsObject(); - if (!object) { - if (g_vsc.log) - *g_vsc.log << "error: json packet isn't a object" << std::endl; + if (!g_vsc.HandleObject(object)) return 1; - } - - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(*object); - } else { - if (g_vsc.log) - *g_vsc.log << "error: unhandled command \"" << command.data() - << std::endl; - return 1; - } - } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 29ca06dd17d63..9077ab51dd7fa 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,6 +175,11 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] + }, + "runInTerminal": { + "type": "boolean", + "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", + "default": false } } }, From 8927c900697adf313fb5f11a09a03f1451439403 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 20:57:40 +0200 Subject: [PATCH 0093/1079] [InstCombine] Add tests for known bits for min/max intrinsics (NFC) We already have test coverage for the underlying calculation, this just checked that the folding is wired up... --- .../InstCombine/minmax-intrinsics.ll | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/minmax-intrinsics.ll diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll new file mode 100644 index 0000000000000..d808d5fc42445 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.umax.i8(i8, i8) +declare i8 @llvm.smin.i8(i8, i8) +declare i8 @llvm.smax.i8(i8, i8) + +define i8 @umin_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_known_bits( +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = and i8 %x, 127 + %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @umax_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_known_bits( +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = or i8 %x, -128 + %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @smin_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_known_bits( +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = or i8 %x, -128 + %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @smax_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_known_bits( +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = and i8 %x, 127 + %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} From 8453fbf0889e22cf9bbb74c65e36cf8abbcec7b4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 21:06:59 +0200 Subject: [PATCH 0094/1079] [ValueTracking] Compute known bits of min/max intrinsics Implement known bits for the min/max intrinsics based on the recently added KnownBits primitives. --- llvm/lib/Analysis/ValueTracking.cpp | 20 +++++++++++++++++++ .../InstCombine/minmax-intrinsics.ll | 20 ++++--------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 6e5a7195bb194..5eb66e96e1d85 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1739,6 +1739,26 @@ static void computeKnownBitsFromOperator(const Operator *I, } break; } + case Intrinsic::umin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umin(Known, Known2); + break; + case Intrinsic::umax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umax(Known, Known2); + break; + case Intrinsic::smin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smin(Known, Known2); + break; + case Intrinsic::smax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smax(Known, Known2); + break; case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); break; diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index d808d5fc42445..797f85d944474 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -8,10 +8,7 @@ declare i8 @llvm.smax.i8(i8, i8) define i8 @umin_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_known_bits( -; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %x2 = and i8 %x, 127 %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y) @@ -21,10 +18,7 @@ define i8 @umin_known_bits(i8 %x, i8 %y) { define i8 @umax_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -128 ; %x2 = or i8 %x, -128 %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y) @@ -34,10 +28,7 @@ define i8 @umax_known_bits(i8 %x, i8 %y) { define i8 @smin_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -128 ; %x2 = or i8 %x, -128 %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y) @@ -47,10 +38,7 @@ define i8 @smin_known_bits(i8 %x, i8 %y) { define i8 @smax_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_known_bits( -; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %x2 = and i8 %x, 127 %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y) From 66310aafa0da47dd4664a1200afc7e22cab15b65 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Sun, 30 Aug 2020 14:00:25 -0400 Subject: [PATCH 0095/1079] fix typos; improve a couple of descriptions; add release note --- llvm/docs/ReleaseNotes.rst | 7 +++++-- llvm/docs/TableGen/ProgRef.rst | 35 ++++++++++++++++++---------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 59897806c37a5..47ce9fa10d908 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -69,10 +69,13 @@ Changes to building LLVM Changes to TableGen ------------------- +* The new "TableGen Programmer's Reference" replaces the "TableGen Language + Introduction" and "TableGen Language Reference" documents. + * The syntax for specifying an integer range in a range list has changed. The old syntax used a hyphen in the range (e.g., ``{0-9}``). The new syntax - uses the "`...`" range punctuator (e.g., ``{0...9}``). The hyphen syntax - is deprecated. The "TableGen Language Reference" document has been updated. + uses the "`...`" range punctuation (e.g., ``{0...9}``). The hyphen syntax + is deprecated. Changes to the ARM Backend -------------------------- diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 83684ab41c280..07f0ba8a54dd0 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -140,7 +140,7 @@ the file is printed for review. The following are the basic punctuation tokens:: - - + [ ] { } ( ) < > : ; . = ? # + - + [ ] { } ( ) < > : ; . ... = ? # Literals -------- @@ -328,8 +328,8 @@ to an entity of type ``bits<4>``. .. warning:: The peculiar last form of :token:`RangePiece` is due to the fact that the "``-``" is included in the :token:`TokInteger`, hence ``1-5`` gets lexed as - two consecutive tokens, with values ``1`` and ``-5``, - instead of "1", "-", and "5". + two consecutive tokens, with values ``1`` and ``-5``, instead of "1", "-", + and "5". The use of hyphen as the range punctuation is deprecated. Simple values ------------- @@ -431,7 +431,7 @@ sense after reading the remainder of this guide. * The iteration variable of a ``foreach``, such as the use of ``i`` in:: - foreach i = 0..5 in + foreach i = 0...5 in def Foo#i; .. productionlist:: @@ -466,11 +466,11 @@ primary value. Here are the possible suffixes for some primary *value*. *value*\ ``{17}`` The final value is bit 17 of the integer *value* (note the braces). -*value*\ ``{8..15}`` +*value*\ ``{8...15}`` The final value is bits 8--15 of the integer *value*. The order of the - bits can be reversed by specifying ``{15..8}``. + bits can be reversed by specifying ``{15...8}``. -*value*\ ``[4..7,17,2..3,4]`` +*value*\ ``[4...7,17,2...3,4]`` The final value is a new list that is a slice of the list *value* (note the brackets). The new list contains elements 4, 5, 6, 7, 17, 2, 3, and 4. Elements may be @@ -827,10 +827,13 @@ template that expands into multiple records. MultiClassID: `TokIdentifier` As with regular classes, the multiclass has a name and can accept template -arguments. The body of the multiclass contains a series of statements that -define records, using :token:`Def` and :token:`Defm`. In addition, -:token:`Defvar`, :token:`Foreach`, and :token:`Let` -statements can be used to factor out even more common elements. +arguments. A multiclass can inherit from other multiclasses, which causes +the other multiclasses to be expanded and contribute to the record +definitions in the inheriting multiclass. The body of the multiclass +contains a series of statements that define records, using :token:`Def` and +:token:`Defm`. In addition, :token:`Defvar`, :token:`Foreach`, and +:token:`Let` statements can be used to factor out even more common elements. +The :token:`If` statement can also be used. Also as with regular classes, the multiclass has the implicit template argument ``NAME`` (see NAME_). When a named (non-anonymous) record is @@ -1128,8 +1131,8 @@ the next iteration. The following ``defvar`` will not work:: Variables can also be defined with ``defvar`` in a record body. See `Defvar in Record Body`_ for more details. -``foreach`` --- iterate over a sequence ---------------------------------------- +``foreach`` --- iterate over a sequence of statements +----------------------------------------------------- The ``foreach`` statement iterates over a series of statements, varying a variable over a sequence of values. @@ -1529,7 +1532,7 @@ and non-0 as true. ``!shl(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* left logically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!size(``\ *a*\ ``)`` This operator produces the number of elements in the list *a*. @@ -1537,12 +1540,12 @@ and non-0 as true. ``!sra(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* right arithmetically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!srl(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* right logically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!strconcat(``\ *str1*\ ``,`` *str2*\ ``, ...)`` This operator concatenates the string arguments *str1*, *str2*, etc., and From f4ac79a364f2de7270a3238b176e17b40b036305 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Tue, 8 Sep 2020 20:06:07 +0000 Subject: [PATCH 0096/1079] Sema: extract a check for `isCFError` (NFC) Extract a simple check to check if a `RecordDecl` is a `CFError` Decl. This is a simple refactoring to prepare for an upcoming change. NFC. Patch is extracted from https://github.com/llvm/llvm-project-staging/commit/8afaf3aad2af43cfedca7a24cd817848c4e95c0c. --- clang/include/clang/Sema/Sema.h | 1 + clang/lib/Sema/SemaType.cpp | 52 +++++++++++++++++---------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 53d0285d37027..129ac0355c87f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12415,6 +12415,7 @@ class Sema final { /// The struct behind the CFErrorRef pointer. RecordDecl *CFError = nullptr; + bool isCFError(RecordDecl *D); /// Retrieve the identifier "NSError". IdentifierInfo *getNSErrorIdent(); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 03442fb03b3aa..d8ea9c0372592 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -4043,32 +4043,9 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, if (auto recordType = type->getAs()) { RecordDecl *recordDecl = recordType->getDecl(); - bool isCFError = false; - if (S.CFError) { - // If we already know about CFError, test it directly. - isCFError = (S.CFError == recordDecl); - } else { - // Check whether this is CFError, which we identify based on its bridge - // to NSError. CFErrorRef used to be declared with "objc_bridge" but is - // now declared with "objc_bridge_mutable", so look for either one of - // the two attributes. - if (recordDecl->getTagKind() == TTK_Struct && numNormalPointers > 0) { - IdentifierInfo *bridgedType = nullptr; - if (auto bridgeAttr = recordDecl->getAttr()) - bridgedType = bridgeAttr->getBridgedType(); - else if (auto bridgeAttr = - recordDecl->getAttr()) - bridgedType = bridgeAttr->getBridgedType(); - - if (bridgedType == S.getNSErrorIdent()) { - S.CFError = recordDecl; - isCFError = true; - } - } - } - // If this is CFErrorRef*, report it as such. - if (isCFError && numNormalPointers == 2 && numTypeSpecifierPointers < 2) { + if (numNormalPointers == 2 && numTypeSpecifierPointers < 2 && + S.isCFError(recordDecl)) { return PointerDeclaratorKind::CFErrorRefPointer; } break; @@ -4092,6 +4069,31 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, } } +bool Sema::isCFError(RecordDecl *RD) { + // If we already know about CFError, test it directly. + if (CFError) + return CFError == RD; + + // Check whether this is CFError, which we identify based on its bridge to + // NSError. CFErrorRef used to be declared with "objc_bridge" but is now + // declared with "objc_bridge_mutable", so look for either one of the two + // attributes. + if (RD->getTagKind() == TTK_Struct) { + IdentifierInfo *bridgedType = nullptr; + if (auto bridgeAttr = RD->getAttr()) + bridgedType = bridgeAttr->getBridgedType(); + else if (auto bridgeAttr = RD->getAttr()) + bridgedType = bridgeAttr->getBridgedType(); + + if (bridgedType == getNSErrorIdent()) { + CFError = RD; + return true; + } + } + + return false; +} + static FileID getNullabilityCompletenessCheckFileID(Sema &S, SourceLocation loc) { // If we're anywhere in a function, method, or closure context, don't perform From 041da0d828e39d849c99adf1391aaa9291f4310f Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 8 Sep 2020 16:01:30 -0400 Subject: [PATCH 0097/1079] [HIP] Add gfx1031 and gfx1030 Differential Revision: https://reviews.llvm.org/D87324 --- clang/lib/Basic/Cuda.cpp | 2 +- clang/test/Driver/hip-offload-arch.hip | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/hip-offload-arch.hip diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 709185707bd9c..2abbe3e81e0a2 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -84,7 +84,7 @@ CudaArchToStringMap arch_names[] = { GFX(810), // stoney GFX(900), // vega, instinct GFX(902), GFX(904), GFX(906), GFX(908), GFX(909), - GFX(1010), GFX(1011), GFX(1012), + GFX(1010), GFX(1011), GFX(1012), GFX(1030), GFX(1031) // clang-format on }; #undef SM diff --git a/clang/test/Driver/hip-offload-arch.hip b/clang/test/Driver/hip-offload-arch.hip new file mode 100644 index 0000000000000..4cd37b5815f73 --- /dev/null +++ b/clang/test/Driver/hip-offload-arch.hip @@ -0,0 +1,10 @@ +// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --offload-arch=gfx1030 \ +// RUN: --offload-arch=gfx1031 \ +// RUN: -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}} +// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1031"}} From 5c463d107d3c26fc5573f31b838a8a3a1e4b5065 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 8 Sep 2020 13:40:42 -0700 Subject: [PATCH 0098/1079] Revert "Retry of D84974" This reverts commit 5b2b4f331d78f326e5e29166bec5ad92c864343d. This caused a link error in http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/18794/steps/build/logs/stdio --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +--- .../tools/lldb-vscode/runInTerminal/Makefile | 3 - .../runInTerminal/TestVSCode_runInTerminal.py | 48 ----- .../tools/lldb-vscode/runInTerminal/main.c | 11 -- lldb/tools/lldb-vscode/JSONUtils.cpp | 40 ----- lldb/tools/lldb-vscode/JSONUtils.h | 12 -- lldb/tools/lldb-vscode/VSCode.cpp | 70 +------- lldb/tools/lldb-vscode/VSCode.h | 45 ----- lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++---------- lldb/tools/lldb-vscode/package.json | 5 - 11 files changed, 82 insertions(+), 363 deletions(-) delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index 5710751ec34bf..fa5a9c0db1ebd 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True, runInTerminal=False): + sourceMap=None, disconnectAutomatically=True): '''Sending launch request to vscode ''' @@ -316,16 +316,10 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap, - runInTerminal=runInTerminal) + sourceMap=sourceMap) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) - # We need to trigger a request_configurationDone after we've successfully - # attached a runInTerminal process to finish initialization. - if runInTerminal: - self.vscode.request_configurationDone() - def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -333,7 +327,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None, runInTerminal=False): + debuggerRoot=None): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -343,4 +337,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) + terminateCommands, sourcePath, debuggerRoot) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 834e33ef5c3da..6b1c1c961b545 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,29 +300,12 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response_or_request = self.recv_packet(filter_type=['response', 'request']) - if response_or_request is None: + response = self.recv_packet(filter_type='response') + if response is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - if response_or_request['type'] == 'response': - self.validate_response(command, response_or_request) - return response_or_request - else: - if response_or_request['command'] == 'runInTerminal': - subprocess.Popen(response_or_request['arguments']['args'], - env=response_or_request['arguments']['env']) - self.send_packet({ - "type": "response", - "seq": -1, - "request_seq": response_or_request['seq'], - "success": True, - "command": "runInTerminal", - "body": {} - }, set_sequence=False) - else: - desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) - raise ValueError(desc) - + self.validate_response(command, response) + return response return None def wait_for_event(self, filter=None, timeout=None): @@ -616,8 +599,7 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None, - runInTerminal=False): + debuggerRoot=None, launchCommands=None, sourceMap=None): args_dict = { 'program': program } @@ -656,8 +638,6 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap - if runInTerminal: - args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile deleted file mode 100644 index 10495940055b6..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := main.c - -include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py deleted file mode 100644 index 6a463dfacc1f9..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Test lldb-vscode runInTerminal reverse request -""" - - -import unittest2 -import vscode -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil -import lldbvscode_testcase -import time -import os - - -class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): - - mydir = TestBase.compute_mydir(__file__) - - @skipUnlessDarwin - @skipIfRemote - def test_runInTerminal(self): - ''' - Tests the "runInTerminal" reverse request. It makes sure that the IDE can - launch the inferior with the correct environment variables and arguments. - ''' - program = self.getBuildArtifact("a.out") - source = 'main.c' - self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) - breakpoint_line = line_number(source, '// breakpoint') - - self.set_source_breakpoints(source, [breakpoint_line]) - self.continue_to_next_stop() - - # We verify we actually stopped inside the loop - counter = int(self.vscode.get_local_variable_value('counter')) - self.assertTrue(counter > 0) - - # We verify we were able to set the launch arguments - argc = int(self.vscode.get_local_variable_value('argc')) - self.assertEqual(argc, 2) - - argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] - self.assertIn('foobar', argv1) - - # We verify we were able to set the environment - env = self.vscode.request_evaluate('foo')['body']['result'] - self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c deleted file mode 100644 index 676bd830e657b..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include -#include - -int main(int argc, char *argv[]) { - const char *foo = getenv("FOO"); - for (int counter = 1;; counter++) { - sleep(1); // breakpoint - } - return 0; -} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 044bfd13ec463..36156ca2c42f9 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,44 +998,4 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } -/// See -/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal -llvm::json::Object -CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { - llvm::json::Object reverse_request; - reverse_request.try_emplace("type", "request"); - reverse_request.try_emplace("command", "runInTerminal"); - - llvm::json::Object run_in_terminal_args; - // This indicates the IDE to open an embedded terminal, instead of opening the - // terminal in a new window. - run_in_terminal_args.try_emplace("kind", "integrated"); - - auto launch_request_arguments = launch_request.getObject("arguments"); - std::vector args = GetStrings(launch_request_arguments, "args"); - // The program path must be the first entry in the "args" field - args.insert(args.begin(), - GetString(launch_request_arguments, "program").str()); - run_in_terminal_args.try_emplace("args", args); - - const auto cwd = GetString(launch_request_arguments, "cwd"); - if (!cwd.empty()) - run_in_terminal_args.try_emplace("cwd", cwd); - - // We need to convert the input list of environments variables into a - // dictionary - std::vector envs = GetStrings(launch_request_arguments, "env"); - llvm::json::Object environment; - for (const std::string &env : envs) { - size_t index = env.find("="); - environment.try_emplace(env.substr(0, index), env.substr(index + 1)); - } - run_in_terminal_args.try_emplace("env", - llvm::json::Value(std::move(environment))); - - reverse_request.try_emplace( - "arguments", llvm::json::Value(std::move(run_in_terminal_args))); - return reverse_request; -} - } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index 88cbef9e5fdd4..df4428f390ba2 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,18 +443,6 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); -/// Create a runInTerminal reverse request object -/// -/// \param[in] launch_request -/// The original launch_request object whose fields are used to construct -/// the reverse request object. -/// -/// \return -/// A "runInTerminal" JSON object that follows the specification outlined by -/// Microsoft. -llvm::json::Object -CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); - } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index d57330ce6ff1a..537cae7868631 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,8 +38,7 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false), - reverse_request_seq(0), waiting_for_run_in_terminal(false) { + stop_at_entry(false), is_attach(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -363,71 +362,4 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } -PacketStatus VSCode::GetObject(llvm::json::Object &object) { - std::string json = ReadJSON(); - if (json.empty()) - return PacketStatus::EndOfFile; - - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - *log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return PacketStatus::JSONMalformed; - } - object = *json_value->getAsObject(); - if (!json_value->getAsObject()) { - if (log) - *log << "error: json packet isn't a object" << std::endl; - return PacketStatus::JSONNotObject; - } - return PacketStatus::Success; -} - -bool VSCode::HandleObject(const llvm::json::Object &object) { - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(object); - return true; // Success - } else { - if (log) - *log << "error: unhandled command \"" << command.data() << std::endl; - return false; // Fail - } - } - return false; -} - -PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, - llvm::json::Object &response) { - request.try_emplace("seq", ++reverse_request_seq); - SendJSON(llvm::json::Value(std::move(request))); - while (true) { - PacketStatus status = GetObject(response); - const auto packet_type = GetString(response, "type"); - if (packet_type == "response") - return status; - else { - // Not our response, we got another packet - HandleObject(response); - } - } - return PacketStatus::EndOfFile; -} - -void VSCode::RegisterRequestCallback(std::string request, - RequestCallback callback) { - request_handlers[request] = callback; -} - } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 4a20c56c53eb0..88a0c08de2454 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,7 +9,6 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H -#include #include #include #include @@ -20,7 +19,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -67,15 +65,6 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; -typedef void (*RequestCallback)(const llvm::json::Object &command); - -enum class PacketStatus { - Success = 0, - EndOfFile, - JSONMalformed, - JSONNotObject -}; - struct VSCode { InputStream input; OutputStream output; @@ -102,10 +91,6 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; - uint32_t reverse_request_seq; - std::map request_handlers; - std::condition_variable request_in_terminal_cv; - bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -167,36 +152,6 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); - - const std::map &GetRequestHandlers(); - - PacketStatus GetObject(llvm::json::Object &object); - bool HandleObject(const llvm::json::Object &object); - - /// Send a Debug Adapter Protocol reverse request to the IDE - /// - /// \param[in] request - /// The payload of the request to send. - /// - /// \param[out] response - /// The response of the IDE. It might be undefined if there was an error. - /// - /// \return - /// A \a PacketStatus object indicating the sucess or failure of the - /// request. - PacketStatus SendReverseRequest(llvm::json::Object request, - llvm::json::Object &response); - - /// Registers a callback handler for a Debug Adapter Protocol request - /// - /// \param[in] request - /// The name of the request following the Debug Adapter Protocol - /// specification. - /// - /// \param[in] callback - /// The callback to execute when the given request is triggered by the - /// IDE. - void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index ee01822ba6217..54f2e653d0697 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,12 +384,7 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: { - if (g_vsc.waiting_for_run_in_terminal) { - g_vsc.waiting_for_run_in_terminal = false; - g_vsc.request_in_terminal_cv.notify_one(); - } - } + case lldb::eStateStopped: // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1379,9 +1374,6 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); - // The debug adapter supports launching a debugee in intergrated VSCode - // terminal. - body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1441,49 +1433,6 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -void request_runInTerminal(const llvm::json::Object &launch_request, - llvm::json::Object &launch_response) { - // We have already created a target that has a valid "program" path to the - // executable. We will attach to the next process whose name matches that - // of the target's. - g_vsc.is_attach = true; - lldb::SBAttachInfo attach_info; - lldb::SBError error; - attach_info.SetWaitForLaunch(true, /*async*/ true); - g_vsc.target.Attach(attach_info, error); - - llvm::json::Object reverse_request = - CreateRunInTerminalReverseRequest(launch_request); - llvm::json::Object reverse_response; - lldb_vscode::PacketStatus status = - g_vsc.SendReverseRequest(reverse_request, reverse_response); - if (status != lldb_vscode::PacketStatus::Success) - error.SetErrorString("Process cannot be launched by IDE."); - - if (error.Success()) { - // Wait for the attach stop event to happen or for a timeout. - g_vsc.waiting_for_run_in_terminal = true; - static std::mutex mutex; - std::unique_lock locker(mutex); - g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); - - auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); - if (attached_pid == LLDB_INVALID_PROCESS_ID) - error.SetErrorString("Failed to attach to a process"); - else - SendProcessEvent(Attach); - } - - if (error.Fail()) { - launch_response["success"] = llvm::json::Value(false); - EmplaceSafeString(launch_response, "message", - std::string(error.GetCString())); - } else { - launch_response["success"] = llvm::json::Value(true); - g_vsc.SendJSON(CreateEventObject("initialized")); - } -} - // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1556,12 +1505,6 @@ void request_launch(const llvm::json::Object &request) { return; } - if (GetBoolean(arguments, "runInTerminal", false)) { - request_runInTerminal(request, response); - g_vsc.SendJSON(llvm::json::Value(std::move(response))); - return; - } - // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2888,35 +2831,39 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -void RegisterRequestCallbacks() { - g_vsc.RegisterRequestCallback("attach", request_attach); - g_vsc.RegisterRequestCallback("completions", request_completions); - g_vsc.RegisterRequestCallback("continue", request_continue); - g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); - g_vsc.RegisterRequestCallback("disconnect", request_disconnect); - g_vsc.RegisterRequestCallback("evaluate", request_evaluate); - g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); - g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); - g_vsc.RegisterRequestCallback("initialize", request_initialize); - g_vsc.RegisterRequestCallback("launch", request_launch); - g_vsc.RegisterRequestCallback("next", request_next); - g_vsc.RegisterRequestCallback("pause", request_pause); - g_vsc.RegisterRequestCallback("scopes", request_scopes); - g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); - g_vsc.RegisterRequestCallback("setExceptionBreakpoints", - request_setExceptionBreakpoints); - g_vsc.RegisterRequestCallback("setFunctionBreakpoints", - request_setFunctionBreakpoints); - g_vsc.RegisterRequestCallback("setVariable", request_setVariable); - g_vsc.RegisterRequestCallback("source", request_source); - g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); - g_vsc.RegisterRequestCallback("stepIn", request_stepIn); - g_vsc.RegisterRequestCallback("stepOut", request_stepOut); - g_vsc.RegisterRequestCallback("threads", request_threads); - g_vsc.RegisterRequestCallback("variables", request_variables); - // Testing requests - g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", - request__testGetTargetBreakpoints); +const std::map &GetRequestHandlers() { +#define REQUEST_CALLBACK(name) \ + { #name, request_##name } + static std::map g_request_handlers = { + // VSCode Debug Adaptor requests + REQUEST_CALLBACK(attach), + REQUEST_CALLBACK(completions), + REQUEST_CALLBACK(continue), + REQUEST_CALLBACK(configurationDone), + REQUEST_CALLBACK(disconnect), + REQUEST_CALLBACK(evaluate), + REQUEST_CALLBACK(exceptionInfo), + REQUEST_CALLBACK(getCompileUnits), + REQUEST_CALLBACK(initialize), + REQUEST_CALLBACK(launch), + REQUEST_CALLBACK(next), + REQUEST_CALLBACK(pause), + REQUEST_CALLBACK(scopes), + REQUEST_CALLBACK(setBreakpoints), + REQUEST_CALLBACK(setExceptionBreakpoints), + REQUEST_CALLBACK(setFunctionBreakpoints), + REQUEST_CALLBACK(setVariable), + REQUEST_CALLBACK(source), + REQUEST_CALLBACK(stackTrace), + REQUEST_CALLBACK(stepIn), + REQUEST_CALLBACK(stepOut), + REQUEST_CALLBACK(threads), + REQUEST_CALLBACK(variables), + // Testing requests + REQUEST_CALLBACK(_testGetTargetBreakpoints), + }; +#undef REQUEST_CALLBACK + return g_request_handlers; } } // anonymous namespace @@ -2948,8 +2895,6 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); - RegisterRequestCallbacks(); - int portno = -1; LLDBVSCodeOptTable T; @@ -2992,17 +2937,49 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } + auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - llvm::json::Object object; - lldb_vscode::PacketStatus status = g_vsc.GetObject(object); - if (status == lldb_vscode::PacketStatus::EndOfFile) + std::string json = g_vsc.ReadJSON(); + if (json.empty()) break; - if (status != lldb_vscode::PacketStatus::Success) - return 1; // Fatal error - if (!g_vsc.HandleObject(object)) + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (g_vsc.log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + + *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return 1; + } + + auto object = json_value->getAsObject(); + if (!object) { + if (g_vsc.log) + *g_vsc.log << "error: json packet isn't a object" << std::endl; return 1; + } + + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(*object); + } else { + if (g_vsc.log) + *g_vsc.log << "error: unhandled command \"" << command.data() + << std::endl; + return 1; + } + } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 9077ab51dd7fa..29ca06dd17d63 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,11 +175,6 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] - }, - "runInTerminal": { - "type": "boolean", - "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", - "default": false } } }, From c05095cd6865a95ee848cd95d11643969a81a241 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 1 Sep 2020 04:49:49 -0700 Subject: [PATCH 0099/1079] [Asan] Don't crash if metadata is not initialized Fixes https://github.com/google/sanitizers/issues/1193. AsanChunk can be uninitialized yet just after return from the secondary allocator. If lsan starts scan just before metadata assignment it can fail to find corresponding AsanChunk. It should be safe to ignore this and let lsan to assume that AsanChunk is in the beginning of the block. This block is from the secondary allocator and created with mmap, so it should not contain any pointers and will make lsan to miss some leaks. Similar already happens for primary allocator. If it can't find real AsanChunk it falls back and assume that block starts with AsanChunk. Then if the block is already returned to allocator we have garbage in AsanChunk and may scan dead memory hiding some leaks. I'll fix this in D87135. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86931 --- compiler-rt/lib/asan/asan_allocator.cpp | 22 +++++-------- .../test/asan/TestCases/lsan_crash.cpp | 31 +++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 compiler-rt/test/asan/TestCases/lsan_crash.cpp diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 7334b7200fc4c..1d8d5bcad1dc0 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -730,6 +730,9 @@ struct Allocator { // -------------------------- Chunk lookup ---------------------- // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). + // Returns nullptr if AsanChunk is not yet initialized just after + // get_allocator().Allocate(), or is being destroyed just before + // get_allocator().Deallocate(). AsanChunk *GetAsanChunk(void *alloc_beg) { if (!alloc_beg) return nullptr; @@ -1102,26 +1105,17 @@ void GetUserBeginDebug(uptr chunk) { uptr GetUserBegin(uptr chunk) { __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(chunk); - if (!m) { - Printf( - "ASAN is about to crash with a CHECK failure.\n" - "The ASAN developers are trying to chase down this bug,\n" - "so if you've encountered this bug please let us know.\n" - "See also: https://github.com/google/sanitizers/issues/1193\n" - "Internal ref b/149237057\n" - "chunk: %p caller %p __lsan_current_stage %s\n", - chunk, GET_CALLER_PC(), __lsan_current_stage); - GetUserBeginDebug(chunk); - } - CHECK(m); - return m->Beg(); + return m ? m->Beg() : 0; } LsanMetadata::LsanMetadata(uptr chunk) { - metadata_ = reinterpret_cast(chunk - __asan::kChunkHeaderSize); + metadata_ = chunk ? reinterpret_cast(chunk - __asan::kChunkHeaderSize) + : nullptr; } bool LsanMetadata::allocated() const { + if (!metadata_) + return false; __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); return atomic_load(&m->chunk_state, memory_order_relaxed) == __asan::CHUNK_ALLOCATED; diff --git a/compiler-rt/test/asan/TestCases/lsan_crash.cpp b/compiler-rt/test/asan/TestCases/lsan_crash.cpp new file mode 100644 index 0000000000000..23c2569a0b73c --- /dev/null +++ b/compiler-rt/test/asan/TestCases/lsan_crash.cpp @@ -0,0 +1,31 @@ +// RUN: %clangxx_asan -O2 %s -o %t && %run %t + +#include +#include +#include +#include +#include + +std::atomic done; + +void foo() { + std::unique_ptr mem; + + while (!done) + mem.reset(new char[1000000]); +} + +int main() { + std::vector threads; + for (int i = 0; i < 10; ++i) + threads.emplace_back(foo); + + for (int i = 0; i < 100; ++i) + __lsan_do_recoverable_leak_check(); + + done = true; + for (auto &t : threads) + t.join(); + + return 0; +} From 27650a5fed14a99b5c3640444abb0012ca28f3fb Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 1 Sep 2020 05:26:53 -0700 Subject: [PATCH 0100/1079] [NFC][Asan] Remove Debug code Used for https://github.com/google/sanitizers/issues/1193 Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86933 --- compiler-rt/lib/asan/asan_allocator.cpp | 38 ------------------- compiler-rt/lib/lsan/lsan_common.cpp | 7 ---- .../sanitizer_allocator_combined.h | 6 --- .../sanitizer_allocator_primary32.h | 1 - .../sanitizer_allocator_primary64.h | 24 ------------ 5 files changed, 76 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 1d8d5bcad1dc0..a15c569b42ba0 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -750,26 +750,6 @@ struct Allocator { return reinterpret_cast(alloc_beg); } - AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) - return nullptr; - if (!allocator.FromPrimary(alloc_beg)) { - uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); - AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, - m); - return m; - } - uptr *alloc_magic = reinterpret_cast(alloc_beg); - Printf( - "GetAsanChunkDebug2 alloc_beg %p alloc_magic %p alloc_magic[0] %p " - "alloc_magic[1] %p\n", - alloc_beg, alloc_magic, alloc_magic[0], alloc_magic[1]); - if (alloc_magic[0] == kAllocBegMagic) - return reinterpret_cast(alloc_magic[1]); - return reinterpret_cast(alloc_beg); - } - AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -782,14 +762,6 @@ struct Allocator { return GetAsanChunk(alloc_beg); } - AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { - void *alloc_beg = - allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, - alloc_beg); - return GetAsanChunkDebug(alloc_beg); - } - uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; @@ -1093,16 +1065,6 @@ uptr PointsIntoChunk(void* p) { return 0; } -// Debug code. Delete once issue #1193 is chased down. -extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; - -void GetUserBeginDebug(uptr chunk) { - Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = - __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); - Printf("GetUserBeginDebug2 m %p\n", m); -} - uptr GetUserBegin(uptr chunk) { __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(chunk); return m ? m->Beg() : 0; diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 93ce0ddc3d68e..41b5ae5483299 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -25,8 +25,6 @@ #include "sanitizer_common/sanitizer_thread_registry.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" -extern "C" const char *__lsan_current_stage = "unknown"; - #if CAN_SANITIZE_LEAKS namespace __lsan { @@ -362,7 +360,6 @@ static void FloodFillTag(Frontier *frontier, ChunkTag tag) { // ForEachChunk callback. If the chunk is marked as leaked, marks all chunks // which are reachable from it as indirectly leaked. static void MarkIndirectlyLeakedCb(uptr chunk, void *arg) { - __lsan_current_stage = "MarkIndirectlyLeakedCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kReachable) { @@ -375,7 +372,6 @@ static void MarkIndirectlyLeakedCb(uptr chunk, void *arg) { // frontier. static void CollectIgnoredCb(uptr chunk, void *arg) { CHECK(arg); - __lsan_current_stage = "CollectIgnoredCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() == kIgnored) { @@ -405,7 +401,6 @@ struct InvalidPCParam { static void MarkInvalidPCCb(uptr chunk, void *arg) { CHECK(arg); InvalidPCParam *param = reinterpret_cast(arg); - __lsan_current_stage = "MarkInvalidPCCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { @@ -481,7 +476,6 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, // ForEachChunk callback. Resets the tags to pre-leak-check state. static void ResetTagsCb(uptr chunk, void *arg) { (void)arg; - __lsan_current_stage = "ResetTagsCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kIgnored) @@ -498,7 +492,6 @@ static void PrintStackTraceById(u32 stack_trace_id) { static void CollectLeaksCb(uptr chunk, void *arg) { CHECK(arg); LeakReport *leak_report = reinterpret_cast(arg); - __lsan_current_stage = "CollectLeaksCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (!m.allocated()) return; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 0cf483da1e5c8..33f89d6d49928 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -142,12 +142,6 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } - void *GetBlockBeginFastLockedDebug(void *p) { - if (primary_.PointerIsMine(p)) - return primary_.GetBlockBeginDebug(p); - return secondary_.GetBlockBeginFastLocked(p); - } - uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 2c25a687c5f08..b90dabbf77692 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -211,7 +211,6 @@ class SizeClassAllocator32 { uptr res = beg + (n * (u32)size); return reinterpret_cast(res); } - void *GetBlockBeginDebug(const void *p) { return GetBlockBegin(p); } uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index a6126fc6265eb..774c09e424952 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -199,30 +199,6 @@ class SizeClassAllocator64 { return nullptr; } - void *GetBlockBeginDebug(const void *p) { - uptr class_id = GetSizeClass(p); - uptr size = ClassIdToSize(class_id); - Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) - return nullptr; - uptr chunk_idx = GetChunkIdx((uptr)p, size); - uptr reg_beg = GetRegionBegin(p); - uptr beg = chunk_idx * size; - uptr next_beg = beg + size; - Printf( - "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " - "kNumClasses %p\n", - chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) - return nullptr; - const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); - Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, - region->mapped_user); - if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); - return nullptr; - } - uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); From d183f472617dfedf23381be90612d713d0f439af Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 8 Sep 2020 14:20:41 -0500 Subject: [PATCH 0101/1079] [Hexagon] Handle widening of truncation's operand with legal result Failing example: v8i8 = truncate v8i32. v8i8 is legal, but v8i32 was widened to HVX. Make sure that v8i8 does not get altered (even if it's changed to another legal type). --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 68 +++++++++++++------ .../Hexagon/autohvx/isel-truncate-legal.ll | 34 ++++++++++ 3 files changed, 84 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8473515b3c758..9e7176cd94218 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -487,6 +487,7 @@ class HexagonTargetLowering : public TargetLowering { findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; + bool shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const; bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index e5d05cfe64c47..22561691f0e02 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1939,16 +1939,36 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); MVT ResTy = ty(Op); MVT OpTy = ty(Op0); + + // .-res, op-> Scalar Illegal HVX + // Scalar ok extract(widen) - + // Illegal - widen widen + // HVX - - ok + if (Subtarget.isHVXVectorType(OpTy)) return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); + assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); + MVT WideOpTy = getWideTy(OpTy); SmallVector Concats = {Op0}; for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i) Concats.push_back(DAG.getUNDEF(OpTy)); SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats); - return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + SDValue V = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + // If the original result wasn't legal and was supposed to be widened, + // we're done. + if (shouldWidenToHvx(ResTy, DAG)) + return V; + + // The original result type wasn't meant to be widened to HVX, so + // leave it as it is. Standard legalization should be able to deal + // with it (since now it's a result of a target-idendependent ISD + // node). + assert(ResTy.isVector()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, + {V, getZero(dl, MVT::i32, DAG)}); } SDValue @@ -2029,11 +2049,15 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, SDValue Op(N, 0); switch (Opc) { + case ISD::TRUNCATE: { + assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?"); + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); + break; + } case ISD::STORE: { - assert( - getPreferredHvxVectorAction(ty(cast(N)->getValue())) == - TargetLoweringBase::TypeWidenVector && - "Not widening?"); + assert(shouldWidenToHvx(ty(cast(N)->getValue()), DAG) && + "Not widening?"); SDValue Store = WidenHvxStore(SDValue(N, 0), DAG); Results.push_back(Store); break; @@ -2061,12 +2085,12 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, unsigned Opc = N->getOpcode(); SDValue Op(N, 0); switch (Opc) { - case ISD::TRUNCATE: - if (!Subtarget.isHVXVectorType(ty(Op), false)) { - SDValue T = WidenHvxTruncate(Op, DAG); - Results.push_back(T); - } + case ISD::TRUNCATE: { + assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); break; + } case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); @@ -2103,8 +2127,22 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return SDValue(); } +bool +HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { + assert(!Subtarget.isHVXVectorType(Ty, true)); + auto Action = getPreferredHvxVectorAction(Ty); + if (Action == TargetLoweringBase::TypeWidenVector) { + EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); + assert(WideTy.isSimple()); + return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); + } + return false; +} + bool HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { + if (!Subtarget.useHVXOps()) + return false; // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. auto IsHvxTy = [this](EVT Ty) { @@ -2122,15 +2160,7 @@ HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { if (!Op.getValueType().isSimple()) return false; MVT ValTy = ty(Op); - if (ValTy.isVector()) { - auto Action = getPreferredVectorAction(ValTy); - if (Action == TargetLoweringBase::TypeWidenVector) { - EVT WideTy = getTypeToTransformTo(*DAG.getContext(), ValTy); - assert(WideTy.isSimple()); - return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); - } - } - return false; + return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); }; for (int i = 0, e = N->getNumValues(); i != e; ++i) { diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll new file mode 100644 index 0000000000000..e9c7f9cce771e --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Truncating a type-to-be-widenened to a legal type (v8i8). +; Check that this compiles successfully. +; CHECK-LABEL: f0: +; CHECK: dealloc_return + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(i8* %a0) local_unnamed_addr #0 { +b0: + %v0 = load i8, i8* undef, align 1 + %v1 = zext i8 %v0 to i16 + %v2 = add i16 0, %v1 + %v3 = icmp sgt i16 %v2, 1 + %v4 = select i1 %v3, i16 %v2, i16 1 + %v5 = udiv i16 -32768, %v4 + %v6 = zext i16 %v5 to i32 + %v7 = insertelement <8 x i32> undef, i32 %v6, i32 0 + %v8 = shufflevector <8 x i32> %v7, <8 x i32> undef, <8 x i32> zeroinitializer + %v9 = load <8 x i16>, <8 x i16>* undef, align 2 + %v10 = sext <8 x i16> %v9 to <8 x i32> + %v11 = mul nsw <8 x i32> %v8, %v10 + %v12 = add nsw <8 x i32> %v11, + %v13 = lshr <8 x i32> %v12, + %v14 = trunc <8 x i32> %v13 to <8 x i8> + %v15 = getelementptr inbounds i8, i8* %a0, i32 undef + %v16 = bitcast i8* %v15 to <8 x i8>* + store <8 x i8> %v14, <8 x i8>* %v16, align 1 + ret void +} + +attributes #0 = { "target-features"="+hvx,+hvx-length128b" } From 8893d0816ccdf8998d2e21b5430e9d6abe7ef465 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 2 Sep 2020 15:33:19 -0700 Subject: [PATCH 0102/1079] [MLIR] Change Operation::create() methods to use Value/Type/Block ranges. - Introduce a new BlockRange class to represent range of blocks (constructible from an ArrayRef or a SuccessorRange); - Change Operation::create() methods to use TypeRange for result types, ValueRange for operands and BlockRange for successors. Differential Revision: https://reviews.llvm.org/D86985 --- mlir/include/mlir/IR/BlockSupport.h | 41 +++++++++++++++++++++++++ mlir/include/mlir/IR/Operation.h | 14 ++++----- mlir/include/mlir/IR/OperationSupport.h | 8 ++--- mlir/lib/IR/Block.cpp | 28 ++++++++++++++++- mlir/lib/IR/Operation.cpp | 29 +++++++---------- mlir/lib/IR/OperationSupport.cpp | 2 +- 6 files changed, 89 insertions(+), 33 deletions(-) diff --git a/mlir/include/mlir/IR/BlockSupport.h b/mlir/include/mlir/IR/BlockSupport.h index f3dd6140420e4..fc16effbba70d 100644 --- a/mlir/include/mlir/IR/BlockSupport.h +++ b/mlir/include/mlir/IR/BlockSupport.h @@ -75,6 +75,47 @@ class SuccessorRange final friend RangeBaseT; }; +//===----------------------------------------------------------------------===// +// BlockRange +//===----------------------------------------------------------------------===// + +/// This class provides an abstraction over the different types of ranges over +/// Blocks. In many cases, this prevents the need to explicitly materialize a +/// SmallVector/std::vector. This class should be used in places that are not +/// suitable for a more derived type (e.g. ArrayRef) or a template range +/// parameter. +class BlockRange final + : public llvm::detail::indexed_accessor_range_base< + BlockRange, llvm::PointerUnion, + Block *, Block *, Block *> { +public: + using RangeBaseT::RangeBaseT; + BlockRange(ArrayRef blocks = llvm::None); + BlockRange(SuccessorRange successors); + template , Arg>::value>> + BlockRange(Arg &&arg) + : BlockRange(ArrayRef(std::forward(arg))) {} + BlockRange(std::initializer_list blocks) + : BlockRange(ArrayRef(blocks)) {} + +private: + /// The owner of the range is either: + /// * A pointer to the first element of an array of block operands. + /// * A pointer to the first element of an array of Block *. + using OwnerT = llvm::PointerUnion; + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static OwnerT offset_base(OwnerT object, ptrdiff_t index); + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static Block *dereference_iterator(OwnerT object, ptrdiff_t index); + + /// Allow access to `offset_base` and `dereference_iterator`. + friend RangeBaseT; +}; + //===----------------------------------------------------------------------===// // Operation Iterators //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index 5f5e9017ae512..6de7677dbf052 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -32,25 +32,25 @@ class Operation final public: /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, ArrayRef attributes, - ArrayRef successors, unsigned numRegions); + BlockRange successors, unsigned numRegions); /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, unsigned numRegions); + BlockRange successors, unsigned numRegions); /// Create a new Operation from the fields stored in `state`. static Operation *create(const OperationState &state); /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors = {}, + BlockRange successors = {}, RegionRange regions = {}); /// The name of an operation is the key identifier for it. @@ -633,7 +633,7 @@ class Operation final bool hasValidOrder() { return orderIndex != kInvalidOrderIdx; } private: - Operation(Location location, OperationName name, ArrayRef resultTypes, + Operation(Location location, OperationName name, TypeRange resultTypes, unsigned numSuccessors, unsigned numRegions, const MutableDictionaryAttr &attributes, bool hasOperandStorage); diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 7fce4b808d2e4..11e85f20af445 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -29,6 +29,7 @@ namespace mlir { class Block; +class BlockRange; class Dialect; class Operation; struct OperationState; @@ -42,7 +43,6 @@ class Pattern; class Region; class ResultRange; class RewritePattern; -class SuccessorRange; class Type; class Value; class ValueRange; @@ -394,12 +394,8 @@ struct OperationState { attributes.append(newAttributes); } - /// Add an array of successors. - void addSuccessors(ArrayRef newSuccessors) { - successors.append(newSuccessors.begin(), newSuccessors.end()); - } void addSuccessors(Block *successor) { successors.push_back(successor); } - void addSuccessors(SuccessorRange newSuccessors); + void addSuccessors(BlockRange newSuccessors); /// Create a region that should be attached to the operation. These regions /// can be filled in immediately without waiting for Operation to be diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp index 71f368c49776e..e039b41ae4b77 100644 --- a/mlir/lib/IR/Block.cpp +++ b/mlir/lib/IR/Block.cpp @@ -282,7 +282,7 @@ unsigned PredecessorIterator::getSuccessorIndex() const { } //===----------------------------------------------------------------------===// -// Successors +// SuccessorRange //===----------------------------------------------------------------------===// SuccessorRange::SuccessorRange(Block *block) : SuccessorRange(nullptr, 0) { @@ -295,3 +295,29 @@ SuccessorRange::SuccessorRange(Operation *term) : SuccessorRange(nullptr, 0) { if ((count = term->getNumSuccessors())) base = term->getBlockOperands().data(); } + +//===----------------------------------------------------------------------===// +// BlockRange +//===----------------------------------------------------------------------===// + +BlockRange::BlockRange(ArrayRef blocks) : BlockRange(nullptr, 0) { + if ((count = blocks.size())) + base = blocks.data(); +} + +BlockRange::BlockRange(SuccessorRange successors) + : BlockRange(successors.begin().getBase(), successors.size()) {} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +BlockRange::OwnerT BlockRange::offset_base(OwnerT object, ptrdiff_t index) { + if (auto *operand = object.dyn_cast()) + return {operand + index}; + return {object.dyn_cast() + index}; +} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +Block *BlockRange::dereference_iterator(OwnerT object, ptrdiff_t index) { + if (const auto *operand = object.dyn_cast()) + return operand[index].get(); + return object.dyn_cast()[index]; +} diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index b8f9e6c9fdfc4..f531a6097c257 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -71,29 +71,24 @@ OperationName OperationName::getFromOpaquePointer(void *pointer) { /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, ArrayRef attributes, - ArrayRef successors, - unsigned numRegions) { + BlockRange successors, unsigned numRegions) { return create(location, name, resultTypes, operands, MutableDictionaryAttr(attributes), successors, numRegions); } /// Create a new Operation from operation state. Operation *Operation::create(const OperationState &state) { - return Operation::create(state.location, state.name, state.types, - state.operands, state.attributes, state.successors, - state.regions); + return create(state.location, state.name, state.types, state.operands, + state.attributes, state.successors, state.regions); } /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, - RegionRange regions) { + BlockRange successors, RegionRange regions) { unsigned numRegions = regions.size(); Operation *op = create(location, name, resultTypes, operands, attributes, successors, numRegions); @@ -106,11 +101,9 @@ Operation *Operation::create(Location location, OperationName name, /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, - unsigned numRegions) { + BlockRange successors, unsigned numRegions) { // We only need to allocate additional memory for a subset of results. unsigned numTrailingResults = OpResult::getNumTrailing(resultTypes.size()); unsigned numInlineResults = OpResult::getNumInline(resultTypes.size()); @@ -167,7 +160,7 @@ Operation *Operation::create(Location location, OperationName name, } Operation::Operation(Location location, OperationName name, - ArrayRef resultTypes, unsigned numSuccessors, + TypeRange resultTypes, unsigned numSuccessors, unsigned numRegions, const MutableDictionaryAttr &attributes, bool hasOperandStorage) @@ -611,8 +604,8 @@ Operation *Operation::cloneWithoutRegions(BlockAndValueMapping &mapper) { successors.push_back(mapper.lookupOrDefault(successor)); // Create the new operation. - auto *newOp = Operation::create(getLoc(), getName(), getResultTypes(), - operands, attrs, successors, getNumRegions()); + auto *newOp = create(getLoc(), getName(), getResultTypes(), operands, attrs, + successors, getNumRegions()); // Remember the mapping of any results. for (unsigned i = 0, e = getNumResults(); i != e; ++i) diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index ab84f4e8cf178..69aea3bfcf198 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -186,7 +186,7 @@ void OperationState::addOperands(ValueRange newOperands) { operands.append(newOperands.begin(), newOperands.end()); } -void OperationState::addSuccessors(SuccessorRange newSuccessors) { +void OperationState::addSuccessors(BlockRange newSuccessors) { successors.append(newSuccessors.begin(), newSuccessors.end()); } From 76a2c434f2c35fb27913bf59e0acb0435e59f079 Mon Sep 17 00:00:00 2001 From: Nate Voorhies Date: Tue, 8 Sep 2020 14:19:00 -0700 Subject: [PATCH 0103/1079] Insert missing bracket in docs. Body of unrolled loop was missing opening bracket. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D87329 --- llvm/docs/TransformMetadata.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/TransformMetadata.rst b/llvm/docs/TransformMetadata.rst index 817b41b43711d..3c0e10b3eb7a5 100644 --- a/llvm/docs/TransformMetadata.rst +++ b/llvm/docs/TransformMetadata.rst @@ -196,7 +196,7 @@ is transformed into (using an unroll factor of 4): .. code-block:: c int i = 0; - for (; i + 3 < n; i+=4) // unrolled loop + for (; i + 3 < n; i+=4) { // unrolled loop Stmt(i); Stmt(i+1); Stmt(i+2); From b1e68f885b550cf006f5d84b43aa3a0b2905d4b3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Sep 2020 15:09:35 -0700 Subject: [PATCH 0104/1079] [SelectionDAGBuilder] Pass fast math flags to getNode calls rather than trying to set them after the fact.: This removes the after the fact FMF handling from D46854 in favor of passing fast math flags to getNode. This should be a superset of D87130. This required adding a SDNodeFlags to SelectionDAG::getSetCC. Now we manage to contant fold some stuff undefs during the initial getNode that we don't do in later DAG combines. Differential Revision: https://reviews.llvm.org/D87200 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 69 ++------ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 12 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 11 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 150 +++++++++--------- .../SelectionDAG/SelectionDAGBuilder.h | 7 - .../CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/AArch64/fp-const-fold.ll | 16 -- llvm/test/CodeGen/PowerPC/fmf-propagation.ll | 4 +- llvm/test/CodeGen/SystemZ/fp-mul-14.ll | 3 - .../test/CodeGen/Thumb2/mve-vecreduce-fadd.ll | 76 ++------- llvm/test/CodeGen/X86/fp-undef.ll | 25 --- 15 files changed, 130 insertions(+), 263 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5607e785e349a..8db5249743064 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1049,8 +1049,8 @@ class SelectionDAG { /// Helper function to make it easier to build SetCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, - ISD::CondCode Cond, SDValue Chain = SDValue(), - bool IsSignaling = false) { + ISD::CondCode Cond, SDNodeFlags Flags = SDNodeFlags(), + SDValue Chain = SDValue(), bool IsSignaling = false) { assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() && "Cannot compare scalars to vectors"); assert(LHS.getValueType().isVector() == VT.isVector() && @@ -1060,7 +1060,7 @@ class SelectionDAG { if (Chain) return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL, {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)}); - return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); + return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Flags); } /// Helper function to make it easier to build Select's if you just have diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 6eef79162f8a7..fa150831bdbd0 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,10 +357,6 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. It is - // only used by SelectionDAGBuilder. - bool AnyDefined : 1; - bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; @@ -382,9 +378,8 @@ struct SDNodeFlags { public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), NoNaNs(false), NoInfs(false), - NoSignedZeros(false), AllowReciprocal(false), + : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), + NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false) {} @@ -399,56 +394,18 @@ struct SDNodeFlags { setAllowReassociation(FPMO.hasAllowReassoc()); } - /// Sets the state of the flags to the defined state. - void setDefined() { AnyDefined = true; } - /// Returns true if the flags are in a defined state. - bool isDefined() const { return AnyDefined; } - // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { - setDefined(); - NoUnsignedWrap = b; - } - void setNoSignedWrap(bool b) { - setDefined(); - NoSignedWrap = b; - } - void setExact(bool b) { - setDefined(); - Exact = b; - } - void setNoNaNs(bool b) { - setDefined(); - NoNaNs = b; - } - void setNoInfs(bool b) { - setDefined(); - NoInfs = b; - } - void setNoSignedZeros(bool b) { - setDefined(); - NoSignedZeros = b; - } - void setAllowReciprocal(bool b) { - setDefined(); - AllowReciprocal = b; - } - void setAllowContract(bool b) { - setDefined(); - AllowContract = b; - } - void setApproximateFuncs(bool b) { - setDefined(); - ApproximateFuncs = b; - } - void setAllowReassociation(bool b) { - setDefined(); - AllowReassociation = b; - } - void setNoFPExcept(bool b) { - setDefined(); - NoFPExcept = b; - } + void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } + void setNoSignedWrap(bool b) { NoSignedWrap = b; } + void setExact(bool b) { Exact = b; } + void setNoNaNs(bool b) { NoNaNs = b; } + void setNoInfs(bool b) { NoInfs = b; } + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + void setAllowContract(bool b) { AllowContract = b; } + void setApproximateFuncs(bool b) { ApproximateFuncs = b; } + void setAllowReassociation(bool b) { AllowReassociation = b; } + void setNoFPExcept(bool b) { NoFPExcept = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 37d8cdd695445..e5c5e5341a680 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7398,9 +7398,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0.hasOneUse()) { // FIXME Can we handle multiple uses? Could we token factor the chain // results from the new/old setcc? - SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, - N0.getOperand(0), - N0Opcode == ISD::STRICT_FSETCCS); + SDValue SetCC = + DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, SDNodeFlags(), + N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS); CombineTo(N, SetCC); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); recursivelyDeleteUnusedNodes(N0.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f6e4b9363d1a1..7751ebb7705a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1735,12 +1735,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, SDNodeFlags(), Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, SDNodeFlags(), Chain, + IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, SDNodeFlags(), Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, SDNodeFlags(), Chain, + IsSignaling); } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 09b5f14bdb7b4..2399525de6659 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1777,17 +1777,18 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3, OutputChain; Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETOEQ, Chain, IsSignaling); + RHSHi, ISD::SETOEQ, SDNodeFlags(), Chain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, - RHSLo, CCCode, OutputChain, IsSignaling); + RHSLo, CCCode, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETUNE, OutputChain, IsSignaling); + Tmp1 = + DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, + ISD::SETUNE, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, CCCode, OutputChain, IsSignaling); + RHSHi, CCCode, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5e6cb03f3839c..2d42eb7360663 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1120,27 +1120,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (auto *FPMO = dyn_cast(&I)) { - // ConstrainedFPIntrinsics handle their own FMF. - if (!isa(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - // TODO: The handling of flags should be improved, see - // https://reviews.llvm.org/D86871 - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - } - if (!I.isTerminator() && !HasTailCall && !isa(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -3023,9 +3002,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); } - if (auto *ExactOp = dyn_cast(&I)) { + if (auto *ExactOp = dyn_cast(&I)) Flags.setExact(ExactOp->isExact()); - } + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3135,13 +3115,16 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - auto *FPMO = dyn_cast(&I); - if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + auto *FPMO = cast(&I); + if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + SDNodeFlags Flags; + Flags.copyFMF(*FPMO); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition, Flags)); } // Check if the condition of the select has one use or two users that are both @@ -3169,6 +3152,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) { bool IsUnaryAbs = false; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3272,7 +3259,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode( OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags); } } @@ -4876,7 +4863,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4892,13 +4879,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4991,13 +4978,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5088,13 +5075,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5178,25 +5165,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. - return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const TargetLowering &TLI) { + SelectionDAG &DAG, const TargetLowering &TLI, + SDNodeFlags Flags) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -5219,7 +5207,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, } // No special expansion. - return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags); } /// ExpandPowI - Expand a llvm.powi intrinsic. @@ -5640,6 +5628,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DebugLoc dl = getCurDebugLoc(); SDValue Res; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); + switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. @@ -6054,23 +6046,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), DAG)); return; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + getValue(I.getArgOperand(1)), DAG, TLI, Flags)); return; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -6103,7 +6098,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); + getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::lround: @@ -6128,38 +6123,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maxnum: setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + setValue(&I, DAG.getNode( + ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); return; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: @@ -6174,17 +6168,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. - SDValue Mul = DAG.getNode(ISD::FMUL, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + SDValue Mul = DAG.getNode( + ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), - Mul, - getValue(I.getArgOperand(2))); + Mul, getValue(I.getArgOperand(2)), Flags); setValue(&I, Add); } return; @@ -7532,8 +7524,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast(I)); + SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, + DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags)); return true; } @@ -7548,10 +7544,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast(I)); + SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags)); return true; } @@ -8952,23 +8951,28 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; FastMathFlags FMF; - if (isa(I)) - FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast(&I)) { + FMF = FPMO->getFastMathFlags(); + SDFlags.copyFMF(*FPMO); + } switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_v2_fadd: if (FMF.allowReassoc()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_v2_fmul: if (FMF.allowReassoc()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); @@ -8998,10 +9002,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 7bad055198140..e51e7bf89f8e7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -518,13 +518,6 @@ class SelectionDAGBuilder { SDValue getValue(const Value *V); - /// Return the SDNode for the specified IR value if it exists. - SDNode *getNodeForIRValue(const Value *V) { - if (NodeMap.find(V) == NodeMap.end()) - return nullptr; - return NodeMap[V].getNode(); - } - SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ae98edb74466d..cbdd027f55fef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6409,7 +6409,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, SDValue Sel; if (Node->isStrictFPOpcode()) { - Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, SDNodeFlags(), Node->getOperand(0), /*IsSignaling*/ true); Chain = Sel.getValue(1); } else { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b213abb57aa83..f6b5d2ea987f8 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8219,8 +8219,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); EVT DstSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); - SDValue Sel = - DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + SDNodeFlags(), Chain, true); Chain = Sel.getValue(1); SDValue FltOfs = DAG.getSelect( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2c7c36325f146..1cd928c1de120 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20345,7 +20345,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, *DAG.getContext(), TheVT); SDValue Cmp; if (IsStrict) { - Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, + Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, SDNodeFlags(), Chain, /*IsSignaling*/ true); Chain = Cmp.getValue(1); } else { diff --git a/llvm/test/CodeGen/AArch64/fp-const-fold.ll b/llvm/test/CodeGen/AArch64/fp-const-fold.ll index b282c8719ff63..dc3f71001d610 100644 --- a/llvm/test/CodeGen/AArch64/fp-const-fold.ll +++ b/llvm/test/CodeGen/AArch64/fp-const-fold.ll @@ -161,49 +161,33 @@ define double @fmul_nnan_inf_op1(double %x) { ret double %r } -; TODO: Should simplify to undef - define double @fdiv_nnan_undef_op0(double %x) { ; CHECK-LABEL: fdiv_nnan_undef_op0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv nnan double undef, %x ret double %r } -; TODO: Should simplify to undef - define double @fdiv_nnan_undef_op1(double %x) { ; CHECK-LABEL: fdiv_nnan_undef_op1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv nnan double %x, undef ret double %r } -; TODO: Should simplify to undef - define double @fdiv_ninf_undef_op0(double %x) { ; CHECK-LABEL: fdiv_ninf_undef_op0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv ninf double undef, %x ret double %r } -; TODO: Should simplify to undef - define double @fdiv_ninf_undef_op1(double %x) { ; CHECK-LABEL: fdiv_ninf_undef_op1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv ninf double %x, undef ret double %r diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index 90ea31b26916e..91745b4b3ea21 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -557,13 +557,13 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 ; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 +; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 ; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 +; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' declare double @log2(double) diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll index 8bab2135739c4..363511655ad91 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll @@ -2,9 +2,6 @@ ; ; Check that a multiply-and-add results. -; FIXME: This test is xfailed temporarily -; XFAIL: * - define void @f1(float %arg, float* %Dst) { ; CHECK-LABEL: f1: ; CHECK: maeb diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll index a1f25e0f33342..77f0c77033f95 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll @@ -3,30 +3,11 @@ ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP define arm_aapcs_vfpcc float @fadd_v2f32(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fadd_v2f32: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vadd.f32 s0, s0, s1 -; CHECK-FP-NEXT: vldr s2, .LCPI0_0 -; CHECK-FP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-FP-NEXT: vadd.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI0_0: -; CHECK-FP-NEXT: .long 0x00000000 @ float 0 -; -; CHECK-NOFP-LABEL: fadd_v2f32: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vldr s2, .LCPI0_0 -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI0_0: -; CHECK-NOFP-NEXT: .long 0x00000000 @ float 0 +; CHECK-LABEL: fadd_v2f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f32 s0, s0, s1 +; CHECK-NEXT: vadd.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x) ret float %z @@ -80,34 +61,14 @@ entry: } define arm_aapcs_vfpcc void @fadd_v2f16(<2 x half> %x, half* %yy) { -; CHECK-FP-LABEL: fadd_v2f16: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmovx.f16 s4, s0 -; CHECK-FP-NEXT: vadd.f16 s0, s0, s4 -; CHECK-FP-NEXT: vldr.16 s2, [r0] -; CHECK-FP-NEXT: vadd.f16 s0, s2, s0 -; CHECK-FP-NEXT: vstr.16 s0, [r0] -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fadd_v2f16: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI3_0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NOFP-NEXT: vstr.16 s0, [r0] -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI3_0: -; CHECK-NOFP-NEXT: .short 0x0000 @ half 0 +; CHECK-LABEL: fadd_v2f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vadd.f16 s0, s0, s4 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vadd.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half %y, <2 x half> %x) @@ -134,20 +95,11 @@ define arm_aapcs_vfpcc void @fadd_v4f16(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: vadd.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vadd.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI4_0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] +; CHECK-NOFP-NEXT: vadd.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vadd.f16 s0, s2, s0 ; CHECK-NOFP-NEXT: vstr.16 s0, [r0] ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI4_0: -; CHECK-NOFP-NEXT: .short 0x0000 @ half 0 entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x) diff --git a/llvm/test/CodeGen/X86/fp-undef.ll b/llvm/test/CodeGen/X86/fp-undef.ll index d46bea703fdf0..95049d16a7bf4 100644 --- a/llvm/test/CodeGen/X86/fp-undef.ll +++ b/llvm/test/CodeGen/X86/fp-undef.ll @@ -100,7 +100,6 @@ define float @frem_undef_op1(float %x) { define float @fadd_undef_op0_nnan(float %x) { ; ANY-LABEL: fadd_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd nnan float undef, %x ret float %r @@ -109,7 +108,6 @@ define float @fadd_undef_op0_nnan(float %x) { define float @fadd_undef_op1_fast(float %x) { ; ANY-LABEL: fadd_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd fast float %x, undef ret float %r @@ -118,7 +116,6 @@ define float @fadd_undef_op1_fast(float %x) { define float @fsub_undef_op0_fast(float %x) { ; ANY-LABEL: fsub_undef_op0_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, %x ret float %r @@ -127,7 +124,6 @@ define float @fsub_undef_op0_fast(float %x) { define float @fsub_undef_op1_nnan(float %x) { ; ANY-LABEL: fsub_undef_op1_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub nnan float %x, undef ret float %r @@ -136,7 +132,6 @@ define float @fsub_undef_op1_nnan(float %x) { define float @fmul_undef_op0_nnan(float %x) { ; ANY-LABEL: fmul_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul nnan float undef, %x ret float %r @@ -145,7 +140,6 @@ define float @fmul_undef_op0_nnan(float %x) { define float @fmul_undef_op1_fast(float %x) { ; ANY-LABEL: fmul_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul fast float %x, undef ret float %r @@ -154,7 +148,6 @@ define float @fmul_undef_op1_fast(float %x) { define float @fdiv_undef_op0_fast(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, %x ret float %r @@ -163,7 +156,6 @@ define float @fdiv_undef_op0_fast(float %x) { define float @fdiv_undef_op1_nnan(float %x) { ; ANY-LABEL: fdiv_undef_op1_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv nnan float %x, undef ret float %r @@ -172,7 +164,6 @@ define float @fdiv_undef_op1_nnan(float %x) { define float @frem_undef_op0_nnan(float %x) { ; ANY-LABEL: frem_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, %x ret float %r @@ -181,7 +172,6 @@ define float @frem_undef_op0_nnan(float %x) { define float @frem_undef_op1_fast(float %x) { ; ANY-LABEL: frem_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem fast float %x, undef ret float %r @@ -234,7 +224,6 @@ define double @frem_undef_undef(double %x) { define float @fadd_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: fadd_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd nnan float undef, 1.0 ret float %r @@ -252,7 +241,6 @@ define float @fadd_undef_op1_constant(float %x) { define float @fsub_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fsub_undef_op0_fast_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, 3.0 ret float %r @@ -270,7 +258,6 @@ define float @fsub_undef_op1_constant(float %x) { define float @fmul_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: fmul_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul nnan float undef, 5.0 ret float %r @@ -288,7 +275,6 @@ define float @fmul_undef_op1_constant(float %x) { define float @fdiv_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, 7.0 ret float %r @@ -306,7 +292,6 @@ define float @fdiv_undef_op1_constant(float %x) { define float @frem_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: frem_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, 9.0 ret float %r @@ -335,7 +320,6 @@ define double @fadd_undef_op0_constant_nan(double %x) { define double @fadd_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: fadd_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000001, undef ret double %r @@ -353,7 +337,6 @@ define double @fsub_undef_op0_constant_nan(double %x) { define double @fsub_undef_op1_nnan_constant_nan(double %x) { ; ANY-LABEL: fsub_undef_op1_nnan_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub nnan double 0x7FF0000000000011, undef ret double %r @@ -371,7 +354,6 @@ define double @fmul_undef_op0_constant_nan(double %x) { define double @fmul_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: fmul_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000101, undef ret double %r @@ -389,7 +371,6 @@ define double @fdiv_undef_op0_constant_nan(double %x) { define double @fdiv_undef_op1_nnan_constant_nan(double %x) { ; ANY-LABEL: fdiv_undef_op1_nnan_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv nnan double 0x7FF0000000000111, undef ret double %r @@ -407,7 +388,6 @@ define double @frem_undef_op0_constant_nan(double %x) { define double @frem_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: frem_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem fast double 0xFFF0000000001001, undef ret double %r @@ -427,7 +407,6 @@ define double @fadd_undef_op0_constant_inf(double %x) { define double @fadd_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: fadd_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000000, undef ret double %r @@ -445,7 +424,6 @@ define double @fsub_undef_op0_constant_inf(double %x) { define double @fsub_undef_op1_ninf_constant_inf(double %x) { ; ANY-LABEL: fsub_undef_op1_ninf_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub ninf double 0x7FF0000000000000, undef ret double %r @@ -463,7 +441,6 @@ define double @fmul_undef_op0_constant_inf(double %x) { define double @fmul_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: fmul_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000000, undef ret double %r @@ -481,7 +458,6 @@ define double @fdiv_undef_op0_constant_inf(double %x) { define double @fdiv_undef_op1_ninf_constant_inf(double %x) { ; ANY-LABEL: fdiv_undef_op1_ninf_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv ninf double 0x7FF0000000000000, undef ret double %r @@ -499,7 +475,6 @@ define double @frem_undef_op0_constant_inf(double %x) { define double @frem_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: frem_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem fast double 0xFFF0000000000000, undef ret double %r From 69da27c7496ea373567ce5121e6fe8613846e7a5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 8 Sep 2020 14:05:20 -0700 Subject: [PATCH 0105/1079] llvm-symbolizer: Add optional "start file" to match "start line" Since a function might have portions of its code coming from multiple different files, "start line" is ambiguous (it can't just be resolved relative to the file/line specified). Add start file to disambiguate it. --- llvm/include/llvm/DebugInfo/DIContext.h | 18 +++++++++++------ llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 1 + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 20 +++++++++++++++---- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 11 ++++++++++ llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 6 ++++-- llvm/test/tools/llvm-dwarfdump/X86/lookup.s | 6 +++--- .../tools/llvm-symbolizer/sym-verbose.test | 12 +++++++++++ 7 files changed, 59 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index 661d30d04c94e..ae78fe912188d 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -35,6 +35,7 @@ struct DILineInfo { static constexpr const char *const Addr2LineBadString = "??"; std::string FileName; std::string FunctionName; + std::string StartFileName; Optional Source; uint32_t Line = 0; uint32_t Column = 0; @@ -43,12 +44,15 @@ struct DILineInfo { // DWARF-specific. uint32_t Discriminator = 0; - DILineInfo() : FileName(BadString), FunctionName(BadString) {} + DILineInfo() + : FileName(BadString), FunctionName(BadString), StartFileName(BadString) { + } bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && FileName == RHS.FileName && FunctionName == RHS.FunctionName && - StartLine == RHS.StartLine && Discriminator == RHS.Discriminator; + StartFileName == RHS.StartFileName && StartLine == RHS.StartLine && + Discriminator == RHS.Discriminator; } bool operator!=(const DILineInfo &RHS) const { @@ -56,10 +60,10 @@ struct DILineInfo { } bool operator<(const DILineInfo &RHS) const { - return std::tie(FileName, FunctionName, Line, Column, StartLine, - Discriminator) < - std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column, - RHS.StartLine, RHS.Discriminator); + return std::tie(FileName, FunctionName, StartFileName, Line, Column, + StartLine, Discriminator) < + std::tie(RHS.FileName, RHS.FunctionName, RHS.StartFileName, RHS.Line, + RHS.Column, RHS.StartLine, RHS.Discriminator); } explicit operator bool() const { return *this != DILineInfo(); } @@ -72,6 +76,8 @@ struct DILineInfo { OS << "function '" << FunctionName << "', "; OS << "line " << Line << ", "; OS << "column " << Column << ", "; + if (StartFileName != BadString) + OS << "start file '" << StartFileName << "', "; OS << "start line " << StartLine << '\n'; } }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 05a6056e8e21f..5789421e53044 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -262,6 +262,7 @@ class DWARFDie { /// for this subprogram by resolving DW_AT_sepcification or /// DW_AT_abstract_origin references if necessary. uint64_t getDeclLine() const; + std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and DW_AT_call_column /// from DIE (or zeroes if they are missing). This function looks for diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index d31c358798211..47eba48c279dd 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1036,7 +1036,9 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, uint64_t Address, FunctionNameKind Kind, + DILineInfoSpecifier::FileLineInfoKind FileNameKind, std::string &FunctionName, + std::string &StartFile, uint32_t &StartLine) { // The address may correspond to instruction in some inlined function, // so we have to build the chain of inlined functions and take the @@ -1053,6 +1055,11 @@ static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, FunctionName = Name; FoundResult = true; } + std::string DeclFile = DIE.getDeclFile(FileNameKind); + if (!DeclFile.empty()) { + StartFile = DeclFile; + FoundResult = true; + } if (auto DeclLineResult = DIE.getDeclLine()) { StartLine = DeclLineResult; FoundResult = true; @@ -1224,8 +1231,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, if (!CU) return Result; - getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, - Result.FunctionName, Result.StartLine); + getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind, + Result.FunctionName, + Result.StartFileName, Result.StartLine); if (Spec.FLIKind != FileLineInfoKind::None) { if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) { LineTable->getFileLineInfoForAddress( @@ -1244,15 +1252,17 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( return Lines; uint32_t StartLine = 0; + std::string StartFileName; std::string FunctionName(DILineInfo::BadString); - getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, - FunctionName, StartLine); + getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind, + FunctionName, StartFileName, StartLine); // If the Specifier says we don't need FileLineInfo, just // return the top-most function at the starting address. if (Spec.FLIKind == FileLineInfoKind::None) { DILineInfo Result; Result.FunctionName = FunctionName; + Result.StartFileName = StartFileName; Result.StartLine = StartLine; Lines.push_back(std::make_pair(Address.Address, Result)); return Lines; @@ -1276,6 +1286,7 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( Result.FunctionName = FunctionName; Result.Line = Row.Line; Result.Column = Row.Column; + Result.StartFileName = StartFileName; Result.StartLine = StartLine; Lines.push_back(std::make_pair(Row.Address.Address, Result)); } @@ -1318,6 +1329,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address, Frame.FunctionName = Name; if (auto DeclLineResult = FunctionDIE.getDeclLine()) Frame.StartLine = DeclLineResult; + Frame.StartFileName = FunctionDIE.getDeclFile(Spec.FLIKind); if (Spec.FLIKind != FileLineInfoKind::None) { if (i == 0) { // For the topmost frame, initialize the line table of this diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 116f72a1d58ba..31340077a126d 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -557,6 +557,17 @@ uint64_t DWARFDie::getDeclLine() const { return toUnsigned(findRecursively(DW_AT_decl_line), 0); } +std::string +DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { + std::string FileName; + if (auto DeclFile = toUnsigned(findRecursively(DW_AT_decl_file))) { + if (const auto *LT = U->getContext().getLineTableForUnit(U)) { + LT->getFileNameByIndex(*DeclFile, U->getCompilationDir(), Kind, FileName); + } + } + return FileName; +} + void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn, uint32_t &CallDiscriminator) const { diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 10352237763c9..01dc31d849657 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -84,8 +84,10 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) { return; } OS << " Filename: " << Filename << "\n"; - if (Info.StartLine) - OS << "Function start line: " << Info.StartLine << "\n"; + if (Info.StartLine) { + OS << " Function start filename: " << Info.StartFileName << "\n"; + OS << " Function start line: " << Info.StartLine << "\n"; + } OS << " Line: " << Info.Line << "\n"; OS << " Column: " << Info.Column << "\n"; if (Info.Discriminator) diff --git a/llvm/test/tools/llvm-dwarfdump/X86/lookup.s b/llvm/test/tools/llvm-dwarfdump/X86/lookup.s index 74f3314a4f4ec..fed2271f70a06 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/lookup.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/lookup.s @@ -37,9 +37,9 @@ # LEX: DW_AT_low_pc (0x0000000000000004) # LEX: DW_AT_high_pc (0x0000000000000014) -# A: Line info: file 'foo.c', line 3, column 9, start line 1 -# B: Line info: file 'foo.c', line 4, column 6, start line 1 -# C: Line info: file 'foo.c', line 6, column 1, start line 1 +# A: Line info: file 'foo.c', line 3, column 9, start file 'foo.c', start line 1 +# B: Line info: file 'foo.c', line 4, column 6, start file 'foo.c', start line 1 +# C: Line info: file 'foo.c', line 6, column 1, start file 'foo.c', start line 1 .section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 13 diff --git a/llvm/test/tools/llvm-symbolizer/sym-verbose.test b/llvm/test/tools/llvm-symbolizer/sym-verbose.test index c12eb3b530e1b..1529290379093 100644 --- a/llvm/test/tools/llvm-symbolizer/sym-verbose.test +++ b/llvm/test/tools/llvm-symbolizer/sym-verbose.test @@ -18,11 +18,13 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x400590 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 7 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -30,12 +32,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005a5 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 17 #CHECK-NEXT: Discriminator: 2 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -43,12 +47,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005ad #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 0 #CHECK-NEXT: Column: 30 #CHECK-NEXT: Discriminator: 4 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -56,11 +62,13 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005b9 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 7 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -69,12 +77,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005ce #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 17 #CHECK-NEXT: Discriminator: 2 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -83,12 +93,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005d4 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 30 #CHECK-NEXT: Discriminator: 4 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 From 88bf133c99c3124842c182a019306f83f2c1b856 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Thu, 27 Aug 2020 23:46:49 -0700 Subject: [PATCH 0106/1079] [libunwind] Replace chain-of-ifdefs for dl_iterate_phdr Define a _LIBUNWIND_USE_DL_ITERATE_PHDR macro in config.h when there is no other unwind info lookup method. Also define a _LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX macro to factor out (__BIONIC__ and _LIBUNWIND_ARM_EHABI). Differential Revision: https://reviews.llvm.org/D86768 --- libunwind/src/AddressSpace.hpp | 59 +++++++------------ libunwind/src/config.h | 11 ++++ libunwind/test/frameheadercache_test.pass.cpp | 27 ++------- 3 files changed, 35 insertions(+), 62 deletions(-) diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index e6f2609d679b9..cc298c9bbb838 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -98,22 +98,15 @@ extern char __eh_frame_hdr_end; extern char __exidx_start; extern char __exidx_end; -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - -// ELF-based systems may use dl_iterate_phdr() to access sections -// containing unwinding information. The ElfW() macro for pointer-size -// independent ELF header traversal is not provided by on some -// systems (e.g., FreeBSD). On these systems the data structures are -// just called Elf_XXX. Define ElfW() locally. -#ifndef _WIN32 -#include -#else +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) + #include #include -#endif -#if !defined(ElfW) -#define ElfW(type) Elf_##type -#endif + +#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) || \ + defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) + +#include #endif @@ -351,23 +344,14 @@ LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, return result; } -#ifdef __APPLE__ -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) -#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) -// Code inside findUnwindSections handles all these cases. -// -// Although the above ifdef chain is ugly, there doesn't seem to be a cleaner -// way to handle it. The generalized boolean expression is: -// -// A OR (B AND C) OR (D AND C) OR (B AND E) OR (F AND E) OR (D AND G) -// -// Running it through various boolean expression simplifiers gives expressions -// that don't help at all. -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) +// The ElfW() macro for pointer-size independent ELF header traversal is not +// provided by on some systems (e.g., FreeBSD). On these systems the +// data structures are just called Elf_XXX. Define ElfW() locally. +#if !defined(ElfW) + #define ElfW(type) Elf_##type +#endif #if !defined(Elf_Half) typedef ElfW(Half) Elf_Half; #endif @@ -482,9 +466,7 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, return 0; } -#else // defined(LIBUNWIND_SUPPORT_DWARF_UNWIND) -// Given all the #ifdef's above, the code here is for -// defined(LIBUNWIND_ARM_EHABI) +#elif defined(_LIBUNWIND_ARM_EHABI) static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) { @@ -516,8 +498,9 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, } return found_obj && found_hdr; } -#endif // defined(LIBUNWIND_SUPPORT_DWARF_UNWIND) -#endif // defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +#endif +#endif // defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, @@ -601,16 +584,14 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, (void)targetAddr; (void)info; return true; -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) - // For ARM EHABI, Bionic didn't implement dl_iterate_phdr until API 21. After - // API 21, dl_iterate_phdr exists, but dl_unwind_find_exidx is much faster. +#elif defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) int length = 0; info.arm_section = (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length); info.arm_section_length = (uintptr_t)length * sizeof(EHABIIndexEntry); if (info.arm_section && info.arm_section_length) return true; -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) dl_iterate_cb_data cb_data = {this, &info, targetAddr}; int found = dl_iterate_phdr(findUnwindSectionsByPhdr, &cb_data); return static_cast(found); diff --git a/libunwind/src/config.h b/libunwind/src/config.h index fd177dd7338c1..0885dccda07eb 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -34,7 +34,18 @@ #else #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif +#elif defined(_LIBUNWIND_IS_BAREMETAL) + #if !defined(_LIBUNWIND_ARM_EHABI) + #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 + #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 + #endif +#elif defined(__BIONIC__) && defined(_LIBUNWIND_ARM_EHABI) + // For ARM EHABI, Bionic didn't implement dl_iterate_phdr until API 21. After + // API 21, dl_iterate_phdr exists, but dl_unwind_find_exidx is much faster. + #define _LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX 1 #else + // Assume an ELF system with a dl_iterate_phdr function. + #define _LIBUNWIND_USE_DL_ITERATE_PHDR 1 #if !defined(_LIBUNWIND_ARM_EHABI) #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 diff --git a/libunwind/test/frameheadercache_test.pass.cpp b/libunwind/test/frameheadercache_test.pass.cpp index ebbc00464e072..7f2d8e22b9f57 100644 --- a/libunwind/test/frameheadercache_test.pass.cpp +++ b/libunwind/test/frameheadercache_test.pass.cpp @@ -3,27 +3,10 @@ #include "../src/config.h" // Only run this test under supported configurations. -// The frame header cache should work fine for other architectures, -// but the #ifdefs end up being even more complicated than this. -#if defined(__x86_64__) && defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE) - -// This #if chain is ugly, but see the comments in AddressSpace.hpp for -// the reasoning. - -#ifdef __APPLE__ -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) && \ + defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) && \ + defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE) #include #include @@ -84,9 +67,7 @@ int main() { abort(); return 0; } -#else -int main() { return 0; } -#endif + #else int main() { return 0;} #endif From 844e94a5026eea19f1f8091121ad05684f28d047 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Sep 2020 15:48:47 -0700 Subject: [PATCH 0107/1079] [SelectionDAGBuilder] Remove Unnecessary FastMathFlags temporary. Use SDNodeFlags instead. NFCI This was a missed simplication in D87200 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2d42eb7360663..7bcbb7ccddc8d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8950,16 +8950,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; - FastMathFlags FMF; SDNodeFlags SDFlags; - if (auto *FPMO = dyn_cast(&I)) { - FMF = FPMO->getFastMathFlags(); + if (auto *FPMO = dyn_cast(&I)) SDFlags.copyFMF(*FPMO); - } switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_v2_fadd: - if (FMF.allowReassoc()) + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), SDFlags); @@ -8967,7 +8964,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_v2_fmul: - if (FMF.allowReassoc()) + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), SDFlags); From e7b40c5492e5c4b182df421892136d2ee6868124 Mon Sep 17 00:00:00 2001 From: Sergej Jaskiewicz Date: Wed, 9 Sep 2020 01:53:01 +0300 Subject: [PATCH 0108/1079] [llvm] [unittest] Allow getting a C string from the TempDir helper class The TempDir.path() member function returns a StringRef. We've been calling the data() method on that StringRef, which does not guarantee to return a null-terminated string (required by chdir and other POSIX functions). Introduce the c_str() method in the TempDir class, which returns the proper string without the need to create a copy of the path at use site. --- llvm/include/llvm/Testing/Support/SupportHelpers.h | 3 +++ llvm/unittests/Support/LockFileManagerTest.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Testing/Support/SupportHelpers.h b/llvm/include/llvm/Testing/Support/SupportHelpers.h index 3517361041b94..2419fc95d8178 100644 --- a/llvm/include/llvm/Testing/Support/SupportHelpers.h +++ b/llvm/include/llvm/Testing/Support/SupportHelpers.h @@ -152,6 +152,9 @@ class TempDir { /// The path to the temporary directory. StringRef path() const { return Path; } + /// The null-terminated C string pointing to the path. + const char *c_str() { return Path.c_str(); } + /// Creates a new path by appending the argument to the path of the managed /// directory using the native path separator. SmallString<128> path(StringRef component) const { diff --git a/llvm/unittests/Support/LockFileManagerTest.cpp b/llvm/unittests/Support/LockFileManagerTest.cpp index 587e442be1966..0b5a0d982a8fc 100644 --- a/llvm/unittests/Support/LockFileManagerTest.cpp +++ b/llvm/unittests/Support/LockFileManagerTest.cpp @@ -81,7 +81,7 @@ TEST(LockFileManagerTest, RelativePath) { char PathBuf[1024]; const char *OrigPath = getcwd(PathBuf, 1024); - ASSERT_FALSE(chdir(LockFileManagerTestDir.path().data())); + ASSERT_FALSE(chdir(LockFileManagerTestDir.c_str())); TempDir inner("inner"); SmallString<64> LockedFile(inner.path()); From efc17c4bc668ada7d6274879bd5bccdb32436fa2 Mon Sep 17 00:00:00 2001 From: Puyan Lotfi Date: Tue, 8 Sep 2020 19:42:38 -0400 Subject: [PATCH 0109/1079] [NFC] Fixing a gcc compiler warning. warning: type qualifiers ignored on cast result type [-Wignored-qualifiers] Differential Revision: https://reviews.llvm.org/D86952 --- llvm/include/llvm/CodeGen/StableHashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h index c6113aa93c800..caf27e152e78f 100644 --- a/llvm/include/llvm/CodeGen/StableHashing.h +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -40,7 +40,7 @@ inline void stable_hash_append(stable_hash &Hash, const char Value) { inline void stable_hash_append(stable_hash &Hash, stable_hash Value) { for (unsigned I = 0; I < 8; ++I) { - stable_hash_append(Hash, (const char)Value); + stable_hash_append(Hash, static_cast(Value)); Value >>= 8; } } From be561fad1ebe531232dfb2c90577c612d9e08039 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 8 Sep 2020 16:12:46 -0700 Subject: [PATCH 0110/1079] Remove unused variable(s) --- llvm/lib/Extensions/Extensions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Extensions/Extensions.cpp b/llvm/lib/Extensions/Extensions.cpp index 2fe537f91876a..0d25cbda38e00 100644 --- a/llvm/lib/Extensions/Extensions.cpp +++ b/llvm/lib/Extensions/Extensions.cpp @@ -8,7 +8,7 @@ namespace llvm { namespace details { void extensions_anchor() { #define HANDLE_EXTENSION(Ext) \ - static auto Ext = get##Ext##PluginInfo(); + get##Ext##PluginInfo(); #include "llvm/Support/Extension.def" } } From 055d2095898dfbb58b71322c02fbba7e71e8f76a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Sep 2020 14:05:41 -0500 Subject: [PATCH 0111/1079] Handle masked loads and stores in MemoryLocation/Dependence Differential Revision: https://reviews.llvm.org/D87061 --- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 23 ++++++++++++++++++- llvm/lib/Analysis/MemoryLocation.cpp | 15 ++++++++++++ llvm/test/Transforms/GVN/masked-load-store.ll | 6 +++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2428d57d2809f..a19c1d78526b2 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -166,6 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::masked_load: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; + case Intrinsic::masked_store: + Loc = MemoryLocation::getForArgument(II, 1, TLI); + return ModRefInfo::Mod; default: break; } @@ -442,7 +448,9 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (IntrinsicInst *II = dyn_cast(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + Intrinsic::ID ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::lifetime_start: // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point (the right approach is to use @@ -450,6 +458,19 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (BatchAA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc)) return MemDepResult::getDef(II); continue; + case Intrinsic::masked_load: + case Intrinsic::masked_store: { + MemoryLocation Loc; + /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); + AliasResult R = BatchAA.alias(Loc, MemLoc); + if (R == NoAlias) + continue; + if (R == MustAlias) + return MemDepResult::getDef(II); + if (ID == Intrinsic::masked_load) + continue; + return MemDepResult::getClobber(II); + } } } diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 9694036ce4767..fcea03a118bfc 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -176,6 +176,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, cast(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::masked_load: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), + AATags); + + case Intrinsic::masked_store: + assert(ArgIdx == 1 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound( + DL.getTypeStoreSize(II->getArgOperand(0)->getType())), + AATags); + case Intrinsic::invariant_end: // The first argument to an invariant.end is a "descriptor" type (e.g. a // pointer to a empty struct) which is never actually dereferenced. diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll index 8119d77bb76e0..0b71a10a067db 100644 --- a/llvm/test/Transforms/GVN/masked-load-store.ll +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -gvn -S < %s | FileCheck %s +; Check that in both cases the second load is recognized as redundant +; and is removed. + define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-LABEL: @f0( ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] @@ -21,8 +24,7 @@ define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1 ; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]]) -; CHECK-NEXT: [[V3:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) -; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V3]] +; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]] ; CHECK-NEXT: ret <128 x i8> [[V4]] ; %v0 = icmp eq <128 x i8> %a1, %a2 From 4013bab9c4a5fe634be6271779a99bc158c3e396 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 8 Sep 2020 16:42:16 -0700 Subject: [PATCH 0112/1079] [NFC][ThinLTO] EmbedBitcodeSection doesn't need the Config Instead, passing in the command line options, initialized to nullptr. In an upcoming patch, we can then use the parameter to pass actual command line options. Differential Revision: https://reviews.llvm.org/D87336 --- llvm/lib/LTO/LTOBackend.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index ca29548a4d7ca..65d8669604950 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -350,7 +350,7 @@ static cl::opt EmbedBitcode( "lto-embed-bitcode", cl::init(false), cl::desc("Embed LLVM bitcode in object files produced by LTO")); -static void EmitBitcodeSection(Module &M, const Config &Conf) { +static void EmitBitcodeSection(Module &M) { if (!EmbedBitcode) return; SmallVector Buffer; @@ -369,7 +369,7 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; - EmitBitcodeSection(Mod, Conf); + EmitBitcodeSection(Mod); std::unique_ptr DwoOut; SmallString<1024> DwoFile(Conf.SplitDwarfOutput); From 4682f654031c346106463d37ac44e44b0c9856dc Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Wed, 9 Sep 2020 08:48:04 +0800 Subject: [PATCH 0113/1079] [obj2yaml][test] Test generating and dumping a broken debug_ranges section. This patch tests generating and dumping a broken debug_ranges section. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87275 --- .../ObjectYAML/MachO/DWARF-debug_ranges.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index 30997ba1144b6..5aea820145cf7 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -284,3 +284,27 @@ LoadCommands: reserved2: 0x00000000 reserved3: 0x00000000 content: [[CONTENT=]] + +## Test generating and dumping a __debug_ranges section whose size isn't a +## multiple of the address size. This test case is to ensure that when the +## parser fails, the content of the __debug_ranges section will be dumped into +## the 'content' entry and the 'debug_ranges' entry will not exist. + +# RUN: yaml2obj --docnum=2 -DSIZE=3 -DCONTENT='010203' %s | obj2yaml | FileCheck %s --check-prefix=FAILS + +# FAILS-NOT: DWARF: +# FAILS: Sections: +# FAILS-NEXT: - sectname: __debug_ranges +# FAILS-NEXT: segname: __DWARF +# FAILS-NEXT: addr: 0x0000000000000000 +# FAILS-NEXT: size: 3 +# FAILS-NEXT: offset: 0x00000210 +# FAILS-NEXT: align: 0 +# FAILS-NEXT: reloff: 0x00000000 +# FAILS-NEXT: nreloc: 0 +# FAILS-NEXT: flags: 0x00000000 +# FAILS-NEXT: reserved1: 0x00000000 +# FAILS-NEXT: reserved2: 0x00000000 +# FAILS-NEXT: reserved3: 0x00000000 +# FAILS-NEXT: content: '010203' +# FAILS-NEXT: ... From 889cf9bedff1e4516c6caea5a8a214adbdde0102 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 8 Sep 2020 19:27:37 -0500 Subject: [PATCH 0114/1079] [EarlyCSE] Add testcase for masked loads and stores, NFC --- .../Transforms/EarlyCSE/masked-intrinsics.ll | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll diff --git a/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll b/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll new file mode 100644 index 0000000000000..77183ab97a6b0 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A1]], <128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: ret <128 x i8> [[V1]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a1, <128 x i8>* %a0, i32 4, <128 x i1> %v0) + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + ret <128 x i8> %v1 +} + +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V1]], <128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: ret <128 x i8> [[V1]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v1, <128 x i8>* %a0, i32 4, <128 x i1> %v0) + ret <128 x i8> %v1 +} + +define <128 x i8> @f2(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] +; CHECK-NEXT: ret <128 x i8> [[V3]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) From 88b368a1c47bca536f03041f7464235b94ea98a1 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Tue, 8 Sep 2020 21:21:14 -0400 Subject: [PATCH 0115/1079] [PowerPC] Set setMaxAtomicSizeInBitsSupported appropriately for 32-bit PowerPC in PPCTargetLowering Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D86165 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 + llvm/test/CodeGen/PowerPC/atomics-indexed.ll | 140 ++++-- llvm/test/CodeGen/PowerPC/atomics.ll | 437 ++++++++++++++++--- 3 files changed, 503 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f6b5d2ea987f8..f542a8018b4f0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1199,6 +1199,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } + if (!isPPC64) + setMaxAtomicSizeInBitsSupported(32); + setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll index b4790adfd9088..cf7225a5fc200 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 ; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction). ; This is already checked for in Atomics-64.ll @@ -8,9 +9,25 @@ ; Indexed version of loads define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) { -; CHECK-LABEL: load_x_i8_seq_cst -; CHECK: sync -; CHECK: lbzx [[VAL:r[0-9]+]] +; PPC32-LABEL: load_x_i8_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: lis r4, 1 +; PPC32-NEXT: sync +; PPC32-NEXT: ori r4, r4, 24464 +; PPC32-NEXT: lbzx r3, r3, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i8_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 1 +; PPC64-NEXT: sync +; PPC64-NEXT: ori r4, r4, 24464 +; PPC64-NEXT: lbzx r3, r3, r4 +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] ; CHECK-PPC64: bne- [[CR]], .+4 @@ -20,8 +37,23 @@ define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) { ret i8 %val } define i16 @load_x_i16_acquire([100000 x i16]* %mem) { -; CHECK-LABEL: load_x_i16_acquire -; CHECK: lhzx [[VAL:r[0-9]+]] +; PPC32-LABEL: load_x_i16_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: lis r4, 2 +; PPC32-NEXT: ori r4, r4, 48928 +; PPC32-NEXT: lhzx r3, r3, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i16_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 2 +; PPC64-NEXT: ori r4, r4, 48928 +; PPC64-NEXT: lhzx r3, r3, r4 +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] ; CHECK-PPC64: bne- [[CR]], .+4 @@ -31,19 +63,39 @@ define i16 @load_x_i16_acquire([100000 x i16]* %mem) { ret i16 %val } define i32 @load_x_i32_monotonic([100000 x i32]* %mem) { -; CHECK-LABEL: load_x_i32_monotonic -; CHECK: lwzx -; CHECK-NOT: sync +; CHECK-LABEL: load_x_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 5 +; CHECK-NEXT: ori r4, r4, 32320 +; CHECK-NEXT: lwzx r3, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000 %val = load atomic i32, i32* %ptr monotonic, align 4 ret i32 %val } define i64 @load_x_i64_unordered([100000 x i64]* %mem) { -; CHECK-LABEL: load_x_i64_unordered -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: ldx -; CHECK-NOT: sync +; PPC32-LABEL: load_x_i64_unordered: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: addi r3, r3, -896 +; PPC32-NEXT: addis r3, r3, 11 +; PPC32-NEXT: li r4, 0 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i64_unordered: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 10 +; PPC64-NEXT: ori r4, r4, 64640 +; PPC64-NEXT: ldx r3, r3, r4 +; PPC64-NEXT: blr %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 %val = load atomic i64, i64* %ptr unordered, align 8 ret i64 %val @@ -51,35 +103,69 @@ define i64 @load_x_i64_unordered([100000 x i64]* %mem) { ; Indexed version of stores define void @store_x_i8_seq_cst([100000 x i8]* %mem) { -; CHECK-LABEL: store_x_i8_seq_cst -; CHECK: sync -; CHECK: stbx +; CHECK-LABEL: store_x_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 1 +; CHECK-NEXT: ori r4, r4, 24464 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: sync +; CHECK-NEXT: stbx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000 store atomic i8 42, i8* %ptr seq_cst, align 1 ret void } define void @store_x_i16_release([100000 x i16]* %mem) { -; CHECK-LABEL: store_x_i16_release -; CHECK: lwsync -; CHECK: sthx +; CHECK-LABEL: store_x_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 2 +; CHECK-NEXT: ori r4, r4, 48928 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: lwsync +; CHECK-NEXT: sthx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000 store atomic i16 42, i16* %ptr release, align 2 ret void } define void @store_x_i32_monotonic([100000 x i32]* %mem) { -; CHECK-LABEL: store_x_i32_monotonic -; CHECK-NOT: sync -; CHECK: stwx +; CHECK-LABEL: store_x_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 5 +; CHECK-NEXT: ori r4, r4, 32320 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: stwx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000 store atomic i32 42, i32* %ptr monotonic, align 4 ret void } define void @store_x_i64_unordered([100000 x i64]* %mem) { -; CHECK-LABEL: store_x_i64_unordered -; CHECK-NOT: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: stdx +; PPC32-LABEL: store_x_i64_unordered: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: addi r3, r3, -896 +; PPC32-NEXT: addis r3, r3, 11 +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: li r6, 42 +; PPC32-NEXT: li r7, 0 +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_x_i64_unordered: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 10 +; PPC64-NEXT: ori r4, r4, 64640 +; PPC64-NEXT: li r5, 42 +; PPC64-NEXT: stdx r5, r3, r4 +; PPC64-NEXT: blr %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 store atomic i64 42, i64* %ptr unordered, align 8 ret void diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll index c964218cb60bf..008cd4c7157c1 100644 --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 ; This is already checked for in Atomics-64.ll ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64 @@ -9,22 +10,35 @@ ; We first check loads, for all sizes from i8 to i64. ; We also vary orderings to check for barriers. define i8 @load_i8_unordered(i8* %mem) { -; CHECK-LABEL: load_i8_unordered -; CHECK: lbz -; CHECK-NOT: sync +; CHECK-LABEL: load_i8_unordered: +; CHECK: # %bb.0: +; CHECK-NEXT: lbz r3, 0(r3) +; CHECK-NEXT: blr %val = load atomic i8, i8* %mem unordered, align 1 ret i8 %val } define i16 @load_i16_monotonic(i16* %mem) { -; CHECK-LABEL: load_i16_monotonic -; CHECK: lhz -; CHECK-NOT: sync +; CHECK-LABEL: load_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr %val = load atomic i16, i16* %mem monotonic, align 2 ret i16 %val } define i32 @load_i32_acquire(i32* %mem) { -; CHECK-LABEL: load_i32_acquire -; CHECK: lwz [[VAL:r[0-9]+]] +; PPC32-LABEL: load_i32_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: lwz r3, 0(r3) +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_i32_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: lwz r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr %val = load atomic i32, i32* %mem acquire, align 4 ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] @@ -33,11 +47,28 @@ define i32 @load_i32_acquire(i32* %mem) { ret i32 %val } define i64 @load_i64_seq_cst(i64* %mem) { -; CHECK-LABEL: load_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: ld [[VAL:r[0-9]+]] +; PPC32-LABEL: load_i64_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 5 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_i64_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: sync +; PPC64-NEXT: ld r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr %val = load atomic i64, i64* %mem seq_cst, align 8 ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] @@ -48,95 +79,401 @@ define i64 @load_i64_seq_cst(i64* %mem) { ; Stores define void @store_i8_unordered(i8* %mem) { -; CHECK-LABEL: store_i8_unordered -; CHECK-NOT: sync -; CHECK: stb +; CHECK-LABEL: store_i8_unordered: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: stb r4, 0(r3) +; CHECK-NEXT: blr store atomic i8 42, i8* %mem unordered, align 1 ret void } define void @store_i16_monotonic(i16* %mem) { -; CHECK-LABEL: store_i16_monotonic -; CHECK-NOT: sync -; CHECK: sth +; CHECK-LABEL: store_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr store atomic i16 42, i16* %mem monotonic, align 2 ret void } define void @store_i32_release(i32* %mem) { -; CHECK-LABEL: store_i32_release -; CHECK: lwsync -; CHECK: stw +; CHECK-LABEL: store_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: lwsync +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: blr store atomic i32 42, i32* %mem release, align 4 ret void } define void @store_i64_seq_cst(i64* %mem) { -; CHECK-LABEL: store_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: std +; PPC32-LABEL: store_i64_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: li r6, 42 +; PPC32-NEXT: li r7, 5 +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_i64_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: li r4, 42 +; PPC64-NEXT: sync +; PPC64-NEXT: std r4, 0(r3) +; PPC64-NEXT: blr store atomic i64 42, i64* %mem seq_cst, align 8 ret void } ; Atomic CmpXchg define i8 @cas_strong_i8_sc_sc(i8* %mem) { -; CHECK-LABEL: cas_strong_i8_sc_sc -; CHECK: sync +; PPC32-LABEL: cas_strong_i8_sc_sc: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28 +; PPC32-NEXT: li r5, 1 +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: li r7, 255 +; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 +; PPC32-NEXT: xori r3, r8, 24 +; PPC32-NEXT: slw r5, r5, r3 +; PPC32-NEXT: slw r8, r6, r3 +; PPC32-NEXT: slw r6, r7, r3 +; PPC32-NEXT: and r7, r5, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: sync +; PPC32-NEXT: .LBB8_1: +; PPC32-NEXT: lwarx r9, 0, r4 +; PPC32-NEXT: and r5, r9, r6 +; PPC32-NEXT: cmpw r5, r8 +; PPC32-NEXT: bne cr0, .LBB8_3 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: andc r9, r9, r6 +; PPC32-NEXT: or r9, r9, r7 +; PPC32-NEXT: stwcx. r9, 0, r4 +; PPC32-NEXT: bne cr0, .LBB8_1 +; PPC32-NEXT: b .LBB8_4 +; PPC32-NEXT: .LBB8_3: +; PPC32-NEXT: stwcx. r9, 0, r4 +; PPC32-NEXT: .LBB8_4: +; PPC32-NEXT: srw r3, r5, r3 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_strong_i8_sc_sc: +; PPC64: # %bb.0: +; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28 +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: li r7, 255 +; PPC64-NEXT: rldicr r4, r3, 0, 61 +; PPC64-NEXT: xori r3, r8, 24 +; PPC64-NEXT: slw r5, r5, r3 +; PPC64-NEXT: slw r8, r6, r3 +; PPC64-NEXT: slw r6, r7, r3 +; PPC64-NEXT: and r7, r5, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: sync +; PPC64-NEXT: .LBB8_1: +; PPC64-NEXT: lwarx r9, 0, r4 +; PPC64-NEXT: and r5, r9, r6 +; PPC64-NEXT: cmpw r5, r8 +; PPC64-NEXT: bne cr0, .LBB8_3 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: andc r9, r9, r6 +; PPC64-NEXT: or r9, r9, r7 +; PPC64-NEXT: stwcx. r9, 0, r4 +; PPC64-NEXT: bne cr0, .LBB8_1 +; PPC64-NEXT: b .LBB8_4 +; PPC64-NEXT: .LBB8_3: +; PPC64-NEXT: stwcx. r9, 0, r4 +; PPC64-NEXT: .LBB8_4: +; PPC64-NEXT: srw r3, r5, r3 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst -; CHECK: lwsync %loaded = extractvalue { i8, i1} %val, 0 ret i8 %loaded } define i16 @cas_weak_i16_acquire_acquire(i16* %mem) { -; CHECK-LABEL: cas_weak_i16_acquire_acquire -;CHECK-NOT: sync +; PPC32-LABEL: cas_weak_i16_acquire_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27 +; PPC32-NEXT: li r5, 1 +; PPC32-NEXT: ori r7, r6, 65535 +; PPC32-NEXT: xori r4, r4, 16 +; PPC32-NEXT: slw r8, r5, r4 +; PPC32-NEXT: slw r9, r6, r4 +; PPC32-NEXT: slw r5, r7, r4 +; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29 +; PPC32-NEXT: and r6, r8, r5 +; PPC32-NEXT: and r8, r9, r5 +; PPC32-NEXT: .LBB9_1: +; PPC32-NEXT: lwarx r9, 0, r3 +; PPC32-NEXT: and r7, r9, r5 +; PPC32-NEXT: cmpw r7, r8 +; PPC32-NEXT: bne cr0, .LBB9_3 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: andc r9, r9, r5 +; PPC32-NEXT: or r9, r9, r6 +; PPC32-NEXT: stwcx. r9, 0, r3 +; PPC32-NEXT: bne cr0, .LBB9_1 +; PPC32-NEXT: b .LBB9_4 +; PPC32-NEXT: .LBB9_3: +; PPC32-NEXT: stwcx. r9, 0, r3 +; PPC32-NEXT: .LBB9_4: +; PPC32-NEXT: srw r3, r7, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_weak_i16_acquire_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27 +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: ori r7, r6, 65535 +; PPC64-NEXT: xori r4, r4, 16 +; PPC64-NEXT: slw r8, r5, r4 +; PPC64-NEXT: slw r9, r6, r4 +; PPC64-NEXT: slw r5, r7, r4 +; PPC64-NEXT: rldicr r3, r3, 0, 61 +; PPC64-NEXT: and r6, r8, r5 +; PPC64-NEXT: and r8, r9, r5 +; PPC64-NEXT: .LBB9_1: +; PPC64-NEXT: lwarx r9, 0, r3 +; PPC64-NEXT: and r7, r9, r5 +; PPC64-NEXT: cmpw r7, r8 +; PPC64-NEXT: bne cr0, .LBB9_3 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: andc r9, r9, r5 +; PPC64-NEXT: or r9, r9, r6 +; PPC64-NEXT: stwcx. r9, 0, r3 +; PPC64-NEXT: bne cr0, .LBB9_1 +; PPC64-NEXT: b .LBB9_4 +; PPC64-NEXT: .LBB9_3: +; PPC64-NEXT: stwcx. r9, 0, r3 +; PPC64-NEXT: .LBB9_4: +; PPC64-NEXT: srw r3, r7, r4 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire -; CHECK: lwsync %loaded = extractvalue { i16, i1} %val, 0 ret i16 %loaded } define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) { -; CHECK-LABEL: cas_strong_i32_acqrel_acquire -; CHECK: lwsync +; CHECK-LABEL: cas_strong_i32_acqrel_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB10_1: +; CHECK-NEXT: lwarx r4, 0, r3 +; CHECK-NEXT: cmpw r6, r4 +; CHECK-NEXT: bne cr0, .LBB10_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: stwcx. r5, 0, r3 +; CHECK-NEXT: bne cr0, .LBB10_1 +; CHECK-NEXT: b .LBB10_4 +; CHECK-NEXT: .LBB10_3: +; CHECK-NEXT: stwcx. r4, 0, r3 +; CHECK-NEXT: .LBB10_4: +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire -; CHECK: lwsync %loaded = extractvalue { i32, i1} %val, 0 ret i32 %loaded } define i64 @cas_weak_i64_release_monotonic(i64* %mem) { -; CHECK-LABEL: cas_weak_i64_release_monotonic -; CHECK: lwsync +; PPC32-LABEL: cas_weak_i64_release_monotonic: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 0 +; PPC32-NEXT: stw r4, 12(r1) +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: stw r4, 8(r1) +; PPC32-NEXT: addi r4, r1, 8 +; PPC32-NEXT: li r6, 1 +; PPC32-NEXT: li r7, 3 +; PPC32-NEXT: li r8, 0 +; PPC32-NEXT: bl __atomic_compare_exchange_8 +; PPC32-NEXT: lwz r4, 12(r1) +; PPC32-NEXT: lwz r3, 8(r1) +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_weak_i64_release_monotonic: +; PPC64: # %bb.0: +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: lwsync +; PPC64-NEXT: .LBB11_1: +; PPC64-NEXT: ldarx r4, 0, r3 +; PPC64-NEXT: cmpd r6, r4 +; PPC64-NEXT: bne cr0, .LBB11_4 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: stdcx. r5, 0, r3 +; PPC64-NEXT: bne cr0, .LBB11_1 +; PPC64-NEXT: # %bb.3: +; PPC64-NEXT: mr r3, r4 +; PPC64-NEXT: blr +; PPC64-NEXT: .LBB11_4: +; PPC64-NEXT: stdcx. r4, 0, r3 +; PPC64-NEXT: mr r3, r4 +; PPC64-NEXT: blr %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic -; CHECK-NOT: [sync ] %loaded = extractvalue { i64, i1} %val, 0 ret i64 %loaded } ; AtomicRMW define i8 @add_i8_monotonic(i8* %mem, i8 %operand) { -; CHECK-LABEL: add_i8_monotonic -; CHECK-NOT: sync +; PPC32-LABEL: add_i8_monotonic: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwinm r7, r3, 3, 27, 28 +; PPC32-NEXT: li r6, 255 +; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 +; PPC32-NEXT: xori r3, r7, 24 +; PPC32-NEXT: slw r4, r4, r3 +; PPC32-NEXT: slw r6, r6, r3 +; PPC32-NEXT: .LBB12_1: +; PPC32-NEXT: lwarx r7, 0, r5 +; PPC32-NEXT: add r8, r4, r7 +; PPC32-NEXT: andc r9, r7, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: or r8, r8, r9 +; PPC32-NEXT: stwcx. r8, 0, r5 +; PPC32-NEXT: bne cr0, .LBB12_1 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: blr +; +; PPC64-LABEL: add_i8_monotonic: +; PPC64: # %bb.0: +; PPC64-NEXT: rlwinm r7, r3, 3, 27, 28 +; PPC64-NEXT: li r6, 255 +; PPC64-NEXT: rldicr r5, r3, 0, 61 +; PPC64-NEXT: xori r3, r7, 24 +; PPC64-NEXT: slw r4, r4, r3 +; PPC64-NEXT: slw r6, r6, r3 +; PPC64-NEXT: .LBB12_1: +; PPC64-NEXT: lwarx r7, 0, r5 +; PPC64-NEXT: add r8, r4, r7 +; PPC64-NEXT: andc r9, r7, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: or r8, r8, r9 +; PPC64-NEXT: stwcx. r8, 0, r5 +; PPC64-NEXT: bne cr0, .LBB12_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: blr %val = atomicrmw add i8* %mem, i8 %operand monotonic ret i8 %val } define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) { -; CHECK-LABEL: xor_i16_seq_cst -; CHECK: sync +; PPC32-LABEL: xor_i16_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: rlwinm r7, r3, 3, 27, 27 +; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 +; PPC32-NEXT: ori r6, r6, 65535 +; PPC32-NEXT: xori r3, r7, 16 +; PPC32-NEXT: slw r4, r4, r3 +; PPC32-NEXT: slw r6, r6, r3 +; PPC32-NEXT: sync +; PPC32-NEXT: .LBB13_1: +; PPC32-NEXT: lwarx r7, 0, r5 +; PPC32-NEXT: xor r8, r4, r7 +; PPC32-NEXT: andc r9, r7, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: or r8, r8, r9 +; PPC32-NEXT: stwcx. r8, 0, r5 +; PPC32-NEXT: bne cr0, .LBB13_1 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: xor_i16_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: rlwinm r7, r3, 3, 27, 27 +; PPC64-NEXT: rldicr r5, r3, 0, 61 +; PPC64-NEXT: ori r6, r6, 65535 +; PPC64-NEXT: xori r3, r7, 16 +; PPC64-NEXT: slw r4, r4, r3 +; PPC64-NEXT: slw r6, r6, r3 +; PPC64-NEXT: sync +; PPC64-NEXT: .LBB13_1: +; PPC64-NEXT: lwarx r7, 0, r5 +; PPC64-NEXT: xor r8, r4, r7 +; PPC64-NEXT: andc r9, r7, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: or r8, r8, r9 +; PPC64-NEXT: stwcx. r8, 0, r5 +; PPC64-NEXT: bne cr0, .LBB13_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = atomicrmw xor i16* %mem, i16 %operand seq_cst -; CHECK: lwsync ret i16 %val } define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) { -; CHECK-LABEL: xchg_i32_acq_rel -; CHECK: lwsync +; CHECK-LABEL: xchg_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB14_1: +; CHECK-NEXT: lwarx r5, 0, r3 +; CHECK-NEXT: stwcx. r4, 0, r3 +; CHECK-NEXT: bne cr0, .LBB14_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: mr r3, r5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel -; CHECK: lwsync ret i32 %val } define i64 @and_i64_release(i64* %mem, i64 %operand) { -; CHECK-LABEL: and_i64_release -; CHECK: lwsync +; PPC32-LABEL: and_i64_release: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r7, 3 +; PPC32-NEXT: bl __atomic_fetch_and_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: and_i64_release: +; PPC64: # %bb.0: +; PPC64-NEXT: lwsync +; PPC64-NEXT: .LBB15_1: +; PPC64-NEXT: ldarx r5, 0, r3 +; PPC64-NEXT: and r6, r4, r5 +; PPC64-NEXT: stdcx. r6, 0, r3 +; PPC64-NEXT: bne cr0, .LBB15_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: mr r3, r5 +; PPC64-NEXT: blr %val = atomicrmw and i64* %mem, i64 %operand release -; CHECK-NOT: [sync ] ret i64 %val } From b9d086693b5baebc477793af0d86a447bae01b6f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 8 Sep 2020 18:45:11 -0700 Subject: [PATCH 0116/1079] [llvm-cov gcov] Compute unmeasured arc counts by Kirchhoff's circuit law For a CFG G=(V,E), Knuth describes that by Kirchoff's circuit law, the minimum number of counters necessary is |E|-(|V|-1). The emitted edges form a spanning tree. libgcov emitted .gcda files leverages this optimization while clang --coverage's doesn't. Propagate counts by Kirchhoff's circuit law so that llvm-cov gcov can correctly print line counts of gcc --coverage emitted files and enable the future improvement of clang --coverage. --- ...rprof-gcov-multiple-bbs-single-line.c.gcov | 2 +- llvm/include/llvm/ProfileData/GCOV.h | 10 +-- llvm/lib/ProfileData/GCOV.cpp | 67 ++++++++++++++----- llvm/test/tools/llvm-cov/gcov-4.7.c | 22 +++--- llvm/test/tools/llvm-cov/gcov-8.c | 32 +++++---- llvm/test/tools/llvm-cov/gcov-9.c | 18 +++-- 6 files changed, 91 insertions(+), 60 deletions(-) diff --git a/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov index d1104b7f5bbf2..4debf8fc1b680 100644 --- a/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov +++ b/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov @@ -3,7 +3,7 @@ // CHECK-NEXT: -: 0:Data:instrprof-gcov-multiple-bbs-single-line.gcda // CHECK-NEXT: -: 0:Runs:1 // CHECK-NEXT: -: 0:Programs:1 -// CHECK-NEXT:function main called 1 returned 100% blocks executed 80% +// CHECK-NEXT:function main called 1 returned 100% blocks executed 77% // CHECK-NEXT: 1: 1:int main(void) // CHECK-NEXT: -: 2:{ // CHECK-NEXT: -: 3: int var; diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 7b9ba4410b654..f87eab6d3ead2 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -212,12 +212,13 @@ class GCOVFile { }; struct GCOVArc { - GCOVArc(GCOVBlock &src, GCOVBlock &dst, bool fallthrough) - : src(src), dst(dst), fallthrough(fallthrough) {} + GCOVArc(GCOVBlock &src, GCOVBlock &dst, uint32_t flags) + : src(src), dst(dst), flags(flags) {} + bool onTree() const; GCOVBlock &src; GCOVBlock &dst; - bool fallthrough; + uint32_t flags; uint64_t Count = 0; uint64_t CyclesCount = 0; }; @@ -234,7 +235,7 @@ class GCOVFunction { StringRef getFilename() const; size_t getNumBlocks() const { return Blocks.size(); } uint64_t getEntryCount() const; - uint64_t getExitCount() const; + GCOVBlock &getExitBlock() const; BlockIterator block_begin() const { return Blocks.begin(); } BlockIterator block_end() const { return Blocks.end(); } @@ -242,6 +243,7 @@ class GCOVFunction { return make_range(block_begin(), block_end()); } + uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *arc); void print(raw_ostream &OS) const; void dump() const; void collectLineCounts(FileInfo &FI); diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 7b97723da60cc..0292e2a09d17c 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -108,11 +108,10 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { for (uint32_t i = 0, e = (length - 1) / 2; i != e; ++i) { uint32_t dstNo = buf.getWord(), flags = buf.getWord(); GCOVBlock *dst = fn->Blocks[dstNo].get(); - auto arc = - std::make_unique(*src, *dst, flags & GCOV_ARC_FALLTHROUGH); + auto arc = std::make_unique(*src, *dst, flags); src->addDstEdge(arc.get()); dst->addSrcEdge(arc.get()); - if (flags & GCOV_ARC_ON_TREE) + if (arc->onTree()) fn->treeArcs.push_back(std::move(arc)); else fn->arcs.push_back(std::move(arc)); @@ -226,6 +225,17 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { if (arc->dst.succ.empty()) arc->dst.Counter += arc->Count; } + + if (fn->Blocks.size() >= 2) { + GCOVBlock &src = *fn->Blocks[0]; + GCOVBlock &sink = + Version < GCOV::V408 ? *fn->Blocks.back() : *fn->Blocks[1]; + auto arc = std::make_unique(sink, src, GCOV_ARC_ON_TREE); + sink.addDstEdge(arc.get()); + src.addSrcEdge(arc.get()); + fn->treeArcs.push_back(std::move(arc)); + fn->propagateCounts(src, nullptr); + } } pos += 4 * length; if (pos < buf.cursor.tell()) @@ -260,6 +270,8 @@ void GCOVFile::collectLineCounts(FileInfo &fi) { fi.setProgramCount(ProgramCount); } +bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; } + //===----------------------------------------------------------------------===// // GCOVFunction implementation. @@ -271,10 +283,27 @@ uint64_t GCOVFunction::getEntryCount() const { return Blocks.front()->getCount(); } -/// getExitCount - Get the number of times the function returned by retrieving -/// the exit block's count. -uint64_t GCOVFunction::getExitCount() const { - return Blocks.back()->getCount(); +GCOVBlock &GCOVFunction::getExitBlock() const { + return file.getVersion() < GCOV::V408 ? *Blocks.back() : *Blocks[1]; +} + +// For each basic block, the sum of incoming edge counts equals the sum of +// outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a +// spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be +// uniquely identified. +uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) { + uint64_t excess = 0; + for (GCOVArc *e : v.srcs()) + if (e != pred) + excess += e->onTree() ? propagateCounts(e->src, e) : e->Count; + for (GCOVArc *e : v.dsts()) + if (e != pred) + excess -= e->onTree() ? propagateCounts(e->dst, e) : e->Count; + if (int64_t(excess) < 0) + excess = -excess; + if (pred) + pred->Count = excess; + return excess; } void GCOVFunction::print(raw_ostream &OS) const { @@ -322,8 +351,11 @@ void GCOVBlock::print(raw_ostream &OS) const { } if (!succ.empty()) { OS << "\tDestination Edges : "; - for (const GCOVArc *Edge : succ) + for (const GCOVArc *Edge : succ) { + if (Edge->flags & GCOV_ARC_ON_TREE) + OS << '*'; OS << Edge->dst.Number << " (" << Edge->Count << "), "; + } OS << "\n"; } if (!Lines.empty()) { @@ -441,7 +473,7 @@ uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { uint64_t Count = 0; for (auto Block : Blocks) { - if (Block->getNumSrcEdges() == 0) { + if (Block->getNumSrcEdges() == 0 || Block->Number == 0) { // The block has no predecessors and a non-null counter // (can be the case with entry block in functions). Count += Block->getCount(); @@ -467,11 +499,13 @@ uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { //===----------------------------------------------------------------------===// // FileInfo implementation. -// Safe integer division, returns 0 if numerator is 0. -static uint32_t safeDiv(uint64_t Numerator, uint64_t Divisor) { - if (!Numerator) +// Format dividend/divisor as a percentage. Return 1 if the result is greater +// than 0% and less than 1%. +static uint32_t formatPercentage(uint64_t dividend, uint64_t divisor) { + if (!dividend || !divisor) return 0; - return Numerator / Divisor; + dividend *= 100; + return dividend < divisor ? 1 : dividend / divisor; } // This custom division function mimics gcov's branch ouputs: @@ -794,14 +828,15 @@ void FileInfo::printFunctionSummary(raw_ostream &OS, for (const GCOVFunction *Func : Funcs) { uint64_t EntryCount = Func->getEntryCount(); uint32_t BlocksExec = 0; + const GCOVBlock &ExitBlock = Func->getExitBlock(); for (const GCOVBlock &Block : Func->blocks()) - if (Block.getNumDstEdges() && Block.getCount()) + if (Block.Number != 0 && &Block != &ExitBlock && Block.getCount()) ++BlocksExec; OS << "function " << Func->getName() << " called " << EntryCount - << " returned " << safeDiv(Func->getExitCount() * 100, EntryCount) + << " returned " << formatPercentage(ExitBlock.getCount(), EntryCount) << "% blocks executed " - << safeDiv(BlocksExec * 100, Func->getNumBlocks() - 1) << "%\n"; + << formatPercentage(BlocksExec, Func->getNumBlocks() - 2) << "%\n"; } } diff --git a/llvm/test/tools/llvm-cov/gcov-4.7.c b/llvm/test/tools/llvm-cov/gcov-4.7.c index d92953a6b0b65..211c635f51283 100644 --- a/llvm/test/tools/llvm-cov/gcov-4.7.c +++ b/llvm/test/tools/llvm-cov/gcov-4.7.c @@ -1,27 +1,25 @@ /// Test that llvm-cov supports gcov [4.7,8) compatible format. #include #include -int main() { // GCOV: #####: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: - for (int i = 0; i < 11; i++) // GCOV-NEXT: #####: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: + for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 4: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: - if (result > 400) printf("Overflow!"); // GCOV-NEXT: #####: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 7 + if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-4.7.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-4.7.c | FileCheck %s // CHECK: File 'gcov-4.7.c' -// CHECK-NEXT: Lines executed:55.56% of 9 +// CHECK-NEXT: Lines executed:100.00% of 9 // CHECK-NEXT: Creating 'gcov-4.7.c.gcov' // RUN: FileCheck --input-file=%t/gcov-4.7.c.gcov --check-prefix=HEADER %s diff --git a/llvm/test/tools/llvm-cov/gcov-8.c b/llvm/test/tools/llvm-cov/gcov-8.c index eef3511e93a7c..996e4cbe71b33 100644 --- a/llvm/test/tools/llvm-cov/gcov-8.c +++ b/llvm/test/tools/llvm-cov/gcov-8.c @@ -1,29 +1,27 @@ /// Test that llvm-cov supports gcov 8 compatible format. #include #include -int main() { // GCOV: 1: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 7: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: #####: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 8 + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-8.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-8.c | FileCheck %s --check-prefixes=OUT,OUTFILE // OUT: File 'gcov-8.c' -// OUT-NEXT: Lines executed:77.78% of 9 +// OUT-NEXT: Lines executed:100.00% of 9 // OUT-B-NEXT: Branches executed:85.71% of 14 -// OUT-B-NEXT: Taken at least once:42.86% of 14 +// OUT-B-NEXT: Taken at least once:71.43% of 14 // OUT-B-NEXT: No calls // OUTFILE-NEXT: Creating 'gcov-8.c.gcov' // OUT-EMPTY: @@ -51,23 +49,23 @@ int main() { // GCOV: 1: [[@LINE]]:int // I-NEXT:lcount:4,1 // I-NEXT:lcount:6,12 // I-B-NEXT:branch:6,taken -// I-B-NEXT:branch:6,nottaken +// I-B-NEXT:branch:6,taken // I-NEXT:lcount:7,11 // I-B-NEXT:branch:7,taken // I-B-NEXT:branch:7,nottaken -// I-NEXT:lcount:8,7 +// I-NEXT:lcount:8,12 +// I-B-NEXT:branch:8,taken // I-B-NEXT:branch:8,taken -// I-B-NEXT:branch:8,nottaken // I-NEXT:lcount:9,11 // I-NEXT:lcount:10,11 // I-B-NEXT:branch:10,taken // I-B-NEXT:branch:10,nottaken // I-NEXT:lcount:11,11 // I-B-NEXT:branch:11,taken -// I-B-NEXT:branch:11,nottaken +// I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,nottaken -// I-NEXT:lcount:12,0 +// I-NEXT:lcount:12,4 // I-B-NEXT:branch:12,notexec // I-B-NEXT:branch:12,notexec -// I-NEXT:lcount:14,0 +// I-NEXT:lcount:14,1 diff --git a/llvm/test/tools/llvm-cov/gcov-9.c b/llvm/test/tools/llvm-cov/gcov-9.c index 335e6c0663dbe..a2e9cf4749736 100644 --- a/llvm/test/tools/llvm-cov/gcov-9.c +++ b/llvm/test/tools/llvm-cov/gcov-9.c @@ -1,27 +1,25 @@ /// Test that llvm-cov supports gcov 9 compatible format. #include #include -int main() { // GCOV: 1: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 7: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: #####: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 9 + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-9.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-9.c | FileCheck %s // CHECK: File 'gcov-9.c' -// CHECK-NEXT: Lines executed:77.78% of 9 +// CHECK-NEXT: Lines executed:100.00% of 9 // CHECK-NEXT: Creating 'gcov-9.c.gcov' // RUN: FileCheck --input-file=%t/gcov-9.c.gcov --check-prefix=HEADER %s From c2b7b9b642b3247061c4850e9c868c903e3b9654 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 8 Sep 2020 22:09:28 -0500 Subject: [PATCH 0117/1079] [Hexagon] Fix order of operands in V6_vdealb4w --- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index b656a845b1526..c9435cd21c2e0 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -407,7 +407,7 @@ let Predicates = [UseHVX] in { def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; - def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w HvxVR:$Vs, (IMPLICIT_DEF))>; + def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; def: Pat<(VecI16 (bswap HVI16:$Vs)), From 1bb1eac6b177739429e78703b265e7546792fd64 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Wed, 8 Jul 2020 19:30:53 +0000 Subject: [PATCH 0118/1079] [libFuzzer] Add a command-line option for tracing mutation of corpus inputs in the dot graph format. This patch adds a new command-line option -mutation_graph_file=FILE for debugging purposes, which traces how corpus inputs evolve during a fuzzing run. For each new input that is added to the corpus, a new vertex corresponding to the added input, as well as a new edge that connects its base input to itself are written to the given file. Each vertex is labeled with the filename of the input, and each edge is labeled with the mutation sequence that led to the input w.r.t. its base input. The format of the mutation graph file is the dot file format. Once prepended and appended with "graph {" and "}", respectively, the graph becomes a valid dot file and can be visualized. Differential Revision: https://reviews.llvm.org/D86560 --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 2 ++ compiler-rt/lib/fuzzer/FuzzerFlags.def | 5 ++++ compiler-rt/lib/fuzzer/FuzzerIO.cpp | 13 ++++++++ compiler-rt/lib/fuzzer/FuzzerIO.h | 3 ++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 33 +++++++++++++++++++++ compiler-rt/lib/fuzzer/FuzzerMutate.cpp | 9 ++++++ compiler-rt/lib/fuzzer/FuzzerMutate.h | 2 ++ compiler-rt/lib/fuzzer/FuzzerOptions.h | 1 + compiler-rt/test/fuzzer/mutation-graph.test | 17 +++++++++++ 9 files changed, 85 insertions(+) create mode 100644 compiler-rt/test/fuzzer/mutation-graph.test diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index caafd1dbb0a7b..57df1238c398c 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -755,6 +755,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.FeaturesDir = Flags.features_dir; ValidateDirectoryExists(Options.FeaturesDir, Flags.create_missing_dirs); } + if (Flags.mutation_graph_file) + Options.MutationGraphFile = Flags.mutation_graph_file; if (Flags.collect_data_flow) Options.CollectDataFlow = Flags.collect_data_flow; if (Flags.stop_file) diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index fdb8362cef9d4..c9a787e03833d 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -88,6 +88,11 @@ FUZZER_FLAG_STRING(features_dir, "internal flag. Used to dump feature sets on di "Every time a new input is added to the corpus, a corresponding file in the features_dir" " is created containing the unique features of that input." " Features are stored in binary format.") +FUZZER_FLAG_STRING(mutation_graph_file, "Saves a graph (in DOT format) to" + " mutation_graph_file. The graph contains a vertex for each input that has" + " unique coverage; directed edges are provided between parents and children" + " where the child has unique coverage, and are recorded with the type of" + " mutation that caused the child.") FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") FUZZER_FLAG_INT(use_memmem, 1, "Use hints from intercepting memmem, strstr, etc") diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp index c3330c3425d09..54a7219fc0e0f 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp @@ -77,6 +77,19 @@ void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path) { fclose(Out); } +void AppendToFile(const std::string &Data, const std::string &Path) { + AppendToFile(reinterpret_cast(Data.data()), Data.size(), + Path); +} + +void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path) { + FILE *Out = fopen(Path.c_str(), "a"); + if (!Out) + return; + fwrite(Data, sizeof(Data[0]), Size, Out); + fclose(Out); +} + void ReadDirToVectorOfUnits(const char *Path, Vector *V, long *Epoch, size_t MaxSize, bool ExitOnError) { long E = Epoch ? *Epoch : 0; diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index 6e3a0b470c5f6..abd25110d07d4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -29,6 +29,9 @@ void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path); void WriteToFile(const std::string &Data, const std::string &Path); void WriteToFile(const Unit &U, const std::string &Path); +void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path); +void AppendToFile(const std::string &Data, const std::string &Path); + void ReadDirToVectorOfUnits(const char *Path, Vector *V, long *Epoch, size_t MaxSize, bool ExitOnError); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index f9986dd8eea51..ce8c2fb747144 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -463,6 +463,37 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir, DirPlusFile(FeaturesDir, NewFile)); } +static void WriteEdgeToMutationGraphFile(const std::string &MutationGraphFile, + const InputInfo *II, + const InputInfo *BaseII, + const std::string &MS) { + if (MutationGraphFile.empty()) + return; + + std::string Sha1 = Sha1ToString(II->Sha1); + + std::string OutputString; + + // Add a new vertex. + OutputString.append("\""); + OutputString.append(Sha1); + OutputString.append("\"\n"); + + // Add a new edge if there is base input. + if (BaseII) { + std::string BaseSha1 = Sha1ToString(BaseII->Sha1); + OutputString.append("\""); + OutputString.append(BaseSha1); + OutputString.append("\" -> \""); + OutputString.append(Sha1); + OutputString.append("\" [label=\""); + OutputString.append(MS); + OutputString.append("\"];\n"); + } + + AppendToFile(OutputString, MutationGraphFile); +} + bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, InputInfo *II, bool ForceAddToCorpus, bool *FoundUniqFeatures) { @@ -497,6 +528,8 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); + WriteEdgeToMutationGraphFile(Options.MutationGraphFile, NewII, II, + MD.MutationSequence()); return true; } if (II && FoundUniqFeaturesOfII && diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index df9ada45bb039..121b450e8b8c5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -494,6 +494,15 @@ void MutationDispatcher::PrintMutationSequence() { } } +std::string MutationDispatcher::MutationSequence() { + std::string MS; + for (auto M : CurrentMutatorSequence) { + MS += M.Name; + MS += "-"; + } + return MS; +} + size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { return MutateImpl(Data, Size, MaxSize, Mutators); } diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index 6cbce80276248..3ce3159f6893b 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -26,6 +26,8 @@ class MutationDispatcher { void StartMutationSequence(); /// Print the current sequence of mutations. void PrintMutationSequence(); + /// Return the current sequence of mutations. + std::string MutationSequence(); /// Indicate that the current sequence of mutations was successful. void RecordSuccessfulMutationSequence(); /// Mutates data by invoking user-provided mutator. diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index b17a7474d38f0..706e1c64c706c 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -59,6 +59,7 @@ struct FuzzingOptions { std::string DataFlowTrace; std::string CollectDataFlow; std::string FeaturesDir; + std::string MutationGraphFile; std::string StopFile; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; diff --git a/compiler-rt/test/fuzzer/mutation-graph.test b/compiler-rt/test/fuzzer/mutation-graph.test new file mode 100644 index 0000000000000..7774a500395e0 --- /dev/null +++ b/compiler-rt/test/fuzzer/mutation-graph.test @@ -0,0 +1,17 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest + +RUN: rm -rf %t-SimpleTestGraph + +RUN: not %run %t-SimpleTest -seed=1 -max_len=3 -mutation_graph_file=%t-SimpleTestGraph 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: cat %t-SimpleTestGraph | FileCheck %s --check-prefix=GRAPH + +# A vertex and edge that correspond to the discovery of "H" +GRAPH: "7cf184f4c67ad58283ecb19349720b0cae756829" +GRAPH: {{.*}} -> "7cf184f4c67ad58283ecb19349720b0cae756829" [label="{{.*}}"]; + +# A vertex and edge that correspond to the discovery of "Hi" +GRAPH: "94dd9e08c129c785f7f256e82fbe0a30e6d1ae40" +GRAPH: {{.*}} -> "94dd9e08c129c785f7f256e82fbe0a30e6d1ae40" [label="{{.*}}"]; From 795e4ee9d2db386a45dc12e6ead21f5f3151d05c Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 9 Sep 2020 11:20:59 +0700 Subject: [PATCH 0119/1079] [NFC] Move functon from IndVarSimplify to SCEV This function can be reused in other places. Differential Revision: https://reviews.llvm.org/D87274 Reviewed By: fhahn, lebedev.ri --- llvm/include/llvm/Analysis/ScalarEvolution.h | 5 +++ llvm/lib/Analysis/ScalarEvolution.cpp | 25 +++++++++++++++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 32 +------------------ 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 81c5fc9325884..ea841440e1803 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -768,6 +768,11 @@ class ScalarEvolution { return getBackedgeTakenCount(L, ConstantMaximum); } + /// Return a symbolic upper bound for the backedge taken count of the loop. + /// This is more general than getConstantMaxBackedgeTakenCount as it returns + /// an arbitrary expression as opposed to only constants. + const SCEV* computeMaxBackedgeTakenCount(const Loop *L); + /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 40d89fff04587..11d92bc816e9f 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12506,3 +12506,28 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); return false; } + +const SCEV* ScalarEvolution::computeMaxBackedgeTakenCount(const Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. + SmallVector ExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = getExitCount(L, ExitingBB); + if (isa(ExitCount)) + ExitCount = getExitCount(L, ExitingBB, + ScalarEvolution::ConstantMaximum); + if (!isa(ExitCount)) { + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + } + if (ExitCounts.empty()) + return getCouldNotCompute(); + return getUMinFromMismatchedTypes(ExitCounts); +} diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 51d12faf712ad..20b85626dced9 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2329,36 +2329,6 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { return MadeAnyChanges; } -/// Return a symbolic upper bound for the backedge taken count of the loop. -/// This is more general than getConstantMaxBackedgeTakenCount as it returns -/// an arbitrary expression as opposed to only constants. -/// TODO: Move into the ScalarEvolution class. -static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, - DominatorTree &DT, Loop *L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); - if (isa(ExitCount)) - ExitCount = SE.getExitCount(L, ExitingBB, - ScalarEvolution::ConstantMaximum); - if (!isa(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return SE.getCouldNotCompute(); - return SE.getUMinFromMismatchedTypes(ExitCounts); -} - bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -2391,7 +2361,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return false; // Get a symbolic upper bound on the loop backedge taken count. - const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); + const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L); if (isa(MaxExitCount)) return false; From c58dfbdc818275dd0e8f34939a95da546c49cdf6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 8 Sep 2020 21:52:23 -0500 Subject: [PATCH 0120/1079] [flang][msvc] Avoid range-based for over initializer_list. NFC. Msvc crashes with "INTERNAL COMPILER ERROR" when iterating over an `std::initializer_list` in a constexpr constructor. Explicitly use the iterator instead. This patch is part of the series to [[ http://lists.llvm.org/pipermail/flang-dev/2020-July/000448.html | make flang compilable with MS Visual Studio ]]. Reviewed By: isuruf Differential Revision: https://reviews.llvm.org/D86425 --- flang/include/flang/Common/enum-set.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Common/enum-set.h b/flang/include/flang/Common/enum-set.h index a7bdc757a1c97..5d2eda57aa819 100644 --- a/flang/include/flang/Common/enum-set.h +++ b/flang/include/flang/Common/enum-set.h @@ -37,8 +37,8 @@ template class EnumSet { constexpr EnumSet() {} constexpr EnumSet(const std::initializer_list &enums) { - for (auto x : enums) { - set(x); + for (auto it{enums.begin()}; it != enums.end(); ++it) { + set(*it); } } constexpr EnumSet(const EnumSet &) = default; From d5d75f61e5fbeb290944ee5d28d6cd13fd40f223 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 18 Aug 2020 15:27:41 -0500 Subject: [PATCH 0121/1079] [Attributor] Provide a command line option that limits recursion depth In `MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.cpp` we initialized attributes until stack frame ~35k caused space to run out. The initial size 1024 is pretty much random. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 14 +++++++-- llvm/lib/Transforms/IPO/Attributor.cpp | 8 +++++ llvm/test/Transforms/Attributor/chain.ll | 31 +++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/chain.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 75e7ccde4dba7..4268123841b14 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -133,8 +133,10 @@ struct AAIsDead; class Function; -/// Simple enum classes that forces properties to be spelled out explicitly. -/// +/// The value passed to the line option that defines the maximal initialization +/// chain length. +extern unsigned MaxInitializationChainLength; + ///{ enum class ChangeStatus { CHANGED, @@ -1071,6 +1073,9 @@ struct Attributor { Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || FnScope->hasFnAttribute(Attribute::OptimizeNone); + // Avoid too many nested initializations to prevent a stack overflow. + Invalidate |= InitializationChainLength > MaxInitializationChainLength; + // Bootstrap the new attribute with an initial update to propagate // information, e.g., function -> call site. If it is not on a given // Allowed we will not perform updates at all. @@ -1081,7 +1086,9 @@ struct Attributor { { TimeTraceScope TimeScope(AA.getName() + "::initialize"); + ++InitializationChainLength; AA.initialize(*this); + --InitializationChainLength; } // Initialize and update is allowed for code outside of the current function @@ -1615,6 +1622,9 @@ struct Attributor { CLEANUP, } Phase = AttributorPhase::SEEDING; + /// The current initialization chain length. Tracked to avoid stack overflows. + unsigned InitializationChainLength = 0; + /// Functions, blocks, and instructions we delete after manifest is done. /// ///{ diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 32420e847129f..2a15c6f0b818d 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -73,6 +73,14 @@ static cl::opt MaxFixpointIterations("attributor-max-iterations", cl::Hidden, cl::desc("Maximal number of fixpoint iterations."), cl::init(32)); + +static cl::opt MaxInitializationChainLengthX( + "attributor-max-initialization-chain-length", cl::Hidden, + cl::desc( + "Maximal number of chained initializations (to avoid stack overflows)"), + cl::location(MaxInitializationChainLength), cl::init(1024)); +unsigned llvm::MaxInitializationChainLength; + static cl::opt VerifyMaxFixpointIterations( "attributor-max-iterations-verify", cl::Hidden, cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), diff --git a/llvm/test/Transforms/Attributor/chain.ll b/llvm/test/Transforms/Attributor/chain.ll new file mode 100644 index 0000000000000..0306fe22c0b3c --- /dev/null +++ b/llvm/test/Transforms/Attributor/chain.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_1 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_1 +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1024 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_5 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1024 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_5 + +declare void @foo(i8* dereferenceable(8) %arg) + +define dso_local i32 @bar(i32* %arg) { +; CHECK_1-LABEL: define {{[^@]+}}@bar +; CHECK_1-SAME: (i32* dereferenceable_or_null(8) [[ARG:%.*]]) { +; CHECK_1-NEXT: entry: +; CHECK_1-NEXT: [[BC1:%.*]] = bitcast i32* [[ARG]] to i8* +; CHECK_1-NEXT: call void @foo(i8* dereferenceable_or_null(8) [[BC1]]) +; CHECK_1-NEXT: [[LD:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK_1-NEXT: ret i32 [[LD]] +; +; CHECK_5-LABEL: define {{[^@]+}}@bar +; CHECK_5-SAME: (i32* nonnull dereferenceable(8) [[ARG:%.*]]) { +; CHECK_5-NEXT: entry: +; CHECK_5-NEXT: [[BC1:%.*]] = bitcast i32* [[ARG]] to i8* +; CHECK_5-NEXT: call void @foo(i8* nonnull dereferenceable(8) [[BC1]]) +; CHECK_5-NEXT: [[LD:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK_5-NEXT: ret i32 [[LD]] +; +entry: + %bc1 = bitcast i32* %arg to i8* + call void @foo(i8* %bc1) + %ld = load i32, i32* %arg + ret i32 %ld +} From 2600c9e2efce1dc4c64870b00a45ae0082c685fc Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 4 Sep 2020 11:41:58 -0500 Subject: [PATCH 0122/1079] [Attributor] Re-enable a run line in noalias.ll This was disabled as we were looking for a weird CGSCC problem. I think/hope we fixed it as there were a lot of updates recently. I could never reproduce this locally so I'll use the pre-commit phab builds to confirm this suspicion and if they seem to be happy I'll assume this is fixed. Reviewed By: sstefan1 Differential Revision: https://reviews.llvm.org/D87266 --- llvm/test/Transforms/Attributor/noalias.ll | 260 ++++++++++----------- 1 file changed, 127 insertions(+), 133 deletions(-) diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index e7e47d42f4566..030089282334c 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes ; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM -; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/23151 -; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; TEST 1 - negative. @@ -42,10 +41,10 @@ define i8* @return_noalias(){ } define void @nocapture(i8* %a){ -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@nocapture -; NOT_CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nocapture +; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@nocapture @@ -145,10 +144,10 @@ declare i8* @baz(...) nounwind uwtable ; Returning global pointer. Should not be noalias. define i8** @getter() { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@getter -; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: ret i8** @G +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@getter +; IS__TUNIT____-SAME: () [[ATTR0]] { +; IS__TUNIT____-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@getter @@ -160,10 +159,10 @@ define i8** @getter() { ; Returning global pointer. Should not be noalias. define i8** @calle1(){ -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@calle1 -; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: ret i8** @G +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@calle1 +; IS__TUNIT____-SAME: () [[ATTR0]] { +; IS__TUNIT____-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@calle1 @@ -410,6 +409,7 @@ define void @test12_3(){ } define void @test12_4(){ +; ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() { ; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) @@ -422,17 +422,17 @@ define void @test12_4(){ ; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) ; IS________OPM-NEXT: ret void ; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() { -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 -; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) -; NOT_TUNIT_OPM-NEXT: ret void +; IS________NPM-LABEL: define {{[^@]+}}@test12_4() { +; IS________NPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS________NPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS________NPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS________NPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; IS________NPM-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) %B = tail call noalias i8* @malloc(i64 4) @@ -470,12 +470,6 @@ define void @test13_use_noalias(){ ; CHECK-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) ; CHECK-NEXT: ret void ; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() -; IS__CGSCC_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; IS__CGSCC_OPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* -; IS__CGSCC_OPM-NEXT: call void @use_i8_internal(i8* noalias [[C2]]) -; IS__CGSCC_OPM-NEXT: ret void %m1 = tail call noalias i8* @malloc(i64 4) %c1 = bitcast i8* %m1 to i16* %c2 = bitcast i16* %c1 to i8* @@ -504,11 +498,11 @@ define void @test13_use_alias(){ ; TEST 14 i2p casts define internal i32 @p2i(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@p2i -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 -; NOT_CGSCC_NPM-NEXT: ret i32 [[P2I]] +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@p2i +; IS__TUNIT____-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { +; IS__TUNIT____-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 +; IS__TUNIT____-NEXT: ret i32 [[P2I]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@p2i @@ -521,14 +515,14 @@ define internal i32 @p2i(i32* %arg) { } define i32 @i2p(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readonly willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@i2p -; NOT_CGSCC_NPM-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { -; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] -; NOT_CGSCC_NPM-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* -; NOT_CGSCC_NPM-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] -; NOT_CGSCC_NPM-NEXT: ret i32 [[CALL]] +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@i2p +; IS__TUNIT____-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { +; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] +; IS__TUNIT____-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] +; IS__TUNIT____-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@i2p @@ -546,11 +540,11 @@ define i32 @i2p(i32* %arg) { ret i32 %call } define internal i32 @ret(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@ret -; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { -; NOT_CGSCC_NPM-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 -; NOT_CGSCC_NPM-NEXT: ret i32 [[L]] +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@ret +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { +; IS__TUNIT____-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 +; IS__TUNIT____-NEXT: ret i32 [[L]] ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@ret @@ -572,17 +566,17 @@ define internal i32 @ret(i32* %arg) { ; Function Attrs: nounwind optsize define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@strtox -; NOT_CGSCC_NPM-SAME: (i8* [[S:%.*]]) unnamed_addr { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; NOT_CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; NOT_CGSCC_NPM-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; NOT_CGSCC_NPM-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; NOT_CGSCC_NPM-NEXT: ret double [[CALL1]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@strtox +; IS__TUNIT____-SAME: (i8* [[S:%.*]]) unnamed_addr { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* +; IS__TUNIT____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) +; IS__TUNIT____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) +; IS__TUNIT____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) +; IS__TUNIT____-NEXT: ret double [[CALL1]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@strtox ; IS__CGSCC____-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { @@ -642,11 +636,11 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) @alias_of_p = external global i32* define void @make_alias(i32* %p) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@make_alias -; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { -; NOT_CGSCC_NPM-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@make_alias +; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { +; IS__TUNIT____-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@make_alias @@ -659,11 +653,11 @@ define void @make_alias(i32* %p) { } define void @only_store(i32* %p) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@only_store -; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { -; NOT_CGSCC_NPM-NEXT: store i32 0, i32* [[P]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@only_store +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { +; IS__TUNIT____-NEXT: store i32 0, i32* [[P]], align 4 +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@only_store @@ -676,17 +670,17 @@ define void @only_store(i32* %p) { } define void @test15_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test15_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; NOT_CGSCC_NPM: if.then: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] -; NOT_CGSCC_NPM: if.end: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test15_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test15_caller @@ -733,23 +727,23 @@ if.end: ; Therefore, only one of the two conditions of if statementes will be fulfilled. define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_sub -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; NOT_CGSCC_NPM: if.then: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] -; NOT_CGSCC_NPM: if.end: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] -; NOT_CGSCC_NPM: if.then2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END3]] -; NOT_CGSCC_NPM: if.end3: -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_sub +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] +; IS__TUNIT____: if.then2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END3]] +; IS__TUNIT____: if.end3: +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_sub @@ -790,11 +784,11 @@ if.end3: } define void @test16_caller(i32* %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_caller -; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_caller +; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_caller @@ -826,20 +820,20 @@ define void @test16_caller(i32* %p, i32 %c) { ; } define void @test17_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test17_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; NOT_CGSCC_NPM: l1: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[L3:%.*]] -; NOT_CGSCC_NPM: l2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[L3]] -; NOT_CGSCC_NPM: l3: -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test17_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; IS__TUNIT____: l1: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[L3:%.*]] +; IS__TUNIT____: l2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[L3]] +; IS__TUNIT____: l3: +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test17_caller @@ -884,10 +878,10 @@ l3: ; } define void @noreturn() { -; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@noreturn -; NOT_CGSCC_NPM-SAME: () [[ATTR9:#.*]] { -; NOT_CGSCC_NPM-NEXT: unreachable +; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@noreturn +; IS__TUNIT____-SAME: () [[ATTR9:#.*]] { +; IS__TUNIT____-NEXT: unreachable ; ; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noreturn @@ -899,18 +893,18 @@ define void @noreturn() { } define void @test18_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test18_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; NOT_CGSCC_NPM: l1: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: unreachable -; NOT_CGSCC_NPM: l2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test18_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; IS__TUNIT____: l1: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: unreachable +; IS__TUNIT____: l2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test18_caller From c0ab901bddd5cb80c71848a426b7eaa2882b2ef5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 4 Sep 2020 11:14:33 -0500 Subject: [PATCH 0123/1079] [Attributor] Selectively look at the callee even when there are operand bundles While operand bundles carry unpredictable semantics, we know some of them and can therefore "ignore" them. In this case we allow to look at the declaration of `llvm.assume` when asked for the attributes at a call site. The assume operand bundles we have do not invalidate the declaration attributes. We cannot test this in isolation because the llvm.assume attributes are determined by the parser. However, a follow up patch will provide test coverage. --- llvm/lib/Transforms/IPO/Attributor.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 2a15c6f0b818d..4fcea9b5355de 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -325,6 +325,13 @@ const IRPosition SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRP); + // Helper to determine if operand bundles on a call site are benin or + // potentially problematic. We handle only llvm.assume for now. + auto CanIgnoreOperandBundles = [](const CallBase &CB) { + return (isa(CB) && + cast(CB).getIntrinsicID() == Intrinsic ::assume); + }; + const auto *CB = dyn_cast(&IRP.getAnchorValue()); switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: @@ -339,7 +346,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) if (const Function *Callee = CB->getCalledFunction()) IRPositions.emplace_back(IRPosition::function(*Callee)); return; @@ -347,7 +354,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) { + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { if (const Function *Callee = CB->getCalledFunction()) { IRPositions.emplace_back(IRPosition::returned(*Callee)); IRPositions.emplace_back(IRPosition::function(*Callee)); @@ -368,7 +375,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && ArgNo >= 0 && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) { + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { const Function *Callee = CB->getCalledFunction(); if (Callee && Callee->arg_size() > unsigned(ArgNo)) IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); From cefd2a2c705877feebd909a8537b89a8d1d575cc Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 5 Sep 2020 13:20:31 -0500 Subject: [PATCH 0124/1079] [Attributor] Cleanup `IRPosition::getArgNo` usages As we handle callback calls we need to disambiguate the call site argument number from the callee argument number. While always equal in non-callback calls, a callback comes with a partial parameter-argument mapping so there is no implicit correspondence. Here we split `IRPosition::getArgNo()` into two public functions, `getCallSiteArgNo()` and `getCalleeArgNo()`. Usages are adjusted to pick the right one for their purpose. This fixed some problems that would have been exposed as we more aggressively optimize callbacks. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 66 ++++++++++++++----- llvm/lib/Transforms/IPO/Attributor.cpp | 17 ++--- .../Transforms/IPO/AttributorAttributes.cpp | 25 +++---- llvm/test/Transforms/Attributor/callbacks.ll | 19 +++--- 4 files changed, 82 insertions(+), 45 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 4268123841b14..9f021f7dc63e2 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -388,10 +388,11 @@ struct IRPosition { /// Return the value this abstract attribute is associated with. Value &getAssociatedValue() const { - if (getArgNo() < 0 || isa(&getAnchorValue())) + if (getCallSiteArgNo() < 0 || isa(&getAnchorValue())) return getAnchorValue(); assert(isa(&getAnchorValue()) && "Expected a call base!"); - return *cast(&getAnchorValue())->getArgOperand(getArgNo()); + return *cast(&getAnchorValue()) + ->getArgOperand(getCallSiteArgNo()); } /// Return the type this abstract attribute is associated with. @@ -401,19 +402,22 @@ struct IRPosition { return getAssociatedValue().getType(); } - /// Return the argument number of the associated value if it is an argument or - /// call site argument, otherwise a negative value. - int getArgNo() const { - switch (getPositionKind()) { - case IRPosition::IRP_ARGUMENT: - return cast(getAsValuePtr())->getArgNo(); - case IRPosition::IRP_CALL_SITE_ARGUMENT: { - Use &U = *getAsUsePtr(); - return cast(U.getUser())->getArgOperandNo(&U); - } - default: - return -1; - } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCallSiteArgNo` this method will always return the "argument number" + /// from the perspective of the callee. This may not the same as the call site + /// if this is a callback call. + int getCalleeArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ true); + } + + /// Return the call site argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCalleArgNo` this method will always return the "operand number" from + /// the perspective of the call site. This may not the same as the callee + /// perspective if this is a callback call. + int getCallSiteArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ false); } /// Return the index in the attribute list for this position. @@ -430,7 +434,7 @@ struct IRPosition { return AttributeList::ReturnIndex; case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: - return getArgNo() + AttributeList::FirstArgIndex; + return getCallSiteArgNo() + AttributeList::FirstArgIndex; } llvm_unreachable( "There is no attribute index for a floating or invalid position!"); @@ -515,6 +519,17 @@ struct IRPosition { } } + /// Return true if the position is an argument or call site argument. + bool isArgumentPosition() const { + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return true; + default: + return false; + } + } + /// Special DenseMap key values. /// ///{ @@ -561,6 +576,25 @@ struct IRPosition { verify(); } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument. See also `getCalleeArgNo` and + /// `getCallSiteArgNo`. + int getArgNo(bool CallbackCalleeArgIfApplicable) const { + if (CallbackCalleeArgIfApplicable) + if (Argument *Arg = getAssociatedArgument()) + return Arg->getArgNo(); + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + return cast(getAsValuePtr())->getArgNo(); + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + Use &U = *getAsUsePtr(); + return cast(U.getUser())->getArgOperandNo(&U); + } + default: + return -1; + } + } + /// IRPosition for the use \p U. The position kind \p PK needs to be /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value /// the used value. diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 4fcea9b5355de..9927bca995552 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -197,7 +197,7 @@ Argument *IRPosition::getAssociatedArgument() const { // Not an Argument and no argument number means this is not a call site // argument, thus we cannot find a callback argument to return. - int ArgNo = getArgNo(); + int ArgNo = getCallSiteArgNo(); if (ArgNo < 0) return nullptr; @@ -371,17 +371,17 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRPosition::callsite_function(*CB)); return; case IRPosition::IRP_CALL_SITE_ARGUMENT: { - int ArgNo = IRP.getArgNo(); - assert(CB && ArgNo >= 0 && "Expected call site!"); + assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { const Function *Callee = CB->getCalledFunction(); - if (Callee && Callee->arg_size() > unsigned(ArgNo)) - IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); - if (Callee) + if (Callee) { + if (Argument *Arg = IRP.getAssociatedArgument()) + IRPositions.emplace_back(IRPosition::argument(*Arg)); IRPositions.emplace_back(IRPosition::function(*Callee)); } + } IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue())); return; } @@ -518,7 +518,7 @@ void IRPosition::verify() { "Expected call base argument operand for a 'call site argument' " "position"); assert(cast(U->getUser())->getArgOperandNo(U) == - unsigned(getArgNo()) && + unsigned(getCallSiteArgNo()) && "Argument number mismatch!"); assert(U->get() == &getAssociatedValue() && "Associated value mismatch!"); return; @@ -2189,7 +2189,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) { raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { const Value &AV = Pos.getAssociatedValue(); return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " [" - << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; + << Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo() + << "]}"; } raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 0fa5ad92c299e..b7ec899233e41 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -500,7 +500,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, Optional T; // The argument number which is also the call site argument number. - unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo(); auto CallSiteCheck = [&](AbstractCallSite ACS) { const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); @@ -2495,7 +2495,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { void initialize(Attributor &A) override { // See callsite argument attribute and callee argument attribute. const auto &CB = cast(getAnchorValue()); - if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias)) + if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias)) indicateOptimisticFixpoint(); Value &Val = getAssociatedValue(); if (isa(Val) && @@ -2510,7 +2510,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { const AAMemoryBehavior &MemBehaviorAA, const CallBase &CB, unsigned OtherArgNo) { // We do not need to worry about aliasing with the underlying IRP. - if (this->getArgNo() == (int)OtherArgNo) + if (this->getCalleeArgNo() == (int)OtherArgNo) return false; // If it is not a pointer or pointer vector we do not alias. @@ -2925,7 +2925,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { CallBase &CB = cast(getAnchorValue()); - Use &U = CB.getArgOperandUse(getArgNo()); + Use &U = CB.getArgOperandUse(getCallSiteArgNo()); assert(!isa(U.get()) && "Expected undef values to be filtered out!"); UndefValue &UV = *UndefValue::get(U->getType()); @@ -4030,7 +4030,7 @@ struct AANoCaptureImpl : public AANoCapture { return; } - const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope; + const Function *F = isArgumentPosition() ? getAssociatedFunction() : AnchorScope; // Check what state the associated function can actually capture. if (F) @@ -4049,7 +4049,7 @@ struct AANoCaptureImpl : public AANoCapture { if (!isAssumedNoCaptureMaybeReturned()) return; - if (getArgNo() >= 0) { + if (isArgumentPosition()) { if (isAssumedNoCapture()) Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); else if (ManifestInternal) @@ -4085,7 +4085,7 @@ struct AANoCaptureImpl : public AANoCapture { State.addKnownBits(NOT_CAPTURED_IN_RET); // Check existing "returned" attributes. - int ArgNo = IRP.getArgNo(); + int ArgNo = IRP.getCalleeArgNo(); if (F.doesNotThrow() && ArgNo >= 0) { for (unsigned u = 0, e = F.arg_size(); u < e; ++u) if (F.hasParamAttribute(u, Attribute::Returned)) { @@ -4262,12 +4262,12 @@ struct AACaptureUseTracker final : public CaptureTracker { ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { const IRPosition &IRP = getIRPosition(); const Value *V = - getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + isArgumentPosition() ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); if (!V) return indicatePessimisticFixpoint(); const Function *F = - getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); assert(F && "Expected a function!"); const IRPosition &FnPos = IRPosition::function(*F); const auto &IsDeadAA = @@ -4613,7 +4613,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { auto PredForCallSite = [&](AbstractCallSite ACS) { const IRPosition &ACSArgPos = - IRPosition::callsite_argument(ACS, getArgNo()); + IRPosition::callsite_argument(ACS, getCallSiteArgNo()); // Check if a coresponding argument was found or if it is on not // associated (which can happen for callback calls). if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) @@ -4894,7 +4894,8 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { ? dyn_cast(SimplifiedAssociatedValue.getValue()) : UndefValue::get(V.getType()); if (C) { - Use &U = cast(&getAnchorValue())->getArgOperandUse(getArgNo()); + Use &U = cast(&getAnchorValue()) + ->getArgOperandUse(getCallSiteArgNo()); // We can replace the AssociatedValue with the constant. if (&V != C && V.getType() == C->getType()) { if (A.changeUseAfterManifest(U, *C)) @@ -5213,7 +5214,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return getAssociatedValue().getType()->getPointerElementType(); Optional Ty; - unsigned ArgNo = getIRPosition().getArgNo(); + unsigned ArgNo = getIRPosition().getCallSiteArgNo(); // Make sure the associated call site argument has the same type at all call // sites and it is an allocation we know is safe to privatize, for now that diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 03ca89fd1b08a..8fbc526bf46d3 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -115,6 +115,7 @@ declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...) ; we deduce and propagate noalias and others properly. define void @t1_caller(i32* noalias %a) { +; ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@t1_caller ; IS__TUNIT_OPM-SAME: (i32* noalias nocapture align 256 [[A:%.*]]) { ; IS__TUNIT_OPM-NEXT: entry: @@ -136,7 +137,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -160,7 +161,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -190,7 +191,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; IS________NPM: Function Attrs: nosync ; IS________NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS________NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) [[ATTR0:#.*]] { +; IS________NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) [[ATTR0:#.*]] { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -236,7 +237,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -260,7 +261,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -337,8 +338,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller @@ -363,8 +364,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: From 849146ba93fe14989ea0b727b055854b23e5c5e5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 4 Sep 2020 11:20:28 -0500 Subject: [PATCH 0125/1079] [Attributor] Associate the callback callee with a call site argument (if any) If we have a callback, call site arguments were already associated with the callback callee. Now we also associate the function with the callback callee, thus we know ensure that the following holds true (if all return nonnull): `getAssociatedArgument()->getParent() == getAssociatedFunction()` To test this an early exit from `AAMemoryBehaviorCallSiteArgument::initialize`` is included as well. Without the change to getAssociatedFunction() this kind of early exit for declarations would cause callback call site arguments to miss out. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 8 +++++++- .../Transforms/IPO/AttributorAttributes.cpp | 19 +++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 9f021f7dc63e2..5c0a90339150f 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -339,8 +339,14 @@ struct IRPosition { /// Return the associated function, if any. Function *getAssociatedFunction() const { - if (auto *CB = dyn_cast(&getAnchorValue())) + if (auto *CB = dyn_cast(&getAnchorValue())) { + // We reuse the logic that associates callback calles to arguments of a + // call site here to identify the callback callee as the associated + // function. + if (Argument *Arg = getAssociatedArgument()) + return Arg->getParent(); return CB->getCalledFunction(); + } return getAnchorScope(); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b7ec899233e41..97d88895bbfce 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5936,14 +5936,21 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - if (Argument *Arg = getAssociatedArgument()) { - if (Arg->hasByValAttr()) { - addKnownBits(NO_WRITES); - removeKnownBits(NO_READS); - removeAssumedBits(NO_READS); - } + // If we don't have an associated attribute this is either a variadic call + // or an indirect call, either way, nothing to do here. + Argument *Arg = getAssociatedArgument(); + if (!Arg) { + indicatePessimisticFixpoint(); + return; + } + if (Arg->hasByValAttr()) { + addKnownBits(NO_WRITES); + removeKnownBits(NO_READS); + removeAssumedBits(NO_READS); } AAMemoryBehaviorArgument::initialize(A); + if (getAssociatedFunction()->isDeclaration()) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). From 6a9a0bfc3350efc0fc7fabec9a1fef94f4e9cc86 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 8 Sep 2020 23:15:37 -0700 Subject: [PATCH 0126/1079] [llvm-cov gcov] Simply computation of line counts and exit block counter --- llvm/lib/ProfileData/GCOV.cpp | 45 ++++++++++++++++------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 0292e2a09d17c..f8c576d305f05 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -220,10 +220,7 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { for (std::unique_ptr &arc : fn->arcs) { if (!buf.readInt64(arc->Count)) return false; - // FIXME Fix counters arc->src.Counter += arc->Count; - if (arc->dst.succ.empty()) - arc->dst.Counter += arc->Count; } if (fn->Blocks.size() >= 2) { @@ -469,31 +466,28 @@ void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) { } /// Get the count for the list of blocks which lie on the same line. -uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { - uint64_t Count = 0; - - for (auto Block : Blocks) { - if (Block->getNumSrcEdges() == 0 || Block->Number == 0) { - // The block has no predecessors and a non-null counter - // (can be the case with entry block in functions). - Count += Block->getCount(); +uint64_t GCOVBlock::getLineCount(const BlockVector &blocks) { + uint64_t count = 0; + for (const GCOVBlock *block : blocks) { + if (block->Number == 0) { + // For nonstandard control flows, arcs into the exit block may be + // duplicately counted (fork) or not be counted (abnormal exit), and thus + // the (exit,entry) counter may be inaccurate. Count the entry block with + // the outgoing arcs. + for (const GCOVArc *arc : block->succ) + count += arc->Count; } else { // Add counts from predecessors that are not on the same line. - for (auto E : Block->srcs()) { - const GCOVBlock *W = &E->src; - if (find(Blocks, W) == Blocks.end()) { - Count += E->Count; - } - } - } - for (auto E : Block->dsts()) { - E->CyclesCount = E->Count; + for (const GCOVArc *arc : block->pred) + if (!llvm::is_contained(blocks, &arc->src)) + count += arc->Count; } + for (GCOVArc *arc : block->succ) + arc->CyclesCount = arc->Count; } - GCOVBlock::getCyclesCount(Blocks, Count); - - return Count; + GCOVBlock::getCyclesCount(blocks, count); + return count; } //===----------------------------------------------------------------------===// @@ -829,12 +823,15 @@ void FileInfo::printFunctionSummary(raw_ostream &OS, uint64_t EntryCount = Func->getEntryCount(); uint32_t BlocksExec = 0; const GCOVBlock &ExitBlock = Func->getExitBlock(); + uint64_t exitCount = 0; + for (const GCOVArc *arc : ExitBlock.pred) + exitCount += arc->Count; for (const GCOVBlock &Block : Func->blocks()) if (Block.Number != 0 && &Block != &ExitBlock && Block.getCount()) ++BlocksExec; OS << "function " << Func->getName() << " called " << EntryCount - << " returned " << formatPercentage(ExitBlock.getCount(), EntryCount) + << " returned " << formatPercentage(exitCount, EntryCount) << "% blocks executed " << formatPercentage(BlocksExec, Func->getNumBlocks() - 2) << "%\n"; } From d445b6dfec13cdf9b9cb01582ec93548ea30ed0e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 30 Aug 2020 14:14:33 -0500 Subject: [PATCH 0127/1079] [Attributor] Cleanup `::initialize` of various AAs This commit cleans up the ::initialize method of various AAs in the following ways: - If an associated function is required, give up on declarations. This was discovered as a real problem when lots of llvm.dbg.XXX call sites were assumed `noreturn` until proven otherwise. That does not make any sense and caused huge regressions and missed deductions. - Require more associated declarations for function interface AAs. - Use the IRAttribute::initialize to determine if function interface AAs can be used in IPO, don't replicate the checks (especially isFunctionIPOAmendable) all over the place. Arguably the function declaration check should be moved to some central place to. --- .../Transforms/IPO/AttributorAttributes.cpp | 62 ++++++++++++------- .../ArgumentPromotion/X86/attributes.ll | 2 +- .../X86/min-legal-vector-width.ll | 34 +++++----- .../ArgumentPromotion/X86/thiscall.ll | 4 +- .../Attributor/ArgumentPromotion/dbg.ll | 4 +- .../Attributor/ArgumentPromotion/profile.ll | 4 +- .../IPConstantProp/multiple_callbacks.ll | 4 +- .../Attributor/IPConstantProp/pthreads.ll | 4 +- llvm/test/Transforms/Attributor/callbacks.ll | 4 +- .../Attributor/dereferenceable-2.ll | 4 +- .../Transforms/Attributor/heap_to_stack.ll | 6 +- llvm/test/Transforms/Attributor/liveness.ll | 24 +++---- llvm/test/Transforms/Attributor/misc.ll | 4 +- llvm/test/Transforms/Attributor/noalias.ll | 38 ++++-------- llvm/test/Transforms/Attributor/nofree.ll | 4 +- llvm/test/Transforms/Attributor/noundef.ll | 4 +- 16 files changed, 106 insertions(+), 100 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 97d88895bbfce..7bec970597038 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -736,7 +736,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { void initialize(Attributor &A) override { AANoUnwindImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -795,7 +795,7 @@ class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { ReturnedValues.clear(); Function *F = getAssociatedFunction(); - if (!F) { + if (!F || F->isDeclaration()) { indicatePessimisticFixpoint(); return; } @@ -1388,7 +1388,7 @@ struct AANoSyncCallSite final : AANoSyncImpl { void initialize(Attributor &A) override { AANoSyncImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -1453,7 +1453,7 @@ struct AANoFreeCallSite final : AANoFreeImpl { void initialize(Attributor &A) override { AANoFreeImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -1900,7 +1900,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { void initialize(Attributor &A) override { AANoRecurseImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -2276,7 +2276,7 @@ struct AAWillReturnImpl : public AAWillReturn { AAWillReturn::initialize(A); Function *F = getAnchorScope(); - if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A)) + if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A)) indicatePessimisticFixpoint(); } @@ -2320,9 +2320,9 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - AAWillReturnImpl::initialize(A); + AAWillReturn::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } @@ -2675,6 +2675,14 @@ struct AANoAliasReturned final : AANoAliasImpl { AANoAliasReturned(const IRPosition &IRP, Attributor &A) : AANoAliasImpl(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } + /// See AbstractAttribute::updateImpl(...). virtual ChangeStatus updateImpl(Attributor &A) override { @@ -2716,7 +2724,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { void initialize(Attributor &A) override { AANoAliasImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -3865,8 +3873,16 @@ struct AAAlignFloating : AAAlignImpl { /// Align attribute for function return value. struct AAAlignReturned final : AAReturnedFromReturnedValues { - AAAlignReturned(const IRPosition &IRP, Attributor &A) - : AAReturnedFromReturnedValues(IRP, A) {} + using Base = AAReturnedFromReturnedValues; + AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } @@ -3940,7 +3956,7 @@ struct AAAlignCallSiteReturned final void initialize(Attributor &A) override { Base::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -3956,7 +3972,7 @@ struct AANoReturnImpl : public AANoReturn { void initialize(Attributor &A) override { AANoReturn::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -5750,7 +5766,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior { void initialize(Attributor &A) override { intersectAssumedBits(BEST_STATE); getKnownStateFromValue(getIRPosition(), getState()); - IRAttribute::initialize(A); + AAMemoryBehavior::initialize(A); } /// Return the memory behavior information encoded in the IR for \p IRP. @@ -5981,6 +5997,14 @@ struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A) : AAMemoryBehaviorFloating(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } + /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { // We do not annotate returned values. @@ -6030,10 +6054,8 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { void initialize(Attributor &A) override { AAMemoryBehaviorImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); - return; - } } /// See AbstractAttribute::updateImpl(...). @@ -6310,7 +6332,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { void initialize(Attributor &A) override { intersectAssumedBits(BEST_STATE); getKnownStateFromValue(A, getIRPosition(), getState()); - IRAttribute::initialize(A); + AAMemoryLocation::initialize(A); } /// Return the memory behavior information encoded in the IR for \p IRP. @@ -6773,10 +6795,8 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { void initialize(Attributor &A) override { AAMemoryLocationImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); - return; - } } /// See AbstractAttribute::updateImpl(...). diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index 421ddc2bdd396..a50017ac73315 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 50d318198e149..310abfba58d55 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -44,7 +44,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12:#.*]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -57,7 +57,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12:#.*]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -138,7 +138,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -151,7 +151,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -232,7 +232,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -245,7 +245,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -326,7 +326,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -339,7 +339,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -418,7 +418,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -431,7 +431,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -508,7 +508,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -521,7 +521,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -600,7 +600,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -613,7 +613,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -694,7 +694,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -707,7 +707,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index 25729fb893335..29f6a1bf6d3f5 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -4,8 +4,8 @@ ; we don't do that anymore. It also verifies that the combination of ; globalopt and argpromotion is able to optimize the call safely. ; -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll index 5e40294cdb27b..64d5adaa75020 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll index 3584172b242da..932f9197e9ce1 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index ee411ec0c857e..91bf46ca2148f 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 4d8b20cb1cf3f..5afeb2071d192 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 8fbc526bf46d3..26e4ce2679ccc 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll index aa3130e4a3190..816e5c47ef35b 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 3c34419a960d4..27774c525c4e0 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -428,9 +428,8 @@ define void @test11() { ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test11() { -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) [[ATTR6]] -; IS________NPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -739,10 +738,9 @@ define void @test16c(i8 %v, i8** %P) { ; ; IS________NPM-LABEL: define {{[^@]+}}@test16c ; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) [[ATTR6]] -; IS________NPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index ea36bb5f66e8c..8919cf66cbb9b 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -854,22 +854,22 @@ define internal void @middle() { ; NOT_CGSCC_NPM-NEXT: call void @non_dead_b3() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB1:%.*]] ; NOT_CGSCC_NPM: bb1: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b4() [[ATTR2:#.*]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b5() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b6() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b7() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b4() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b5() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b6() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b7() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB2:%.*]] ; NOT_CGSCC_NPM: bb2: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b8() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b9() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b10() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b11() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b8() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b9() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b10() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b11() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB3:%.*]] ; NOT_CGSCC_NPM: bb3: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b12() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b13() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b14() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b15() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b12() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b13() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b14() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b15() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB4:%.*]] ; NOT_CGSCC_NPM: bb4: ; NOT_CGSCC_NPM-NEXT: call void @non_exact2() diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index 3fa65e07a5162..a5c4556ac0417 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 030089282334c..a4c05fb4ca29d 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -566,29 +566,17 @@ define internal i32 @ret(i32* %arg) { ; Function Attrs: nounwind optsize define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { -; IS__TUNIT____-LABEL: define {{[^@]+}}@strtox -; IS__TUNIT____-SAME: (i8* [[S:%.*]]) unnamed_addr { -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; IS__TUNIT____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; IS__TUNIT____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; IS__TUNIT____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; IS__TUNIT____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; IS__TUNIT____-NEXT: ret double [[CALL1]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@strtox -; IS__CGSCC____-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; IS__CGSCC____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; IS__CGSCC____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10]] -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; IS__CGSCC____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; IS__CGSCC____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; IS__CGSCC____-NEXT: ret double [[CALL1]] +; CHECK-LABEL: define {{[^@]+}}@strtox +; CHECK-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) +; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) +; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: ret double [[CALL1]] ; entry: %f = alloca %struct._IO_FILE, align 8 diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll index 6cbaf71a01e39..b459527fe2eda 100644 --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/noundef.ll b/llvm/test/Transforms/Attributor/noundef.ll index 34142af9ef8cd..211338eefa0b9 100644 --- a/llvm/test/Transforms/Attributor/noundef.ll +++ b/llvm/test/Transforms/Attributor/noundef.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM From f9ea4501b861ecc987afb4a71266dcc83ae640ca Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 8 Sep 2020 15:58:58 -0500 Subject: [PATCH 0128/1079] [Attributor][NFC] Improve check lines in depgraph.ll This adds the check lines with -NEXT so we see any change in the future. --- llvm/test/Transforms/Attributor/depgraph.ll | 290 ++++++++++++++------ 1 file changed, 208 insertions(+), 82 deletions(-) diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 791af581b22a0..d7dc9d42f49b2 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -51,88 +51,214 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; Check for graph ; -; GRAPH: [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree -; GRAPH: [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind -; GRAPH: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly -; GRAPH: [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state Live[#BB 4/4][#TBEP 0][#KDE 1] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAWillReturn] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-noreturn +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAUndefinedBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state undefined-behavior +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAReturnedValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state returns(#3)[#UC: 1] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state range(32) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPotentialValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state set-state(< {full-set} >) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br i1 %3, label %4, label %7' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoReturn] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-return +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoRecurse] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-recurse +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs: 0 +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state dereferenceable<4-4> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPrivatizablePtr] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state [no-priv] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state range(1) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI <> at position {flt: [@-1]} with state range(32)<[0,1) / [0,1)> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPotentialValues] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state set-state(< {full-set} >) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoReturn] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-return +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' ret i32* %.0' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br label %8' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAWillReturn] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-noreturn +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoRecurse] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-recurse +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br label %8' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state unknown-dereferenceable + ; GRAPH-NOT: update ; From 3ebc7552270e632d16e7900dd6933ed467159289 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 9 Sep 2020 07:32:30 +0100 Subject: [PATCH 0129/1079] [ARM] Try to rematerialize VCTP instructions We really want to try and avoid spilling P0, which can be difficult since there's only one register, so try to rematerialize any VCTP instructions. Differential Revision: https://reviews.llvm.org/D87280 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 9 ++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 6 +- llvm/lib/Target/ARM/ARMInstrMVE.td | 1 + .../cond-vector-reduce-mve-codegen.ll | 24 ++- .../Thumb2/LowOverheadLoops/remat-vctp.ll | 139 ++++++++++++++++-- 5 files changed, 150 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index dd7b520effa86..d7d51fdd29ca8 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6134,3 +6134,12 @@ bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( MachineFunction &MF) const { return Subtarget.isMClass() && MF.getFunction().hasMinSize(); } + +bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + // Try hard to rematerialize any VCTPs because if we spill P0, it will block + // the tail predication conversion. This means that the element count + // register has to be live for longer, but that has to be better than + // spill/restore and VPT predication. + return isVCTP(&MI) && !isPredicated(MI); +} diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 53c627c209343..5bf6e880056de 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -452,6 +452,9 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const override; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -635,8 +638,7 @@ static inline unsigned getTailPredVectorWidth(unsigned Opcode) { return 0; } -static inline -bool isVCTP(MachineInstr *MI) { +static inline bool isVCTP(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 75543093bcbfe..2287edeef7662 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5710,6 +5710,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let isReMaterializable = 1 in class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 2fa8a4d8ed7ef..459e2c8395997 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -10,7 +10,6 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: add.w r12, r3, #3 ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: bic r12, r12, #3 @@ -21,28 +20,26 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: and r4, r12, #15 -; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill ; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r2], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 ; CHECK-NEXT: vdup.32 q3, r4 ; CHECK-NEXT: vpt.i32 eq, q3, zr ; CHECK-NEXT: vmovt q1, q2 -; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vmul.i32 q1, q1, q2 +; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp eq i32 %N, 0 @@ -101,8 +98,7 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldr.w r12, [sp, #40] +; CHECK-NEXT: ldr.w r12, [sp, #32] ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB1_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph @@ -116,10 +112,9 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: and r5, r4, #15 -; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r3], #16 @@ -127,22 +122,21 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: vdup.32 q4, r5 ; CHECK-NEXT: vpt.i32 eq, q4, zr ; CHECK-NEXT: vsubt.i32 q1, q3, q2 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vmul.i32 q1, q1, q2 +; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: b .LBB1_5 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r7, pc} i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll index 9178217a89e92..6ce2b9f5f1c02 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -1,21 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m -mattr=+mve.fp %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp %s -o - | FileCheck %s -define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { +define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { ; CHECK-LABEL: remat_vctp: ; CHECK: @ %bb.0: @ %bb -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldrd lr, r12, [sp, #80] +; CHECK-NEXT: ldrd r5, r12, [sp, #80] +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: mov r4, r12 ; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r4, #4 ; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r4, r12, r4 ; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: add.w lr, r4, #3 +; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %bb6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: subs.w r12, r12, #4 -; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 ; CHECK-NEXT: vabs.s32 q5, q4 @@ -24,7 +30,7 @@ define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i3 ; CHECK-NEXT: vadd.i32 q3, q3, q2 ; CHECK-NEXT: vshr.u32 q6, q5, #24 ; CHECK-NEXT: vand q6, q6, q1 -; CHECK-NEXT: vldrw.u32 q7, [lr, q6, uxtw #2] +; CHECK-NEXT: vldrw.u32 q7, [r5, q6, uxtw #2] ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 ; CHECK-NEXT: vqsub.s32 q6, q0, q6 ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 @@ -35,18 +41,18 @@ define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i3 ; CHECK-NEXT: vqshl.s32 q5, q5, #1 ; CHECK-NEXT: vpt.s32 lt, q4, zr ; CHECK-NEXT: vnegt.s32 q5, q5 -; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 ; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vstrwt.32 q4, [r2], #16 ; CHECK-NEXT: vstrwt.32 q3, [r3], #16 -; CHECK-NEXT: bgt .LBB0_1 +; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %bb44 -; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} bb: %i = zext i16 %arg5 to i32 br label %bb6 @@ -97,6 +103,115 @@ bb44: ; preds = %bb6 ret void } +define void @dont_remat_predicated_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5, i32 %conv.mask) { +; CHECK-LABEL: dont_remat_predicated_vctp: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: ldrd r6, r12, [sp, #88] +; CHECK-NEXT: movs r4, #4 +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: csel r5, r12, r4, lt +; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r5, r12, r5 +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: add.w lr, r5, #3 +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: add.w lr, r5, lr, lsr #2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB1_1: @ %bb6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.32 r4 +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vabs.s32 q5, q4 +; CHECK-NEXT: vcls.s32 q3, q5 +; CHECK-NEXT: vshl.u32 q5, q5, q3 +; CHECK-NEXT: vadd.i32 q3, q3, q2 +; CHECK-NEXT: vshr.u32 q6, q5, #24 +; CHECK-NEXT: vand q6, q6, q1 +; CHECK-NEXT: vldrw.u32 q7, [r6, q6, uxtw #2] +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 +; CHECK-NEXT: vqsub.s32 q6, q0, q6 +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 +; CHECK-NEXT: vqshl.s32 q6, q6, #1 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqsub.s32 q5, q0, q5 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqshl.s32 q5, q5, #1 +; CHECK-NEXT: vpt.s32 lt, q4, zr +; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vstrwt.32 q4, [r2], #16 +; CHECK-NEXT: vstrwt.32 q3, [r3], #16 +; CHECK-NEXT: le lr, .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %bb44 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r4, r5, r6, pc} +bb: + %i = zext i16 %arg5 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i7 = phi i32* [ %arg3, %bb ], [ %i38, %bb6 ] + %i8 = phi i32 [ %i, %bb ], [ %i42, %bb6 ] + %i9 = phi i32* [ %arg2, %bb ], [ %i41, %bb6 ] + %i10 = phi i32* [ %arg1, %bb ], [ %i40, %bb6 ] + %i11 = phi i32* [ %arg, %bb ], [ %i39, %bb6 ] + %i12 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 4) + %mask = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i8) + %pred = and <4 x i1> %i12, %mask + %i13 = bitcast i32* %i11 to <4 x i32>* + %i14 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i13, i32 4, <4 x i1> %pred, <4 x i32> zeroinitializer) + %i15 = bitcast i32* %i10 to <4 x i32>* + %i16 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i15, i32 4, <4 x i1> %pred, <4 x i32> zeroinitializer) + %i17 = icmp slt <4 x i32> %i16, zeroinitializer + %i18 = sub <4 x i32> zeroinitializer, %i16 + %i19 = select <4 x i1> %i17, <4 x i32> %i18, <4 x i32> %i16 + %i20 = tail call <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32> %i19) + %i21 = shl <4 x i32> %i19, %i20 + %i22 = add <4 x i32> %i20, + %i23 = lshr <4 x i32> %i21, + %i24 = and <4 x i32> %i23, + %i25 = tail call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %arg4, <4 x i32> %i24, i32 32, i32 2, i32 0) + %i26 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i21) + %i27 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i26) + %i28 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i27) + %i29 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i28, i32 1, i32 0) + %i30 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i21) + %i31 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i30) + %i32 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i31) + %i33 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i32, i32 1, i32 0) + %i34 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %i33, <4 x i1> %i17, <4 x i32> %i33) + %i35 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i14, <4 x i32> %i34) + %i36 = bitcast i32* %i9 to <4 x i32>* + %i37 = bitcast i32* %i7 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i35, <4 x i32>* %i36, i32 4, <4 x i1> %pred) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i22, <4 x i32>* %i37, i32 4, <4 x i1> %pred) + %i38 = getelementptr inbounds i32, i32* %i7, i32 4 + %i39 = getelementptr inbounds i32, i32* %i11, i32 4 + %i40 = getelementptr inbounds i32, i32* %i10, i32 4 + %i41 = getelementptr inbounds i32, i32* %i9, i32 4 + %i42 = add nsw i32 %i8, -4 + %i43 = icmp sgt i32 %i8, 4 + br i1 %i43, label %bb6, label %bb44 + +bb44: ; preds = %bb6 + ret void +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) declare <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) From 2a52c3301a5254d4614401b4aa12ab7c841d7340 Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Mon, 7 Sep 2020 22:04:07 +0700 Subject: [PATCH 0130/1079] [Statepoints] Properly handle const base pointer. Current code in InstEmitter assumes all GC pointers are either VRegs or stack slots - hence, taking only one operand. But it is possible to have constant base, in which case it occupies two machine operands. Add a convinience function to StackMaps to get index of next meta argument and use it in InsrEmitter to properly advance to the next statepoint meta operand. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D87252 --- llvm/include/llvm/CodeGen/StackMaps.h | 4 ++++ .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 20 ++++++---------- llvm/lib/CodeGen/StackMaps.cpp | 23 +++++++++++++++++++ llvm/test/CodeGen/X86/statepoint-vreg.ll | 23 +++++++++++++++++++ 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index ce4eb85d64525..578bc0e161a64 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -261,6 +261,10 @@ class StackMaps { StackMaps(AsmPrinter &AP); + /// Get index of next meta operand. + /// Similar to parseOperand, but does not actually parses operand meaning. + static unsigned getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx); + void reset() { CSInfos.clear(); ConstPool.clear(); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index ff84fdd62075c..e2da367cfe3f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -89,18 +89,9 @@ static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) { "STATEPOINT node expected"); unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx(); unsigned NumDeopts = MI->getOperand(OperIdx).getImm(); - // At this point stack references has not been lowered yet, so they - // take single operand. ++OperIdx; - while (NumDeopts--) { - MachineOperand &MO = MI->getOperand(OperIdx); - if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) { - ++OperIdx; - assert(MI->getOperand(OperIdx).isImm() && - "Unexpected statepoint operand"); - } - ++OperIdx; - } + while (NumDeopts--) + OperIdx = StackMaps::getNextMetaArgIdx(MI, OperIdx); return OperIdx; } @@ -1002,11 +993,14 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, assert(!HasPhysRegOuts && "STATEPOINT mishandled"); MachineInstr *MI = MIB; unsigned Def = 0; - unsigned Use = getStatepointGCArgStartIdx(MI) + 1; + unsigned Use = getStatepointGCArgStartIdx(MI); + Use = StackMaps::getNextMetaArgIdx(MI, Use); // first derived + assert(Use < MI->getNumOperands()); while (Def < NumDefs) { if (MI->getOperand(Use).isReg()) MI->tieOperands(Def++, Use); - Use += 2; + Use = StackMaps::getNextMetaArgIdx(MI, Use); // next base + Use = StackMaps::getNextMetaArgIdx(MI, Use); // next derived } } diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 113d477ec80a7..806ba1aa98226 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -88,6 +88,29 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { llvm_unreachable("Unsupported stackmap version!"); } +unsigned StackMaps::getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx) { + assert(CurIdx < MI->getNumOperands() && "Bad meta arg index"); + const auto &MO = MI->getOperand(CurIdx); + if (MO.isImm()) { + switch (MO.getImm()) { + default: + llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: + CurIdx += 2; + break; + case StackMaps::IndirectMemRefOp: + CurIdx += 3; + break; + case StackMaps::ConstantOp: + ++CurIdx; + break; + } + } + ++CurIdx; + assert(CurIdx < MI->getNumOperands() && "points past operand list"); + return CurIdx; +} + /// Go up the super-register chain until we hit a valid dwarf register number. static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { int RegNum = TRI->getDwarfRegNum(Reg, false); diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll index b613a949c273d..66b984b905364 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -47,6 +47,7 @@ entry: call void @consume(i32 addrspace(1)* %rel1) ret i1 %res1 } + ; test pointer variables intermixed with pointer constants define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) gc "statepoint-example" { ; CHECK-LABEL: test_mixed: @@ -567,6 +568,28 @@ exceptional_return.right: ret i64 addrspace(1)* %val.relocated3 } +; test ISEL for constant base pointer - must properly tie operands +define void @test_const_base(i32 addrspace(1)* %a) gc "statepoint-example" { +; CHECK-LABEL: test_const_base: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp24: +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq consume +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %token1 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 0, i32 1, i32 7, i32 addrspace(1)* null, i32 9), "gc-live" (i32 addrspace(1)* null, i32 addrspace(1)* %a)] + %rel = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token1, i32 0, i32 1) + call void @consume(i32 addrspace(1)* %rel) + ret void +} + declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) From 6a494e117cd99fc5b4c728d9f5a78ae817f93434 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 9 Sep 2020 07:16:45 +0000 Subject: [PATCH 0131/1079] [MLIR] Add debug support for ignored patterns The rewrite engine's cost model may determine some patterns to be irrelevant ahead of their application. These patterns were silently ignored previously and now cause a message in `--debug` mode. Differential Revision: https://reviews.llvm.org/D87290 --- mlir/lib/IR/PatternMatch.cpp | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/mlir/lib/IR/PatternMatch.cpp b/mlir/lib/IR/PatternMatch.cpp index a26bc63ed89d0..d1da8d1d8f263 100644 --- a/mlir/lib/IR/PatternMatch.cpp +++ b/mlir/lib/IR/PatternMatch.cpp @@ -10,9 +10,12 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Value.h" +#include "llvm/Support/Debug.h" using namespace mlir; +#define DEBUG_TYPE "pattern-match" + PatternBenefit::PatternBenefit(unsigned benefit) : representation(benefit) { assert(representation == benefit && benefit != ImpossibleToMatchSentinel && "This pattern match benefit is too large to represent"); @@ -207,8 +210,14 @@ void PatternApplicator::applyCostModel(CostModel model) { anyOpPatterns.clear(); for (const auto &pat : owningPatternList) { // If the pattern is always impossible to match, just ignore it. - if (pat->getBenefit().isImpossibleToMatch()) + if (pat->getBenefit().isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() + << "Ignoring pattern '" << pat->getRootKind() + << "' because it is impossible to match (by pattern benefit)\n"; + }); continue; + } if (Optional opName = pat->getRootKind()) patterns[*opName].push_back(pat.get()); else @@ -223,8 +232,14 @@ void PatternApplicator::applyCostModel(CostModel model) { auto processPatternList = [&](SmallVectorImpl &list) { // Special case for one pattern in the list, which is the most common case. if (list.size() == 1) { - if (model(*list.front()).isImpossibleToMatch()) + if (model(*list.front()).isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() << "Ignoring pattern '" << list.front()->getRootKind() + << "' because it is impossible to match or cannot lead " + "to legal IR (by cost model)\n"; + }); list.clear(); + } return; } @@ -236,8 +251,14 @@ void PatternApplicator::applyCostModel(CostModel model) { // Sort patterns with highest benefit first, and remove those that are // impossible to match. std::stable_sort(list.begin(), list.end(), cmp); - while (!list.empty() && benefits[list.back()].isImpossibleToMatch()) + while (!list.empty() && benefits[list.back()].isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() << "Ignoring pattern '" << list.back()->getRootKind() + << "' because it is impossible to match or cannot lead to " + "legal IR (by cost model)\n"; + }); list.pop_back(); + } }; for (auto &it : patterns) processPatternList(it.second); From 4e4a3feecdb6bd56483b9c6ba9116609c20588aa Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 09:29:51 +0200 Subject: [PATCH 0132/1079] [lldb][doc] Mention python3-dev instead of python2.7-dev in build docs --- lldb/docs/resources/build.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index c1cb6ec1a9343..b5c1fb8cb0012 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -71,7 +71,7 @@ commands below. :: > yum install libedit-devel libxml2-devel ncurses-devel python-devel swig - > sudo apt-get install build-essential subversion swig python2.7-dev libedit-dev libncurses5-dev + > sudo apt-get install build-essential subversion swig python3-dev libedit-dev libncurses5-dev > pkg install swig python > pkgin install swig python27 cmake ninja-build > brew install swig cmake ninja From c0e5e3fbfa504c3792023d0db9008b08caa6b6d7 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Tue, 8 Sep 2020 11:32:02 +0000 Subject: [PATCH 0133/1079] [Ignore Expressions] Fix performance regression by inlining `Ignore*SingleStep` We also add a `const` versions of `IgnoreExprNodes` Differential Revision: https://reviews.llvm.org/D87278 --- clang/include/clang/AST/IgnoreExpr.h | 118 ++++++++++++++++++++++-- clang/lib/AST/CMakeLists.txt | 1 - clang/lib/AST/IgnoreExpr.cpp | 129 --------------------------- 3 files changed, 109 insertions(+), 139 deletions(-) delete mode 100644 clang/lib/AST/IgnoreExpr.cpp diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h index 0aeb547606a2b..1c2b538e5b635 100644 --- a/clang/include/clang/AST/IgnoreExpr.h +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_AST_IGNOREEXPR_H #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" namespace clang { namespace detail { @@ -38,23 +39,122 @@ template Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { return E; } -Expr *IgnoreImplicitCastsSingleStep(Expr *E); +template +const Expr *IgnoreExprNodes(const Expr *E, FnTys &&...Fns) { + return const_cast(IgnoreExprNodes(E, std::forward(Fns)...)); +} + +inline Expr *IgnoreImplicitCastsSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E) { + // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in + // addition to what IgnoreImpCasts() skips to account for the current + // behaviour of IgnoreParenImpCasts(). + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +inline Expr *IgnoreCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + return CE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +inline Expr *IgnoreLValueCastsSingleStep(Expr *E) { + // Skip what IgnoreCastsSingleStep skips, except that only + // lvalue-to-rvalue casts are skipped. + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() != CK_LValueToRValue) + return E; -Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E); + return IgnoreCastsSingleStep(E); +} + +inline Expr *IgnoreBaseCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() == CK_DerivedToBase || + CE->getCastKind() == CK_UncheckedDerivedToBase || + CE->getCastKind() == CK_NoOp) + return CE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitSingleStep(Expr *E) { + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *BTE = dyn_cast(E)) + return BTE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExprAsWritten(); -Expr *IgnoreCastsSingleStep(Expr *E); + return IgnoreImplicitSingleStep(E); +} -Expr *IgnoreLValueCastsSingleStep(Expr *E); +inline Expr *IgnoreParensOnlySingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + return E; +} -Expr *IgnoreBaseCastsSingleStep(Expr *E); +inline Expr *IgnoreParensSingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); -Expr *IgnoreImplicitSingleStep(Expr *E); + if (auto *UO = dyn_cast(E)) { + if (UO->getOpcode() == UO_Extension) + return UO->getSubExpr(); + } -Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E); + else if (auto *GSE = dyn_cast(E)) { + if (!GSE->isResultDependent()) + return GSE->getResultExpr(); + } -Expr *IgnoreParensOnlySingleStep(Expr *E); + else if (auto *CE = dyn_cast(E)) { + if (!CE->isConditionDependent()) + return CE->getChosenSubExpr(); + } -Expr *IgnoreParensSingleStep(Expr *E); + return E; +} } // namespace clang diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index dfd26fd97bc6d..35099fd0dacf8 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -55,7 +55,6 @@ add_clang_library(clangAST ExternalASTMerger.cpp ExternalASTSource.cpp FormatString.cpp - IgnoreExpr.cpp InheritViz.cpp Interp/ByteCodeEmitter.cpp Interp/ByteCodeExprGen.cpp diff --git a/clang/lib/AST/IgnoreExpr.cpp b/clang/lib/AST/IgnoreExpr.cpp deleted file mode 100644 index 65aaaeb6a1ed0..0000000000000 --- a/clang/lib/AST/IgnoreExpr.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===--- IgnoreExpr.cpp - Ignore intermediate Expressions -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements common functions to ignore intermediate expression nodes -// -//===----------------------------------------------------------------------===// - -#include "clang/AST/IgnoreExpr.h" -#include "clang/AST/Expr.h" -#include "clang/AST/ExprCXX.h" - -using namespace clang; - -Expr *clang::IgnoreImplicitCastsSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitCastsExtraSingleStep(Expr *E) { - // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in - // addition to what IgnoreImpCasts() skips to account for the current - // behaviour of IgnoreParenImpCasts(). - Expr *SubE = IgnoreImplicitCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -Expr *clang::IgnoreCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - return CE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -Expr *clang::IgnoreLValueCastsSingleStep(Expr *E) { - // Skip what IgnoreCastsSingleStep skips, except that only - // lvalue-to-rvalue casts are skipped. - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() != CK_LValueToRValue) - return E; - - return IgnoreCastsSingleStep(E); -} - -Expr *clang::IgnoreBaseCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() == CK_DerivedToBase || - CE->getCastKind() == CK_UncheckedDerivedToBase || - CE->getCastKind() == CK_NoOp) - return CE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitSingleStep(Expr *E) { - Expr *SubE = IgnoreImplicitCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *BTE = dyn_cast(E)) - return BTE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitAsWrittenSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExprAsWritten(); - - return IgnoreImplicitSingleStep(E); -} - -Expr *clang::IgnoreParensOnlySingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - return E; -} - -Expr *clang::IgnoreParensSingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - - if (auto *UO = dyn_cast(E)) { - if (UO->getOpcode() == UO_Extension) - return UO->getSubExpr(); - } - - else if (auto *GSE = dyn_cast(E)) { - if (!GSE->isResultDependent()) - return GSE->getResultExpr(); - } - - else if (auto *CE = dyn_cast(E)) { - if (!CE->isConditionDependent()) - return CE->getChosenSubExpr(); - } - - return E; -} From fdc8a1aac293084ffb2d7f04b1225c8e2fb3b164 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 9 Sep 2020 07:32:57 +0000 Subject: [PATCH 0134/1079] [gn build] Port c0e5e3fbfa5 --- llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index bb3d69d046bef..4d645799dbf65 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -81,7 +81,6 @@ static_library("AST") { "ExternalASTMerger.cpp", "ExternalASTSource.cpp", "FormatString.cpp", - "IgnoreExpr.cpp", "InheritViz.cpp", "Interp/ByteCodeEmitter.cpp", "Interp/ByteCodeExprGen.cpp", From 133322d2e30877d5039643ab5c2ed02f75c29466 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 9 Sep 2020 07:44:38 +0000 Subject: [PATCH 0135/1079] [MLIR][Standard] Update `tensor_from_elements` assembly format Remove the redundant parenthesis that are used for none of the other operation formats. Differential Revision: https://reviews.llvm.org/D86287 --- .../include/mlir/Dialect/StandardOps/IR/Ops.td | 11 +++-------- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 18 +++++++++++++----- .../ShapeToStandard/shape-to-standard.mlir | 6 +++--- mlir/test/IR/core-ops.mlir | 12 ++++++------ mlir/test/IR/invalid-ops.mlir | 4 ++-- mlir/test/Transforms/canonicalize.mlir | 2 +- 6 files changed, 28 insertions(+), 25 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index f326ae5578650..c276818589afe 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1621,14 +1621,9 @@ def TensorFromElementsOp : Std_Op<"tensor_from_elements", let results = (outs AnyTensor:$result); let skipDefaultBuilders = 1; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, ValueRange elements", [{ - assert(!elements.empty() && "expected at least one element"); - result.addOperands(elements); - result.addTypes( - RankedTensorType::get({static_cast(elements.size())}, - *elements.getTypes().begin())); - }]>]; + let builders = [ + OpBuilder<"OpBuilder &b, OperationState &result, ValueRange elements"> + ]; let hasCanonicalizer = 1; } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 65f8b83d9a718..1c69019870198 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1744,9 +1744,9 @@ static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, OperationState &result) { SmallVector elementsOperands; Type resultType; - if (parser.parseLParen() || parser.parseOperandList(elementsOperands) || - parser.parseRParen() || parser.parseOptionalAttrDict(result.attributes) || - parser.parseColon() || parser.parseType(resultType)) + if (parser.parseOperandList(elementsOperands) || + parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonType(resultType)) return failure(); if (parser.resolveOperands(elementsOperands, @@ -1759,9 +1759,9 @@ static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, } static void print(OpAsmPrinter &p, TensorFromElementsOp op) { - p << "tensor_from_elements(" << op.elements() << ')'; + p << "tensor_from_elements " << op.elements(); p.printOptionalAttrDict(op.getAttrs()); - p << " : " << op.result().getType(); + p << " : " << op.getType(); } static LogicalResult verify(TensorFromElementsOp op) { @@ -1778,6 +1778,14 @@ static LogicalResult verify(TensorFromElementsOp op) { return success(); } +void TensorFromElementsOp::build(OpBuilder &builder, OperationState &result, + ValueRange elements) { + assert(!elements.empty() && "expected at least one element"); + result.addOperands(elements); + result.addTypes(RankedTensorType::get({static_cast(elements.size())}, + *elements.getTypes().begin())); +} + namespace { // Canonicalizes the pattern of the form diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index bf8e74e5143ed..4d2437a4877bc 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -94,7 +94,7 @@ func @const_shape() -> tensor { // CHECK: %[[C1:.*]] = constant 1 : index // CHECK: %[[C2:.*]] = constant 2 : index // CHECK: %[[C3:.*]] = constant 3 : index - // CHECK: %[[TENSOR3:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) + // CHECK: %[[TENSOR3:.*]] = tensor_from_elements %[[C1]], %[[C2]], %[[C3]] // CHECK: %[[RESULT:.*]] = tensor_cast %[[TENSOR3]] : tensor<3xindex> to tensor // CHECK: return %[[RESULT]] : tensor %shape = shape.const_shape [1, 2, 3] : tensor @@ -223,7 +223,7 @@ func @shape_of_stat(%arg : tensor<1x2x3xf32>) { // CHECK-DAG: %[[C1:.*]] = constant 1 : index // CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements %[[C1]], %[[C2]], %[[C3]] : tensor<3xindex> %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor return } @@ -238,7 +238,7 @@ func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { // CHECK-DAG: %[[C5:.*]] = constant 5 : index // CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements %[[C1]], %[[C5]], %[[DYN_DIM]] : tensor<3xindex> %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor return } diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 69e974bc41734..e4472b444f034 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -661,17 +661,17 @@ func @extract_element(%arg0: tensor<*xi32>, %arg1 : tensor<4x4xf32>) -> i32 { // CHECK-LABEL: func @tensor_from_elements() { func @tensor_from_elements() { %c0 = "std.constant"() {value = 0: index} : () -> index - // CHECK: %0 = tensor_from_elements(%c0) : tensor<1xindex> - %0 = tensor_from_elements(%c0) : tensor<1xindex> + // CHECK: %0 = tensor_from_elements %c0 : tensor<1xindex> + %0 = tensor_from_elements %c0 : tensor<1xindex> %c1 = "std.constant"() {value = 1: index} : () -> index - // CHECK: %1 = tensor_from_elements(%c0, %c1) : tensor<2xindex> - %1 = tensor_from_elements(%c0, %c1) : tensor<2xindex> + // CHECK: %1 = tensor_from_elements %c0, %c1 : tensor<2xindex> + %1 = tensor_from_elements %c0, %c1 : tensor<2xindex> %c0_f32 = "std.constant"() {value = 0.0: f32} : () -> f32 // CHECK: [[C0_F32:%.*]] = constant - // CHECK: %2 = tensor_from_elements([[C0_F32]]) : tensor<1xf32> - %2 = tensor_from_elements(%c0_f32) : tensor<1xf32> + // CHECK: %2 = tensor_from_elements [[C0_F32]] : tensor<1xf32> + %2 = tensor_from_elements %c0_f32 : tensor<1xf32> return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 55739119aa26d..71b007ef6e39f 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -597,7 +597,7 @@ func @extract_element_tensor_too_few_indices(%t : tensor<2x3xf32>, %i : index) { func @tensor_from_elements_wrong_result_type() { // expected-error@+2 {{expected result type to be a ranked tensor}} %c0 = constant 0 : i32 - %0 = tensor_from_elements(%c0) : tensor<*xi32> + %0 = tensor_from_elements %c0 : tensor<*xi32> return } @@ -606,7 +606,7 @@ func @tensor_from_elements_wrong_result_type() { func @tensor_from_elements_wrong_elements_count() { // expected-error@+2 {{expected result type to be a 1D tensor with 1 element}} %c0 = constant 0 : index - %0 = tensor_from_elements(%c0) : tensor<2xindex> + %0 = tensor_from_elements %c0 : tensor<2xindex> return } diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 7333446c6e5d9..76fe82588be3e 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -981,7 +981,7 @@ func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: i func @extract_element_from_tensor_from_elements(%element : index) -> index { // CHECK-SAME: ([[ARG:%.*]]: index) %c0 = constant 0 : index - %tensor = tensor_from_elements(%element) : tensor<1xindex> + %tensor = tensor_from_elements %element : tensor<1xindex> %extracted_element = extract_element %tensor[%c0] : tensor<1xindex> // CHECK: [[ARG]] : index return %extracted_element : index From 5106a8b8f8d0d3dd6c3fc0554f05402d8d9177ef Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 9 Sep 2020 07:53:13 +0000 Subject: [PATCH 0136/1079] [MLIR][Shape] Lower `shape_of` to `dynamic_tensor_from_elements` Take advantage of the new `dynamic_tensor_from_elements` operation in `std`. Instead of stack-allocated memory, we can now lower directly to a single `std` operation. Differential Revision: https://reviews.llvm.org/D86935 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 7 +++++ .../ShapeToStandard/ShapeToStandard.cpp | 27 +++++++------------ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 16 +++++++++++ .../ShapeToStandard/shape-to-standard.mlir | 13 ++++----- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index c276818589afe..44bbb423b2d95 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1504,6 +1504,13 @@ def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", let arguments = (ins Variadic:$dynamicExtents); let results = (outs AnyRankedTensor:$result); let regions = (region SizedRegion<1>:$body); + + let builders = [ + // Build op and populate its body per callback function. + OpBuilder<"OpBuilder &b, OperationState &result, Type resultTy, " + "ValueRange dynamicExtents, " + "function_ref">, + ]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index 8c917e08f942c..f3f11e89af02f 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -422,6 +422,7 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( return failure(); // For ranked tensor arguments, lower to `tensor_from_elements`. + auto loc = op.getLoc(); ShapeOfOp::Adaptor transformed(operands); Value tensor = transformed.arg(); Type tensorTy = tensor.getType(); @@ -431,7 +432,6 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( SmallVector extentValues; RankedTensorType rankedTensorTy = tensorTy.cast(); int64_t rank = rankedTensorTy.getRank(); - auto loc = op.getLoc(); for (int64_t i = 0; i < rank; i++) { if (rankedTensorTy.isDynamicDim(i)) { Value extent = rewriter.create(loc, tensor, i); @@ -451,26 +451,17 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( return success(); } - // Allocate stack memory. - auto loc = op.getLoc(); + // Lower to `dynamic_tensor_from_elements` otherwise. + auto *ctx = rewriter.getContext(); Value rank = rewriter.create(loc, tensor); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, tensor, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); + rewriter.replaceOpWithNewOp( + op, getExtentTensorType(ctx), ValueRange{rank}, + [&](OpBuilder &b, Location loc, ValueRange args) { + Value dim = args.front(); + Value extent = b.create(loc, tensor, dim); + b.create(loc, extent); }); - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); return success(); } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 1c69019870198..a0ad05852e230 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1694,6 +1694,22 @@ static LogicalResult verify(DynamicTensorFromElementsOp op) { return success(); } +void DynamicTensorFromElementsOp::build( + OpBuilder &b, OperationState &result, Type resultTy, + ValueRange dynamicExtents, + function_ref bodyBuilder) { + build(b, result, resultTy, dynamicExtents); + + // Build and populate body. + OpBuilder::InsertionGuard guard(b); + Region *bodyRegion = result.regions.front().get(); + auto rank = resultTy.cast().getRank(); + SmallVector argumentTypes(rank, b.getIndexType()); + Block *bodyBlock = + b.createBlock(bodyRegion, bodyRegion->end(), argumentTypes); + bodyBuilder(b, result.location, bodyBlock->getArguments()); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index 4d2437a4877bc..4168634f1240d 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -191,14 +191,11 @@ func @shape_of(%arg : tensor<*xf32>) { // CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) func @shape_of_unranked(%arg : tensor<*xf32>) { // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + // CHECK: %[[SHAPE:.*]] = dynamic_tensor_from_elements %[[RANK]] { + // CHECK: ^bb0(%[[I:.*]]: index): + // CHECK: %[[EXTENT:.*]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: yield %[[EXTENT]] : index + // CHECK: } : tensor %shape = shape.shape_of %arg : tensor<*xf32> -> tensor return } From 32c8da41dc0cb99651823a1a21130c2cbdf688e1 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 09:54:47 +0200 Subject: [PATCH 0137/1079] [lldb] Don't infinite loop in SemaSourceWithPriorities::CompleteType when trying to complete a forward decl SemaSourceWithPriorities is a special SemaSource that wraps our normal LLDB ExternalASTSource and the ASTReader (which is used for the C++ module loading). It's only active when the `import-std-module` setting is turned on. The `CompleteType` function there in `SemaSourceWithPriorities` is looping over all ExternalASTSources and asks each to complete the type. However, that loop is in another loop that keeps doing that until the type is complete. If that function is ever called on a type that is a forward decl then that causes LLDB to go into an infinite loop. I remember I added that second loop and the comment because I thought I saw a similar pattern in some other Clang code, but after some grepping I can't find that code anywhere and it seems the rest of the code base only calls CompleteType once (It would also be kinda silly to have calling it multiple times). So it seems that's just a silly mistake. The is implicitly tested by importing `std::pair`, but I also added a simpler dedicated test that creates a dummy libc++ module with some forward declarations and then imports them into the scratch AST context. At some point the ASTImporter will check if one of the forward decls could be completed by the ExternalASTSource, which will cause the `SemaSourceWithPriorities` to go into an infinite loop once it receives the `CompleteType` call. Reviewed By: shafik Differential Revision: https://reviews.llvm.org/D87289 --- .../Plugins/ExpressionParser/Clang/ASTUtils.h | 15 +++---- .../forward_decl_from_module/Makefile | 9 +++++ .../TestForwardDeclFromStdModule.py | 39 +++++++++++++++++++ .../forward_decl_from_module/main.cpp | 8 ++++ .../root/usr/include/c++/v1/module.modulemap | 3 ++ .../root/usr/include/c++/v1/vector | 14 +++++++ .../root/usr/include/libc_header.h | 1 + 7 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h index 769b18d54cedd..b70ec223df4df 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h @@ -359,15 +359,12 @@ class SemaSourceWithPriorities : public clang::ExternalSemaSource { } void CompleteType(clang::TagDecl *Tag) override { - while (!Tag->isCompleteDefinition()) - for (size_t i = 0; i < Sources.size(); ++i) { - // FIXME: We are technically supposed to loop here too until - // Tag->isCompleteDefinition() is true, but if our low quality source - // is failing to complete the tag this code will deadlock. - Sources[i]->CompleteType(Tag); - if (Tag->isCompleteDefinition()) - break; - } + for (clang::ExternalSemaSource *S : Sources) { + S->CompleteType(Tag); + // Stop after the first source completed the type. + if (Tag->isCompleteDefinition()) + break; + } } void CompleteType(clang::ObjCInterfaceDecl *Class) override { diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile new file mode 100644 index 0000000000000..4915cdae87641 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile @@ -0,0 +1,9 @@ +# We don't have any standard include directories, so we can't +# parse the test_common.h header we usually inject as it includes +# system headers. +NO_TEST_COMMON_H := 1 + +CXXFLAGS_EXTRAS = -I $(SRCDIR)/root/usr/include/c++/v1/ -I $(SRCDIR)/root/usr/include/ -nostdinc -nostdinc++ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py new file mode 100644 index 0000000000000..48459abb92668 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py @@ -0,0 +1,39 @@ +""" +Tests forward declarations coming from the `std` module. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + # We only emulate a fake libc++ in this test and don't use the real libc++, + # but we still add the libc++ category so that this test is only run in + # test configurations where libc++ is actually supposed to be tested. + @add_test_categories(["libc++"]) + @skipIfRemote + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + sysroot = os.path.join(os.getcwd(), "root") + + # Set the sysroot where our dummy libc++ exists. + self.runCmd("platform select --sysroot '" + sysroot + "' host", CURRENT_EXECUTABLE_SET) + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + + # Print the dummy `std::vector`. It only has the dummy member in it + # so the standard `std::vector` formatter can't format it. Instead use + # the raw output so LLDB has to show the member variable. + # Both `std::vector` and the type of the member have forward + # declarations before their definitions. + self.expect("expr --raw -- v", + substrs=['(std::__1::vector) $0 = {', 'f = 0x', '}']) diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp new file mode 100644 index 0000000000000..a0b02d5c68141 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp @@ -0,0 +1,8 @@ +#include + +int main(int argc, char **argv) { + // Makes sure we have the mock libc headers in the debug information. + libc_struct s; + std::vector v; + return 0; // Set break point at this line. +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap new file mode 100644 index 0000000000000..f149be7b7d21a --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap @@ -0,0 +1,3 @@ +module std { + module "vector" { header "vector" export * } +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector new file mode 100644 index 0000000000000..c2d77aab07110 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector @@ -0,0 +1,14 @@ +#include "libc_header.h" + +namespace std { + inline namespace __1 { + // A forward decl of `vector`. + template class vector; + // Pretend to be a std::vector template we need to instantiate in LLDB + // when import-std-module is enabled. + template + struct vector { class F; F *f; }; + // The definition of our forward declared nested class. + template class vector::F { int x; }; + } +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h new file mode 100644 index 0000000000000..47525c9db3467 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h @@ -0,0 +1 @@ +struct libc_struct {}; From 2bcc4db761768f1b7431237920f26360549ca268 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 9 Sep 2020 09:00:41 +0100 Subject: [PATCH 0138/1079] [EarlyCSE] Explicitly require AAResultsWrapperPass. The MemorySSAWrapperPass depends on AAResultsWrapperPass and if MemorySSA is preserved but AAResultsWrapperPass is not, this could lead to a crash when updating the last user of the MemorySSAWrapperPass. Alternatively AAResultsWrapperPass could be marked preserved by GVN, but I am not sure if that would be safe. I am not sure what is required in order to preserve AAResultsWrapperPass. At the moment, it seems like a couple of passes that do similar transforms to GVN are preserving it. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87137 --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 2 ++ llvm/lib/Transforms/Scalar/GVN.cpp | 1 - llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 51da10fc48790..b655204d26dd2 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1463,6 +1463,7 @@ class EarlyCSELegacyCommonPass : public FunctionPass { AU.addRequired(); AU.addRequired(); if (UseMemorySSA) { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -1504,6 +1505,7 @@ INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", "Early CSE w/ MemorySSA", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index c71038d66f995..036ca1d1054fe 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2850,7 +2850,6 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { if (Impl.isMemDepEnabled()) AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); diff --git a/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll b/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll new file mode 100644 index 0000000000000..744389c24db28 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll @@ -0,0 +1,7 @@ +; RUN: opt -memoryssa -gvn -early-cse-memssa %s -S | FileCheck %s + +; CHECK: define void @foo( + +define void @foo() { + ret void +} From 7866b91405693df5b4cf6ba770b3a92d48b0c508 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 10:16:56 +0200 Subject: [PATCH 0139/1079] [lldb] Fix a crash when the ASTImporter is giving us two Imported callbacks for the same target decl The ASTImporter has an `Imported(From, To)` callback that notifies subclasses that a declaration has been imported in some way. LLDB uses this in the `CompleteTagDeclsScope` to see which records have been imported into the scratch context. If the record was declared inside the expression, then the `CompleteTagDeclsScope` will forcibly import the full definition of that record to the scratch context so that the expression AST can safely be disposed later (otherwise we might end up going back to the deleted AST to complete the minimally imported record). The way this is implemented is that there is a list of decls that need to be imported (`m_decls_to_complete`) and we keep completing the declarations inside that list until the list is empty. Every `To` Decl we get via the `Imported` callback will be added to the list of Decls to be completed. There are some situations where the ASTImporter will actually give us two `Imported` calls with the same `To` Decl. One way where this happens is if the ASTImporter decides to merge an imported definition into an already imported one. Another way is that the ASTImporter just happens to get two calls to `ASTImporter::Import` for the same Decl. This for example happens when importing the DeclContext of a Decl requires importing the Decl itself, such as when importing a RecordDecl that was declared inside a function. The bug addressed in this patch is that when we end up getting two `Imported` calls for the same `To` Decl, then we would crash in the `CompleteTagDeclsScope`. That's because the first time we complete the Decl we remove the Origin tracking information (that maps the Decl back to from where it came from). The next time we try to complete the same `To` Decl the Origin tracking information is gone and we hit the `to_context_md->getOrigin(decl).ctx == m_src_ctx` assert (`getOrigin(decl).ctx` is a nullptr the second time as the Origin was deleted). This is actually a regression coming from D72495. Before D72495 `m_decls_to_complete` was actually a set so every declaration in there could only be queued once to be completed. The set was changed to a vector to make the iteration over it deterministic, but that also causes that we now potentially end up trying to complete a Decl twice. This patch essentially just reverts D72495 and makes the `CompleteTagDeclsScope` use a SetVector for the list of declarations to be completed. The SetVector should filter out the duplicates (as the original `set` did) and also ensure that the completion order is deterministic. I actually couldn't find any way to cause LLDB to reproduce this bug by merging declarations (this would require that we for example declare two namespaces in a non-top-level expression which isn't possible). But the bug reproduces very easily by just declaring a class in an expression, so that's what the test is doing. Reviewed By: shafik Differential Revision: https://reviews.llvm.org/D85648 --- .../Clang/ClangASTImporter.cpp | 13 +++++-- .../TestRecordDeclInExpr.py | 34 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 73042c205a5ae..e2601a059bb77 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -216,7 +216,12 @@ namespace { /// imported while completing the original Decls). class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { ClangASTImporter::ImporterDelegateSP m_delegate; - llvm::SmallVector m_decls_to_complete; + /// List of declarations in the target context that need to be completed. + /// Every declaration should only be completed once and therefore should only + /// be once in this list. + llvm::SetVector m_decls_to_complete; + /// Set of declarations that already were successfully completed (not just + /// added to m_decls_to_complete). llvm::SmallPtrSet m_decls_already_completed; clang::ASTContext *m_dst_ctx; clang::ASTContext *m_src_ctx; @@ -244,6 +249,9 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { NamedDecl *decl = m_decls_to_complete.pop_back_val(); m_decls_already_completed.insert(decl); + // The decl that should be completed has to be imported into the target + // context from some other context. + assert(to_context_md->hasOrigin(decl)); // We should only complete decls coming from the source context. assert(to_context_md->getOrigin(decl).ctx == m_src_ctx); @@ -287,7 +295,8 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { // Check if we already completed this type. if (m_decls_already_completed.count(to_named_decl) != 0) return; - m_decls_to_complete.push_back(to_named_decl); + // Queue this type to be completed. + m_decls_to_complete.insert(to_named_decl); } }; } // namespace diff --git a/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py b/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py new file mode 100644 index 0000000000000..16bf098dce8f3 --- /dev/null +++ b/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py @@ -0,0 +1,34 @@ +""" +Tests declaring RecordDecls in non-top-level expressions. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test_fwd_decl(self): + # Declare a forward decl and import it to the scratch AST. + self.expect_expr("struct S; S *s = nullptr; s", result_type="S *") + + @no_debug_info_test + def test_struct(self): + # Declare a struct and import it to the scratch AST. + self.expect("expr struct S {}; S s; s", substrs=["= {}"]) + + @no_debug_info_test + def test_struct_with_fwd_decl(self): + # Import the forward decl to the scratch AST. + self.expect_expr("struct S; S *s = nullptr; s", result_type="S *") + # Merge the definition into the scratch AST. + self.expect("expr struct S {}; S s; s", substrs=["= {}"]) + + @no_debug_info_test + def test_struct_with_fwd_decl_same_expr(self): + # Test both a forward decl and a definition in one expression and + # import them into the scratch AST. + self.expect("expr struct S; struct S{}; S s; s", substrs=["= {}"]) From 37a7c0a00773f135d909eb9eba7f82547aee1e89 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 9 Sep 2020 15:44:25 +0700 Subject: [PATCH 0140/1079] [Test] Add failing test for pr47457 --- llvm/test/Transforms/LoopLoadElim/pr47457.ll | 45 ++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/Transforms/LoopLoadElim/pr47457.ll diff --git a/llvm/test/Transforms/LoopLoadElim/pr47457.ll b/llvm/test/Transforms/LoopLoadElim/pr47457.ll new file mode 100644 index 0000000000000..1b102944cd767 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/pr47457.ll @@ -0,0 +1,45 @@ +; RUN: opt -loop-load-elim -S %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s +; REQUIRES: asserts +; XFAIL: * + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() { +; CHECK-LABEL: test + +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb1, %bb + %tmp = phi i32 [ undef, %bb ], [ 0, %bb1 ], [ %tmp3, %bb6 ] + br i1 undef, label %bb1, label %bb2 + +bb2: ; preds = %bb1 + %tmp3 = add i32 %tmp, 1 + %tmp4 = icmp ult i32 %tmp, undef + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb2 + ret void + +bb6: ; preds = %bb2 + br i1 undef, label %bb7, label %bb1 + +bb7: ; preds = %bb7, %bb6 + %tmp8 = phi i32 [ %tmp15, %bb7 ], [ %tmp3, %bb6 ] + %tmp9 = phi i32 [ %tmp8, %bb7 ], [ %tmp, %bb6 ] + %tmp10 = zext i32 %tmp9 to i64 + %tmp11 = getelementptr inbounds float, float addrspace(1)* null, i64 %tmp10 + %tmp12 = load float, float addrspace(1)* %tmp11, align 4 + %tmp13 = zext i32 %tmp8 to i64 + %tmp14 = getelementptr inbounds float, float addrspace(1)* null, i64 %tmp13 + store float 1.000000e+00, float addrspace(1)* %tmp14, align 4 + %tmp15 = add nuw nsw i32 %tmp8, 1 + %tmp16 = icmp sgt i32 %tmp8, 78 + br i1 %tmp16, label %bb17, label %bb7 + +bb17: ; preds = %bb7 + unreachable +} From b85222520f861a1812f991d6bd65950dda22f31b Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 10:35:56 +0200 Subject: [PATCH 0141/1079] [lldb] Enable std::pair in CxxModuleHandler This adds support for substituting std::pair instantiations with enabled import-std-module. With the fixes in parent revisions we can currently substitute a single pair (however, a result that returns a second pair currently causes LLDB to crash while importing the second template instantiation). Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D85141 --- .../Clang/CxxModuleHandler.cpp | 1 + .../import-std-module/pair/Makefile | 3 +++ .../pair/TestPairFromStdModule.py | 25 +++++++++++++++++++ .../import-std-module/pair/main.cpp | 6 +++++ 4 files changed, 35 insertions(+) create mode 100644 lldb/test/API/commands/expression/import-std-module/pair/Makefile create mode 100644 lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py create mode 100644 lldb/test/API/commands/expression/import-std-module/pair/main.cpp diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp index 2f8cf1846ee77..38d9f8d1e4b80 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp @@ -34,6 +34,7 @@ CxxModuleHandler::CxxModuleHandler(ASTImporter &importer, ASTContext *target) "weak_ptr", // utility "allocator", + "pair", }; m_supported_templates.insert(supported_names.begin(), supported_names.end()); } diff --git a/lldb/test/API/commands/expression/import-std-module/pair/Makefile b/lldb/test/API/commands/expression/import-std-module/pair/Makefile new file mode 100644 index 0000000000000..f938f7428468a --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/Makefile @@ -0,0 +1,3 @@ +USE_LIBCPP := 1 +CXX_SOURCES := main.cpp +include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py new file mode 100644 index 0000000000000..4f5b1ea8028b0 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py @@ -0,0 +1,25 @@ +""" +Test basic std::pair functionality. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @add_test_categories(["libc++"]) + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + + self.expect_expr("pair_int.first", result_type="int", result_value="1234") + self.expect_expr("pair_int.second", result_type="int", result_value="5678") + self.expect("expr pair_int", substrs=['first = 1234, second = 5678']) \ No newline at end of file diff --git a/lldb/test/API/commands/expression/import-std-module/pair/main.cpp b/lldb/test/API/commands/expression/import-std-module/pair/main.cpp new file mode 100644 index 0000000000000..1363698f1fc7f --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/main.cpp @@ -0,0 +1,6 @@ +#include + +int main(int argc, char **argv) { + std::pair pair_int(1234, 5678); + return 0; // Set break point at this line. +} From feb0b9c3bba7db6d547b552c3cdaa838559da664 Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Fri, 7 Aug 2020 12:22:45 +0200 Subject: [PATCH 0142/1079] [mlir] Added support for loops to BufferPlacement transformation. The current BufferPlacement transformation cannot handle loops properly. Buffers passed via backedges will not be freed automatically introducing memory leaks. This CL adds support for loops to overcome these limitations. Differential Revision: https://reviews.llvm.org/D85513 --- mlir/lib/Transforms/BufferPlacement.cpp | 236 +++++++++++++---- mlir/test/Transforms/buffer-placement.mlir | 292 +++++++++++++++++++++ 2 files changed, 474 insertions(+), 54 deletions(-) diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 0279129758ab8..9f2c254f91e51 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -48,11 +48,10 @@ // will be freed in the end. // // TODO: -// The current implementation does not support loops and the resulting code will -// be invalid with respect to program semantics. The only thing that is -// currently missing is a high-level loop analysis that allows us to move allocs -// and deallocs outside of the loop blocks. Furthermore, it doesn't also accept -// functions which return buffers already. +// The current implementation does not support explicit-control-flow loops and +// the resulting code will be invalid with respect to program semantics. +// However, structured control-flow loops are fully supported. Furthermore, it +// doesn't accept functions which return buffers already. // //===----------------------------------------------------------------------===// @@ -77,6 +76,22 @@ static void walkReturnOperations(Region *region, const FuncT &func) { } } +/// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions` +/// function that initializes the required `operandAttributes` array. +static void getSuccessorRegions(RegionBranchOpInterface regionInterface, + llvm::Optional index, + SmallVectorImpl &successors) { + // Create a list of null attributes for each operand to comply with the + // `getSuccessorRegions` interface definition that requires a single + // attribute per operand. + SmallVector operandAttributes( + regionInterface.getOperation()->getNumOperands()); + + // Get all successor regions using the temporarily allocated + // `operandAttributes`. + regionInterface.getSuccessorRegions(index, operandAttributes, successors); +} + namespace { //===----------------------------------------------------------------------===// // BufferPlacementAliasAnalysis @@ -166,16 +181,10 @@ class BufferPlacementAliasAnalysis { // Query the RegionBranchOpInterface to find potential successor regions. op->walk([&](RegionBranchOpInterface regionInterface) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); - // Extract all entry regions and wire all initial entry successor inputs. SmallVector entrySuccessors; - regionInterface.getSuccessorRegions(/*index=*/llvm::None, - operandAttributes, entrySuccessors); + getSuccessorRegions(regionInterface, /*index=*/llvm::None, + entrySuccessors); for (RegionSuccessor &entrySuccessor : entrySuccessors) { // Wire the entry region's successor arguments with the initial // successor inputs. @@ -191,8 +200,8 @@ class BufferPlacementAliasAnalysis { // Iterate over all successor region entries that are reachable from the // current region. SmallVector successorRegions; - regionInterface.getSuccessorRegions( - region.getRegionNumber(), operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); for (RegionSuccessor &successorRegion : successorRegions) { // Iterate over all immediate terminator operations and wire the // successor inputs with the operands of each terminator. @@ -209,6 +218,83 @@ class BufferPlacementAliasAnalysis { ValueMapT aliases; }; +//===----------------------------------------------------------------------===// +// Backedges +//===----------------------------------------------------------------------===// + +/// A straight-forward program analysis which detects loop backedges induced by +/// explicit control flow. +class Backedges { +public: + using BlockSetT = SmallPtrSet; + using BackedgeSetT = llvm::DenseSet>; + +public: + /// Constructs a new backedges analysis using the op provided. + Backedges(Operation *op) { recurse(op, op->getBlock()); } + + /// Returns the number of backedges formed by explicit control flow. + size_t size() const { return edgeSet.size(); } + + /// Returns the start iterator to loop over all backedges. + BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } + + /// Returns the end iterator to loop over all backedges. + BackedgeSetT::const_iterator end() const { return edgeSet.end(); } + +private: + /// Enters the current block and inserts a backedge into the `edgeSet` if we + /// have already visited the current block. The inserted edge links the given + /// `predecessor` with the `current` block. + bool enter(Block ¤t, Block *predecessor) { + bool inserted = visited.insert(¤t).second; + if (!inserted) + edgeSet.insert(std::make_pair(predecessor, ¤t)); + return inserted; + } + + /// Leaves the current block. + void exit(Block ¤t) { visited.erase(¤t); } + + /// Recurses into the given operation while taking all attached regions into + /// account. + void recurse(Operation *op, Block *predecessor) { + Block *current = op->getBlock(); + // If the current op implements the `BranchOpInterface`, there can be + // cycles in the scope of all successor blocks. + if (isa(op)) { + for (Block *succ : current->getSuccessors()) + recurse(*succ, current); + } + // Recurse into all distinct regions and check for explicit control-flow + // loops. + for (Region ®ion : op->getRegions()) + recurse(region.front(), current); + } + + /// Recurses into explicit control-flow structures that are given by + /// the successor relation defined on the block level. + void recurse(Block &block, Block *predecessor) { + // Try to enter the current block. If this is not possible, we are + // currently processing this block and can safely return here. + if (!enter(block, predecessor)) + return; + + // Recurse into all operations and successor blocks. + for (auto &op : block.getOperations()) + recurse(&op, predecessor); + + // Leave the current block. + exit(block); + } + + /// Stores all blocks that are currently visited and on the processing stack. + BlockSetT visited; + + /// Stores all backedges in the format (source, target). + BackedgeSetT edgeSet; +}; + //===----------------------------------------------------------------------===// // BufferPlacement //===----------------------------------------------------------------------===// @@ -357,9 +443,14 @@ class BufferPlacement { for (Value value : it->second) { if (valuesToFree.count(value) > 0) continue; - // Check whether we have to free this particular block argument. - if (!dominators.dominates(definingBlock, value.getParentBlock())) { - toProcess.emplace_back(value, value.getParentBlock()); + Block *parentBlock = value.getParentBlock(); + // Check whether we have to free this particular block argument or + // generic value. We have to free the current alias if it is either + // defined in a non-dominated block or it is defined in the same block + // but the current value is not dominated by the source value. + if (!dominators.dominates(definingBlock, parentBlock) || + (definingBlock == parentBlock && value.isa())) { + toProcess.emplace_back(value, parentBlock); valuesToFree.insert(value); } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) .second) @@ -431,22 +522,42 @@ class BufferPlacement { // argument belongs to the first block in a region and the parent operation // implements the RegionBranchOpInterface. Region *argRegion = block->getParent(); + Operation *parentOp = argRegion->getParentOp(); RegionBranchOpInterface regionInterface; if (!argRegion || &argRegion->front() != block || - !(regionInterface = - dyn_cast(argRegion->getParentOp()))) + !(regionInterface = dyn_cast(parentOp))) return; introduceCopiesForRegionSuccessors( - regionInterface, argRegion->getParentOp()->getRegions(), + regionInterface, argRegion->getParentOp()->getRegions(), blockArg, [&](RegionSuccessor &successorRegion) { // Find a predecessor of our argRegion. return successorRegion.getSuccessor() == argRegion; - }, - [&](RegionSuccessor &successorRegion) { - // The operand index will be the argument number. - return blockArg.getArgNumber(); }); + + // Check whether the block argument belongs to an entry region of the + // parent operation. In this case, we have to introduce an additional copy + // for buffer that is passed to the argument. + SmallVector successorRegions; + getSuccessorRegions(regionInterface, llvm::None, successorRegions); + auto *it = + llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { + return successorRegion.getSuccessor() == argRegion; + }); + if (it == successorRegions.end()) + return; + + // Determine the actual operand to introduce a copy for and rewire the + // operand to point to the copy instead. + Value operand = + regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) + [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; + Value copy = introduceBufferCopy(operand, parentOp); + + auto op = llvm::find(parentOp->getOperands(), operand); + assert(op != parentOp->getOperands().end() && + "parentOp does not contain operand"); + parentOp->setOperand(op.getIndex(), copy); } /// Introduces temporary allocs in front of all associated nested-region @@ -455,42 +566,34 @@ class BufferPlacement { // Get the actual result index in the scope of the parent terminator. Operation *operation = value.getDefiningOp(); auto regionInterface = cast(operation); - introduceCopiesForRegionSuccessors( - regionInterface, operation->getRegions(), - [&](RegionSuccessor &successorRegion) { - // Determine whether this region has a successor entry that leaves - // this region by returning to its parent operation. - return !successorRegion.getSuccessor(); - }, - [&](RegionSuccessor &successorRegion) { - // Find the associated success input index. - return llvm::find(successorRegion.getSuccessorInputs(), value) - .getIndex(); - }); + // Filter successors that return to the parent operation. + auto regionPredicate = [&](RegionSuccessor &successorRegion) { + // If the RegionSuccessor has no associated successor, it will return to + // its parent operation. + return !successorRegion.getSuccessor(); + }; + // Introduce a copy for all region "results" that are returned to the parent + // operation. This is required since the parent's result value has been + // considered critical. Therefore, the algorithm assumes that a copy of a + // previously allocated buffer is returned by the operation (like in the + // case of a block argument). + introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(), + value, regionPredicate); } /// Introduces buffer copies for all terminators in the given regions. The /// regionPredicate is applied to every successor region in order to restrict - /// the copies to specific regions. Thereby, the operandProvider is invoked - /// for each matching region successor and determines the operand index that - /// requires a buffer copy. - template - void - introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface, - MutableArrayRef regions, - const TPredicate ®ionPredicate, - const TOperandProvider &operandProvider) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); + /// the copies to specific regions. + template + void introduceCopiesForRegionSuccessors( + RegionBranchOpInterface regionInterface, MutableArrayRef regions, + Value argValue, const TPredicate ®ionPredicate) { for (Region ®ion : regions) { // Query the regionInterface to get all successor regions of the current // one. SmallVector successorRegions; - regionInterface.getSuccessorRegions(region.getRegionNumber(), - operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); // Try to find a matching region successor. RegionSuccessor *regionSuccessor = llvm::find_if(successorRegions, regionPredicate); @@ -498,7 +601,9 @@ class BufferPlacement { continue; // Get the operand index in the context of the current successor input // bindings. - auto operandIndex = operandProvider(*regionSuccessor); + size_t operandIndex = + llvm::find(regionSuccessor->getSuccessorInputs(), argValue) + .getIndex(); // Iterate over all immediate terminator operations to introduce // new buffer allocations. Thereby, the appropriate terminator operand @@ -518,6 +623,16 @@ class BufferPlacement { /// its content into the newly allocated buffer. The terminator operation is /// used to insert the alloc and copy operations at the right places. Value introduceBufferCopy(Value sourceValue, Operation *terminator) { + // Avoid multiple copies of the same source value. This can happen in the + // presence of loops when a branch acts as a backedge while also having + // another successor that returns to its parent operation. Note: that + // copying copied buffers can introduce memory leaks since the invariant of + // BufferPlacement assumes that a buffer will be only copied once into a + // temporary buffer. Hence, the construction of copy chains introduces + // additional allocations that are not tracked automatically by the + // algorithm. + if (copiedValues.contains(sourceValue)) + return sourceValue; // Create a new alloc at the current location of the terminator. auto memRefType = sourceValue.getType().cast(); OpBuilder builder(terminator); @@ -541,6 +656,8 @@ class BufferPlacement { // allocation to the new one. builder.create(terminator->getLoc(), sourceValue, alloc); + // Remember the copy of original source value. + copiedValues.insert(alloc); return alloc; } @@ -652,6 +769,9 @@ class BufferPlacement { /// Maps allocation nodes to their associated blocks. AllocEntryList allocs; + // Stores already copied allocations to avoid additional copies of copies. + ValueSetT copiedValues; + /// The underlying liveness analysis to compute fine grained information /// about alloc and dealloc positions. Liveness liveness; @@ -673,6 +793,14 @@ class BufferPlacement { struct BufferPlacementPass : BufferPlacementBase { void runOnFunction() override { + // Ensure that there are supported loops only. + Backedges backedges(getFunction()); + if (backedges.size()) { + getFunction().emitError( + "Structured control-flow loops are supported only."); + return; + } + // Place all required alloc, copy and dealloc nodes. BufferPlacement placement(getFunction()); placement.place(); diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index e1ed2c4309c3d..dc9ff44bf4838 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1125,3 +1125,295 @@ func @nestedRegionControlFlowAlloca( // CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1) // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] + +// ----- + +// Test Case: structured control-flow loop using a nested alloc. +// The alloc positions of %3 will not be changed, but the iteration argument +// %iterBuf has to be freed before yielding %3 to avoid memory leaks. + +// ----- + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = alloc() : memref<2xf32> + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: cmpi +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK: dealloc %[[ALLOC3]] +// CHECK: scf.yield %[[ALLOC4]] +// CHECK: } +// CHECK: linalg.copy(%[[ALLOC2]], %arg4) +// CHECK-NEXT: dealloc %[[ALLOC2]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation. +// The loop yields buffers that have been defined outside of the loop and the +// backeges only use the iteration arguments (or one of its aliases). +// Therefore, we do not have to (and are not allowed to) free any buffers +// that are passed via the backedges. + +// CHECK-LABEL: func @loop_nested_if_no_alloc +func @loop_nested_if_no_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + scf.yield %0 : memref<2xf32> + } else { + scf.yield %iterBuf : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.if +// CHECK: scf.yield %[[ALLOC0]] +// CHECK: scf.yield %[[IALLOC]] +// CHECK: scf.yield %[[ALLOC2]] +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK: dealloc %[[ALLOC0]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation using +// a deeply nested buffer allocation. +// Since the innermost allocation happens in a divergent branch, we have to +// introduce additional copies for the nested if operation. Since the loop's +// yield operation "returns" %3, it will return a newly allocated buffer. +// Therefore, we have to free the iteration argument %iterBuf before +// "returning" %3. + +// CHECK-LABEL: func @loop_nested_if_alloc +func @loop_nested_if_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>) -> memref<2xf32> { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + %4 = alloc() : memref<2xf32> + scf.yield %4 : memref<2xf32> + } else { + scf.yield %0 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + return %1 : memref<2xf32> +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = scf.if + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[ALLOC4]] +// CHECK-NEXT: scf.yield %[[ALLOC5]] + +// CHECK: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]]) +// CHECK-NEXT: scf.yield %[[ALLOC6]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: dealloc %[[ALLOC0]] +// CHECK-NEXT: return %[[ALLOC2]] + +// ----- + +// Test Case: several nested structured control-flow loops with a deeply nested +// buffer allocation inside an if operation. +// Same behavior is an loop_nested_if_alloc: we have to insert deallocations +// before each yield in all loops recursively. + +// CHECK-LABEL: func @loop_nested_alloc +func @loop_nested_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = scf.for %i2 = %lb to %ub step %step + iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { + %3 = scf.for %i3 = %lb to %ub step %step + iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { + %4 = alloc() : memref<2xf32> + %5 = cmpi "eq", %i, %ub : index + %6 = scf.if %5 -> (memref<2xf32>) { + %7 = alloc() : memref<2xf32> + scf.yield %7 : memref<2xf32> + } else { + scf.yield %iterBuf3 : memref<2xf32> + } + scf.yield %6 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + scf.yield %2 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]]) +// CHECK-NEXT: dealloc %[[IALLOC0]] +// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = %[[ALLOC2]]) +// CHECK: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[IALLOC1]] + +// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = %[[ALLOC5]]) +// CHECK: %[[ALLOC8:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC8]] +// CHECK: %[[ALLOC9:.*]] = scf.if + +// CHECK: %[[ALLOC11:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC12:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]]) +// CHECK-NEXT: dealloc %[[ALLOC11]] +// CHECK-NEXT: scf.yield %[[ALLOC12]] + +// CHECK: %[[ALLOC13:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]]) +// CHECK-NEXT: scf.yield %[[ALLOC13]] + +// CHECK: dealloc %[[IALLOC2]] +// CHECK-NEXT: %[[ALLOC10:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]]) +// CHECK-NEXT: dealloc %[[ALLOC9]] +// CHECK-NEXT: scf.yield %[[ALLOC10]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC6]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC4]] + +// CHECK: linalg.copy(%[[VAL_7]], %arg4) +// CHECK-NEXT: dealloc %[[VAL_7]] + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @loop_dynalloc +func @loop_dynalloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref, + %arg3: memref) { + %const0 = constant 0 : i32 + br ^loopHeader(%const0, %arg2 : i32, memref) + +^loopHeader(%i : i32, %buff : memref): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref), + ^exit(%buff : memref) + +^loopBody(%val : i32, %buff2: memref): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %size = std.index_cast %inc : i32 to index + %alloc1 = alloc(%size) : memref + br ^loopHeader(%inc, %alloc1 : i32, memref) + +^exit(%buff3 : memref): + "linalg.copy"(%buff3, %arg3) : (memref, memref) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @do_loop_alloc +func @do_loop_alloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref<2xf32>, + %arg3: memref<2xf32>) { + %const0 = constant 0 : i32 + br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) + +^loopBody(%val : i32, %buff2: memref<2xf32>): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %alloc1 = alloc() : memref<2xf32> + br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) + +^loopHeader(%i : i32, %buff : memref<2xf32>): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref<2xf32>), + ^exit(%buff : memref<2xf32>) + +^exit(%buff3 : memref<2xf32>): + "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} From 8427885e27813c457dccb011f65e8ded74444e31 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 9 Sep 2020 12:08:46 +0300 Subject: [PATCH 0143/1079] Temporairly revert "Thread safety analysis: Consider global variables in scope" & followup This appears to cause false-positives because it started to warn on local non-global variables. Repro posted to https://reviews.llvm.org/D84604#2262745 This reverts commit 9dcc82f34ea9b623d82d2577b93aaf67d36dabd2. This reverts commit b2ce79ef66157dd752e3864ece57915e23a73f5d. --- clang/lib/Analysis/ThreadSafety.cpp | 18 ++++-------- clang/lib/Analysis/ThreadSafetyCommon.cpp | 2 +- .../SemaCXX/warn-thread-safety-analysis.cpp | 7 ++--- .../SemaCXX/warn-thread-safety-negative.cpp | 29 ------------------- 4 files changed, 9 insertions(+), 47 deletions(-) diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 5b97265a6d8ae..64e0da9e64b12 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1266,21 +1266,13 @@ ClassifyDiagnostic(const AttrTy *A) { } bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { - const threadSafety::til::SExpr *SExp = CapE.sexpr(); - assert(SExp && "Null expressions should be ignored"); - - // Global variables are always in scope. - if (isa(SExp)) - return true; - - // Members are in scope from methods of the same class. - if (const auto *P = dyn_cast(SExp)) { - if (!CurrentMethod) + if (!CurrentMethod) return false; - const ValueDecl *VD = P->clangDecl(); - return VD->getDeclContext() == CurrentMethod->getDeclContext(); + if (const auto *P = dyn_cast_or_null(CapE.sexpr())) { + const auto *VD = P->clangDecl(); + if (VD) + return VD->getDeclContext() == CurrentMethod->getDeclContext(); } - return false; } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index aee9185760071..1b8c55e56d470 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -274,7 +274,7 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, const auto *VD = cast(DRE->getDecl()->getCanonicalDecl()); // Function parameters require substitution and/or renaming. - if (const auto *PV = dyn_cast(VD)) { + if (const auto *PV = dyn_cast_or_null(VD)) { unsigned I = PV->getFunctionScopeIndex(); const DeclContext *D = PV->getDeclContext(); if (Ctx && Ctx->FunArgs) { diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index d1520b1decbd3..91bd15def577d 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -5036,8 +5036,7 @@ void spawn_fake_flight_control_thread(void) { } extern const char *deque_log_msg(void) __attribute__((requires_capability(Logger))); -void logger_entry(void) __attribute__((requires_capability(Logger))) - __attribute__((requires_capability(!FlightControl))) { +void logger_entry(void) __attribute__((requires_capability(Logger))) { const char *msg; while ((msg = deque_log_msg())) { @@ -5045,13 +5044,13 @@ void logger_entry(void) __attribute__((requires_capability(Logger))) } } -void spawn_fake_logger_thread(void) __attribute__((requires_capability(!FlightControl))) { +void spawn_fake_logger_thread(void) { acquire(Logger); logger_entry(); release(Logger); } -int main(void) __attribute__((requires_capability(!FlightControl))) { +int main(void) { spawn_fake_flight_control_thread(); spawn_fake_logger_thread(); diff --git a/clang/test/SemaCXX/warn-thread-safety-negative.cpp b/clang/test/SemaCXX/warn-thread-safety-negative.cpp index 68e30f4a3225b..456fe16e6574e 100644 --- a/clang/test/SemaCXX/warn-thread-safety-negative.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-negative.cpp @@ -81,35 +81,6 @@ class Foo { } // end namespace SimpleTest -Mutex globalMutex; - -namespace ScopeTest { - -void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); -void fq() EXCLUSIVE_LOCKS_REQUIRED(!::globalMutex); - -namespace ns { - Mutex globalMutex; - void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); - void fq() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex); -} - -void testGlobals() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex) { - f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} - fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} - ns::f(); - ns::fq(); -} - -void testNamespaceGlobals() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex) { - f(); - fq(); - ns::f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} - ns::fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} -} - -} // end namespace ScopeTest - namespace DoubleAttribute { struct Foo { From 3a577f544618d9713aca5052e55143142d23f427 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 9 Sep 2020 07:41:56 +0200 Subject: [PATCH 0144/1079] Rename MemRefDescriptor::getElementType() to MemRefDescriptor::getElementPtrType(). Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D87284 --- .../Conversion/StandardToLLVM/ConvertStandardToLLVM.h | 5 +++-- mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 10 ++++++---- .../Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index 63ffd78373825..ab047a08f404c 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -34,6 +34,7 @@ class UnrankedMemRefType; namespace LLVM { class LLVMDialect; class LLVMType; +class LLVMPointerType; } // namespace LLVM /// Callback to convert function argument types. It converts a MemRef function @@ -281,8 +282,8 @@ class MemRefDescriptor : public StructBuilder { void setConstantStride(OpBuilder &builder, Location loc, unsigned pos, uint64_t stride); - /// Returns the (LLVM) type this descriptor points to. - LLVM::LLVMType getElementType(); + /// Returns the (LLVM) pointer type this descriptor contains. + LLVM::LLVMPointerType getElementPtrType(); /// Builds IR populating a MemRef descriptor structure from a list of /// individual values composing that descriptor, in the following order: diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 55a926ef1423d..2aa589a0fb7b2 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -642,9 +642,11 @@ void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc, createIndexAttrConstant(builder, loc, indexType, stride)); } -LLVM::LLVMType MemRefDescriptor::getElementType() { - return value.getType().cast().getStructElementType( - kAlignedPtrPosInMemRefDescriptor); +LLVM::LLVMPointerType MemRefDescriptor::getElementPtrType() { + return value.getType() + .cast() + .getStructElementType(kAlignedPtrPosInMemRefDescriptor) + .cast(); } /// Creates a MemRef descriptor structure from a list of individual values @@ -894,7 +896,7 @@ Value ConvertToLLVMPattern::getStridedElementPtr( Value ConvertToLLVMPattern::getDataPtr( Location loc, MemRefType type, Value memRefDesc, ValueRange indices, ConversionPatternRewriter &rewriter) const { - LLVM::LLVMType ptrType = MemRefDescriptor(memRefDesc).getElementType(); + LLVM::LLVMType ptrType = MemRefDescriptor(memRefDesc).getElementPtrType(); int64_t offset; SmallVector strides; auto successStrides = getStridesAndOffset(type, strides, offset); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index d51a96dca3849..73fd3285ec974 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -198,7 +198,7 @@ static LogicalResult getBasePtr(ConversionPatternRewriter &rewriter, Value base; if (failed(getBase(rewriter, loc, memref, memRefType, base))) return failure(); - auto pType = MemRefDescriptor(memref).getElementType(); + auto pType = MemRefDescriptor(memref).getElementPtrType(); ptr = rewriter.create(loc, pType, base); return success(); } @@ -225,7 +225,7 @@ static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, Value base; if (failed(getBase(rewriter, loc, memref, memRefType, base))) return failure(); - auto pType = MemRefDescriptor(memref).getElementType(); + auto pType = MemRefDescriptor(memref).getElementPtrType(); auto ptrsType = LLVM::LLVMType::getVectorTy(pType, vType.getDimSize(0)); ptrs = rewriter.create(loc, ptrsType, base, indices); return success(); @@ -1151,7 +1151,7 @@ class VectorTypeCastOpConversion : public ConvertToLLVMPattern { // Create descriptor. auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy); - Type llvmTargetElementTy = desc.getElementType(); + Type llvmTargetElementTy = desc.getElementPtrType(); // Set allocated ptr. Value allocated = sourceMemRef.allocatedPtr(rewriter, loc); allocated = From 43af2a6faa272565cde4e3eec7dfeac593d29701 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Wed, 9 Sep 2020 11:28:36 +0200 Subject: [PATCH 0145/1079] [AMDGPU] Workaround for LDS Misalignment bug on GFX10 Add subtarget feature check to avoid using ds_read/write_b96/128 with too low alignment if a bug is present on that specific hardware. Add this "feature" to GFX 10.1.1 as it is also affected. Add global-isel test. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 3 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 +- .../AMDGPU/GlobalISel/lds-misaligned-bug.ll | 128 ++++++++++++++++++ .../test/CodeGen/AMDGPU/lds-misaligned-bug.ll | 18 ++- 4 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 37e4b56e9ccf7..3e8cd60b7d77a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -163,7 +163,7 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", "LDSMisalignedBug", "true", - "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode" + "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode" >; def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", @@ -929,6 +929,7 @@ def FeatureISAVersion10_1_1 : FeatureSet< FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, + FeatureLdsMisalignedBug, FeatureDoesNotSupportXNACK, FeatureCodeObjectV3])>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ad9c4d0673476..26fbab63e1ca5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1417,8 +1417,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( } if (Size == 96) { // ds_read/write_b96 require 16-byte alignment on gfx8 and older. - bool Aligned = - Alignment >= Align(Subtarget->hasUnalignedDSAccess() ? 4 : 16); + bool Aligned = Alignment >= Align((Subtarget->hasUnalignedDSAccess() && + !Subtarget->hasLDSMisalignedBug()) + ? 4 + : 16); if (IsFast) *IsFast = Aligned; @@ -1428,8 +1430,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( // ds_read/write_b128 require 16-byte alignment on gfx8 and older, but we // can do a 8 byte aligned, 16 byte access in a single operation using // ds_read2/write2_b64. - bool Aligned = - Alignment >= Align(Subtarget->hasUnalignedDSAccess() ? 4 : 8); + bool Aligned = Alignment >= Align((Subtarget->hasUnalignedDSAccess() && + !Subtarget->hasLDSMisalignedBug()) + ? 4 + : 8); if (IsFast) *IsFast = Aligned; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll new file mode 100644 index 0000000000000..7d5a49cfd38dd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll @@ -0,0 +1,128 @@ +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s + +; GCN-LABEL: test_local_misaligned_v2: +; GCN-DAG: ds_read2_b32 +; GCN-DAG: ds_write2_b32 +define amdgpu_kernel void @test_local_misaligned_v2(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)* + %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 4 + %v1 = extractelement <2 x i32> %load, i32 0 + %v2 = extractelement <2 x i32> %load, i32 1 + %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 + %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 + store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_local_misaligned_v4: +; VECT-DAG: ds_read_b128 +; VECT-DAG: ds_write_b128 +; SPLIT-DAG: ds_read2_b32 +; SPLIT-DAG: ds_read2_b32 +; SPLIT-DAG: ds_write2_b32 +; SPLIT-DAG: ds_write2_b32 +define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)* + %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_local_misaligned_v3: +; VECT-DAG: ds_read_b96 +; VECT-DAG: ds_write_b96 +; SPLIT-DAG: ds_read2_b32 +; SPLIT-DAG: ds_read_b32 +; SPLIT-DAG: ds_write2_b32 +; SPLIT-DAG: ds_write_b32 +define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)* + %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4 + %v1 = extractelement <3 x i32> %load, i32 0 + %v2 = extractelement <3 x i32> %load, i32 1 + %v3 = extractelement <3 x i32> %load, i32 2 + %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 + %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 + %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 + store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_local_aligned_v2: +; GCN-DAG: ds_read_b64 +; GCN-DAG: ds_write_b64 +define amdgpu_kernel void @test_local_aligned_v2(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)* + %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 8 + %v1 = extractelement <2 x i32> %load, i32 0 + %v2 = extractelement <2 x i32> %load, i32 1 + %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 + %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 + store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: test_local_aligned_v3: +; GCN-DAG: ds_read_b96 +; GCN-DAG: ds_write_b96 +define amdgpu_kernel void @test_local_aligned_v3(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)* + %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16 + %v1 = extractelement <3 x i32> %load, i32 0 + %v2 = extractelement <3 x i32> %load, i32 1 + %v3 = extractelement <3 x i32> %load, i32 2 + %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 + %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 + %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 + store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 16 + ret void +} + +; GCN-LABEL: test_local_v4_aligned8: +; GCN-DAG: ds_read_b128 +; GCN-DAG: ds_write_b128 +define amdgpu_kernel void @test_local_v4_aligned8(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)* + %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 8 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll index 975e2306cc325..1e5dcffdedd77 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s -; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VECT %s +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s @@ -21,8 +21,12 @@ bb: } ; GCN-LABEL: test_local_misaligned_v4: -; GCN-DAG: ds_read_b128 -; GCN-DAG: ds_write_b128 +; VECT-DAG: ds_read_b128 +; VECT-DAG: ds_write_b128 +; SPLIT-DAG: ds_read2_b32 +; SPLIT-DAG: ds_read2_b32 +; SPLIT-DAG: ds_write2_b32 +; SPLIT-DAG: ds_write2_b32 define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -42,8 +46,12 @@ bb: } ; GCN-LABEL: test_local_misaligned_v3: -; GCN-DAG: ds_read_b96 -; GCN-DAG: ds_write_b96 +; VECT-DAG: ds_read_b96 +; VECT-DAG: ds_write_b96 +; SPLIT-DAG: ds_read2_b32 +; SPLIT-DAG: ds_read_b32 +; SPLIT-DAG: ds_write2_b32 +; SPLIT-DAG: ds_write_b32 define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() From 8cb8cea1bd7f03330fc310b8993a3be89da90c1d Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 9 Sep 2020 10:40:23 +0100 Subject: [PATCH 0146/1079] [ARM] Fixup of a few test cases. NFC. After changing the semantics of get.active.lane.mask, I missed a few tests that should use now the tripcount instead of the backedge taken count. --- .../Thumb2/LowOverheadLoops/reductions.ll | 53 +++++++++---------- .../tail-pred-intrinsic-sub-sat.ll | 6 +-- llvm/test/CodeGen/Thumb2/active_lane_mask.ll | 16 +++--- llvm/test/Verifier/get-active-lane-mask.ll | 10 ++-- 4 files changed, 41 insertions(+), 44 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index 0554742369fdc..b5cac5d6a3cf8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -9,7 +9,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocaptur ; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dlstp.8 lr, r2 -; CHECK: .LBB0_2: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r1], #16 ; CHECK-NEXT: vldrb.u8 q2, [r0], #16 @@ -75,7 +75,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #3 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB1_2: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vmov q0, q1 @@ -148,7 +148,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocaptur ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #4 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB2_2: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.8 r2 ; CHECK-NEXT: vmov q0, q1 @@ -218,7 +218,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocaptu ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #3 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB3_2: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vmov q0, q1 @@ -290,7 +290,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocaptur ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #4 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB4_2: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.8 r2 ; CHECK-NEXT: vmov q0, q1 @@ -360,7 +360,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocaptu ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #3 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB5_2: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vmov q0, q1 @@ -432,7 +432,7 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read ; CHECK-NEXT: add.w lr, r3, r6, lsr #2 ; CHECK-NEXT: mov r3, r2 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB6_2: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q0, q1 @@ -454,7 +454,7 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: vmov.32 q0[0], r12 -; CHECK: .LBB6_5: @ %vector.body46 +; CHECK-NEXT: .LBB6_5: @ %vector.body46 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: vmov q1, q0 @@ -559,7 +559,7 @@ define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocaptur ; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB7_2: @ %vector.body +; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vmov q0, q1 @@ -670,32 +670,31 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) { ; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: cset r4, lo ; CHECK-NEXT: .LBB8_4: @ %lor.end -; CHECK-NEXT: ldr.w r3, [r12, #4] -; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: ldr.w r1, [r12, #4] +; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB8_5: @ %vector.ph -; CHECK-NEXT: adds r1, r3, #3 +; CHECK-NEXT: adds r3, r1, #3 ; CHECK-NEXT: movs r2, #1 -; CHECK-NEXT: bic r1, r1, #3 -; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w lr, r2, r1, lsr #2 -; CHECK-NEXT: movw r1, :lower16:days -; CHECK-NEXT: movt r1, :upper16:days -; CHECK-NEXT: movs r2, #52 -; CHECK-NEXT: mla r1, r4, r2, r1 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: bic r3, r3, #3 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: add.w lr, r2, r3, lsr #2 +; CHECK-NEXT: movw r2, :lower16:days +; CHECK-NEXT: movt r2, :upper16:days +; CHECK-NEXT: movs r3, #52 +; CHECK-NEXT: mla r2, r4, r3, r2 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: subs r0, r3, #1 -; CHECK: .LBB8_6: @ %vector.body +; CHECK-NEXT: .LBB8_6: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r0 +; CHECK-NEXT: vctp.32 r1 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q0, [r1], #16 -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vldrwt.u32 q0, [r2], #16 +; CHECK-NEXT: subs r1, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB8_6 ; CHECK-NEXT: @ %bb.7: @ %middle.block @@ -738,7 +737,7 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ %5, %vector.ph ], [ %8, %vector.body ] %6 = getelementptr inbounds [2 x [13 x i32]], [2 x [13 x i32]]* @days, i32 0, i32 %3, i32 %index - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %trip.count.minus.1) + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %4) %7 = bitcast i32* %6 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* nonnull %7, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %8 = add <4 x i32> %wide.masked.load, %vec.phi diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll index 5b2f3a7c98e8a..98d48d49539c5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll @@ -10,7 +10,6 @@ define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i1 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph -; CHECK-NEXT: subs r3, #1 ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -36,7 +35,7 @@ vector.body: ; preds = %vector.body, %vecto %next.gep = getelementptr i16, i16* %pSrcA, i32 %index %next.gep20 = getelementptr i16, i16* %pDst, i32 %index %next.gep21 = getelementptr i16, i16* %pSrcB, i32 %index - %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %trip.count.minus.1) + %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %blockSize) %0 = bitcast i16* %next.gep to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %1 = bitcast i16* %next.gep21 to <8 x i16>* @@ -61,7 +60,6 @@ define arm_aapcs_vfpcc void @ssub_sat(i16* noalias nocapture readonly %pSrcA, i1 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: subs r3, #1 ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -87,7 +85,7 @@ vector.body: ; preds = %vector.body, %vecto %next.gep = getelementptr i16, i16* %pSrcA, i32 %index %next.gep20 = getelementptr i16, i16* %pDst, i32 %index %next.gep21 = getelementptr i16, i16* %pSrcB, i32 %index - %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %trip.count.minus.1) + %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %blockSize) %0 = bitcast i16* %next.gep to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %1 = bitcast i16* %next.gep21 to <8 x i16>* diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index 116031cb895ff..2a5d32013d473 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -o - | FileCheck %s -define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) { +define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) { ; CHECK-LABEL: v4i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: adr.w r12, .LCPI0_0 @@ -28,12 +28,12 @@ define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) { ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 3 @ 0x3 - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %BTC) + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC) %select = select <4 x i1> %active.lane.mask, <4 x i32> %V1, <4 x i32> %V2 ret <4 x i32> %select } -define <7 x i32> @v7i32(i32 %index, i32 %BTC, <7 x i32> %V1, <7 x i32> %V2) { +define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) { ; CHECK-LABEL: v7i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: adr r3, .LCPI1_0 @@ -105,12 +105,12 @@ define <7 x i32> @v7i32(i32 %index, i32 %BTC, <7 x i32> %V1, <7 x i32> %V2) { ; CHECK-NEXT: .long 5 @ 0x5 ; CHECK-NEXT: .long 6 @ 0x6 ; CHECK-NEXT: .zero 4 - %active.lane.mask = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i32(i32 %index, i32 %BTC) + %active.lane.mask = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i32(i32 %index, i32 %TC) %select = select <7 x i1> %active.lane.mask, <7 x i32> %V1, <7 x i32> %V2 ret <7 x i32> %select } -define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) { +define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-LABEL: v8i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} @@ -189,12 +189,12 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-NEXT: .long 5 @ 0x5 ; CHECK-NEXT: .long 6 @ 0x6 ; CHECK-NEXT: .long 7 @ 0x7 - %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %BTC) + %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC) %select = select <8 x i1> %active.lane.mask, <8 x i16> %V1, <8 x i16> %V2 ret <8 x i16> %select } -define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) { +define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-LABEL: v16i8: ; CHECK: @ %bb.0: ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} @@ -405,7 +405,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: .long 13 @ 0xd ; CHECK-NEXT: .long 14 @ 0xe ; CHECK-NEXT: .long 15 @ 0xf - %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %BTC) + %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC) %select = select <16 x i1> %active.lane.mask, <16 x i8> %V1, <16 x i8> %V2 ret <16 x i8> %select } diff --git a/llvm/test/Verifier/get-active-lane-mask.ll b/llvm/test/Verifier/get-active-lane-mask.ll index 94d819b5c75b0..c637916faccfc 100644 --- a/llvm/test/Verifier/get-active-lane-mask.ll +++ b/llvm/test/Verifier/get-active-lane-mask.ll @@ -2,20 +2,20 @@ declare <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32, i32) -define <4 x i32> @t1(i32 %IV, i32 %BTC) { +define <4 x i32> @t1(i32 %IV, i32 %TC) { ; CHECK: get_active_lane_mask: element type is not i1 -; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC) +; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %TC) - %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC) + %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %TC) ret <4 x i32> %res } declare i32 @llvm.get.active.lane.mask.i32.i32(i32, i32) -define i32 @t2(i32 %IV, i32 %BTC) { +define i32 @t2(i32 %IV, i32 %TC) { ; CHECK: Intrinsic has incorrect return type! ; CHECK-NEXT: i32 (i32, i32)* @llvm.get.active.lane.mask.i32.i32 - %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %BTC) + %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %TC) ret i32 %res } From 3a61bfb027a623807a30adb496ab62203c9b4ba5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 9 Sep 2020 10:24:49 +0100 Subject: [PATCH 0147/1079] [DomTree] Use SmallVector instead of std::vector. Currentl DomTreeNodeBase is using std::vectot to store it's children. Using SmallVector should be more efficient in terms of compile-time. A size of 4 seems to be the sweet-spot in terms of compile-time, according to http://llvm-compile-time-tracker.com/compare.php?from=9933188c90615c9c264ebb69117f09726e909a25&to=d7a801d027648877b20f0e00e822a7a64c58d976&stat=instructions This results in the following geomean improvements ``` geomean insts max rss O3 -0.31 % +0.02 % ReleaseThinLTO -0.35 % -0.12 % ReleaseLTO -0.28 % -0.12 % O0 -0.06 % -0.02 % NewPM O3 -0.36 % +0.05 % ReleaseThinLTO (link only) -0.44 % -0.10 % ReleaseLTO-g (link only): -0.32 % -0.03 % ``` I am not sure if there's any other benefits of using std::vector over SmallVector. Reviewed By: kuhar, asbirlea Differential Revision: https://reviews.llvm.org/D87319 --- llvm/include/llvm/Support/GenericDomTree.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index 76973f521042c..c77168432058a 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -38,7 +38,6 @@ #include #include #include -#include namespace llvm { @@ -61,7 +60,7 @@ template class DomTreeNodeBase { NodeT *TheBB; DomTreeNodeBase *IDom; unsigned Level; - std::vector Children; + SmallVector Children; mutable unsigned DFSNumIn = ~0; mutable unsigned DFSNumOut = ~0; @@ -69,9 +68,9 @@ template class DomTreeNodeBase { DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom) : TheBB(BB), IDom(iDom), Level(IDom ? IDom->Level + 1 : 0) {} - using iterator = typename std::vector::iterator; + using iterator = typename SmallVector::iterator; using const_iterator = - typename std::vector::const_iterator; + typename SmallVector::const_iterator; iterator begin() { return Children.begin(); } iterator end() { return Children.end(); } @@ -837,7 +836,7 @@ class DominatorTreeBase { "NewBB should have a single successor!"); NodeRef NewBBSucc = *GraphT::child_begin(NewBB); - std::vector PredBlocks; + SmallVector PredBlocks; for (auto Pred : children>(NewBB)) PredBlocks.push_back(Pred); From b5bc56da8aa23dc57db9d286b0591dbcf9b1bdd3 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 9 Sep 2020 03:06:46 -0700 Subject: [PATCH 0148/1079] [NFC][Asan] Fit ChunkHeader into redzone In code as-is min redzone and ChunkHeader are 16 byte. This patch just makes sure that redzone is calculated correctly if we extend ChunkHeader. --- compiler-rt/lib/asan/asan_allocator.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index a15c569b42ba0..64796f7526714 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -354,17 +354,18 @@ struct Allocator { // -------------------- Helper methods. ------------------------- uptr ComputeRZLog(uptr user_requested_size) { - u32 rz_log = - user_requested_size <= 64 - 16 ? 0 : - user_requested_size <= 128 - 32 ? 1 : - user_requested_size <= 512 - 64 ? 2 : - user_requested_size <= 4096 - 128 ? 3 : - user_requested_size <= (1 << 14) - 256 ? 4 : - user_requested_size <= (1 << 15) - 512 ? 5 : - user_requested_size <= (1 << 16) - 1024 ? 6 : 7; - u32 min_rz = atomic_load(&min_redzone, memory_order_acquire); - u32 max_rz = atomic_load(&max_redzone, memory_order_acquire); - return Min(Max(rz_log, RZSize2Log(min_rz)), RZSize2Log(max_rz)); + u32 rz_log = user_requested_size <= 64 - 16 ? 0 + : user_requested_size <= 128 - 32 ? 1 + : user_requested_size <= 512 - 64 ? 2 + : user_requested_size <= 4096 - 128 ? 3 + : user_requested_size <= (1 << 14) - 256 ? 4 + : user_requested_size <= (1 << 15) - 512 ? 5 + : user_requested_size <= (1 << 16) - 1024 ? 6 + : 7; + u32 hdr_log = RZSize2Log(RoundUpToPowerOfTwo(sizeof(ChunkHeader))); + u32 min_log = RZSize2Log(atomic_load(&min_redzone, memory_order_acquire)); + u32 max_log = RZSize2Log(atomic_load(&max_redzone, memory_order_acquire)); + return Min(Max(rz_log, Max(min_log, hdr_log)), Max(max_log, hdr_log)); } static uptr ComputeUserRequestedAlignmentLog(uptr user_requested_alignment) { From 24ecfdac7b7d195795b6cb0e373cba8bfa7911f4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 10:58:59 +0100 Subject: [PATCH 0149/1079] [APFloat] Fix uninitialized variable in IEEEFloat constructors Some constructors of IEEEFloat do not initialize member variable exponent. Fix it by initializing exponent with the following values: For NaNs, the `exponent` is `maxExponent+1`. For Infinities, the `exponent` is `maxExponent+1`. For Zeroes, the `exponent` is `maxExponent-1`. Patch by: @nullptr.cpp (Yang Fan) Differential Revision: https://reviews.llvm.org/D86997 --- llvm/include/llvm/ADT/APFloat.h | 5 ++- llvm/lib/Support/APFloat.cpp | 68 ++++++++++++++++----------------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 876e52c150a05..1f9ac22621a6d 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -249,7 +249,7 @@ class IEEEFloat final : public APFloatBase { /// \name Constructors /// @{ - IEEEFloat(const fltSemantics &); // Default construct to 0.0 + IEEEFloat(const fltSemantics &); // Default construct to +0.0 IEEEFloat(const fltSemantics &, integerPart); IEEEFloat(const fltSemantics &, uninitializedTag); IEEEFloat(const fltSemantics &, const APInt &); @@ -539,6 +539,9 @@ class IEEEFloat final : public APFloatBase { roundingMode) const; opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int, roundingMode); + ExponentType exponentNaN() const; + ExponentType exponentInf() const; + ExponentType exponentZero() const; /// @} diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 569cac790af99..7a4c8bd3639d5 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -755,6 +755,7 @@ void IEEEFloat::copySignificand(const IEEEFloat &rhs) { void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { category = fcNaN; sign = Negative; + exponent = exponentNaN(); integerPart *significand = significandParts(); unsigned numParts = partCount(); @@ -925,8 +926,7 @@ IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { initialize(&ourSemantics); - category = fcZero; - sign = false; + makeZero(false); } // Delegate to the previous constructor, because later copy constructor may @@ -3379,15 +3379,13 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { sign = static_cast(i2>>15); if (myexponent == 0 && mysignificand == 0) { - // exponent, significand meaningless - category = fcZero; + makeZero(sign); } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { - // exponent, significand meaningless - category = fcInfinity; + makeInf(sign); } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { - // exponent meaningless category = fcNaN; + exponent = exponentNaN(); significandParts()[0] = mysignificand; significandParts()[1] = 0; } else { @@ -3438,16 +3436,14 @@ void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { sign = static_cast(i2>>63); if (myexponent==0 && (mysignificand==0 && mysignificand2==0)) { - // exponent, significand meaningless - category = fcZero; + makeZero(sign); } else if (myexponent==0x7fff && (mysignificand==0 && mysignificand2==0)) { - // exponent, significand meaningless - category = fcInfinity; + makeInf(sign); } else if (myexponent==0x7fff && (mysignificand!=0 || mysignificand2 !=0)) { - // exponent meaningless category = fcNaN; + exponent = exponentNaN(); significandParts()[0] = mysignificand; significandParts()[1] = mysignificand2; } else { @@ -3473,14 +3469,12 @@ void IEEEFloat::initFromDoubleAPInt(const APInt &api) { sign = static_cast(i>>63); if (myexponent==0 && mysignificand==0) { - // exponent, significand meaningless - category = fcZero; + makeZero(sign); } else if (myexponent==0x7ff && mysignificand==0) { - // exponent, significand meaningless - category = fcInfinity; + makeInf(sign); } else if (myexponent==0x7ff && mysignificand!=0) { - // exponent meaningless category = fcNaN; + exponent = exponentNaN(); *significandParts() = mysignificand; } else { category = fcNormal; @@ -3504,14 +3498,12 @@ void IEEEFloat::initFromFloatAPInt(const APInt &api) { sign = i >> 31; if (myexponent==0 && mysignificand==0) { - // exponent, significand meaningless - category = fcZero; + makeZero(sign); } else if (myexponent==0xff && mysignificand==0) { - // exponent, significand meaningless - category = fcInfinity; + makeInf(sign); } else if (myexponent==0xff && mysignificand!=0) { - // sign, exponent, significand meaningless category = fcNaN; + exponent = exponentNaN(); *significandParts() = mysignificand; } else { category = fcNormal; @@ -3535,14 +3527,12 @@ void IEEEFloat::initFromBFloatAPInt(const APInt &api) { sign = i >> 15; if (myexponent == 0 && mysignificand == 0) { - // exponent, significand meaningless - category = fcZero; + makeZero(sign); } else if (myexponent == 0xff && mysignificand == 0) { - // exponent, significand meaningless - category = fcInfinity; + makeInf(sign); } else if (myexponent == 0xff && mysignificand != 0) { - // sign, exponent, significand meaningless category = fcNaN; + exponent = exponentNaN(); *significandParts() = mysignificand; } else { category = fcNormal; @@ -3566,14 +3556,12 @@ void IEEEFloat::initFromHalfAPInt(const APInt &api) { sign = i >> 15; if (myexponent==0 && mysignificand==0) { - // exponent, significand meaningless - category = fcZero; + makeZero(sign); } else if (myexponent==0x1f && mysignificand==0) { - // exponent, significand meaningless - category = fcInfinity; + makeInf(sign); } else if (myexponent==0x1f && mysignificand!=0) { - // sign, exponent, significand meaningless category = fcNaN; + exponent = exponentNaN(); *significandParts() = mysignificand; } else { category = fcNormal; @@ -4131,17 +4119,29 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { return result; } +APFloatBase::ExponentType IEEEFloat::exponentNaN() const { + return semantics->maxExponent + 1; +} + +APFloatBase::ExponentType IEEEFloat::exponentInf() const { + return semantics->maxExponent + 1; +} + +APFloatBase::ExponentType IEEEFloat::exponentZero() const { + return semantics->minExponent - 1; +} + void IEEEFloat::makeInf(bool Negative) { category = fcInfinity; sign = Negative; - exponent = semantics->maxExponent + 1; + exponent = exponentInf(); APInt::tcSet(significandParts(), 0, partCount()); } void IEEEFloat::makeZero(bool Negative) { category = fcZero; sign = Negative; - exponent = semantics->minExponent-1; + exponent = exponentZero(); APInt::tcSet(significandParts(), 0, partCount()); } From f16b2d83154aed71aaf9a0717fbb0199d027f312 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 11:17:49 +0100 Subject: [PATCH 0150/1079] ARMTargetParser.cpp - use auto const references in for range loops. NFCI. Fix static analysis warnings about unnecessary copies. --- llvm/lib/Support/ARMTargetParser.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp index 751f84475f42c..73baac832ee30 100644 --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -255,7 +255,7 @@ ARM::ISAKind ARM::parseArchISA(StringRef Arch) { unsigned ARM::parseFPU(StringRef FPU) { StringRef Syn = getFPUSynonym(FPU); - for (const auto F : FPUNames) { + for (const auto &F : FPUNames) { if (Syn == F.getName()) return F.ID; } @@ -409,7 +409,7 @@ bool ARM::getExtensionFeatures(uint64_t Extensions, if (Extensions == AEK_INVALID) return false; - for (const auto AE : ARCHExtNames) { + for (const auto &AE : ARCHExtNames) { if ((Extensions & AE.ID) == AE.ID && AE.Feature) Features.push_back(AE.Feature); else if (AE.NegFeature) @@ -436,7 +436,7 @@ unsigned ARM::getArchAttr(ARM::ArchKind AK) { } StringRef ARM::getArchExtName(uint64_t ArchExtKind) { - for (const auto AE : ARCHExtNames) { + for (const auto &AE : ARCHExtNames) { if (ArchExtKind == AE.ID) return AE.getName(); } @@ -453,7 +453,7 @@ static bool stripNegationPrefix(StringRef &Name) { StringRef ARM::getArchExtFeature(StringRef ArchExt) { bool Negated = stripNegationPrefix(ArchExt); - for (const auto AE : ARCHExtNames) { + for (const auto &AE : ARCHExtNames) { if (AE.Feature && ArchExt == AE.getName()) return StringRef(Negated ? AE.NegFeature : AE.Feature); } @@ -502,7 +502,7 @@ bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, if (ID == AEK_INVALID) return false; - for (const auto AE : ARCHExtNames) { + for (const auto &AE : ARCHExtNames) { if (Negated) { if ((AE.ID & ID) == ID && AE.NegFeature) Features.push_back(AE.NegFeature); @@ -535,7 +535,7 @@ bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, } StringRef ARM::getHWDivName(uint64_t HWDivKind) { - for (const auto D : HWDivNames) { + for (const auto &D : HWDivNames) { if (HWDivKind == D.ID) return D.getName(); } @@ -548,7 +548,7 @@ StringRef ARM::getDefaultCPU(StringRef Arch) { return StringRef(); // Look for multiple AKs to find the default for pair AK+Name. - for (const auto CPU : CPUNames) { + for (const auto &CPU : CPUNames) { if (CPU.ArchID == AK && CPU.Default) return CPU.getName(); } @@ -559,7 +559,7 @@ StringRef ARM::getDefaultCPU(StringRef Arch) { uint64_t ARM::parseHWDiv(StringRef HWDiv) { StringRef Syn = getHWDivSynonym(HWDiv); - for (const auto D : HWDivNames) { + for (const auto &D : HWDivNames) { if (Syn == D.getName()) return D.ID; } @@ -567,7 +567,7 @@ uint64_t ARM::parseHWDiv(StringRef HWDiv) { } uint64_t ARM::parseArchExt(StringRef ArchExt) { - for (const auto A : ARCHExtNames) { + for (const auto &A : ARCHExtNames) { if (ArchExt == A.getName()) return A.ID; } @@ -575,7 +575,7 @@ uint64_t ARM::parseArchExt(StringRef ArchExt) { } ARM::ArchKind ARM::parseCPUArch(StringRef CPU) { - for (const auto C : CPUNames) { + for (const auto &C : CPUNames) { if (CPU == C.getName()) return C.ArchID; } From 455cce3e216ba3cac0844b4ee9cf85791c1ac046 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 11:26:21 +0100 Subject: [PATCH 0151/1079] TrigramIndex.cpp - remove unnecessary includes. NFCI. TrigramIndex.h already includes most of these. --- llvm/include/llvm/Support/TrigramIndex.h | 2 +- llvm/lib/Support/TrigramIndex.cpp | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h index d635694eb5fd3..360ab94597902 100644 --- a/llvm/include/llvm/Support/TrigramIndex.h +++ b/llvm/include/llvm/Support/TrigramIndex.h @@ -27,7 +27,7 @@ #define LLVM_SUPPORT_TRIGRAMINDEX_H #include "llvm/ADT/SmallVector.h" - +#include "llvm/ADT/StringRef.h" #include #include #include diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp index 88375e6e78639..1f1f3022b0b30 100644 --- a/llvm/lib/Support/TrigramIndex.cpp +++ b/llvm/lib/Support/TrigramIndex.cpp @@ -15,12 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/TrigramIndex.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" - #include -#include -#include using namespace llvm; From 25ce1e0497259711836f949005297125e92a6e93 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 8 Sep 2020 11:41:19 +0900 Subject: [PATCH 0152/1079] [ValueTracking] Add UndefOrPoison/Poison-only version of relevant functions This patch adds isGuaranteedNotToBePoison and programUndefinedIfUndefOrPoison. isGuaranteedNotToBePoison will be used at D75808. The latter function is used at isGuaranteedNotToBePoison. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D84242 --- llvm/include/llvm/Analysis/ValueTracking.h | 24 ++-- llvm/lib/Analysis/ScalarEvolution.cpp | 2 +- llvm/lib/Analysis/ValueTracking.cpp | 107 +++++++++++++----- .../Instrumentation/PoisonChecking.cpp | 2 +- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +- llvm/unittests/Analysis/ValueTrackingTest.cpp | 48 +++++++- 6 files changed, 146 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index f9a27a8ec4b09..8ddbcbf4d6433 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -584,25 +584,27 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// if, for all i, r is evaluated to poison or op raises UB if vi = poison. /// To filter out operands that raise UB on poison, you can use /// getGuaranteedNonPoisonOp. - bool propagatesPoison(const Instruction *I); + bool propagatesPoison(const Operator *I); /// Insert operands of I into Ops such that I will trigger undefined behavior /// if I is executed and that operand has a poison value. void getGuaranteedNonPoisonOps(const Instruction *I, SmallPtrSetImpl &Ops); - /// Return true if the given instruction must trigger undefined behavior. + /// Return true if the given instruction must trigger undefined behavior /// when I is executed with any operands which appear in KnownPoison holding /// a poison value at the point of execution. bool mustTriggerUB(const Instruction *I, const SmallSet& KnownPoison); - /// Return true if this function can prove that if PoisonI is executed - /// and yields a poison value, then that will trigger undefined behavior. + /// Return true if this function can prove that if Inst is executed + /// and yields a poison value or undef bits, then that will trigger + /// undefined behavior. /// /// Note that this currently only considers the basic block that is - /// the parent of I. - bool programUndefinedIfPoison(const Instruction *PoisonI); + /// the parent of Inst. + bool programUndefinedIfUndefOrPoison(const Instruction *Inst); + bool programUndefinedIfPoison(const Instruction *Inst); /// canCreateUndefOrPoison returns true if Op can create undef or poison from /// non-undef & non-poison operands. @@ -618,9 +620,9 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; bool canCreateUndefOrPoison(const Operator *Op); bool canCreatePoison(const Operator *Op); - /// Return true if this function can prove that V is never undef value - /// or poison value. If V is an aggregate value or vector, check whether all - /// elements (except padding) are not undef or poison. + /// Return true if this function can prove that V does not have undef bits + /// and is never poison. If V is an aggregate value or vector, check whether + /// all elements (except padding) are not undef or poison. /// Note that this is different from canCreateUndefOrPoison because the /// function assumes Op's operands are not poison/undef. /// @@ -631,6 +633,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, unsigned Depth = 0); + bool isGuaranteedNotToBePoison(const Value *V, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + unsigned Depth = 0); /// Specific patterns of select instructions we can match. enum SelectPatternFlavor { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 11d92bc816e9f..649e8d3733a9b 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5912,7 +5912,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { const Instruction *Poison = PoisonStack.pop_back_val(); for (auto *PoisonUser : Poison->users()) { - if (propagatesPoison(cast(PoisonUser))) { + if (propagatesPoison(cast(PoisonUser))) { if (Pushed.insert(cast(PoisonUser)).second) PoisonStack.push_back(cast(PoisonUser)); } else if (auto *BI = dyn_cast(PoisonUser)) { diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5eb66e96e1d85..469257d91071d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4860,10 +4860,13 @@ bool llvm::canCreatePoison(const Operator *Op) { return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true); } -bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, - const Instruction *CtxI, - const DominatorTree *DT, - unsigned Depth) { +static bool programUndefinedIfUndefOrPoison(const Instruction *Inst, + bool PoisonOnly); + +static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + unsigned Depth, bool PoisonOnly) { if (Depth >= MaxAnalysisRecursionDepth) return false; @@ -4874,14 +4877,15 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, if (auto *C = dyn_cast(V)) { if (isa(C)) - return false; + return PoisonOnly; if (isa(C) || isa(C) || isa(V) || isa(C) || isa(C)) return true; if (C->getType()->isVectorTy() && !isa(C)) - return !C->containsConstantExpression() && !C->containsUndefElement(); + return (PoisonOnly || !C->containsUndefElement()) && + !C->containsConstantExpression(); } // Strip cast operations from a pointer value. @@ -4898,7 +4902,7 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, return true; auto OpCheck = [&](const Value *V) { - return isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth + 1); + return isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth + 1, PoisonOnly); }; if (auto *Opr = dyn_cast(V)) { @@ -4917,9 +4921,7 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, } if (auto *I = dyn_cast(V)) { - if (programUndefinedIfPoison(I) && I->getType()->isIntegerTy(1)) - // Note: once we have an agreement that poison is a value-wise concept, - // we can remove the isIntegerTy(1) constraint. + if (programUndefinedIfUndefOrPoison(I, PoisonOnly)) return true; } @@ -4941,12 +4943,24 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, while (Dominator) { auto *TI = Dominator->getBlock()->getTerminator(); + Value *Cond = nullptr; if (auto BI = dyn_cast(TI)) { - if (BI->isConditional() && BI->getCondition() == V) - return true; + if (BI->isConditional()) + Cond = BI->getCondition(); } else if (auto SI = dyn_cast(TI)) { - if (SI->getCondition() == V) + Cond = SI->getCondition(); + } + + if (Cond) { + if (Cond == V) return true; + else if (PoisonOnly && isa(Cond)) { + // For poison, we can analyze further + auto *Opr = cast(Cond); + if (propagatesPoison(Opr) && + any_of(Opr->operand_values(), [&](Value *Op) { return Op == V; })) + return true; + } } Dominator = Dominator->getIDom(); @@ -4955,6 +4969,18 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, return false; } +bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth, false); +} + +bool llvm::isGuaranteedNotToBePoison(const Value *V, const Instruction *CtxI, + const DominatorTree *DT, unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth, true); +} + OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, @@ -5048,7 +5074,7 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, llvm_unreachable("Instruction not contained in its own parent basic block."); } -bool llvm::propagatesPoison(const Instruction *I) { +bool llvm::propagatesPoison(const Operator *I) { switch (I->getOpcode()) { case Instruction::Freeze: case Instruction::Select: @@ -5124,30 +5150,51 @@ bool llvm::mustTriggerUB(const Instruction *I, return false; } - -bool llvm::programUndefinedIfPoison(const Instruction *PoisonI) { - // We currently only look for uses of poison values within the same basic +static bool programUndefinedIfUndefOrPoison(const Instruction *Inst, + bool PoisonOnly) { + // We currently only look for uses of values within the same basic // block, as that makes it easier to guarantee that the uses will be - // executed given that PoisonI is executed. + // executed given that Inst is executed. // // FIXME: Expand this to consider uses beyond the same basic block. To do // this, look out for the distinction between post-dominance and strong // post-dominance. - const BasicBlock *BB = PoisonI->getParent(); + const BasicBlock *BB = Inst->getParent(); + + BasicBlock::const_iterator Begin = Inst->getIterator(), End = BB->end(); + + if (!PoisonOnly) { + // Be conservative & just check whether a value is passed to a noundef + // argument. + // Instructions that raise UB with a poison operand are well-defined + // or have unclear semantics when the input is partially undef. + // For example, 'udiv x, (undef | 1)' isn't UB. + + for (auto &I : make_range(Begin, End)) { + if (const auto *CB = dyn_cast(&I)) { + for (unsigned i = 0; i < CB->arg_size(); ++i) { + if (CB->paramHasAttr(i, Attribute::NoUndef) && + CB->getArgOperand(i) == Inst) + return true; + } + } + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + } + return false; + } - // Set of instructions that we have proved will yield poison if PoisonI + // Set of instructions that we have proved will yield poison if Inst // does. SmallSet YieldsPoison; SmallSet Visited; - YieldsPoison.insert(PoisonI); - Visited.insert(PoisonI->getParent()); - - BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end(); + YieldsPoison.insert(Inst); + Visited.insert(Inst->getParent()); unsigned Iter = 0; while (Iter++ < MaxAnalysisRecursionDepth) { for (auto &I : make_range(Begin, End)) { - if (&I != PoisonI) { + if (&I != Inst) { if (mustTriggerUB(&I, YieldsPoison)) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) @@ -5158,7 +5205,7 @@ bool llvm::programUndefinedIfPoison(const Instruction *PoisonI) { if (YieldsPoison.count(&I)) { for (const User *User : I.users()) { const Instruction *UserI = cast(User); - if (propagatesPoison(UserI)) + if (propagatesPoison(cast(UserI))) YieldsPoison.insert(User); } } @@ -5178,6 +5225,14 @@ bool llvm::programUndefinedIfPoison(const Instruction *PoisonI) { return false; } +bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { + return ::programUndefinedIfUndefOrPoison(Inst, false); +} + +bool llvm::programUndefinedIfPoison(const Instruction *Inst) { + return ::programUndefinedIfUndefOrPoison(Inst, true); +} + static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { if (FMF.noNaNs()) return true; diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp index 6f785687b5045..fc5267261851d 100644 --- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -295,7 +295,7 @@ static bool rewrite(Function &F) { } SmallVector Checks; - if (propagatesPoison(&I)) + if (propagatesPoison(cast(&I))) for (Value *V : I.operands()) Checks.push_back(getPoisonFor(ValToPoison, V)); diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 20b85626dced9..f5a74b86ae9d1 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1824,7 +1824,7 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, // If we can't analyze propagation through this instruction, just skip it // and transitive users. Safe as false is a conservative result. - if (!propagatesPoison(I) && I != Root) + if (!propagatesPoison(cast(I)) && I != Root) continue; if (KnownPoison.insert(I).second) diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 3df5dc1fb82d4..09faad4484599 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -716,12 +717,57 @@ TEST(ValueTracking, propagatesPoison) { for (auto &I : BB) { if (isa(&I)) break; - EXPECT_EQ(propagatesPoison(&I), Data[Index].first) + EXPECT_EQ(propagatesPoison(cast(&I)), Data[Index].first) << "Incorrect answer at instruction " << Index << " = " << I; Index++; } } +TEST_F(ValueTrackingTest, programUndefinedIfPoison) { + parseAssembly("declare i32 @any_num()" + "define void @test(i32 %mask) {\n" + " %A = call i32 @any_num()\n" + " %B = or i32 %A, %mask\n" + " udiv i32 1, %B" + " ret void\n" + "}\n"); + // If %A was poison, udiv raises UB regardless of %mask's value + EXPECT_EQ(programUndefinedIfPoison(A), true); +} + +TEST_F(ValueTrackingTest, programUndefinedIfUndefOrPoison) { + parseAssembly("declare i32 @any_num()" + "define void @test(i32 %mask) {\n" + " %A = call i32 @any_num()\n" + " %B = or i32 %A, %mask\n" + " udiv i32 1, %B" + " ret void\n" + "}\n"); + // If %A was undef and %mask was 1, udiv does not raise UB + EXPECT_EQ(programUndefinedIfUndefOrPoison(A), false); +} + +TEST_F(ValueTrackingTest, isGuaranteedNotToBePoison_exploitBranchCond) { + parseAssembly("declare i1 @any_bool()" + "define void @test(i1 %y) {\n" + " %A = call i1 @any_bool()\n" + " %cond = and i1 %A, %y\n" + " br i1 %cond, label %BB1, label %BB2\n" + "BB1:\n" + " ret void\n" + "BB2:\n" + " ret void\n" + "}\n"); + DominatorTree DT(*F); + for (auto &BB : *F) { + if (&BB == &F->getEntryBlock()) + continue; + + EXPECT_EQ(isGuaranteedNotToBePoison(A, BB.getTerminator(), &DT), true) + << "isGuaranteedNotToBePoison does not hold at " << *BB.getTerminator(); + } +} + TEST(ValueTracking, canCreatePoisonOrUndef) { std::string AsmHead = "declare i32 @g(i32)\n" From 0fd425af071a9bc5c0891a4db09f4d9a466b7be9 Mon Sep 17 00:00:00 2001 From: Irina Dobrescu Date: Wed, 9 Sep 2020 11:50:13 +0100 Subject: [PATCH 0153/1079] [flang]Add Semantic Checks for OpenMP Allocate Clause Reviewed By: kiranchandramohan, clementval, kiranktp, raghavendhra Differential Revision: https://reviews.llvm.org/D86051 --- flang/include/flang/Semantics/symbol.h | 6 +- flang/lib/Semantics/check-omp-structure.cpp | 3 + flang/lib/Semantics/check-omp-structure.h | 1 + flang/lib/Semantics/resolve-directives.cpp | 74 ++++++++++++++++++- .../test/Semantics/omp-clause-validity01.f90 | 35 +++++++-- flang/test/Semantics/omp-resolve06.f90 | 54 ++++++++++++++ 6 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 flang/test/Semantics/omp-resolve06.f90 diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 981abb8555f8f..5f861d10332ed 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -501,9 +501,9 @@ class Symbol { // OpenMP data-mapping attribute OmpMapTo, OmpMapFrom, OmpMapAlloc, OmpMapRelease, OmpMapDelete, // OpenMP miscellaneous flags - OmpCommonBlock, OmpReduction, OmpDeclareSimd, OmpDeclareTarget, - OmpThreadprivate, OmpDeclareReduction, OmpFlushed, OmpCriticalLock, - OmpIfSpecified, OmpNone, OmpPreDetermined); + OmpCommonBlock, OmpReduction, OmpAllocate, OmpDeclareSimd, + OmpDeclareTarget, OmpThreadprivate, OmpDeclareReduction, OmpFlushed, + OmpCriticalLock, OmpIfSpecified, OmpNone, OmpPreDetermined); using Flags = common::EnumSet; const Scope &owner() const { return *owner_; } diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 6a4980ebcd544..3e360b8ec4ca4 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -456,6 +456,9 @@ void OmpStructureChecker::Enter(const parser::OmpAlignedClause &x) { } // 2.8.1 TODO: list-item attribute check } +void OmpStructureChecker::Enter(const parser::OmpAllocateClause &) { + CheckAllowed(llvm::omp::Clause::OMPC_allocate); +} void OmpStructureChecker::Enter(const parser::OmpDefaultClause &) { CheckAllowed(llvm::omp::Clause::OMPC_default); } diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 9a0c1e2c0a2d4..fbe95d0ee2e0a 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -150,6 +150,7 @@ class OmpStructureChecker void Enter(const parser::OmpClause::IsDevicePtr &); void Enter(const parser::OmpAlignedClause &); + void Enter(const parser::OmpAllocateClause &); void Enter(const parser::OmpDefaultClause &); void Enter(const parser::OmpDefaultmapClause &); void Enter(const parser::OmpDependClause &); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index e73bfa7c37ccf..f68bcd1e1fa86 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -13,6 +13,7 @@ #include "resolve-names-utils.h" #include "flang/Common/idioms.h" #include "flang/Evaluate/fold.h" +#include "flang/Evaluate/type.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Parser/tools.h" @@ -226,7 +227,8 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { } bool Pre(const parser::OpenMPBlockConstruct &); - void Post(const parser::OpenMPBlockConstruct &) { PopContext(); } + void Post(const parser::OpenMPBlockConstruct &); + void Post(const parser::OmpBeginBlockDirective &) { GetContext().withinConstruct = true; } @@ -254,6 +256,11 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { ResolveOmpObjectList(x.v, Symbol::Flag::OmpPrivate); return false; } + bool Pre(const parser::OmpAllocateClause &x) { + const auto &objectList{std::get(x.t)}; + ResolveOmpObjectList(objectList, Symbol::Flag::OmpAllocate); + return false; + } bool Pre(const parser::OmpClause::Firstprivate &x) { ResolveOmpObjectList(x.v, Symbol::Flag::OmpFirstPrivate); return false; @@ -273,6 +280,10 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { Symbol::Flag::OmpFirstPrivate, Symbol::Flag::OmpLastPrivate, Symbol::Flag::OmpReduction, Symbol::Flag::OmpLinear}; + static constexpr Symbol::Flags privateDataSharingAttributeFlags{ + Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, + Symbol::Flag::OmpLastPrivate}; + static constexpr Symbol::Flags ompFlagsRequireNewSymbol{ Symbol::Flag::OmpPrivate, Symbol::Flag::OmpLinear, Symbol::Flag::OmpFirstPrivate, Symbol::Flag::OmpLastPrivate, @@ -281,6 +292,21 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { static constexpr Symbol::Flags ompFlagsRequireMark{ Symbol::Flag::OmpThreadprivate}; + std::vector allocateNames_; // on one directive + SymbolSet privateDataSharingAttributeObjects_; // on one directive + + void AddAllocateName(const parser::Name *&object) { + allocateNames_.push_back(object); + } + void ClearAllocateNames() { allocateNames_.clear(); } + + void AddPrivateDataSharingAttributeObjects(SymbolRef object) { + privateDataSharingAttributeObjects_.insert(object); + } + void ClearPrivateDataSharingAttributeObjects() { + privateDataSharingAttributeObjects_.clear(); + } + // Predetermined DSA rules void PrivatizeAssociatedLoopIndex(const parser::OpenMPLoopConstruct &); void ResolveSeqLoopIndexInParallelOrTaskConstruct(const parser::Name &); @@ -632,9 +658,49 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { break; } ClearDataSharingAttributeObjects(); + ClearPrivateDataSharingAttributeObjects(); + ClearAllocateNames(); return true; } +void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) { + const auto &beginBlockDir{std::get(x.t)}; + const auto &beginDir{std::get(beginBlockDir.t)}; + switch (beginDir.v) { + case llvm::omp::Directive::OMPD_parallel: + case llvm::omp::Directive::OMPD_single: + case llvm::omp::Directive::OMPD_target: + case llvm::omp::Directive::OMPD_task: + case llvm::omp::Directive::OMPD_teams: + case llvm::omp::Directive::OMPD_parallel_workshare: + case llvm::omp::Directive::OMPD_target_teams: + case llvm::omp::Directive::OMPD_target_parallel: { + bool hasPrivate; + for (const auto *allocName : allocateNames_) { + hasPrivate = false; + for (auto privateObj : privateDataSharingAttributeObjects_) { + const Symbol &symbolPrivate{*privateObj}; + if (allocName->source == symbolPrivate.name()) { + hasPrivate = true; + break; + } + } + if (!hasPrivate) { + context_.Say(allocName->source, + "The ALLOCATE clause requires that '%s' must be listed in a " + "private " + "data-sharing attribute clause on the same directive"_err_en_US, + allocName->ToString()); + } + } + break; + } + default: + break; + } + PopContext(); +} + bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; @@ -879,6 +945,9 @@ void OmpAttributeVisitor::ResolveOmpObject( if (dataSharingAttributeFlags.test(ompFlag)) { CheckMultipleAppearances(*name, *symbol, ompFlag); } + if (ompFlag == Symbol::Flag::OmpAllocate) { + AddAllocateName(name); + } } } else { // Array sections to be changed to substrings as needed @@ -976,6 +1045,9 @@ void OmpAttributeVisitor::CheckMultipleAppearances( name.ToString()); } else { AddDataSharingAttributeObject(*target); + if (privateDataSharingAttributeFlags.test(ompFlag)) { + AddPrivateDataSharingAttributeObjects(*target); + } } } diff --git a/flang/test/Semantics/omp-clause-validity01.f90 b/flang/test/Semantics/omp-clause-validity01.f90 index d3f77a432de86..07f55733c8dc8 100644 --- a/flang/test/Semantics/omp-clause-validity01.f90 +++ b/flang/test/Semantics/omp-clause-validity01.f90 @@ -9,7 +9,7 @@ ! TODO: all the internal errors integer :: b = 128 - integer :: c = 32 + integer :: z, c = 32 integer, parameter :: num = 16 real(8) :: arrayA(256), arrayB(512) @@ -39,29 +39,54 @@ enddo !$omp end parallel - !$omp parallel allocate(b) + !$omp parallel private(b) allocate(b) do i = 1, N a = 3.14 enddo !$omp end parallel - !$omp parallel allocate(omp_default_mem_space : b, c) + !$omp parallel private(c, b) allocate(omp_default_mem_space : b, c) do i = 1, N a = 3.14 enddo !$omp end parallel - !$omp parallel allocate(b) allocate(c) + !$omp parallel allocate(b) allocate(c) private(b, c) do i = 1, N a = 3.14 enddo !$omp end parallel - !$omp parallel allocate(xy_alloc :b) + !$omp parallel allocate(xy_alloc :b) private(b) do i = 1, N a = 3.14 enddo !$omp end parallel + + !$omp task private(b) allocate(b) + do i = 1, N + z = 2 + end do + !$omp end task + + !$omp teams private(b) allocate(b) + do i = 1, N + z = 2 + end do + !$omp end teams + + !$omp target private(b) allocate(b) + do i = 1, N + z = 2 + end do + !$omp end target + + !ERROR: ALLOCATE clause is not allowed on the TARGET DATA directive + !$omp target data map(from: b) allocate(b) + do i = 1, N + z = 2 + enddo + !$omp end target data !ERROR: SCHEDULE clause is not allowed on the PARALLEL directive !$omp parallel schedule(static) diff --git a/flang/test/Semantics/omp-resolve06.f90 b/flang/test/Semantics/omp-resolve06.f90 new file mode 100644 index 0000000000000..0909c0f54a576 --- /dev/null +++ b/flang/test/Semantics/omp-resolve06.f90 @@ -0,0 +1,54 @@ +! RUN: %S/test_errors.sh %s %t %f18 -fopenmp +use omp_lib +!2.11.4 Allocate Clause +!For any list item that is specified in the allocate +!clause on a directive, a data-sharing attribute clause +!that may create a private copy of that list item must be +!specified on the same directive. + + integer :: N = 2 + + !ERROR: The ALLOCATE clause requires that 'x' must be listed in a private data-sharing attribute clause on the same directive + !$omp parallel allocate(omp_default_mem_space : x) + do i = 1, N + x = 2 + enddo + !$omp end parallel + + !ERROR: The ALLOCATE clause requires that 'y' must be listed in a private data-sharing attribute clause on the same directive + !$omp parallel allocate(omp_default_mem_space : y) firstprivate(x) + do i = 1, N + x = 2 + enddo + !$omp end parallel + + !ERROR: The ALLOCATE clause requires that 'x' must be listed in a private data-sharing attribute clause on the same directive + !ERROR: The ALLOCATE clause requires that 'x' must be listed in a private data-sharing attribute clause on the same directive + !$omp parallel allocate(omp_default_mem_space : x) allocate(omp_default_mem_space : x) + do i = 1, N + x = 2 + enddo + !$omp end parallel + + !ERROR: The ALLOCATE clause requires that 'f' must be listed in a private data-sharing attribute clause on the same directive + !$omp parallel allocate(omp_default_mem_space : f) shared(f) + do i = 1, N + x = 2 + enddo + !$omp end parallel + + !ERROR: The ALLOCATE clause requires that 'q' must be listed in a private data-sharing attribute clause on the same directive + !$omp parallel private(t) allocate(omp_default_mem_space : z, t, q, r) firstprivate(z, r) + do i = 1, N + x = 2 + enddo + !$omp end parallel + + !ERROR: The ALLOCATE clause requires that 'b' must be listed in a private data-sharing attribute clause on the same directive + !ERROR: The ALLOCATE clause requires that 'c' must be listed in a private data-sharing attribute clause on the same directive + !$omp parallel allocate(omp_default_mem_space : a, b, c, d) firstprivate(a, d) + do i = 1, N + x = 2 + enddo + !$omp end parallel +end From 36c8621638d18c830efe2c6a2a6d0a0338b0f79d Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Wed, 9 Sep 2020 20:31:51 +0900 Subject: [PATCH 0154/1079] [BuildLibCalls] Add more noundef to library functions This patch follows D85345 and adds more noundef attributes to return values/arguments of library functions that are mostly about accessing the file system or processes. A few functions like `chmod` or `times` use typedef `mode_t` and `clock_t`. They are neither struct nor union, so they cannot contain undef even if they're lowered to iN in IR. So, it is fine to add noundef to them. - clock_t's actual type is size_t (C17, 7.27.1.3), so it isn't struct or union. - For mode_t, either int or long is used in practice because programmers use bit manipulation. So, I think it is okay that it's never aggregate in practice. After this patch, the remaining library functions are those that eagerly participate in optimizations: they can be removed, reordered, or introduced by a transformation from primitive IR operations. For them, a few testings is needed, since it may not be valid to add noundef anymore even if C standard says it's okay. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D85894 --- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 33 ++++++++ .../Transforms/InferFunctionAttrs/annotate.ll | 84 +++++++++---------- 2 files changed, 75 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index d4d2957efab4c..09ed68a5f6782 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -262,6 +262,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_setbuf: case LibFunc_setvbuf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -274,6 +275,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_stat: case LibFunc_statvfs: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -304,6 +306,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_setitimer: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); @@ -311,6 +314,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; @@ -369,11 +373,13 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_mkdir: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_mktime: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -395,11 +401,13 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_rename: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -407,6 +415,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_readlink: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -445,6 +454,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_chmod: case LibFunc_chown: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -452,6 +462,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -464,6 +475,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_access: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -583,6 +595,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getlogin_r: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -592,6 +605,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getenv: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); @@ -603,10 +617,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotThrow(F); return Changed; case LibFunc_getitimer: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getpwnam: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -617,21 +633,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_uname: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_unlink: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_unsetenv: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_utime: case LibFunc_utimes: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -669,6 +689,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -677,6 +698,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_pclose: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -733,16 +755,19 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_opendir: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_tmpfile: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_times: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -754,18 +779,22 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotAccessMemory(F); return Changed; case LibFunc_lstat: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_lchown: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_qsort: // May throw; places call through function pointer. + // Cannot give undef pointer/size + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_dunder_strdup: @@ -799,6 +828,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -828,6 +858,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_tmpfile64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; @@ -847,6 +878,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -874,6 +906,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); return Changed; diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 85c6e35266b71..7f52bf771769b 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -11,7 +11,7 @@ declare i8* @_Znwm(i64) ; CHECK: declare noalias nonnull i8* @_Znwm(i64) [[G0]] declare i32 @__nvvm_reflect(i8*) -; CHECK-NVPTX: declare i32 @__nvvm_reflect(i8*) [[G0:#[0-9]+]] +; CHECK-NVPTX: declare noundef i32 @__nvvm_reflect(i8* noundef) [[G0:#[0-9]+]] ; CHECK-NVPTX: attributes [[G0]] = { nofree nounwind readnone } @@ -163,7 +163,7 @@ declare float @__sinpif(float) ; CHECK: declare i32 @abs(i32) [[G0]] declare i32 @abs(i32) -; CHECK: declare i32 @access(i8* nocapture readonly, i32) [[G1:#[0-9]+]] +; CHECK: declare noundef i32 @access(i8* nocapture noundef readonly, i32 noundef) [[G1:#[0-9]+]] declare i32 @access(i8*, i32) ; CHECK: declare double @acos(double) [[G0]] @@ -274,16 +274,16 @@ declare float @ceilf(float) ; CHECK: declare x86_fp80 @ceill(x86_fp80) [[G0]] declare x86_fp80 @ceill(x86_fp80) -; CHECK: declare i32 @chmod(i8* nocapture readonly, i16 zeroext) [[G1]] +; CHECK: declare noundef i32 @chmod(i8* nocapture noundef readonly, i16 noundef zeroext) [[G1]] declare i32 @chmod(i8*, i16 zeroext) -; CHECK: declare i32 @chown(i8* nocapture readonly, i32, i32) [[G1]] +; CHECK: declare noundef i32 @chown(i8* nocapture noundef readonly, i32 noundef, i32 noundef) [[G1]] declare i32 @chown(i8*, i32, i32) -; CHECK: declare void @clearerr(%opaque* nocapture) [[G1]] +; CHECK: declare void @clearerr(%opaque* nocapture noundef) [[G1]] declare void @clearerr(%opaque*) -; CHECK: declare i32 @closedir(%opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @closedir(%opaque* nocapture noundef) [[G1]] declare i32 @closedir(%opaque*) ; CHECK: declare double @copysign(double, double) [[G0]] @@ -313,7 +313,7 @@ declare x86_fp80 @coshl(x86_fp80) ; CHECK: declare x86_fp80 @cosl(x86_fp80) [[G0]] declare x86_fp80 @cosl(x86_fp80) -; CHECK: declare i8* @ctermid(i8* nocapture) [[G1]] +; CHECK: declare noundef i8* @ctermid(i8* nocapture noundef) [[G1]] declare i8* @ctermid(i8*) ; CHECK: declare double @exp(double) [[G0]] @@ -520,22 +520,22 @@ declare i32 @getchar() ; CHECK: declare noundef i32 @getchar_unlocked() [[G1]] declare i32 @getchar_unlocked() -; CHECK: declare i8* @getenv(i8* nocapture) [[G2]] +; CHECK: declare noundef i8* @getenv(i8* nocapture noundef) [[G2]] declare i8* @getenv(i8*) -; CHECK: declare i32 @getitimer(i32, %opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @getitimer(i32 noundef, %opaque* nocapture noundef) [[G1]] declare i32 @getitimer(i32, %opaque*) -; CHECK: declare i32 @getlogin_r(i8* nocapture, i64) [[G1]] +; CHECK: declare noundef i32 @getlogin_r(i8* nocapture noundef, i64 noundef) [[G1]] declare i32 @getlogin_r(i8*, i64) -; CHECK: declare %opaque* @getpwnam(i8* nocapture readonly) [[G1]] +; CHECK: declare noundef %opaque* @getpwnam(i8* nocapture noundef readonly) [[G1]] declare %opaque* @getpwnam(i8*) ; CHECK: declare noundef i8* @gets(i8* noundef) [[G1]] declare i8* @gets(i8*) -; CHECK: declare i32 @gettimeofday(%opaque* nocapture, i8* nocapture) [[G1]] +; CHECK: declare noundef i32 @gettimeofday(%opaque* nocapture noundef, i8* nocapture noundef) [[G1]] declare i32 @gettimeofday(%opaque*, i8*) ; CHECK: declare i32 @isascii(i32) [[G0]] @@ -547,7 +547,7 @@ declare i32 @isdigit(i32) ; CHECK: declare i64 @labs(i64) [[G0]] declare i64 @labs(i64) -; CHECK: declare i32 @lchown(i8* nocapture readonly, i32, i32) [[G1]] +; CHECK: declare noundef i32 @lchown(i8* nocapture noundef readonly, i32 noundef, i32 noundef) [[G1]] declare i32 @lchown(i8*, i32, i32) ; CHECK: declare double @ldexp(double, i32) [[G0]] @@ -607,10 +607,10 @@ declare float @logf(float) ; CHECK: declare x86_fp80 @logl(x86_fp80) [[G0]] declare x86_fp80 @logl(x86_fp80) -; CHECK: declare i32 @lstat(i8* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @lstat(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @lstat(i8*, %opaque*) -; CHECK-LINUX: declare i32 @lstat64(i8* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK-LINUX: declare noundef i32 @lstat64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @lstat64(i8*, %opaque*) ; CHECK: declare noalias i8* @malloc(i64) [[G1]] @@ -642,10 +642,10 @@ declare i8* @memmove(i8*, i8*, i64) ; CHECK: declare i8* @memset(i8*, i32, i64) [[G0]] declare i8* @memset(i8*, i32, i64) -; CHECK: declare i32 @mkdir(i8* nocapture readonly, i16 zeroext) [[G1]] +; CHECK: declare noundef i32 @mkdir(i8* nocapture noundef readonly, i16 noundef zeroext) [[G1]] declare i32 @mkdir(i8*, i16 zeroext) -; CHECK: declare i64 @mktime(%opaque* nocapture) [[G1]] +; CHECK: declare noundef i64 @mktime(%opaque* nocapture noundef) [[G1]] declare i64 @mktime(%opaque*) ; CHECK: declare double @modf(double, double* nocapture) [[G1]] @@ -672,16 +672,16 @@ declare i32 @open(i8*, i32, ...) ; CHECK-LINUX: declare noundef i32 @open64(i8* nocapture noundef readonly, i32 noundef, ...) [[G0]] declare i32 @open64(i8*, i32, ...) -; CHECK: declare noalias %opaque* @opendir(i8* nocapture readonly) [[G1]] +; CHECK: declare noalias noundef %opaque* @opendir(i8* nocapture noundef readonly) [[G1]] declare %opaque* @opendir(i8*) -; CHECK: declare i32 @pclose(%opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @pclose(%opaque* nocapture noundef) [[G1]] declare i32 @pclose(%opaque*) ; CHECK: declare void @perror(i8* nocapture noundef readonly) [[G1]] declare void @perror(i8*) -; CHECK: declare noalias %opaque* @popen(i8* nocapture readonly, i8* nocapture readonly) [[G1]] +; CHECK: declare noalias noundef %opaque* @popen(i8* nocapture noundef readonly, i8* nocapture noundef readonly) [[G1]] declare %opaque* @popen(i8*, i8*) ; CHECK: declare i32 @posix_memalign(i8**, i64, i64) [[G0]] @@ -717,13 +717,13 @@ declare i32 @puts(i8*) ; CHECK: declare noundef i64 @pwrite(i32 noundef, i8* nocapture noundef readonly, i64 noundef, i64 noundef) [[G0]] declare i64 @pwrite(i32, i8*, i64, i64) -; CHECK: declare void @qsort(i8*, i64, i64, i32 (i8*, i8*)* nocapture) [[G0]] +; CHECK: declare void @qsort(i8* noundef, i64 noundef, i64 noundef, i32 (i8*, i8*)* nocapture noundef) [[G0]] declare void @qsort(i8*, i64, i64, i32 (i8*, i8*)*) ; CHECK: declare noundef i64 @read(i32 noundef, i8* nocapture noundef, i64 noundef) [[G0]] declare i64 @read(i32, i8*, i64) -; CHECK: declare i64 @readlink(i8* nocapture readonly, i8* nocapture, i64) [[G1]] +; CHECK: declare noundef i64 @readlink(i8* nocapture noundef readonly, i8* nocapture noundef, i64 noundef) [[G1]] declare i64 @readlink(i8*, i8*, i64) ; CHECK: declare noalias i8* @realloc(i8* nocapture, i64) [[G3]] @@ -732,13 +732,13 @@ declare i8* @realloc(i8*, i64) ; CHECK: declare i8* @reallocf(i8*, i64) declare i8* @reallocf(i8*, i64) -; CHECK: declare i8* @realpath(i8* nocapture readonly, i8*) [[G1]] +; CHECK: declare noundef i8* @realpath(i8* nocapture noundef readonly, i8* noundef) [[G1]] declare i8* @realpath(i8*, i8*) -; CHECK: declare i32 @remove(i8* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @remove(i8* nocapture noundef readonly) [[G1]] declare i32 @remove(i8*) -; CHECK: declare i32 @rename(i8* nocapture readonly, i8* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @rename(i8* nocapture noundef readonly, i8* nocapture noundef readonly) [[G1]] declare i32 @rename(i8*, i8*) ; CHECK: declare void @rewind(%opaque* nocapture noundef) [[G1]] @@ -753,7 +753,7 @@ declare float @rintf(float) ; CHECK: declare x86_fp80 @rintl(x86_fp80) [[G0]] declare x86_fp80 @rintl(x86_fp80) -; CHECK: declare i32 @rmdir(i8* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @rmdir(i8* nocapture noundef readonly) [[G1]] declare i32 @rmdir(i8*) ; CHECK: declare double @round(double) [[G0]] @@ -768,13 +768,13 @@ declare x86_fp80 @roundl(x86_fp80) ; CHECK: declare noundef i32 @scanf(i8* nocapture noundef readonly, ...) [[G1]] declare i32 @scanf(i8*, ...) -; CHECK: declare void @setbuf(%opaque* nocapture, i8*) [[G1]] +; CHECK: declare void @setbuf(%opaque* nocapture noundef, i8* noundef) [[G1]] declare void @setbuf(%opaque*, i8*) -; CHECK: declare i32 @setitimer(i32, %opaque* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @setitimer(i32 noundef, %opaque* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @setitimer(i32, %opaque*, %opaque*) -; CHECK: declare i32 @setvbuf(%opaque* nocapture, i8*, i32, i64) [[G1]] +; CHECK: declare noundef i32 @setvbuf(%opaque* nocapture noundef, i8* noundef, i32 noundef, i64 noundef) [[G1]] declare i32 @setvbuf(%opaque*, i8*, i32, i64) ; CHECK: declare double @sin(double) [[G0]] @@ -813,16 +813,16 @@ declare x86_fp80 @sqrtl(x86_fp80) ; CHECK: declare noundef i32 @sscanf(i8* nocapture noundef readonly, i8* nocapture noundef readonly, ...) [[G1]] declare i32 @sscanf(i8*, i8*, ...) -; CHECK: declare i32 @stat(i8* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @stat(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @stat(i8*, %opaque*) -; CHECK-LINUX: declare i32 @stat64(i8* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK-LINUX: declare noundef i32 @stat64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @stat64(i8*, %opaque*) -; CHECK: declare i32 @statvfs(i8* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @statvfs(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @statvfs(i8*, %opaque*) -; CHECK-LINUX: declare i32 @statvfs64(i8* nocapture readonly, %opaque* nocapture) [[G1]] +; CHECK-LINUX: declare noundef i32 @statvfs64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @statvfs64(i8*, %opaque*) ; CHECK: declare i8* @stpcpy(i8*, i8* nocapture readonly) [[G1]] @@ -918,7 +918,7 @@ declare i64 @strtoull(i8*, i8**, i32) ; CHECK: declare i64 @strxfrm(i8* nocapture, i8* nocapture readonly, i64) [[G1]] declare i64 @strxfrm(i8*, i8*, i64) -; CHECK: declare i32 @system(i8* nocapture readonly) [[G0]] +; CHECK: declare noundef i32 @system(i8* nocapture noundef readonly) [[G0]] declare i32 @system(i8*) ; CHECK: declare double @tan(double) [[G0]] @@ -939,13 +939,13 @@ declare x86_fp80 @tanhl(x86_fp80) ; CHECK: declare x86_fp80 @tanl(x86_fp80) [[G0]] declare x86_fp80 @tanl(x86_fp80) -; CHECK: declare i64 @times(%opaque* nocapture) [[G1]] +; CHECK: declare noundef i64 @times(%opaque* nocapture noundef) [[G1]] declare i64 @times(%opaque*) -; CHECK: declare noalias %opaque* @tmpfile() [[G1]] +; CHECK: declare noalias noundef %opaque* @tmpfile() [[G1]] declare %opaque* @tmpfile() -; CHECK-LINUX: declare noalias %opaque* @tmpfile64() [[G1]] +; CHECK-LINUX: declare noalias noundef %opaque* @tmpfile64() [[G1]] declare %opaque* @tmpfile64() ; CHECK: declare i32 @toascii(i32) [[G0]] @@ -960,22 +960,22 @@ declare float @truncf(float) ; CHECK: declare x86_fp80 @truncl(x86_fp80) [[G0]] declare x86_fp80 @truncl(x86_fp80) -; CHECK: declare i32 @uname(%opaque* nocapture) [[G1]] +; CHECK: declare noundef i32 @uname(%opaque* nocapture noundef) [[G1]] declare i32 @uname(%opaque*) ; CHECK: declare noundef i32 @ungetc(i32 noundef, %opaque* nocapture noundef) [[G1]] declare i32 @ungetc(i32, %opaque*) -; CHECK: declare i32 @unlink(i8* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @unlink(i8* nocapture noundef readonly) [[G1]] declare i32 @unlink(i8*) -; CHECK: declare i32 @unsetenv(i8* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @unsetenv(i8* nocapture noundef readonly) [[G1]] declare i32 @unsetenv(i8*) -; CHECK: declare i32 @utime(i8* nocapture readonly, %opaque* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @utime(i8* nocapture noundef readonly, %opaque* nocapture noundef readonly) [[G1]] declare i32 @utime(i8*, %opaque*) -; CHECK: declare i32 @utimes(i8* nocapture readonly, %opaque* nocapture readonly) [[G1]] +; CHECK: declare noundef i32 @utimes(i8* nocapture noundef readonly, %opaque* nocapture noundef readonly) [[G1]] declare i32 @utimes(i8*, %opaque*) ; CHECK: declare noalias i8* @valloc(i64) [[G1]] From 48fc781438767bd8337facf2e232c695b0426fb4 Mon Sep 17 00:00:00 2001 From: David Stenberg Date: Wed, 9 Sep 2020 10:59:41 +0200 Subject: [PATCH 0155/1079] [UnifyFunctionExitNodes] Fix Modified status for unreachable blocks If a function had at most one return block, the pass would return false regardless if an unified unreachable block was created. This patch fixes that by refactoring runOnFunction into two separate helper functions for handling the unreachable blocks respectively the return blocks, as suggested by @bjope in a review comment. This was caught using the check introduced by D80916. Reviewed By: serge-sans-paille Differential Revision: https://reviews.llvm.org/D85818 --- .../Transforms/Utils/UnifyFunctionExitNodes.h | 5 +- .../Utils/UnifyFunctionExitNodes.cpp | 65 ++++++++++-------- .../unreachable-blocks-status.ll | 67 +++++++++++++++++++ 3 files changed, 107 insertions(+), 30 deletions(-) create mode 100644 llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h index ce7cb16b3886d..a9fe808cb4552 100644 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h @@ -20,7 +20,10 @@ namespace llvm { class BasicBlock; -struct UnifyFunctionExitNodes : public FunctionPass { +class UnifyFunctionExitNodes : public FunctionPass { + bool unifyUnreachableBlocks(Function &F); + bool unifyReturnBlocks(Function &F); + public: static char ID; // Pass identification, replacement for typeid UnifyFunctionExitNodes(); diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index b124d0536254b..621e944741b14 100644 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -40,44 +40,41 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ AU.addPreservedID(LowerSwitchID); } -// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new -// BasicBlock, and converting all returns to unconditional branches to this -// new basic block. The singular exit node is returned. -// -// If there are no return stmts in the Function, a null pointer is returned. -// -bool UnifyFunctionExitNodes::runOnFunction(Function &F) { - // Loop over all of the blocks in a function, tracking all of the blocks that - // return. - // - std::vector ReturningBlocks; +bool UnifyFunctionExitNodes::unifyUnreachableBlocks(Function &F) { std::vector UnreachableBlocks; + for (BasicBlock &I : F) - if (isa(I.getTerminator())) - ReturningBlocks.push_back(&I); - else if (isa(I.getTerminator())) + if (isa(I.getTerminator())) UnreachableBlocks.push_back(&I); - // Then unreachable blocks. - if (UnreachableBlocks.size() > 1) { - BasicBlock *UnreachableBlock = BasicBlock::Create(F.getContext(), - "UnifiedUnreachableBlock", &F); - new UnreachableInst(F.getContext(), UnreachableBlock); + if (UnreachableBlocks.size() <= 1) + return false; + + BasicBlock *UnreachableBlock = + BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); - for (BasicBlock *BB : UnreachableBlocks) { - BB->getInstList().pop_back(); // Remove the unreachable inst. - BranchInst::Create(UnreachableBlock, BB); - } + for (BasicBlock *BB : UnreachableBlocks) { + BB->getInstList().pop_back(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); } - // There is nothing more to do if we do not have multiple return blocks. + return true; +} + +bool UnifyFunctionExitNodes::unifyReturnBlocks(Function &F) { + std::vector ReturningBlocks; + + for (BasicBlock &I : F) + if (isa(I.getTerminator())) + ReturningBlocks.push_back(&I); + if (ReturningBlocks.size() <= 1) return false; - // Otherwise, we need to insert a new basic block into the function, add a PHI - // nodes (if the function returns values), and convert all of the return - // instructions into unconditional branches. - // + // Insert a new basic block into the function, add PHI nodes (if the function + // returns values), and convert all of the return instructions into + // unconditional branches. BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); @@ -94,7 +91,6 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. - // for (BasicBlock *BB : ReturningBlocks) { // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... @@ -104,5 +100,16 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } + return true; } + +// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting +// all returns to unconditional branches to this new basic block. Also, unify +// all unreachable blocks. +bool UnifyFunctionExitNodes::runOnFunction(Function &F) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed; +} diff --git a/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll b/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll new file mode 100644 index 0000000000000..a9169e9ff15e9 --- /dev/null +++ b/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll @@ -0,0 +1,67 @@ +; RUN: opt -mergereturn -S < %s | FileCheck %s + +; The pass did previously not report the correct Modified status in the case +; where a function had at most one return block, and an unified unreachable +; block was created. This was caught by the pass return status check that is +; hidden under EXPENSIVE_CHECKS. + +; CHECK: for.foo.body2: +; CHECK-NEXT: br label %UnifiedUnreachableBlock + +; CHECK: for.foo.end: +; CHECK-NEXT: br label %UnifiedUnreachableBlock + +; CHECK: UnifiedUnreachableBlock: +; CHECK-NEXT: unreachable + +define i32 @foo() { +entry: + br label %for.foo.cond + +for.foo.cond: ; preds = %entry + br i1 false, label %for.foo.body, label %for.foo.end3 + +for.foo.body: ; preds = %for.foo.cond + br label %for.foo.cond1 + +for.foo.cond1: ; preds = %for.foo.body + br i1 false, label %for.foo.body2, label %for.foo.end + +for.foo.body2: ; preds = %for.foo.cond1 + unreachable + +for.foo.end: ; preds = %for.foo.cond1 + unreachable + +for.foo.end3: ; preds = %for.foo.cond + ret i32 undef +} + +; CHECK: for.bar.body2: +; CHECK-NEXT: br label %UnifiedUnreachableBlock + +; CHECK: for.bar.end: +; CHECK-NEXT: br label %UnifiedUnreachableBlock + +; CHECK: UnifiedUnreachableBlock: +; CHECK-NEXT: unreachable + +define void @bar() { +entry: + br label %for.bar.cond + +for.bar.cond: ; preds = %entry + br i1 false, label %for.bar.body, label %for.bar.end + +for.bar.body: ; preds = %for.bar.cond + br label %for.bar.cond1 + +for.bar.cond1: ; preds = %for.bar.body + br i1 false, label %for.bar.body2, label %for.bar.end + +for.bar.body2: ; preds = %for.bar.cond1 + unreachable + +for.bar.end: ; preds = %for.bar.cond1 + unreachable +} From edf244217a48b91c8e9c860848885106fbcc5c4b Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 8 Sep 2020 11:26:15 +0000 Subject: [PATCH 0156/1079] [mlir][Linalg] Integration tests for convolutions added. This commit introduces end-to-end integration tests for convolutions that test multiple ways of ConvOps lowering. Differential Revision: https://reviews.llvm.org/D87277 --- .../Linalg/Conv/test-conv-1d-call.mlir | 65 ++++++ .../Linalg/Conv/test-conv-1d-ncw-call.mlir | 71 +++++++ .../Linalg/Conv/test-conv-1d-nwc-call.mlir | 82 ++++++++ .../Linalg/Conv/test-conv-2d-call.mlir | 70 +++++++ .../Linalg/Conv/test-conv-2d-nchw-call.mlir | 84 ++++++++ .../Linalg/Conv/test-conv-2d-nhwc-call.mlir | 130 ++++++++++++ .../Linalg/Conv/test-conv-3d-call.mlir | 87 ++++++++ .../Linalg/Conv/test-conv-3d-ncdhw-call.mlir | 91 +++++++++ .../Linalg/Conv/test-conv-3d-ndhwc-call.mlir | 193 ++++++++++++++++++ 9 files changed, 873 insertions(+) create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir create mode 100644 mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir new file mode 100644 index 0000000000000..1b3ee65f13d96 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir @@ -0,0 +1,65 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" -linalg-tile="linalg-tile-sizes=1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns a 1-D buffer of size %s1 filled with the value %f +func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref { + %buf = alloc(%s1) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D = call @alloc_1d_filled_f32(%c3, %val) : (index, f32) -> (memref) + %in1D = call @alloc_1d_filled_f32(%c8, %val) : (index, f32) -> (memref) + %out1D = call @alloc_1d_filled_f32(%c6, %zero) : (index, f32) -> (memref) + + store %f10, %in1D[%c3] : memref + call @conv_1d(%in1D, %filter1D, %out1D) : (memref, memref, memref) -> () + %out1D_ = memref_cast %out1D : memref to memref<*xf32> + call @print_memref_f32(%out1D_): (memref<*xf32>) -> () + + dealloc %filter1D : memref + dealloc %in1D : memref + dealloc %out1D : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [12, 28, 28, 28, 12, 12] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir new file mode 100644 index 0000000000000..2647ee3d663c3 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir @@ -0,0 +1,71 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -linalg-tile="linalg-tile-sizes=1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d_ncw(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c3, %val) : (index, index, index, f32) -> (memref) + %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref) + %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in1D_ncw[%c0, %c0, %c3] : memref + call @conv_1d_ncw(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref, memref, memref) -> () + %out1D_ncw_ = memref_cast %out1D_ncw : memref to memref<*xf32> + call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> () + + dealloc %filter1D_ncw : memref + dealloc %in1D_ncw : memref + dealloc %out1D_ncw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [12, 28, 28, 28, 12, 12] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir new file mode 100644 index 0000000000000..5cc4de3844aa6 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -linalg-tile="linalg-tile-sizes=1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d_nwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D_nwc = call @alloc_3d_filled_f32(%c1, %c3, %c1, %val) : (index, index, index, f32) -> (memref) + %in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref) + %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in1D_nwc[%c0, %c3, %c0] : memref + call @conv_1d_nwc(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () + %out1D_nwc_ = memref_cast %out1D_nwc : memref to memref<*xf32> + call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> () + + dealloc %filter1D_nwc : memref + dealloc %in1D_nwc : memref + dealloc %out1D_nwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [12], +// CHECK-COUNT-3: [28], +// CHECK-NEXT: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-5: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-5: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir new file mode 100644 index 0000000000000..38420974ad983 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" -linalg-tile="linalg-tile-sizes=1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f +func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D = call @alloc_2d_filled_f32(%c3, %c3, %val) : (index, index, f32) -> (memref) + %in2D = call @alloc_2d_filled_f32(%c8, %c8, %val) : (index, index, f32) -> (memref) + %out2D = call @alloc_2d_filled_f32(%c6, %c6, %zero) : (index, index, f32) -> (memref) + + store %f10, %in2D[%c0, %c3] : memref + call @conv_2d(%in2D, %filter2D, %out2D) : (memref, memref, memref) -> () + %out2D_ = memref_cast %out2D : memref to memref<*xf32> + call @print_memref_f32(%out2D_): (memref<*xf32>) -> () + + dealloc %filter2D : memref + dealloc %in2D : memref + dealloc %out2D : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [36, 52, 52, 52, 36, 36], +// CHECK-COUNT-5: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir new file mode 100644 index 0000000000000..fbd831f6801a9 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir @@ -0,0 +1,84 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -linalg-tile="linalg-tile-sizes=1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d_nchw(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D_nchw = call @alloc_4d_filled_f32(%c1, %c1, %c3, %c3, %val) : (index, index, index, index, f32) -> (memref) + %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref) + %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref) + + store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref + call @conv_2d_nchw(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref, memref, memref) -> () + %out2D_nchw_ = memref_cast %out2D_nchw : memref to memref<*xf32> + call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> () + + dealloc %filter2D_nchw : memref + dealloc %in2D_nchw : memref + dealloc %out2D_nchw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [36, 52, 52, 52, 36, 36], +// CHECK-COUNT-5: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir new file mode 100644 index 0000000000000..422720da429ef --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir @@ -0,0 +1,130 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -linalg-tile="linalg-tile-sizes=1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d_nhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D_nhwc = call @alloc_4d_filled_f32(%c1, %c3, %c3, %c3, %val) :(index, index, index, index, f32) -> (memref) + %in2D_nhwc = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (memref) + %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref) + + store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref + call @conv_2d_nhwc(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () + %out2D_nhwc_ = memref_cast %out2D_nhwc : memref to memref<*xf32> + call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> () + + dealloc %filter2D_nhwc : memref + dealloc %in2D_nhwc : memref + dealloc %out2D_nhwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108], +// CHECK-COUNT-3: [124], +// CHECK-COUNT-2: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir new file mode 100644 index 0000000000000..8f38962acf8bb --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir @@ -0,0 +1,87 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" -linalg-tile="linalg-tile-sizes=1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D = call @alloc_3d_filled_f32(%c3, %c3, %c3, %val) : (index, index, index, f32) -> (memref) + %in3D = call @alloc_3d_filled_f32(%c8, %c8, %c8, %val) : (index, index, index, f32) -> (memref) + %out3D = call @alloc_3d_filled_f32(%c6, %c6, %c6, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in3D[%c0, %c0, %c3] : memref + call @conv_3d(%in3D, %filter3D, %out3D) : (memref, memref, memref) -> () + %out3D_ = memref_cast %out3D : memref to memref<*xf32> + call @print_memref_f32(%out3D_): (memref<*xf32>) -> () + + dealloc %filter3D : memref + dealloc %in3D : memref + dealloc %out3D : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [108, 124, 124, 124, 108, 108], +// CHECK-COUNT-5: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir new file mode 100644 index 0000000000000..2ad2b4fc3465e --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir @@ -0,0 +1,91 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -linalg-tile="linalg-tile-sizes=1,1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d_ncdhw(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c3, %c3, %c3, %val) : (index, index, index, index, index, f32) -> (memref) + %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref) + %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref) + + store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref + call @conv_3d_ncdhw(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref, memref, memref) -> () + %out3D_ncdhw_ = memref_cast %out3D_ncdhw : memref to memref<*xf32> + call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> () + + dealloc %filter3D_ncdhw : memref + dealloc %in3D_ncdhw : memref + dealloc %out3D_ncdhw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108, 124, 124, 124, 108, 108], +// CHECK-COUNT-5: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir new file mode 100644 index 0000000000000..4f1392363bb2d --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir @@ -0,0 +1,193 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -linalg-tile="linalg-tile-sizes=1,1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d_ndhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c3, %c3, %c3, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %in3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref) + + store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref + call @conv_3d_ndhwc(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () + %out3D_ndhwc_ = memref_cast %out3D_ndhwc : memref to memref<*xf32> + call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> () + + dealloc %filter3D_ndhwc : memref + dealloc %in3D_ndhwc : memref + dealloc %out3D_ndhwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108], +// CHECK-COUNT-3: [124], +// CHECK-COUNT-2: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] From d4b88ac1658d681e143482336cac27c6a74b8b24 Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Thu, 3 Sep 2020 13:39:29 +0200 Subject: [PATCH 0157/1079] [cmake] Use absolute paths for modules search For out of tree builds, the user generally needs to specify LLVM_DIR and MLIR_DIR on the command line so that the correct LLVM and MLIR installations are picked up. If the provided paths are absolute, everything works fine, however for buildbots it is customary to work with relative paths, and that makes it difficult for CMake to find the right modules to include. This patch changes CMakeLists.txt to convert LLVM_DIR and MLIR_DIR to absolute paths before adding them to CMAKE_MODULE_PATH. The inputs are assumed to be relative to the source directory (llvm-project/flang). Differential Revision: https://reviews.llvm.org/D87083 --- flang/CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 03440b72ec8ca..707c7235a272a 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -56,7 +56,10 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) # We need a pre-built/installed version of LLVM. find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_PATH}") - list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR}) + # If the user specifies a relative path to LLVM_DIR, the calls to include + # LLVM modules fail. Append the absolute path to LLVM_DIR instead. + get_filename_component(LLVM_DIR_ABSOLUTE ${LLVM_DIR} REALPATH) + list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR_ABSOLUTE}) # If LLVM links to zlib we need the imported targets so we can too. if(LLVM_ENABLE_ZLIB) @@ -78,7 +81,10 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) find_package(MLIR REQUIRED CONFIG) # Use SYSTEM for the same reasons as for LLVM includes include_directories(SYSTEM ${MLIR_INCLUDE_DIRS}) - list(APPEND CMAKE_MODULE_PATH ${MLIR_DIR}) + # If the user specifies a relative path to MLIR_DIR, the calls to include + # MLIR modules fail. Append the absolute path to MLIR_DIR instead. + get_filename_component(MLIR_DIR_ABSOLUTE ${MLIR_DIR} REALPATH) + list(APPEND CMAKE_MODULE_PATH ${MLIR_DIR_ABSOLUTE}) include(AddMLIR) find_program(MLIR_TABLEGEN_EXE "mlir-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) From 25f3cc0ced1759af1911c2446ac40fab4f5e5571 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Wed, 9 Sep 2020 20:06:00 +0800 Subject: [PATCH 0158/1079] [elf2yaml] Fix dumping a debug section whose name is not recognized. If the debug section's name isn't recognized, it should be dumped as a raw content section. Reviewed By: jhenderson, grimar Differential Revision: https://reviews.llvm.org/D87346 --- .../ELF/DWARF/unrecognized-debug-section.yaml | 19 +++++++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 2 ++ 2 files changed, 21 insertions(+) create mode 100644 llvm/test/tools/obj2yaml/ELF/DWARF/unrecognized-debug-section.yaml diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/unrecognized-debug-section.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/unrecognized-debug-section.yaml new file mode 100644 index 0000000000000..618ac3592b6df --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/unrecognized-debug-section.yaml @@ -0,0 +1,19 @@ +## Test dumping a debug section when its name is not recognized by obj2yaml. + +# RUN: yaml2obj %s | obj2yaml | FileCheck %s + +# CHECK: Sections: +# CHECK-NEXT: - Name: .debug_foo +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Content: '01020304' +# CHECK-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_foo + Type: SHT_PROGBITS + Content: '01020304' diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 264bc4d1dbf36..94819cb8d87d3 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -416,6 +416,8 @@ Optional ELFDumper::dumpDWARFSections( Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); else if (RawSec->Name == ".debug_str") Err = dumpDebugStrings(*DWARFCtx.get(), DWARF); + else + continue; // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. From 1eaf7babf2dcc3ab8fb429171c991556ffa98291 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 12:21:55 +0100 Subject: [PATCH 0159/1079] APInt.h - return directly from clearUnusedBits in single word cases. NFCI. Consistently use the same pattern of returning *this from the clearUnusedBits() call to allow us to early out from the isSingleWord() path and avoid an else statement. --- llvm/include/llvm/ADT/APInt.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 5e4206732f4df..fdc0850d21eb0 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -794,11 +794,10 @@ class LLVM_NODISCARD APInt { APInt &operator=(uint64_t RHS) { if (isSingleWord()) { U.VAL = RHS; - clearUnusedBits(); - } else { - U.pVal[0] = RHS; - memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + return clearUnusedBits(); } + U.pVal[0] = RHS; + memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); return *this; } @@ -855,10 +854,9 @@ class LLVM_NODISCARD APInt { APInt &operator|=(uint64_t RHS) { if (isSingleWord()) { U.VAL |= RHS; - clearUnusedBits(); - } else { - U.pVal[0] |= RHS; + return clearUnusedBits(); } + U.pVal[0] |= RHS; return *this; } @@ -885,10 +883,9 @@ class LLVM_NODISCARD APInt { APInt &operator^=(uint64_t RHS) { if (isSingleWord()) { U.VAL ^= RHS; - clearUnusedBits(); - } else { - U.pVal[0] ^= RHS; + return clearUnusedBits(); } + U.pVal[0] ^= RHS; return *this; } From d816499f95d673bbad297d0231cbeaf5efbbc5de Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 13:22:39 +0100 Subject: [PATCH 0160/1079] [KnownBits] Move SelectionDAG::computeKnownBits ISD::ABS handling to KnownBits::abs Move the ISD::ABS handling to a KnownBits::abs handler, to simplify future implementations in ValueTracking/GlobalISel. --- llvm/include/llvm/Support/KnownBits.h | 3 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +--------------- llvm/lib/Support/KnownBits.cpp | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index a29e150b904a3..8da6c7d98ba5f 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -278,6 +278,9 @@ struct KnownBits { /// Update known bits based on XORing with RHS. KnownBits &operator^=(const KnownBits &RHS); + /// Compute known bits for the absolute value. + KnownBits abs() const; + KnownBits byteSwap() { return KnownBits(Zero.byteSwap(), One.byteSwap()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2350248626c71..1cc2ec77ebceb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3370,21 +3370,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ABS: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - // If the source's MSB is zero then we know the rest of the bits already. - if (Known2.isNonNegative()) { - Known.Zero = Known2.Zero; - Known.One = Known2.One; - break; - } - - // We only know that the absolute values's MSB will be zero iff there is - // a set bit that isn't the sign bit (otherwise it could be INT_MIN). - Known2.One.clearSignBit(); - if (Known2.One.getBoolValue()) { - Known.Zero = APInt::getSignMask(BitWidth); - break; - } + Known = Known2.abs(); break; } case ISD::UMIN: { diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 03843687c10a4..ed32a80a061db 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -145,6 +145,24 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { return Flip(umax(Flip(LHS), Flip(RHS))); } +KnownBits KnownBits::abs() const { + // If the source's MSB is zero then we know the rest of the bits already. + if (isNonNegative()) + return *this; + + // Assume we know nothing. + KnownBits KnownAbs(getBitWidth()); + + // We only know that the absolute values's MSB will be zero iff there is + // a set bit that isn't the sign bit (otherwise it could be INT_MIN). + APInt Val = One; + Val.clearSignBit(); + if (!Val.isNullValue()) + KnownAbs.Zero.setSignBit(); + + return KnownAbs; +} + KnownBits &KnownBits::operator&=(const KnownBits &RHS) { // Result bit is 0 if either operand bit is 0. Zero |= RHS.Zero; From f078577f31cc96b6e8a064f628f81a376f21e2e2 Mon Sep 17 00:00:00 2001 From: Ronak Chauhan Date: Wed, 9 Sep 2020 18:01:28 +0530 Subject: [PATCH 0161/1079] Revert "[AMDGPU] Support disassembly for AMDGPU kernel descriptors" This reverts commit 487a80531006add8102d50dbcce4b6fd729ab1f6. Tests fail on big endian machines. --- .../llvm/Support/AMDHSAKernelDescriptor.h | 70 ++-- .../Disassembler/AMDGPUDisassembler.cpp | 345 ------------------ .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 30 +- llvm/test/CodeGen/AMDGPU/nop-data.ll | 4 +- .../llvm-objdump/ELF/AMDGPU/kd-failure.s | 37 -- .../tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 49 --- .../tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 36 -- .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s | 58 --- .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s | 53 --- .../llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s | 41 --- llvm/tools/llvm-objdump/llvm-objdump.cpp | 17 + 11 files changed, 50 insertions(+), 690 deletions(-) delete mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s delete mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s delete mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s delete mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s delete mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s delete mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 48a09ac48005d..d1c2147536a72 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,49 +162,39 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; -enum : uint32_t { - GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, - PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, - RESERVED0_OFFSET = 8, - KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, - RESERVED1_OFFSET = 24, - COMPUTE_PGM_RSRC3_OFFSET = 44, - COMPUTE_PGM_RSRC1_OFFSET = 48, - COMPUTE_PGM_RSRC2_OFFSET = 52, - KERNEL_CODE_PROPERTIES_OFFSET = 56, - RESERVED2_OFFSET = 58, -}; - static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == - GROUP_SEGMENT_FIXED_SIZE_OFFSET, - "invalid offset for group_segment_fixed_size"); -static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == - PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, - "invalid offset for private_segment_fixed_size"); -static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, - "invalid offset for reserved0"); -static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == - KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, - "invalid offset for reserved1"); -static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == - COMPUTE_PGM_RSRC3_OFFSET, - "invalid offset for compute_pgm_rsrc3"); -static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == - COMPUTE_PGM_RSRC1_OFFSET, - "invalid offset for compute_pgm_rsrc1"); -static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == - COMPUTE_PGM_RSRC2_OFFSET, - "invalid offset for compute_pgm_rsrc2"); -static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == - KERNEL_CODE_PROPERTIES_OFFSET, - "invalid offset for kernel_code_properties"); -static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, - "invalid offset for reserved2"); +static_assert( + offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, + "invalid offset for group_segment_fixed_size"); +static_assert( + offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, + "invalid offset for private_segment_fixed_size"); +static_assert( + offsetof(kernel_descriptor_t, reserved0) == 8, + "invalid offset for reserved0"); +static_assert( + offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert( + offsetof(kernel_descriptor_t, reserved1) == 24, + "invalid offset for reserved1"); +static_assert( + offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, + "invalid offset for compute_pgm_rsrc3"); +static_assert( + offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, + "invalid offset for compute_pgm_rsrc1"); +static_assert( + offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, + "invalid offset for compute_pgm_rsrc2"); +static_assert( + offsetof(kernel_descriptor_t, kernel_code_properties) == 56, + "invalid offset for kernel_code_properties"); +static_assert( + offsetof(kernel_descriptor_t, reserved2) == 58, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 840208169168e..9c2f2e7eecd14 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,7 +34,6 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1216,350 +1215,6 @@ bool AMDGPUDisassembler::isGFX10() const { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } -//===----------------------------------------------------------------------===// -// AMDGPU specific symbol handling -//===----------------------------------------------------------------------===// -#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ - do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ - } while (0) - -// NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( - uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { - using namespace amdhsa; - StringRef Indent = "\t"; - - // We cannot accurately backward compute #VGPRs used from - // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same - // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we - // simply calculate the inverse of what the assembler does. - - uint32_t GranulatedWorkitemVGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; - - uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * - AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); - - KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; - - // We cannot backward compute values used to calculate - // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following - // directives can't be computed: - // .amdhsa_reserve_vcc - // .amdhsa_reserve_flat_scratch - // .amdhsa_reserve_xnack_mask - // They take their respective default values if not specified in the assembly. - // - // GRANULATED_WAVEFRONT_SGPR_COUNT - // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) - // - // We compute the inverse as though all directives apart from NEXT_FREE_SGPR - // are set to 0. So while disassembling we consider that: - // - // GRANULATED_WAVEFRONT_SGPR_COUNT - // = f(NEXT_FREE_SGPR + 0 + 0 + 0) - // - // The disassembler cannot recover the original values of those 3 directives. - - uint32_t GranulatedWavefrontSGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; - - if (isGFX10() && GranulatedWavefrontSGPRCount) - return MCDisassembler::Fail; - - uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * - AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); - - KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; - KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); - PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); - PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); - PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) - return MCDisassembler::Fail; - - if (isGFX10()) { - PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", - COMPUTE_PGM_RSRC1_WGP_MODE); - PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); - PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); - } - return MCDisassembler::Success; -} - -// NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( - uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { - using namespace amdhsa; - StringRef Indent = "\t"; - PRINT_DIRECTIVE( - ".amdhsa_system_sgpr_private_segment_wavefront_offset", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); - PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", - COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE( - ".amdhsa_exception_fp_ieee_invalid_op", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); - PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); - PRINT_DIRECTIVE( - ".amdhsa_exception_fp_ieee_div_zero", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); - PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); - PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); - PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); - PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) - return MCDisassembler::Fail; - - return MCDisassembler::Success; -} - -#undef PRINT_DIRECTIVE - -MCDisassembler::DecodeStatus -AMDGPUDisassembler::decodeKernelDescriptorDirective( - DataExtractor::Cursor &Cursor, ArrayRef Bytes, - raw_string_ostream &KdStream) const { -#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ - do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ - } while (0) - - uint16_t TwoByteBuffer = 0; - uint32_t FourByteBuffer = 0; - uint64_t EightByteBuffer = 0; - - StringRef ReservedBytes; - StringRef Indent = "\t"; - - assert(Bytes.size() == 64); - DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); - - switch (Cursor.tell()) { - case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer - << '\n'; - return MCDisassembler::Success; - - case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - KdStream << Indent << ".amdhsa_private_segment_fixed_size " - << FourByteBuffer << '\n'; - return MCDisassembler::Success; - - case amdhsa::RESERVED0_OFFSET: - // 8 reserved bytes, must be 0. - EightByteBuffer = DE.getU64(Cursor); - if (EightByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: - // KERNEL_CODE_ENTRY_BYTE_OFFSET - // So far no directive controls this for Code Object V3, so simply skip for - // disassembly. - DE.skip(Cursor, 8); - return MCDisassembler::Success; - - case amdhsa::RESERVED1_OFFSET: - // 20 reserved bytes, must be 0. - ReservedBytes = DE.getBytes(Cursor, 20); - for (int I = 0; I < 20; ++I) { - if (ReservedBytes[I] != 0) { - return MCDisassembler::Fail; - } - } - return MCDisassembler::Success; - - case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: - // COMPUTE_PGM_RSRC3 - // - Only set for GFX10, GFX6-9 have this to be 0. - // - Currently no directives directly control this. - FourByteBuffer = DE.getU32(Cursor); - if (!isGFX10() && FourByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: - using namespace amdhsa; - TwoByteBuffer = DE.getU16(Cursor); - - PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); - - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) - return MCDisassembler::Fail; - - // Reserved for GFX9 - if (isGFX9() && - (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { - return MCDisassembler::Fail; - } else if (isGFX10()) { - PRINT_DIRECTIVE(".amdhsa_wavefront_size32", - KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); - } - - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) - return MCDisassembler::Fail; - - return MCDisassembler::Success; - - case amdhsa::RESERVED2_OFFSET: - // 6 bytes from here are reserved, must be 0. - ReservedBytes = DE.getBytes(Cursor, 6); - for (int I = 0; I < 6; ++I) { - if (ReservedBytes[I] != 0) - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - default: - llvm_unreachable("Unhandled index. Case statements cover everything."); - return MCDisassembler::Fail; - } -#undef PRINT_DIRECTIVE -} - -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( - StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { - // CP microcode requires the kernel descriptor to be 64 aligned. - if (Bytes.size() != 64 || KdAddress % 64 != 0) - return MCDisassembler::Fail; - - std::string Kd; - raw_string_ostream KdStream(Kd); - KdStream << ".amdhsa_kernel " << KdName << '\n'; - - DataExtractor::Cursor C(0); - while (C && C.tell() < Bytes.size()) { - MCDisassembler::DecodeStatus Status = - decodeKernelDescriptorDirective(C, Bytes, KdStream); - - cantFail(C.takeError()); - - if (Status == MCDisassembler::Fail) - return MCDisassembler::Fail; - } - KdStream << ".end_amdhsa_kernel\n"; - outs() << KdStream.str(); - return MCDisassembler::Success; -} - -Optional -AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const { - // Right now only kernel descriptor needs to be handled. - // We ignore all other symbols for target specific handling. - // TODO: - // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code - // Object V2 and V3 when symbols are marked protected. - - // amd_kernel_code_t for Code Object V2. - if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { - Size = 256; - return MCDisassembler::Fail; - } - - // Code Object V3 kernel descriptors. - StringRef Name = Symbol.Name; - if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { - Size = 64; // Size = 64 regardless of success or failure. - return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); - } - return None; -} - //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 315602c35288c..f975af409a096 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,11 +17,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/Support/DataExtractor.h" #include #include @@ -67,33 +66,6 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; - Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, - ArrayRef Bytes, - uint64_t Address, - raw_ostream &CStream) const override; - - DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, - uint64_t KdAddress) const; - - DecodeStatus - decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, - ArrayRef Bytes, - raw_string_ostream &KdStream) const; - - /// Decode as directives that handle COMPUTE_PGM_RSRC1. - /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. - /// \param KdStream - Stream to write the disassembled directives to. - // NOLINTNEXTLINE(readability-identifier-naming) - DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, - raw_string_ostream &KdStream) const; - - /// Decode as directives that handle COMPUTE_PGM_RSRC2. - /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. - /// \param KdStream - Stream to write the disassembled directives to. - // NOLINTNEXTLINE(readability-identifier-naming) - DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, - raw_string_ostream &KdStream) const; - DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index e21ca97e8ffca..7b6853acce285 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : -; CHECK: s_endpgm +; CHECK-NEXT: s_endpgm define amdgpu_kernel void @kernel0() align 256 { entry: ret void @@ -80,7 +80,7 @@ entry: ; CHECK-EMPTY: ; CHECK-NEXT: : -; CHECK: s_endpgm +; CHECK-NEXT: s_endpgm define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { entry: ret void diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s deleted file mode 100644 index eee3fd4b7103e..0000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s +++ /dev/null @@ -1,37 +0,0 @@ -;; Failure test. We create a malformed kernel descriptor (KD) by manually -;; setting the bytes, because one can't create a malformed KD using the -;; assembler directives. - -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o - -; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info -; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ -; RUN: | tail -n +9 > %t1.sym_content -; RUN: cat %t1.sym_info %t1.sym_content > %t1.s - -; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o -; RUN: diff %t.o %t-re-assemble.o - -;; Test failure by setting one of the reserved bytes to non-zero value. - -.type my_kernel.kd, @object -.size my_kernel.kd, 64 -my_kernel.kd: - .long 0x00000000 ;; group_segment_fixed_size - .long 0x00000000 ;; private_segment_fixed_size - .quad 0x00FF000000000000 ;; reserved bytes. - .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. - - ;; 20 reserved bytes. - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .long 0x00000000 - - .long 0x00000000 ;; compute_PGM_RSRC3 - .long 0x00000000 ;; compute_PGM_RSRC1 - .long 0x00000000 ;; compute_PGM_RSRC2 - .short 0x0000 ;; additional fields. - - ;; 6 reserved bytes. - .long 0x0000000 - .short 0x0000 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s deleted file mode 100644 index 0b798a298d398..0000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +++ /dev/null @@ -1,49 +0,0 @@ -;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. - -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble - - -;--- 1.s -;; Only set next_free_sgpr. -.amdhsa_kernel my_kernel_1 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 42 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 - .amdhsa_reserve_vcc 0 -.end_amdhsa_kernel - -;--- 2.s -;; Only set other directives. -.amdhsa_kernel my_kernel_2 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_reserve_flat_scratch 1 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_reserve_vcc 1 -.end_amdhsa_kernel - -;--- 3.s -;; Set all affecting directives. -.amdhsa_kernel my_kernel_3 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 35 - .amdhsa_reserve_flat_scratch 1 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_reserve_vcc 1 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s deleted file mode 100644 index a8883d2f74be7..0000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +++ /dev/null @@ -1,36 +0,0 @@ -;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. - -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble - -;--- 1.s -.amdhsa_kernel my_kernel_1 - .amdhsa_next_free_vgpr 23 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -;--- 2.s -.amdhsa_kernel my_kernel_2 - .amdhsa_next_free_vgpr 14 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -;--- 3.s -.amdhsa_kernel my_kernel_3 - .amdhsa_next_free_vgpr 32 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s deleted file mode 100644 index 803507a130c03..0000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +++ /dev/null @@ -1,58 +0,0 @@ -;; Entirely zeroed kernel descriptor (for GFX10). - -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t -; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s - -;; TODO: -;; This file and kd-zeroed-raw.s should produce the same output for the kernel -;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets -;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive -;; mentions 0 (see line 36). - -;; Check the raw bytes right now. - -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 - -.amdhsa_kernel my_kernel - .amdhsa_group_segment_fixed_size 0 - .amdhsa_private_segment_fixed_size 0 - .amdhsa_next_free_vgpr 8 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 - .amdhsa_next_free_sgpr 8 - .amdhsa_float_round_mode_32 0 - .amdhsa_float_round_mode_16_64 0 - .amdhsa_float_denorm_mode_32 0 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 0 - .amdhsa_workgroup_processor_mode 0 - .amdhsa_memory_ordered 0 - .amdhsa_forward_progress 0 - .amdhsa_system_sgpr_private_segment_wavefront_offset 0 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 0 - .amdhsa_system_sgpr_workgroup_id_z 0 - .amdhsa_system_sgpr_workgroup_info 0 - .amdhsa_system_vgpr_workitem_id 0 - .amdhsa_exception_fp_ieee_invalid_op 0 - .amdhsa_exception_fp_denorm_src 0 - .amdhsa_exception_fp_ieee_div_zero 0 - .amdhsa_exception_fp_ieee_overflow 0 - .amdhsa_exception_fp_ieee_underflow 0 - .amdhsa_exception_fp_ieee_inexact 0 - .amdhsa_exception_int_div_zero 0 - .amdhsa_user_sgpr_private_segment_buffer 0 - .amdhsa_user_sgpr_dispatch_ptr 0 - .amdhsa_user_sgpr_queue_ptr 0 - .amdhsa_user_sgpr_kernarg_segment_ptr 0 - .amdhsa_user_sgpr_dispatch_id 0 - .amdhsa_user_sgpr_flat_scratch_init 0 - .amdhsa_user_sgpr_private_segment_size 0 - .amdhsa_wavefront_size32 0 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s deleted file mode 100644 index de4fdf74d88e0..0000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s +++ /dev/null @@ -1,53 +0,0 @@ -;; Entirely zeroed kernel descriptor (for GFX9). - -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: diff %t1 %t2 - -; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s - -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 - -;; This file and kd-zeroed-raw.s produce the same output for the kernel -;; descriptor - a block of 64 zeroed bytes. - -.amdhsa_kernel my_kernel - .amdhsa_group_segment_fixed_size 0 - .amdhsa_private_segment_fixed_size 0 - .amdhsa_next_free_vgpr 0 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_float_round_mode_32 0 - .amdhsa_float_round_mode_16_64 0 - .amdhsa_float_denorm_mode_32 0 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 0 - .amdhsa_system_sgpr_private_segment_wavefront_offset 0 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 0 - .amdhsa_system_sgpr_workgroup_id_z 0 - .amdhsa_system_sgpr_workgroup_info 0 - .amdhsa_system_vgpr_workitem_id 0 - .amdhsa_exception_fp_ieee_invalid_op 0 - .amdhsa_exception_fp_denorm_src 0 - .amdhsa_exception_fp_ieee_div_zero 0 - .amdhsa_exception_fp_ieee_overflow 0 - .amdhsa_exception_fp_ieee_underflow 0 - .amdhsa_exception_fp_ieee_inexact 0 - .amdhsa_exception_int_div_zero 0 - .amdhsa_user_sgpr_private_segment_buffer 0 - .amdhsa_user_sgpr_dispatch_ptr 0 - .amdhsa_user_sgpr_queue_ptr 0 - .amdhsa_user_sgpr_kernarg_segment_ptr 0 - .amdhsa_user_sgpr_dispatch_id 0 - .amdhsa_user_sgpr_flat_scratch_init 0 - .amdhsa_user_sgpr_private_segment_size 0 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s deleted file mode 100644 index 85554209d5d8f..0000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s - -;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). -;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the -;; kernel descriptor - a block of 64 zeroed bytes. - -;; The disassembly will produce the contents of kd-zeroed-*.s which on being -;; assembled contains additional relocation info. A diff over the entire object -;; will fail in this case. So we check by looking the bytes in .text. - -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 - -;; The entire object is zeroed out. - -.type my_kernel.kd, @object -.size my_kernel.kd, 64 -my_kernel.kd: - .long 0x00000000 ;; group_segment_fixed_size - .long 0x00000000 ;; private_segment_fixed_size - .quad 0x0000000000000000 ;; reserved bytes. - .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. - - ;; 20 reserved bytes. - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .long 0x00000000 - - .long 0x00000000 ;; compute_PGM_RSRC3 - .long 0x00000000 ;; compute_PGM_RSRC1 - .long 0x00000000 ;; compute_PGM_RSRC2 - .short 0x0000 ;; additional fields. - - ;; 6 reserved bytes. - .long 0x0000000 - .short 0x0000 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 46ed7414dbb31..b63d08b90ff51 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1854,6 +1854,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, outs() << SectionName << ":\n"; } + if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { + if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) + Start += 256; + } + if (SI == SE - 1 || + Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // cut trailing zeroes at the end of kernel + // cut up to 256 bytes + const uint64_t EndAlign = 256; + const auto Limit = End - (std::min)(EndAlign, End - Start); + while (End > Limit && + *reinterpret_cast(&Bytes[End - 4]) == 0) + End -= 4; + } + } + outs() << '\n'; if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", From b29bdab8c76dbeda7786ef8e0d1bf58376955795 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 14:20:41 +0100 Subject: [PATCH 0162/1079] CommandLine.h - use auto const reference in ValuesClass::apply for range loop. NFCI. --- llvm/include/llvm/Support/CommandLine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 38c588080069c..a367387510e9e 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -672,7 +672,7 @@ class ValuesClass { : Values(Options) {} template void apply(Opt &O) const { - for (auto Value : Values) + for (const auto &Value : Values) O.getParser().addLiteralOption(Value.Name, Value.Value, Value.Description); } From 4358fa782e3def5176f6e70c72de8e65702aeb0f Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Mon, 7 Sep 2020 22:04:07 +0700 Subject: [PATCH 0163/1079] [Statepoints] Update DAG root after emitting statepoint. Since we always generate CopyToRegs for statepoint results, we must update DAG root after emitting statepoint, so that these copies are scheduled before any possible local uses. Note: getControlRoot() flushes all PendingExports, not only those we generates for relocates. If that'll become a problem, we can change it to flushing relocate exports only. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D87251 --- .../SelectionDAG/StatepointLowering.cpp | 7 +- llvm/test/CodeGen/X86/statepoint-vreg.ll | 88 +++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 7cbeb1016c67b..83c72ca2da39b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -841,7 +841,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, None); - SDValue Chain = DAG.getEntryNode(); + SDValue Chain = DAG.getRoot(); RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); @@ -919,8 +919,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Remove original call node DAG.DeleteNode(CallNode); - // DON'T set the root - under the assumption that it's already set past the - // inserted node we created. + // Since we always emit CopyToRegs (even for local relocates), we must + // update root, so that they are emitted before any local uses. + (void)getControlRoot(); // TODO: A better future implementation would be to emit a single variable // argument, variable return value STATEPOINT node here and then hookup the diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll index 66b984b905364..6a65abed57541 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -8,8 +8,12 @@ declare i1 @return_i1() declare void @func() declare void @"some_call"(i64 addrspace(1)*) declare void @consume(i32 addrspace(1)*) +declare i32 @consume1(i32) gc "statepoint-example" declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*) +declare void @consume3(float) gc "statepoint-example" +declare float @consume4(i64) gc "statepoint-example" declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*) + declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*) declare i32 @"personality_function"() @@ -590,6 +594,90 @@ entry: ret void } +; test multiple statepoints/relocates within single block. +; relocates must be properly scheduled w.r.t. statepoints +define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-example" { +; CHECK-LABEL: test_sched: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq consume3 +; CHECK-NEXT: .Ltmp25: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2sd %ebp, %xmm0 +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp26: +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp27: +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp28: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: movabsq $9223372036854775807, %rdi # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: cmovbeq %rax, %rdi +; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp29: +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %token0 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 0, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %2) ] + %reloc1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token0, i32 0, i32 0) ; (%2, %2) + %tmp1 = sitofp i32 %1 to double + %to_max.i29 = fcmp ogt double %tmp1, 0.000000e+00 + %token1 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %reloc1) ] + %reloc2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1) + %reloc3 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1) + %token2 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc2, i8 addrspace(1)* %reloc3) ] + %reloc4 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 0, i32 0) ; (%reloc3, %reloc2) + %reloc5 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 1, i32 1) ; (%reloc3, %reloc3) + %token3 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 5, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc4, i8 addrspace(1)* %reloc5) ] + %reloc6 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token3, i32 1, i32 0) ; (%reloc5, %reloc4) + %tmp5 = select i1 %to_max.i29, i64 9223372036854775807, i64 0 + %token4 = call token (i64, i32, float (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 2, i32 5, float (i64)* nonnull @consume4, i32 1, i32 0, i64 %tmp5, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"() ] +ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 immarg, i32 immarg, float (i64)*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 immarg, i32 immarg, i32 (i32)*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 immarg, i32 immarg, void (float)*, i32 immarg, i32 immarg, ...) declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) From 818cf30b83305fa4a2f75821349210b0f7aff4a4 Mon Sep 17 00:00:00 2001 From: Alon Kom Date: Wed, 9 Sep 2020 13:17:53 +0000 Subject: [PATCH 0164/1079] [MachinePipeliner] Fix II_setByPragma initialization II_setByPragma was not reset between 2 calls of the MachinePipleiner pass Reviewed By: bcahoon Differential Revision: https://reviews.llvm.org/D87088 --- llvm/lib/CodeGen/MachinePipeliner.cpp | 1 + .../swp-pragma-initiation-interval-reset.ii | 85 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval-reset.ii diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 45a5ef71d0fda..7b6f59f0d91ad 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -268,6 +268,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { // Reset the pragma for the next loop in iteration. disabledByPragma = false; + II_setByPragma = 0; MachineBasicBlock *LBLK = L.getTopBlock(); diff --git a/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval-reset.ii b/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval-reset.ii new file mode 100644 index 0000000000000..03c2a13f77f22 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval-reset.ii @@ -0,0 +1,85 @@ +; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner \ +; RUN: -debug-only=pipeliner < %s 2>&1 > /dev/null -pipeliner-experimental-cg=true | FileCheck %s +; REQUIRES: asserts +; +; Test that checks that the II set by pragma was reset between loops. + +; CHECK: MII = 10 MAX_II = 10 +; CHECK: MII = 1 MAX_II = 11 (rec=1, res=1) +; CHECK-NOT: MII = 10 MAX_II = 10 + +; Function Attrs: nounwind +define void @f0(i32* nocapture %a0, i32 %a1) #0 { +b0: + %v0 = icmp sgt i32 %a1, 1 + br i1 %v0, label %b1, label %b4 + +b1: ; preds = %b0 + %v1 = load i32, i32* %a0, align 4 + %v2 = add i32 %v1, 10 + %v3 = getelementptr i32, i32* %a0, i32 1 + %v4 = add i32 %a1, -1 + br label %b2 + +b2: ; preds = %b2, %b1 + %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ] + %v6 = phi i32* [ %v11, %b2 ], [ %v3, %b1 ] + %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ] + store i32 %v7, i32* %v6, align 4 + %v8 = add i32 %v7, 10 + %v9 = getelementptr i32, i32* %v6, i32 -1 + store i32 %v8, i32* %v9, align 4 + %v10 = add i32 %v7, 10 + %v11 = getelementptr i32, i32* %v6, i32 1 + %v12 = add i32 %v5, -1 + %v13 = icmp eq i32 %v12, 0 + br i1 %v13, label %b3, label %b2 + +b3: ; preds = %b2 + br label %b4 , !llvm.loop !2 + +b4: ; preds = %b3, %b0 + ret void +} + +; Function Attrs: nounwind +define void @f1(i32* nocapture %a0, i32 %a1) #0 { +b0: + %v0 = icmp sgt i32 %a1, 1 + br i1 %v0, label %b1, label %b4 + +b1: ; preds = %b0 + %v1 = load i32, i32* %a0, align 4 + %v2 = add i32 %v1, 10 + %v3 = getelementptr i32, i32* %a0, i32 1 + %v4 = add i32 %a1, -1 + br label %b2 + +b2: ; preds = %b2, %b1 + %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ] + %v6 = phi i32* [ %v11, %b2 ], [ %v3, %b1 ] + %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ] + store i32 %v7, i32* %v6, align 4 + %v8 = add i32 %v7, 10 + %v9 = getelementptr i32, i32* %v6, i32 -1 + store i32 %v8, i32* %v9, align 4 + %v10 = add i32 %v7, 10 + %v11 = getelementptr i32, i32* %v6, i32 1 + %v12 = add i32 %v5, -1 + %v13 = icmp eq i32 %v12, 0 + br i1 %v13, label %b3, label %b2 + +b3: ; preds = %b2 + br label %b4 + +b4: ; preds = %b3, %b0 + ret void +} + +attributes #0 = { nounwind } + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2, !2, i64 0} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.pipeline.initiationinterval", i32 10} + From 95b7040e43841802e1ccba59b46e7773c47c4ad6 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Wed, 9 Sep 2020 15:58:12 +0300 Subject: [PATCH 0165/1079] [AMDGPU][MC] Improved diagnostic messages for invalid registers Corrected parser to issue meaningful error messages for invalid and malformed registers. See bug 41303: https://bugs.llvm.org/show_bug.cgi?id=41303 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D87234 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 113 ++++++++++++---- llvm/test/MC/AMDGPU/expressions.s | 4 +- llvm/test/MC/AMDGPU/flat-scratch.s | 12 +- llvm/test/MC/AMDGPU/literals.s | 88 ++++++------ llvm/test/MC/AMDGPU/mtbuf.s | 2 +- llvm/test/MC/AMDGPU/out-of-range-registers.s | 80 ++++++----- llvm/test/MC/AMDGPU/reg-syntax-err.s | 126 ++++++++++++++---- llvm/test/MC/AMDGPU/reg-syntax-extra.s | 24 ++-- llvm/test/MC/AMDGPU/smem.s | 35 +++-- llvm/test/MC/AMDGPU/smrd-err.s | 10 +- llvm/test/MC/AMDGPU/smrd.s | 12 +- llvm/test/MC/AMDGPU/sop1-err.s | 17 +-- llvm/test/MC/AMDGPU/sop1.s | 6 +- llvm/test/MC/AMDGPU/sop2.s | 6 +- llvm/test/MC/AMDGPU/sopk.s | 47 ++++++- llvm/test/MC/AMDGPU/trap.s | 76 ++++++----- llvm/test/MC/AMDGPU/vop3.s | 6 +- llvm/test/MC/AMDGPU/vop_sdwa.s | 27 ++-- llvm/test/MC/AMDGPU/xnack-mask.s | 12 +- 19 files changed, 442 insertions(+), 261 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index db74f8a54c0af..d2eb7c1726e27 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1070,7 +1070,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { std::string &CollectString); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, - RegisterKind RegKind, unsigned Reg1); + RegisterKind RegKind, unsigned Reg1, SMLoc Loc); bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, unsigned &RegNum, unsigned &RegWidth, bool RestoreOnFailure = false); @@ -1088,7 +1088,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ParseRegRange(unsigned& Num, unsigned& Width); unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, - unsigned RegWidth); + unsigned RegWidth, + SMLoc Loc); bool isRegister(); bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; @@ -2065,7 +2066,8 @@ OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, } bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, - RegisterKind RegKind, unsigned Reg1) { + RegisterKind RegKind, unsigned Reg1, + SMLoc Loc) { switch (RegKind) { case IS_SPECIAL: if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { @@ -2098,12 +2100,14 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, RegWidth = 2; return true; } + Error(Loc, "register does not fit in the list"); return false; case IS_VGPR: case IS_SGPR: case IS_AGPR: case IS_TTMP: if (Reg1 != Reg + RegWidth) { + Error(Loc, "registers in a list must have consecutive indices"); return false; } RegWidth++; @@ -2186,7 +2190,8 @@ AMDGPUAsmParser::isRegister() unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, - unsigned RegWidth) { + unsigned RegWidth, + SMLoc Loc) { assert(isRegularReg(RegKind)); @@ -2197,18 +2202,24 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, AlignSize = std::min(RegWidth, 4u); } - if (RegNum % AlignSize != 0) + if (RegNum % AlignSize != 0) { + Error(Loc, "invalid register alignment"); return AMDGPU::NoRegister; + } unsigned RegIdx = RegNum / AlignSize; int RCID = getRegClass(RegKind, RegWidth); - if (RCID == -1) + if (RCID == -1) { + Error(Loc, "invalid or unsupported register size"); return AMDGPU::NoRegister; + } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); const MCRegisterClass RC = TRI->getRegClass(RCID); - if (RegIdx >= RC.getNumRegs()) + if (RegIdx >= RC.getNumRegs()) { + Error(Loc, "register index is out of range"); return AMDGPU::NoRegister; + } return RC.getRegister(RegIdx); } @@ -2216,24 +2227,40 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, bool AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { int64_t RegLo, RegHi; - if (!trySkipToken(AsmToken::LBrac)) + if (!skipToken(AsmToken::LBrac, "missing register index")) return false; + SMLoc FirstIdxLoc = getLoc(); + SMLoc SecondIdxLoc; + if (!parseExpr(RegLo)) return false; if (trySkipToken(AsmToken::Colon)) { + SecondIdxLoc = getLoc(); if (!parseExpr(RegHi)) return false; } else { RegHi = RegLo; } - if (!trySkipToken(AsmToken::RBrac)) + if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) + return false; + + if (!isUInt<32>(RegLo)) { + Error(FirstIdxLoc, "invalid register index"); + return false; + } + + if (!isUInt<32>(RegHi)) { + Error(SecondIdxLoc, "invalid register index"); return false; + } - if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) + if (RegLo > RegHi) { + Error(FirstIdxLoc, "first register index should not exceed second index"); return false; + } Num = static_cast(RegLo); Width = (RegHi - RegLo) + 1; @@ -2260,10 +2287,14 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, SmallVectorImpl &Tokens) { assert(isToken(AsmToken::Identifier)); StringRef RegName = getTokenStr(); + auto Loc = getLoc(); const RegInfo *RI = getRegularRegInfo(RegName); - if (!RI) + if (!RI) { + Error(Loc, "invalid register name"); return AMDGPU::NoRegister; + } + Tokens.push_back(getToken()); lex(); // skip register name @@ -2271,8 +2302,10 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, StringRef RegSuffix = RegName.substr(RI->Name.size()); if (!RegSuffix.empty()) { // Single 32-bit register: vXX. - if (!getRegNum(RegSuffix, RegNum)) + if (!getRegNum(RegSuffix, RegNum)) { + Error(Loc, "invalid register index"); return AMDGPU::NoRegister; + } RegWidth = 1; } else { // Range of registers: v[XX:YY]. ":YY" is optional. @@ -2280,44 +2313,59 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, return AMDGPU::NoRegister; } - return getRegularReg(RegKind, RegNum, RegWidth); + return getRegularReg(RegKind, RegNum, RegWidth, Loc); } unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, unsigned &RegWidth, SmallVectorImpl &Tokens) { unsigned Reg = AMDGPU::NoRegister; + auto ListLoc = getLoc(); - if (!trySkipToken(AsmToken::LBrac)) + if (!skipToken(AsmToken::LBrac, + "expected a register or a list of registers")) { return AMDGPU::NoRegister; + } // List of consecutive registers, e.g.: [s0,s1,s2,s3] + auto Loc = getLoc(); if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) return AMDGPU::NoRegister; - if (RegWidth != 1) + if (RegWidth != 1) { + Error(Loc, "expected a single 32-bit register"); return AMDGPU::NoRegister; + } for (; trySkipToken(AsmToken::Comma); ) { RegisterKind NextRegKind; unsigned NextReg, NextRegNum, NextRegWidth; + Loc = getLoc(); - if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, - Tokens)) + if (!ParseAMDGPURegister(NextRegKind, NextReg, + NextRegNum, NextRegWidth, + Tokens)) { return AMDGPU::NoRegister; - if (NextRegWidth != 1) + } + if (NextRegWidth != 1) { + Error(Loc, "expected a single 32-bit register"); return AMDGPU::NoRegister; - if (NextRegKind != RegKind) + } + if (NextRegKind != RegKind) { + Error(Loc, "registers in a list must be of the same kind"); return AMDGPU::NoRegister; - if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) + } + if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) return AMDGPU::NoRegister; } - if (!trySkipToken(AsmToken::RBrac)) + if (!skipToken(AsmToken::RBrac, + "expected a comma or a closing square bracket")) { return AMDGPU::NoRegister; + } if (isRegularReg(RegKind)) - Reg = getRegularReg(RegKind, RegNum, RegWidth); + Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); return Reg; } @@ -2325,6 +2373,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, unsigned &RegNum, unsigned &RegWidth, SmallVectorImpl &Tokens) { + auto Loc = getLoc(); Reg = AMDGPU::NoRegister; if (isToken(AsmToken::Identifier)) { @@ -2336,12 +2385,26 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); + if (Reg == AMDGPU::NoRegister) { + assert(Parser.hasPendingError()); + return false; + } + + if (!subtargetHasRegister(*TRI, Reg)) { + if (Reg == AMDGPU::SGPR_NULL) { + Error(Loc, "'null' operand is not supported on this GPU"); + } else { + Error(Loc, "register not available on this GPU"); + } + return false; + } + + return true; } bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, unsigned &RegNum, unsigned &RegWidth, - bool RestoreOnFailure) { + bool RestoreOnFailure /*=false*/) { Reg = AMDGPU::NoRegister; SmallVector Tokens; @@ -2413,8 +2476,6 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { unsigned Reg, RegNum, RegWidth; if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { - //FIXME: improve error messages (bug 41303). - Error(StartLoc, "not a valid operand."); return nullptr; } if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { diff --git a/llvm/test/MC/AMDGPU/expressions.s b/llvm/test/MC/AMDGPU/expressions.s index 57f47d8f0345d..0b7bdcdebb88f 100644 --- a/llvm/test/MC/AMDGPU/expressions.s +++ b/llvm/test/MC/AMDGPU/expressions.s @@ -327,8 +327,8 @@ v_sin_f32 v0, -[ttmp0] s1000=1 v_sin_f32 v0, -s1000 -// NOVI: error: not a valid operand. +// NOVI: error: register index is out of range xnack_mask_lo=1 v_sin_f32 v0, xnack_mask_lo -// NOVI: error: not a valid operand. +// NOVI: error: register not available on this GPU diff --git a/llvm/test/MC/AMDGPU/flat-scratch.s b/llvm/test/MC/AMDGPU/flat-scratch.s index eea2f0d07f3ea..9ff9ee3af7e51 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch.s +++ b/llvm/test/MC/AMDGPU/flat-scratch.s @@ -5,32 +5,32 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s s_mov_b64 flat_scratch, -1 -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // CI: s_mov_b64 flat_scratch, -1 ; encoding: [0xc1,0x04,0xe8,0xbe] // VI: s_mov_b64 flat_scratch, -1 ; encoding: [0xc1,0x01,0xe6,0xbe] s_mov_b32 flat_scratch_lo, -1 -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // CI: s_mov_b32 flat_scratch_lo, -1 ; encoding: [0xc1,0x03,0xe8,0xbe] // VI: s_mov_b32 flat_scratch_lo, -1 ; encoding: [0xc1,0x00,0xe6,0xbe] s_mov_b32 flat_scratch_hi, -1 -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // CI: s_mov_b32 flat_scratch_hi, -1 ; encoding: [0xc1,0x03,0xe9,0xbe] // VI: s_mov_b32 flat_scratch_hi, -1 ; encoding: [0xc1,0x00,0xe7,0xbe] s_mov_b64 flat_scratch_lo, -1 -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // NOCI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction s_mov_b64 flat_scratch_hi, -1 -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // NOCI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction s_mov_b32 flat_scratch, -1 -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // NOCI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index b666b7d1cb780..ce6893ed057b9 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -640,11 +640,11 @@ v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD // named inline values: shared_base, shared_limit, private_base, etc //---------------------------------------------------------------------------// -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb] buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] s_add_i32 s0, src_shared_base, s0 @@ -654,119 +654,127 @@ s_add_i32 s0, src_shared_base, s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] s_add_i32 s0, src_shared_limit, s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] s_add_i32 s0, src_private_base, s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] s_add_i32 s0, src_private_limit, s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81] s_add_i32 s0, src_pops_exiting_wave_id, s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_base -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_limit -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_base -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_limit -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c] v_add_u16 v0, src_shared_base, v0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06] v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86] v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68] v_add_u32 v0, src_shared_base, v0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00] v_add_u32_e64 v0, src_shared_base, v0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d] v_cmp_eq_i64 vcc, src_shared_base, v[0:1] -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a] v_max_f16 v0, src_shared_base, v0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16] v_max_f32 v0, src_shared_base, v0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] v_max_f64 v[0:1], src_shared_base, v[0:1] -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18] v_pk_add_f16 v0, src_shared_base, v0 -// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] +// NOSICI: error: not a valid operand. +// NOVI: error: register not available on this GPU v_ceil_f16 v0, neg(src_shared_base) -// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00] +// NOSICI: error: not a valid operand. +// NOVI: error: register not available on this GPU v_ceil_f16 v0, abs(src_shared_base) -// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00] +// NOSI: error: not a valid operand. +// NOCIVI: error: register not available on this GPU +// NOVI: error: register not available on this GPU v_ceil_f64 v[5:6], |src_shared_base| -// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20] +// NOSI: error: not a valid operand. +// NOCIVI: error: register not available on this GPU +// NOVI: error: register not available on this GPU v_ceil_f64 v[5:6], -src_shared_base -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f32 v0, -src_shared_base -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f32 v0, |src_shared_base| -// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] +// NOSICI: error: not a valid operand. +// NOVI: error: register not available on this GPU v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00] +// NOSICI: error: not a valid operand. +// NOVI: error: register not available on this GPU v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00] v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD @@ -774,7 +782,7 @@ v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD // named inline values compete with other scalars for constant bus access //---------------------------------------------------------------------------// -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_add_u32 v0, private_base, s0 @@ -783,17 +791,17 @@ v_add_u32 v0, private_base, s0 v_add_u32 v0, scc, s0 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, shared_base, v0, v1 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, shared_limit, v1 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, v1, private_limit @@ -810,29 +818,29 @@ v_div_fmas_f32 v0, v0, scc, v1 v_div_fmas_f32 v0, v0, v1, vccz // v_addc_co_u32 implicitly reads VCC (VOP2) -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_addc_co_u32 v0, vcc, shared_base, v0, vcc -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, shared_base, v0, 0x11213141 // NOGCN: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, scc, v0, 0x11213141 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, private_limit -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, s0 // NOGCN: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], execz, s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, private_base, private_limit diff --git a/llvm/test/MC/AMDGPU/mtbuf.s b/llvm/test/MC/AMDGPU/mtbuf.s index 0653b591d69d7..a405a8824df4a 100644 --- a/llvm/test/MC/AMDGPU/mtbuf.s +++ b/llvm/test/MC/AMDGPU/mtbuf.s @@ -289,7 +289,7 @@ tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], format:[BUF_DATA_FORMAT_32] // Invalid soffset tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], s[255] format:[BUF_NUM_FORMAT_FLOAT] -// GCN-ERR: error: not a valid operand. +// GCN-ERR: error: register index is out of range // Both legacy and symbolic formats are specified tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1 s0 format:[BUF_NUM_FORMAT_FLOAT] diff --git a/llvm/test/MC/AMDGPU/out-of-range-registers.s b/llvm/test/MC/AMDGPU/out-of-range-registers.s index c7cd03470f9fc..e350fc5de5207 100644 --- a/llvm/test/MC/AMDGPU/out-of-range-registers.s +++ b/llvm/test/MC/AMDGPU/out-of-range-registers.s @@ -4,112 +4,108 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX10-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SIVICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=SIVICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefixes=SIVICI,CIVI9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9,CIVI9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s s_add_i32 s106, s0, s1 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_add_i32 s104, s0, s1 -// SICIVI9-ERR: error: not a valid operand +// SICIVI9-ERR: error: register not available on this GPU // GFX10: s_add_i32 s104, s0, s1 ; encoding: s_add_i32 s105, s0, s1 -// SICIVI9-ERR: error: not a valid operand +// SICIVI9-ERR: error: register not available on this GPU // GFX10: s_add_i32 s105, s0, s1 ; encoding: v_add_i32 v256, v0, v1 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range v_add_i32 v257, v0, v1 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_mov_b64 s[0:17], -1 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: invalid or unsupported register size s_mov_b64 s[103:104], -1 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: invalid register alignment s_mov_b64 s[105:106], -1 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: invalid register alignment s_mov_b64 s[104:105], -1 -// SICIVI9-ERR: error: not a valid operand +// SICIVI9-ERR: error: register not available on this GPU // GFX10: s_mov_b64 s[104:105], -1 ; encoding: s_load_dwordx4 s[102:105], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: invalid register alignment s_load_dwordx4 s[104:108], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx4 s[108:112], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx4 s[1:4], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: invalid register alignment -s_load_dwordx4 s[1:4], s[2:3], s4 -// GCN-ERR: error: not a valid operand +s_load_dwordx4 s[2:5], s[2:3], s4 +// GCN-ERR: error: invalid register alignment s_load_dwordx8 s[104:111], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx8 s[100:107], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx8 s[108:115], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx16 s[92:107], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx16 s[96:111], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx16 s[100:115], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx16 s[104:119], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_load_dwordx16 s[108:123], s[2:3], s4 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_mov_b32 ttmp16, 0 -// GCN-ERR: error: not a valid operand +// GCN-ERR: error: register index is out of range s_mov_b32 ttmp12, 0 -// SICIVI: error: not a valid operand // GFX9: s_mov_b32 ttmp12, 0 ; encoding: // GFX10: s_mov_b32 ttmp12, 0 ; encoding: -// SIVICI-ERR: error: not a valid operand. +// SIVICI-ERR: error: register not available on this GPU s_mov_b32 ttmp15, 0 -// SICIVI: error: not a valid operand // GFX9: s_mov_b32 ttmp15, 0 ; encoding: // GFX10: s_mov_b32 ttmp15, 0 ; encoding: -// SIVICI-ERR: error: not a valid operand. +// SIVICI-ERR: error: register not available on this GPU s_mov_b32 flat_scratch_lo, 0 -// SI-ERR: error: not a valid operand -// CIVI9: s_mov_b32 flat_scratch_lo, 0 ; encoding: -// GFX10-ERR: error: not a valid operand -// GFX9: s_mov_b32 flat_scratch_lo, 0 ; encoding: [0x80,0x00,0xe6,0xbe] +// SI-ERR: error: register not available on this GPU +// GFX10-ERR: error: register not available on this GPU +// CIVI9: s_mov_b32 flat_scratch_lo, 0 ; encoding: [0x80,0x00,0xe6,0xbe] s_mov_b32 flat_scratch_hi, 0 -// SI-ERR: error: not a valid operand -// CIVI9: s_mov_b32 flat_scratch_hi, 0 ; encoding: -// GFX10-ERR: error: not a valid operand -// GFX9: s_mov_b32 flat_scratch_hi, 0 ; encoding: [0x80,0x00,0xe7,0xbe] +// SI-ERR: error: register not available on this GPU +// GFX10-ERR: error: register not available on this GPU +// CIVI9: s_mov_b32 flat_scratch_hi, 0 ; encoding: [0x80,0x00,0xe7,0xbe] s_mov_b32 tma_lo, 0 // SIVICI: s_mov_b32 tma_lo, 0 ; encoding: -// GFX9-ERR: error: not a valid operand -// GFX10-ERR: error: not a valid operand +// GFX9-ERR: error: register not available on this GPU +// GFX10-ERR: error: register not available on this GPU s_mov_b32 tba_lo, 0 // SIVICI: s_mov_b32 tba_lo, 0 ; encoding: -// GFX9-ERR: error: not a valid operand -// GFX10-ERR: error: not a valid operand +// GFX9-ERR: error: register not available on this GPU +// GFX10-ERR: error: register not available on this GPU diff --git a/llvm/test/MC/AMDGPU/reg-syntax-err.s b/llvm/test/MC/AMDGPU/reg-syntax-err.s index dce9375a47111..8f2c3e79310ce 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-err.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-err.s @@ -1,73 +1,151 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s s_mov_b32 s1, s 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction s_mov_b32 s1, s[0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a closing square bracket s_mov_b32 s1, s[0:0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a closing square bracket s_mov_b32 s1, [s[0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a closing square bracket s_mov_b32 s1, [s[0:1] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a single 32-bit register s_mov_b32 s1, [s0, 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a register or a list of registers s_mov_b32 s1, s999 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: register index is out of range s_mov_b32 s1, s[1:2] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: invalid register alignment s_mov_b32 s1, s[0:2] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction s_mov_b32 s1, xnack_mask_lo 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: register not available on this GPU s_mov_b32 s1, s s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction s_mov_b32 s1, s[0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a closing square bracket s_mov_b32 s1, s[0:0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a closing square bracket s_mov_b32 s1, [s[0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a closing square bracket s_mov_b32 s1, [s[0:1] s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: expected a single 32-bit register s_mov_b32 s1, [s0, s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: registers in a list must have consecutive indices s_mov_b32 s1, s999 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: register index is out of range s_mov_b32 s1, s[1:2] s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: invalid register alignment s_mov_b32 s1, s[0:2] vcc_lo -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction s_mov_b32 s1, xnack_mask_lo s1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: register not available on this GPU exp mrt0 v1, v2, v3, v4000 off -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: register index is out of range v_add_f64 v[0:1], v[0:1], v[0xF00000001:0x2] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: invalid register index v_add_f64 v[0:1], v[0:1], v[0x1:0xF00000002] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: invalid register index s_mov_b32 s1, s[0:-1] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// NOVI: error: invalid register index + +s_mov_b64 s[10:11], [exec_lo,vcc_hi] +// NOVI: error: register does not fit in the list + +s_mov_b64 s[10:11], [exec_hi,exec_lo] +// NOVI: error: register does not fit in the list + +s_mov_b64 s[10:11], [exec_lo,exec_lo] +// NOVI: error: register does not fit in the list + +s_mov_b64 s[10:11], [exec,exec_lo] +// NOVI: error: register does not fit in the list + +s_mov_b64 s[10:11], [exec_lo,exec] +// NOVI: error: register does not fit in the list + +s_mov_b64 s[10:11], [exec_lo,s0] +// NOVI: error: registers in a list must be of the same kind + +s_mov_b64 s[10:11], [s0,exec_lo] +// NOVI: error: registers in a list must be of the same kind + +s_mov_b64 s[10:11], [s0,exec] +// NOVI: error: registers in a list must be of the same kind + +s_mov_b64 s[10:11], [s0,v1] +// NOVI: error: registers in a list must be of the same kind + +s_mov_b64 s[10:11], [v0,s1] +// NOVI: error: registers in a list must be of the same kind + +s_mov_b64 s[10:11], [s0,s0] +// NOVI: error: registers in a list must have consecutive indices + +s_mov_b64 s[10:11], [s0,s2] +// NOVI: error: registers in a list must have consecutive indices + +s_mov_b64 s[10:11], [s2,s1] +// NOVI: error: registers in a list must have consecutive indices + +s_mov_b64 s[10:11], [a0,a2] +// NOVI: error: registers in a list must have consecutive indices + +s_mov_b64 s[10:11], [a0,v1] +// NOVI: error: registers in a list must be of the same kind + +s_mov_b64 s[10:11], [s +// NOVI: error: missing register index + +s_mov_b64 s[10:11], s[1:0] +// NOVI: error: first register index should not exceed second index + +s_mov_b64 s[10:11], [x0,s1] +// NOVI: error: invalid register name + +s_mov_b64 s[10:11], [s,s1] +// NOVI: error: missing register index + +s_mov_b64 s[10:11], [s01,s1] +// NOVI: error: registers in a list must have consecutive indices + +s_mov_b64 s[10:11], [s0x] +// NOVI: error: invalid register index + +s_mov_b64 s[10:11], [s[0:1],s[2:3]] +// NOVI: error: expected a single 32-bit register + +s_mov_b64 s[10:11], [s0,s[2:3]] +// NOVI: error: expected a single 32-bit register + +s_mov_b64 s[10:11], [s0 +// NOVI: error: expected a comma or a closing square bracket + +s_mov_b64 s[10:11], [s0,s1 +// NOVI: error: expected a comma or a closing square bracket + +s_mov_b64 s[10:11], s[1:0] +// NOVI: error: first register index should not exceed second index diff --git a/llvm/test/MC/AMDGPU/reg-syntax-extra.s b/llvm/test/MC/AMDGPU/reg-syntax-extra.s index 528247f562399..1f887118ef8a2 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-extra.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-extra.s @@ -38,9 +38,9 @@ s_mov_b64 [exec_lo,exec_hi], s[2:3] // GFX10: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x04,0xfe,0xbe] s_mov_b64 [flat_scratch_lo,flat_scratch_hi], s[2:3] -// NOSICI: error: not a valid operand. +// NOSICI: error: register not available on this GPU // VI: s_mov_b64 flat_scratch, s[2:3] ; encoding: [0x02,0x01,0xe6,0xbe] -// NOGFX10: error: not a valid operand. +// NOGFX10: error: register not available on this GPU s_mov_b64 [vcc_lo,vcc_hi], s[2:3] // SICI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe] @@ -50,12 +50,12 @@ s_mov_b64 [vcc_lo,vcc_hi], s[2:3] s_mov_b64 [tba_lo,tba_hi], s[2:3] // SICI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x04,0xec,0xbe] // VI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x01,0xec,0xbe] -// NOGFX10: error: not a valid operand. +// NOGFX10: error: register not available on this GPU s_mov_b64 [tma_lo,tma_hi], s[2:3] // SICI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x04,0xee,0xbe] // VI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x01,0xee,0xbe] -// NOGFX10: error: not a valid operand. +// NOGFX10: error: register not available on this GPU v_mov_b32_e32 [v1], [v2] // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] @@ -151,21 +151,21 @@ flat_load_dwordx4 [v[8/2+4],v9,v[10],v[11/2+6]], v[2:3] // NOSICI: error: instruction not supported on this GPU v_mul_f32 v0, null, v2 -// NOSICIVI: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU // GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10] -// NOVI: error: not a valid operand. +// NOVI: error: 'null' operand is not supported on this GPU v_mul_f64 v[0:1], null, null -// NOSICIVI: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU // GFX10: v_mul_f64 v[0:1], null, null ; encoding: [0x00,0x00,0x65,0xd5,0x7d,0xfa,0x00,0x00] -// NOVI: error: not a valid operand. +// NOVI: error: 'null' operand is not supported on this GPU s_add_u32 null, null, null -// NOSICIVI: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU // GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80] -// NOVI: error: not a valid operand. +// NOVI: error: 'null' operand is not supported on this GPU s_not_b64 s[2:3], null -// NOSICIVI: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU // GFX10: s_not_b64 s[2:3], null ; encoding: [0x7d,0x08,0x82,0xbe] -// NOVI: error: not a valid operand. +// NOVI: error: 'null' operand is not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index 4d81929b415e0..3bae52d640282 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -47,12 +47,12 @@ s_memrealtime s[4:5] s_memrealtime tba // VI: s_memrealtime tba ; encoding: [0x00,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_memrealtime tma // VI: s_memrealtime tma ; encoding: [0x80,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_memrealtime ttmp[0:1] // VI: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1c,0x94,0xc0,0x00,0x00,0x00,0x00] @@ -84,22 +84,22 @@ s_store_dword s1, s[2:3], s4 glc s_store_dword tba_lo, s[2:3], s4 // VI: s_store_dword tba_lo, s[2:3], s4 ; encoding: [0x01,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_store_dword tba_hi, s[2:3], s4 // VI: s_store_dword tba_hi, s[2:3], s4 ; encoding: [0x41,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_store_dword tma_lo, s[2:3], s4 // VI: s_store_dword tma_lo, s[2:3], s4 ; encoding: [0x81,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_store_dword tma_hi, s[2:3], s4 // VI: s_store_dword tma_hi, s[2:3], s4 ; encoding: [0xc1,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU // FIXME: Should error on SI instead of silently ignoring glc s_load_dword s1, s[2:3], 0xfc glc @@ -120,22 +120,22 @@ s_buffer_store_dword s10, s[92:95], m0 s_buffer_store_dword tba_lo, s[92:95], m0 // VI: s_buffer_store_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_store_dword tba_hi, s[92:95], m0 // VI: s_buffer_store_dword tba_hi, s[92:95], m0 ; encoding: [0x6e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_store_dword tma_lo, s[92:95], m0 // VI: s_buffer_store_dword tma_lo, s[92:95], m0 ; encoding: [0xae,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_store_dword tma_hi, s[92:95], m0 // VI: s_buffer_store_dword tma_hi, s[92:95], m0 ; encoding: [0xee,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_store_dword ttmp0, s[92:95], m0 // VI: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -156,33 +156,32 @@ s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc s_buffer_store_dwordx2 tba, s[92:95], m0 glc // VI: s_buffer_store_dwordx2 tba, s[92:95], m0 glc ; encoding: [0x2e,0x1b,0x65,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: invalid operand for instruction -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dword s10, s[92:95], m0 // GFX89: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] // GFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xf4,0x00,0x00,0x00,0xf8] -// SICIGFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] s_buffer_load_dword tba_lo, s[92:95], m0 // VI: s_buffer_load_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tba_lo, s[92:95], m0 ; encoding: [0x7c,0x5c,0x36,0xc2] -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dword tba_hi, s[92:95], m0 // VI: s_buffer_load_dword tba_hi, s[92:95], m0 ; encoding: [0x6e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tba_hi, s[92:95], m0 ; encoding: [0x7c,0xdc,0x36,0xc2] -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dword tma_lo, s[92:95], m0 // VI: s_buffer_load_dword tma_lo, s[92:95], m0 ; encoding: [0xae,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tma_lo, s[92:95], m0 ; encoding: [0x7c,0x5c,0x37,0xc2] -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dword tma_hi, s[92:95], m0 // VI: s_buffer_load_dword tma_hi, s[92:95], m0 ; encoding: [0xee,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tma_hi, s[92:95], m0 ; encoding: [0x7c,0xdc,0x37,0xc2] -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dword ttmp0, s[92:95], m0 // VI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x20,0xc0,0x7c,0x00,0x00,0x00] @@ -198,12 +197,12 @@ s_buffer_load_dwordx2 s[10:11], s[92:95], m0 s_buffer_load_dwordx2 tba, s[92:95], m0 // VI: s_buffer_load_dwordx2 tba, s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dwordx2 tba, s[92:95], m0 ; encoding: [0x7c,0x5c,0x76,0xc2] -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dwordx2 tma, s[92:95], m0 // VI: s_buffer_load_dwordx2 tma, s[92:95], m0 ; encoding: [0xae,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dwordx2 tma, s[92:95], m0 ; encoding: [0x7c,0x5c,0x77,0xc2] -// NOGFX9: error: not a valid operand. +// NOGFX9: error: register not available on this GPU s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 // VI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1c,0x24,0xc0,0x7c,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/smrd-err.s b/llvm/test/MC/AMDGPU/smrd-err.s index 68f2ac6570c90..5017a1ac59e3a 100644 --- a/llvm/test/MC/AMDGPU/smrd-err.s +++ b/llvm/test/MC/AMDGPU/smrd-err.s @@ -1,14 +1,14 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=SI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s s_load_dwordx4 s[100:103], s[2:3], s4 -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU // SI: s_load_dwordx4 s[100:103], s[2:3], s4 s_load_dwordx8 s[96:103], s[2:3], s4 -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU // SI: s_load_dwordx8 s[96:103], s[2:3], s4 s_load_dwordx16 s[88:103], s[2:3], s4 -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU // SI: s_load_dwordx16 s[88:103], s[2:3], s4 diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index 30f01b2ced1c3..43819935afd02 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -105,7 +105,7 @@ s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 s_load_dwordx4 s[100:103], s[2:3], s4 // GCN: s_load_dwordx4 s[100:103], s[2:3], s4 ; encoding: [0x04,0x02,0xb2,0xc0] -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU s_load_dwordx8 s[8:15], s[2:3], 1 // GCN: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0] @@ -117,7 +117,7 @@ s_load_dwordx8 s[8:15], s[2:3], s4 s_load_dwordx8 s[96:103], s[2:3], s4 // GCN: s_load_dwordx8 s[96:103], s[2:3], s4 ; encoding: [0x04,0x02,0xf0,0xc0] -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU s_load_dwordx16 s[16:31], s[2:3], 1 // GCN: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1] @@ -129,7 +129,7 @@ s_load_dwordx16 s[16:31], s[2:3], s4 s_load_dwordx16 s[88:103], s[2:3], s4 // GCN: s_load_dwordx16 s[88:103], s[2:3], s4 ; encoding: [0x04,0x02,0x2c,0xc1] -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU s_buffer_load_dword s1, s[4:7], 1 // GCN: s_buffer_load_dword s1, s[4:7], 0x1 ; encoding: [0x01,0x85,0x00,0xc2] @@ -189,7 +189,7 @@ s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 s_buffer_load_dwordx4 s[100:103], s[4:7], s4 // GCN: s_buffer_load_dwordx4 s[100:103], s[4:7], s4 ; encoding: [0x04,0x04,0xb2,0xc2] -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU s_buffer_load_dwordx8 s[8:15], s[4:7], 1 // GCN: s_buffer_load_dwordx8 s[8:15], s[4:7], 0x1 ; encoding: [0x01,0x05,0xc4,0xc2] @@ -201,7 +201,7 @@ s_buffer_load_dwordx8 s[8:15], s[4:7], s4 s_buffer_load_dwordx8 s[96:103], s[4:7], s4 // GCN: s_buffer_load_dwordx8 s[96:103], s[4:7], s4 ; encoding: [0x04,0x04,0xf0,0xc2] -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU s_buffer_load_dwordx16 s[16:31], s[4:7], 1 // GCN: s_buffer_load_dwordx16 s[16:31], s[4:7], 0x1 ; encoding: [0x01,0x05,0x08,0xc3] @@ -213,7 +213,7 @@ s_buffer_load_dwordx16 s[16:31], s[4:7], s4 s_buffer_load_dwordx16 s[88:103], s[4:7], s4 // GCN: s_buffer_load_dwordx16 s[88:103], s[4:7], s4 ; encoding: [0x04,0x04,0x2c,0xc3] -// NOVI: error: not a valid operand +// NOVI: error: register not available on this GPU s_dcache_inv // GCN: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] diff --git a/llvm/test/MC/AMDGPU/sop1-err.s b/llvm/test/MC/AMDGPU/sop1-err.s index 6322f5b098c35..fe2a02154106b 100644 --- a/llvm/test/MC/AMDGPU/sop1-err.s +++ b/llvm/test/MC/AMDGPU/sop1-err.s @@ -9,16 +9,16 @@ s_mov_b32 s1, v0 // GCN: error: invalid operand for instruction s_mov_b32 s[1:2], s0 -// GCN: error: not a valid operand +// GCN: error: invalid register alignment s_mov_b32 s0, s[1:2] -// GCN: error: not a valid operand +// GCN: error: invalid register alignment s_mov_b32 s220, s0 -// GCN: error: not a valid operand +// GCN: error: register index is out of range s_mov_b32 s0, s220 -// GCN: error: not a valid operand +// GCN: error: register index is out of range s_mov_b64 s1, s[0:1] // GCN: error: invalid operand for instruction @@ -32,13 +32,10 @@ s_mov_b32 s // Out of range register s_mov_b32 s102, 1 -// VI: error: not a valid operand -// SI: s_mov_b32 s102, 1 +// VI: error: register not available on this GPU s_mov_b32 s103, 1 -// VI: error: not a valid operand -// SI: s_mov_b32 s103, 1 +// VI: error: register not available on this GPU s_mov_b64 s[102:103], -1 -// VI: error: not a valid operand -// SI: s_mov_b64 s[102:103], -1 +// VI: error: register not available on this GPU diff --git a/llvm/test/MC/AMDGPU/sop1.s b/llvm/test/MC/AMDGPU/sop1.s index dafbf650b6715..3b0bafd4ae2c2 100644 --- a/llvm/test/MC/AMDGPU/sop1.s +++ b/llvm/test/MC/AMDGPU/sop1.s @@ -42,8 +42,8 @@ s_mov_b64 s[2:3], s[4:5] s_mov_b64 null, s[4:5] // GFX10: s_mov_b64 null, s[4:5] ; encoding: [0x04,0x04,0xfd,0xbe] -// NOSICIVI: error: not a valid operand. -// NOGFX9: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU +// NOGFX9: error: 'null' operand is not supported on this GPU s_mov_b64 s[2:3], 0xffffffffffffffff // SICI: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe] @@ -62,7 +62,7 @@ s_mov_b64 s[0:1], 0x80000000 s_mov_b64 s[102:103], -1 // SICI: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe] -// NOGFX89: error: not a valid operand +// NOGFX89: error: register not available on this GPU // GFX10: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe] s_cmov_b32 s1, 200 diff --git a/llvm/test/MC/AMDGPU/sop2.s b/llvm/test/MC/AMDGPU/sop2.s index 89f41a7b3d512..94152bd98695d 100644 --- a/llvm/test/MC/AMDGPU/sop2.s +++ b/llvm/test/MC/AMDGPU/sop2.s @@ -65,8 +65,8 @@ s_and_b32 s2, 0xFFFF0000, -65536 s_and_b64 null, s[4:5], s[6:7] // GFX10: s_and_b64 null, s[4:5], s[6:7] ; encoding: [0x04,0x06,0xfd,0x87] -// NOSICIVI: error: not a valid operand. -// NOGFX9: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU +// NOGFX9: error: 'null' operand is not supported on this GPU s_and_b64 s[2:3], s[4:5], s[6:7] // SICI: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] @@ -235,7 +235,7 @@ s_absdiff_i32 s2, s4, s6 s_add_u32 s101, s102, s103 // SICI: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80] -// NOGFX89: error: not a valid operand +// NOGFX89: error: register not available on this GPU // GFX10: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80] s_lshl1_add_u32 s5, s1, s2 diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s index e128df94c611f..14523dcec8567 100644 --- a/llvm/test/MC/AMDGPU/sopk.s +++ b/llvm/test/MC/AMDGPU/sopk.s @@ -19,74 +19,92 @@ s_movk_i32 s2, 0x6 s_cmovk_i32 s2, 0x6 // SICI: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1] // VI9: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb0] +// GFX10: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1] s_cmpk_eq_i32 s2, 0x6 // SICI: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1] // VI9: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1] +// GFX10: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1] s_cmpk_lg_i32 s2, 0x6 // SICI: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2] // VI9: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1] +// GFX10: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2] s_cmpk_gt_i32 s2, 0x6 // SICI: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2] // VI9: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2] +// GFX10: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2] s_cmpk_ge_i32 s2, 0x6 // SICI: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3] // VI9: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2] +// GFX10: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3] s_cmpk_lt_i32 s2, 0x6 // SICI: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3] // VI9: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3] +// GFX10: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3] s_cmpk_le_i32 s2, 0x6 // SICI: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4] // VI9: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3] +// GFX10: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4] s_cmpk_eq_u32 s2, 0x6 // SICI: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4] // VI9: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4] +// GFX10: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4] s_cmpk_lg_u32 s2, 0x6 // SICI: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5] // VI9: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4] +// GFX10: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5] s_cmpk_gt_u32 s2, 0x6 // SICI: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5] // VI9: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5] +// GFX10: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5] s_cmpk_ge_u32 s2, 0x6 // SICI: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6] // VI9: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5] +// GFX10: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6] s_cmpk_lt_u32 s2, 0x6 // SICI: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6] // VI9: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6] +// GFX10: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6] s_cmpk_le_u32 s2, 0x6 // SICI: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7] // VI9: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6] +// GFX10: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7] s_cmpk_le_u32 s2, 0xFFFF // SICI: s_cmpk_le_u32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb7] // VI9: s_cmpk_le_u32 s2, 0xffff ; encoding: [0xff,0xff,0x82,0xb6] +// GFX10: s_cmpk_le_u32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb7] s_addk_i32 s2, 0x6 // SICI: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7] // VI9: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7] +// GFX10: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7] s_mulk_i32 s2, 0x6 // SICI: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8] // VI9: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7] +// GFX10: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8] s_mulk_i32 s2, -1 // SICI: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8] // VI9: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x82,0xb7] +// GFX10: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8] s_mulk_i32 s2, 0xFFFF // SICI: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8] // VI9: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x82,0xb7] +// GFX10: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8] s_cbranch_i_fork s[2:3], 0x6 // SICI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x82,0xb8] @@ -100,26 +118,31 @@ s_cbranch_i_fork s[2:3], 0x6 s_getreg_b32 s2, 0x6 // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] // HW register identifier, non-default offset/width s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) // SICI: s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) ; encoding: [0x45,0xf0,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) ; encoding: [0x45,0xf0,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) ; encoding: [0x45,0xf0,0x02,0xb9] // HW register code of unknown HW register, non-default offset/width s_getreg_b32 s2, hwreg(51, 1, 31) // SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9] // HW register code of unknown HW register, default offset/width s_getreg_b32 s2, hwreg(51) // SICI: s_getreg_b32 s2, hwreg(51) ; encoding: [0x33,0xf8,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(51) ; encoding: [0x33,0xf8,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(51) ; encoding: [0x33,0xf8,0x02,0xb9] // HW register code of unknown HW register, valid symbolic name range but no name available s_getreg_b32 s2, hwreg(10) // SICI: s_getreg_b32 s2, hwreg(10) ; encoding: [0x0a,0xf8,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(10) ; encoding: [0x0a,0xf8,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(10) ; encoding: [0x0a,0xf8,0x02,0xb9] // HW_REG_SH_MEM_BASES valid starting from GFX9 s_getreg_b32 s2, hwreg(15) @@ -183,31 +206,37 @@ s_getreg_b32 s2, hwreg(25) s_setreg_b32 0x6, s2 // SICI: s_setreg_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), s2 ; encoding: [0x06,0x00,0x82,0xb9] // VI9: s_setreg_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), s2 ; encoding: [0x06,0x00,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), s2 ; encoding: [0x06,0x00,0x82,0xb9] // raw number mapped to unknown HW register s_setreg_b32 0x33, s2 // SICI: s_setreg_b32 hwreg(51, 0, 1), s2 ; encoding: [0x33,0x00,0x82,0xb9] // VI9: s_setreg_b32 hwreg(51, 0, 1), s2 ; encoding: [0x33,0x00,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(51, 0, 1), s2 ; encoding: [0x33,0x00,0x82,0xb9] // raw number mapped to known HW register, default offset/width s_setreg_b32 0xf803, s2 // SICI: s_setreg_b32 hwreg(HW_REG_TRAPSTS), s2 ; encoding: [0x03,0xf8,0x82,0xb9] // VI9: s_setreg_b32 hwreg(HW_REG_TRAPSTS), s2 ; encoding: [0x03,0xf8,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(HW_REG_TRAPSTS), s2 ; encoding: [0x03,0xf8,0x82,0xb9] // HW register identifier, default offset/width implied s_setreg_b32 hwreg(HW_REG_HW_ID), s2 // SICI: s_setreg_b32 hwreg(HW_REG_HW_ID), s2 ; encoding: [0x04,0xf8,0x82,0xb9] // VI9: s_setreg_b32 hwreg(HW_REG_HW_ID), s2 ; encoding: [0x04,0xf8,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(HW_REG_HW_ID), s2 ; encoding: [0x04,0xf8,0x82,0xb9] // HW register identifier, non-default offset/width s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 // SICI: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9] // VI9: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9] // HW register code of unknown HW register, valid symbolic name range but no name available s_setreg_b32 hwreg(10), s2 // SICI: s_setreg_b32 hwreg(10), s2 ; encoding: [0x0a,0xf8,0x82,0xb9] // VI9: s_setreg_b32 hwreg(10), s2 ; encoding: [0x0a,0xf8,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(10), s2 ; encoding: [0x0a,0xf8,0x82,0xb9] // HW_REG_SH_MEM_BASES valid starting from GFX9 s_setreg_b32 hwreg(15), s2 @@ -271,16 +300,19 @@ s_setreg_b32 hwreg(25), s2 s_setreg_b32 hwreg(5, 1, 31), s2 // SICI: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9] // VI9: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x02,0xb9] +// GFX10: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9] // raw number mapped to known HW register s_setreg_imm32_b32 0x6, 0xff // SICI: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), 0xff ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00] // VI9: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), 0xff ; encoding: [0x06,0x00,0x00,0xba,0xff,0x00,0x00,0x00] +// GFX10: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), 0xff ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00] // HW register identifier, non-default offset/width s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff // SICI: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x80,0xba,0xff,0x00,0x00,0x00] // VI9: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x00,0xba,0xff,0x00,0x00,0x00] +// GFX10: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x80,0xba,0xff,0x00,0x00,0x00] //===----------------------------------------------------------------------===// // expressions and hwreg macro @@ -290,16 +322,19 @@ hwreg=6 s_getreg_b32 s2, hwreg // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] x=5 s_getreg_b32 s2, x+1 // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] x=5 s_getreg_b32 s2, 1+x // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9] reg=50 offset=2 @@ -307,10 +342,12 @@ width=30 s_getreg_b32 s2, hwreg(reg + 1, offset - 1, width + 1) // SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9] s_getreg_b32 s2, hwreg(1 + reg, -1 + offset, 1 + width) // SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9] // VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8] +// GFX10: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9] //===----------------------------------------------------------------------===// // Instructions @@ -319,30 +356,36 @@ s_getreg_b32 s2, hwreg(1 + reg, -1 + offset, 1 + width) s_endpgm_ordered_ps_done // GFX9: s_endpgm_ordered_ps_done ; encoding: [0x00,0x00,0x9e,0xbf] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_endpgm_ordered_ps_done ; encoding: [0x00,0x00,0x9e,0xbf] s_call_b64 null, 12609 // GFX10: s_call_b64 null, 12609 ; encoding: [0x41,0x31,0x7d,0xbb] -// NOSICIVI: error: not a valid operand. -// NOGFX9: error: not a valid operand. +// NOSICIVI: error: 'null' operand is not supported on this GPU +// NOGFX9: error: 'null' operand is not supported on this GPU s_call_b64 s[12:13], 12609 // GFX9: s_call_b64 s[12:13], 12609 ; encoding: [0x41,0x31,0x8c,0xba] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_call_b64 s[12:13], 12609 ; encoding: [0x41,0x31,0x0c,0xbb] s_call_b64 s[100:101], 12609 // GFX9: s_call_b64 s[100:101], 12609 ; encoding: [0x41,0x31,0xe4,0xba] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_call_b64 s[100:101], 12609 ; encoding: [0x41,0x31,0x64,0xbb] s_call_b64 s[10:11], 49617 // GFX9: s_call_b64 s[10:11], 49617 ; encoding: [0xd1,0xc1,0x8a,0xba] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_call_b64 s[10:11], 49617 ; encoding: [0xd1,0xc1,0x0a,0xbb] offset = 4 s_call_b64 s[0:1], offset + 4 // GFX9: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x80,0xba] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x00,0xbb] offset = 4 s_call_b64 s[0:1], 4 + offset // GFX9: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x80,0xba] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x00,0xbb] diff --git a/llvm/test/MC/AMDGPU/trap.s b/llvm/test/MC/AMDGPU/trap.s index 5d23c1f30d6ed..18296c859642f 100644 --- a/llvm/test/MC/AMDGPU/trap.s +++ b/llvm/test/MC/AMDGPU/trap.s @@ -20,124 +20,124 @@ s_add_u32 ttmp0, ttmp0, 4 s_add_u32 ttmp4, 8, ttmp4 // SICI: s_add_u32 ttmp4, 8, ttmp4 ; encoding: [0x88,0x74,0x74,0x80] // VI: s_add_u32 ttmp4, 8, ttmp4 ; encoding: [0x88,0x74,0x74,0x80] -// GXF9: s_add_u32 ttmp4, 8, ttmp4 ; encoding: [0x88,0x70,0x70,0x80] +// GFX9: s_add_u32 ttmp4, 8, ttmp4 ; encoding: [0x88,0x70,0x70,0x80] s_add_u32 ttmp4, ttmp4, 0x00000100 // SICI: s_add_u32 ttmp4, ttmp4, 0x100 ; encoding: [0x74,0xff,0x74,0x80,0x00,0x01,0x00,0x00] // VI: s_add_u32 ttmp4, ttmp4, 0x100 ; encoding: [0x74,0xff,0x74,0x80,0x00,0x01,0x00,0x00] -// GXF9: s_add_u32 ttmp4, ttmp4, 0x100 ; encoding: [0x70,0xff,0x70,0x80,0x00,0x01,0x00,0x00] +// GFX9: s_add_u32 ttmp4, ttmp4, 0x100 ; encoding: [0x70,0xff,0x70,0x80,0x00,0x01,0x00,0x00] s_add_u32 ttmp4, ttmp4, 4 // SICI: s_add_u32 ttmp4, ttmp4, 4 ; encoding: [0x74,0x84,0x74,0x80] // VI: s_add_u32 ttmp4, ttmp4, 4 ; encoding: [0x74,0x84,0x74,0x80] -// GXF9: s_add_u32 ttmp4, ttmp4, 4 ; encoding: [0x70,0x84,0x70,0x80] +// GFX9: s_add_u32 ttmp4, ttmp4, 4 ; encoding: [0x70,0x84,0x70,0x80] s_add_u32 ttmp4, ttmp8, ttmp4 // SICI: s_add_u32 ttmp4, ttmp8, ttmp4 ; encoding: [0x78,0x74,0x74,0x80] // VI: s_add_u32 ttmp4, ttmp8, ttmp4 ; encoding: [0x78,0x74,0x74,0x80] -// GXF9: s_add_u32 ttmp4, ttmp8, ttmp4 ; encoding: [0x74,0x70,0x70,0x80] +// GFX9: s_add_u32 ttmp4, ttmp8, ttmp4 ; encoding: [0x74,0x70,0x70,0x80] s_and_b32 ttmp10, ttmp8, 0x00000080 // SICI: s_and_b32 ttmp10, ttmp8, 0x80 ; encoding: [0x78,0xff,0x7a,0x87,0x80,0x00,0x00,0x00] // VI: s_and_b32 ttmp10, ttmp8, 0x80 ; encoding: [0x78,0xff,0x7a,0x86,0x80,0x00,0x00,0x00] -// GXF9: s_and_b32 ttmp10, ttmp8, 0x80 ; encoding: [0x74,0xff,0x74,0x86,0x80,0x00,0x00,0x00] +// GFX9: s_and_b32 ttmp10, ttmp8, 0x80 ; encoding: [0x74,0xff,0x76,0x86,0x80,0x00,0x00,0x00] s_and_b32 ttmp9, tma_hi, 0x0000ffff // SICI: s_and_b32 ttmp9, tma_hi, 0xffff ; encoding: [0x6f,0xff,0x79,0x87,0xff,0xff,0x00,0x00] // VI: s_and_b32 ttmp9, tma_hi, 0xffff ; encoding: [0x6f,0xff,0x79,0x86,0xff,0xff,0x00,0x00] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU s_and_b32 ttmp9, ttmp9, 0x000001ff // SICI: s_and_b32 ttmp9, ttmp9, 0x1ff ; encoding: [0x79,0xff,0x79,0x87,0xff,0x01,0x00,0x00] // VI: s_and_b32 ttmp9, ttmp9, 0x1ff ; encoding: [0x79,0xff,0x79,0x86,0xff,0x01,0x00,0x00] -// GXF9: s_and_b32 ttmp9, ttmp9, 0x1ff ; encoding: [0x75,0xff,0x75,0x86,0xff,0x01,0x00,0x00] +// GFX9: s_and_b32 ttmp9, ttmp9, 0x1ff ; encoding: [0x75,0xff,0x75,0x86,0xff,0x01,0x00,0x00] s_and_b32 ttmp9, tma_lo, 0xffff0000 // SICI: s_and_b32 ttmp9, tma_lo, 0xffff0000 ; encoding: [0x6e,0xff,0x79,0x87,0x00,0x00,0xff,0xff] // VI: s_and_b32 ttmp9, tma_lo, 0xffff0000 ; encoding: [0x6e,0xff,0x79,0x86,0x00,0x00,0xff,0xff] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU s_and_b32 ttmp9, ttmp9, ttmp8 // SICI: s_and_b32 ttmp9, ttmp9, ttmp8 ; encoding: [0x79,0x78,0x79,0x87] // VI: s_and_b32 ttmp9, ttmp9, ttmp8 ; encoding: [0x79,0x78,0x79,0x86] -// GXF9: s_and_b32 ttmp9, ttmp9, ttmp8 ; encoding: [0x75,0x78,0x75,0x86] +// GFX9: s_and_b32 ttmp9, ttmp9, ttmp8 ; encoding: [0x75,0x74,0x75,0x86] s_and_b32 ttmp8, ttmp1, 0x01000000 // SICI: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x71,0xff,0x78,0x87,0x00,0x00,0x00,0x01] // VI: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x71,0xff,0x78,0x86,0x00,0x00,0x00,0x01] -// GXF9: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x6d,0xff,0x74,0x86,0x00,0x00,0x00,0x01] +// GFX9: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x6d,0xff,0x74,0x86,0x00,0x00,0x00,0x01] s_cmp_eq_i32 ttmp8, 0 // SICI: s_cmp_eq_i32 ttmp8, 0 ; encoding: [0x78,0x80,0x00,0xbf] // VI: s_cmp_eq_i32 ttmp8, 0 ; encoding: [0x78,0x80,0x00,0xbf] -// GXF9: s_cmp_eq_i32 ttmp8, 0 ; encoding: [0x74,0x80,0x00,0xbf] +// GFX9: s_cmp_eq_i32 ttmp8, 0 ; encoding: [0x74,0x80,0x00,0xbf] s_cmp_eq_i32 ttmp8, 0x000000fe // SICI: s_cmp_eq_i32 ttmp8, 0xfe ; encoding: [0x78,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00] // VI: s_cmp_eq_i32 ttmp8, 0xfe ; encoding: [0x78,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00] -// GXF9: s_cmp_eq_i32 ttmp8, 0xfe ; encoding: [0x74,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00] +// GFX9: s_cmp_eq_i32 ttmp8, 0xfe ; encoding: [0x74,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00] s_lshr_b32 ttmp8, ttmp8, 12 // SICI: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x78,0x8c,0x78,0x90] // VI: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x78,0x8c,0x78,0x8f] -// GXF9: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x74,0x8c,0x74,0x8f] +// GFX9: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x74,0x8c,0x74,0x8f] v_mov_b32_e32 v1, ttmp8 // SICI: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x78,0x02,0x02,0x7e] // VI: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x78,0x02,0x02,0x7e] -// GXF9: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x74,0x02,0x02,0x7e] +// GFX9: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x74,0x02,0x02,0x7e] s_mov_b32 m0, ttmp8 // SICI: s_mov_b32 m0, ttmp8 ; encoding: [0x78,0x03,0xfc,0xbe] // VI: s_mov_b32 m0, ttmp8 ; encoding: [0x78,0x00,0xfc,0xbe] -// GXF9: s_mov_b32 m0, ttmp8 ; encoding: [0x74,0x00,0xfc,0xbe] +// GFX9: s_mov_b32 m0, ttmp8 ; encoding: [0x74,0x00,0xfc,0xbe] s_mov_b32 ttmp10, 0 // SICI: s_mov_b32 ttmp10, 0 ; encoding: [0x80,0x03,0xfa,0xbe] // VI: s_mov_b32 ttmp10, 0 ; encoding: [0x80,0x00,0xfa,0xbe] -// GXF9: s_mov_b32 ttmp10, 0 ; encoding: [0x80,0x00,0xf6,0xbe] +// GFX9: s_mov_b32 ttmp10, 0 ; encoding: [0x80,0x00,0xf6,0xbe] s_mov_b32 ttmp11, 0x01024fac // SICI: s_mov_b32 ttmp11, 0x1024fac ; encoding: [0xff,0x03,0xfb,0xbe,0xac,0x4f,0x02,0x01] // VI: s_mov_b32 ttmp11, 0x1024fac ; encoding: [0xff,0x00,0xfb,0xbe,0xac,0x4f,0x02,0x01] -// GXF9: s_mov_b32 ttmp11, 0x1024fac ; encoding: [0xff,0x00,0xf7,0xbe,0xac,0x4f,0x02,0x01] +// GFX9: s_mov_b32 ttmp11, 0x1024fac ; encoding: [0xff,0x00,0xf7,0xbe,0xac,0x4f,0x02,0x01] s_mov_b32 ttmp8, m0 // SICI: s_mov_b32 ttmp8, m0 ; encoding: [0x7c,0x03,0xf8,0xbe] // VI: s_mov_b32 ttmp8, m0 ; encoding: [0x7c,0x00,0xf8,0xbe] -// GXF9: s_mov_b32 ttmp8, m0 ; encoding: [0x7c,0x00,0xf4,0xbe] +// GFX9: s_mov_b32 ttmp8, m0 ; encoding: [0x7c,0x00,0xf4,0xbe] s_mov_b32 ttmp8, tma_lo // SICI: s_mov_b32 ttmp8, tma_lo ; encoding: [0x6e,0x03,0xf8,0xbe] // VI: s_mov_b32 ttmp8, tma_lo ; encoding: [0x6e,0x00,0xf8,0xbe] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU s_mul_i32 ttmp8, 0x00000324, ttmp8 // SICI: s_mul_i32 ttmp8, 0x324, ttmp8 ; encoding: [0xff,0x78,0x78,0x93,0x24,0x03,0x00,0x00] // VI: s_mul_i32 ttmp8, 0x324, ttmp8 ; encoding: [0xff,0x78,0x78,0x92,0x24,0x03,0x00,0x00] -// GXF9: s_mul_i32 ttmp8, 0x324, ttmp8 ; encoding: [0xff,0x74,0x74,0x92,0x24,0x03,0x00,0x00] +// GFX9: s_mul_i32 ttmp8, 0x324, ttmp8 ; encoding: [0xff,0x74,0x74,0x92,0x24,0x03,0x00,0x00] s_or_b32 ttmp9, ttmp9, 0x00280000 // SICI: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x79,0xff,0x79,0x88,0x00,0x00,0x28,0x00] // VI: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x79,0xff,0x79,0x87,0x00,0x00,0x28,0x00] -// GXF9: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x75,0xff,0x75,0x87,0x00,0x00,0x28,0x00] +// GFX9: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x75,0xff,0x75,0x87,0x00,0x00,0x28,0x00] // ttmp12..ttmp15 (GFX9 only) s_add_u32 ttmp0, ttmp12, 4 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_u32 ttmp0, ttmp12, 4 ; encoding: [0x78,0x84,0x6c,0x80] s_add_u32 ttmp0, ttmp13, 4 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_u32 ttmp0, ttmp13, 4 ; encoding: [0x79,0x84,0x6c,0x80] s_add_u32 ttmp0, ttmp14, 4 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_u32 ttmp0, ttmp14, 4 ; encoding: [0x7a,0x84,0x6c,0x80] s_add_u32 ttmp0, ttmp15, 4 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_add_u32 ttmp0, ttmp15, 4 ; encoding: [0x7b,0x84,0x6c,0x80] //===----------------------------------------------------------------------===// @@ -162,31 +162,31 @@ s_mov_b64 exec, [ttmp4,ttmp5] s_mov_b64 tba, ttmp[4:5] // SICI: s_mov_b64 tba, ttmp[4:5] ; encoding: [0x74,0x04,0xec,0xbe] // VI: s_mov_b64 tba, ttmp[4:5] ; encoding: [0x74,0x01,0xec,0xbe] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU s_mov_b64 ttmp[4:5], tba // SICI: s_mov_b64 ttmp[4:5], tba ; encoding: [0x6c,0x04,0xf4,0xbe] // VI: s_mov_b64 ttmp[4:5], tba ; encoding: [0x6c,0x01,0xf4,0xbe] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU s_mov_b64 tma, ttmp[4:5] // SICI: s_mov_b64 tma, ttmp[4:5] ; encoding: [0x74,0x04,0xee,0xbe] // VI: s_mov_b64 tma, ttmp[4:5] ; encoding: [0x74,0x01,0xee,0xbe] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU s_mov_b64 ttmp[4:5], tma // SICI: s_mov_b64 ttmp[4:5], tma ; encoding: [0x6e,0x04,0xf4,0xbe] // VI: s_mov_b64 ttmp[4:5], tma ; encoding: [0x6e,0x01,0xf4,0xbe] -// NOGFX9: error: not a valid operand +// NOGFX9: error: register not available on this GPU // ttmp12..ttmp15 (GFX9 only) s_mov_b64 ttmp[12:13], exec -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_mov_b64 ttmp[12:13], exec ; encoding: [0x7e,0x01,0xf8,0xbe] s_mov_b64 ttmp[14:15], exec -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: s_mov_b64 ttmp[14:15], exec ; encoding: [0x7e,0x01,0xfa,0xbe] //===----------------------------------------------------------------------===// @@ -197,25 +197,29 @@ s_mov_b64 ttmp[14:15], exec s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 // VI: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00] // GFX9: [0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00] +// SICI: s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 ; encoding: [0x00,0x00,0xf8,0xc2] s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 // VI: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00] // GFX9: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00] +// SICI: s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 ; encoding: [0x00,0x00,0xfa,0xc2] s_buffer_load_dwordx8 ttmp[8:15], s[0:3], s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00] s_load_dwordx8 ttmp[0:7], s[0:1], s0 // VI: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00] // GFX9: [0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00] +// SICI: s_load_dwordx8 ttmp[0:7], s[0:1], s0 ; encoding: [0x00,0x00,0xf8,0xc0] s_load_dwordx8 ttmp[4:11], s[0:1], s0 // VI: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00] // GFX9: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00] +// SICI: s_load_dwordx8 ttmp[4:11], s[0:1], s0 ; encoding: [0x00,0x00,0xfa,0xc0] s_load_dwordx8 ttmp[8:15], s[0:1], s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00] //===----------------------------------------------------------------------===// @@ -224,11 +228,11 @@ s_load_dwordx8 ttmp[8:15], s[0:1], s0 //===----------------------------------------------------------------------===// s_buffer_load_dwordx16 ttmp[0:15], s[0:3], s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: [0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00] s_load_dwordx16 ttmp[0:15], s[0:1], s0 -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: [0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00] //===----------------------------------------------------------------------===// @@ -253,5 +257,5 @@ buffer_atomic_inc v1, off, ttmp[8:11], 56 glc // ttmp12..ttmp15 (GFX9 only) buffer_atomic_inc v1, off, ttmp[12:15], 56 glc -// NOSICIVI: error: not a valid operand +// NOSICIVI: error: register not available on this GPU // GFX9: buffer_atomic_inc v1, off, ttmp[12:15], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8] diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index e5ff3f030a6fc..2c083e7024e3c 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -289,17 +289,17 @@ v_mac_f32_e64 v0, -v1, |v2| v_mac_f16_e64 v0, 0.5, flat_scratch_lo // VI: v_mac_f16_e64 v0, 0.5, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf0,0xcc,0x00,0x00] // NOCI: error: instruction not supported on this GPU -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU v_mac_f16_e64 v0, -4.0, flat_scratch_lo // VI: v_mac_f16_e64 v0, -4.0, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf7,0xcc,0x00,0x00] // NOCI: error: instruction not supported on this GPU -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU v_mac_f16_e64 v0, flat_scratch_lo, -4.0 // VI: v_mac_f16_e64 v0, flat_scratch_lo, -4.0 ; encoding: [0x00,0x00,0x23,0xd1,0x66,0xee,0x01,0x00] // NOCI: error: instruction not supported on this GPU -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU v_add_u32 v84, vcc, v13, s31 clamp // NOSICI: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index 88386e046917f..9a4283e73e384 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -717,8 +717,8 @@ v_mov_b32 v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD // GFX9: v_mov_b32_sdwa v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x7e,0x10,0x86,0x00] v_mov_b32 v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: register not available on this GPU +// NOVI: error: register not available on this GPU // GFX9: v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x78,0x10,0x86,0x00] v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD @@ -735,19 +735,16 @@ v_add_f32 v0, v0, s22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_s // NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction -// NO: invalid operand (violates constant bus restrictions) v_add_f32 v0, exec_lo, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction -// NOGFX9: error: not a valid operand. -// NO: error: not a valid operand +// NOGFX9: error: register not available on this GPU v_add_f32 v0, v1, tba_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction -// NOGFX9: error: not a valid operand. -// NO: error: not a valid operand +// NOGFX9: error: register not available on this GPU v_add_f32 v0, v1, tma_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction @@ -760,25 +757,23 @@ v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // GFX9: v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: register not available on this GPU +// NOVI: error: register not available on this GPU // GFX9: v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xf8,0x05,0x02] v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction // NOVI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. -// NO: error: not a valid operand +// NOGFX9: error: register not available on this GPU v_cmp_eq_f32_sdwa tba, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: invalid operand for instruction // NOVI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. -// NO: error: not a valid operand +// NOGFX9: error: register not available on this GPU v_cmp_eq_f32_sdwa tma, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: not a valid operand. -// NOVI: error: not a valid operand. +// NOSICI: error: register not available on this GPU +// NOVI: error: register not available on this GPU // GFX9: v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0xf6,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 @@ -789,7 +784,7 @@ v_cmp_eq_f32_sdwa vcc, exec_lo, vcc_lo src0_sel:WORD_1 src1_sel:BYTE_2 // NOVI: error: invalid operand for instruction // GFX9: v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0x66,0x06,0x86,0x00] -// NOSI: error: not a valid operand. +// NOSI: error: register not available on this GPU // NOCI: error: not a valid operand. v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD diff --git a/llvm/test/MC/AMDGPU/xnack-mask.s b/llvm/test/MC/AMDGPU/xnack-mask.s index 0fa5242d37899..e6e310724d453 100644 --- a/llvm/test/MC/AMDGPU/xnack-mask.s +++ b/llvm/test/MC/AMDGPU/xnack-mask.s @@ -7,25 +7,25 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s | FileCheck -check-prefix=XNACK %s s_mov_b64 xnack_mask, -1 -// NOSICIVI10: error: not a valid operand. +// NOSICIVI10: error: register not available on this GPU // XNACK: s_mov_b64 xnack_mask, -1 ; encoding: [0xc1,0x01,0xe8,0xbe] s_mov_b32 xnack_mask_lo, -1 -// NOSICIVI10: error: not a valid operand. +// NOSICIVI10: error: register not available on this GPU // XNACK: s_mov_b32 xnack_mask_lo, -1 ; encoding: [0xc1,0x00,0xe8,0xbe] s_mov_b32 xnack_mask_hi, -1 -// NOSICIVI10: error: not a valid operand. +// NOSICIVI10: error: register not available on this GPU // XNACK: s_mov_b32 xnack_mask_hi, -1 ; encoding: [0xc1,0x00,0xe9,0xbe] s_mov_b32 xnack_mask, -1 -// NOSICIVI10: error: not a valid operand. +// NOSICIVI10: error: register not available on this GPU // XNACKERR: error: invalid operand for instruction s_mov_b64 xnack_mask_lo, -1 -// NOSICIVI10: error: not a valid operand. +// NOSICIVI10: error: register not available on this GPU // XNACKERR: error: invalid operand for instruction s_mov_b64 xnack_mask_hi, -1 -// NOSICIVI10: error: not a valid operand. +// NOSICIVI10: error: register not available on this GPU // XNACKERR: error: invalid operand for instruction From 5ec043eae1877add1cde2a7bd6e01ef64549a41d Mon Sep 17 00:00:00 2001 From: compinder Date: Wed, 9 Sep 2020 19:02:51 +0530 Subject: [PATCH 0166/1079] [FLANG] Generate error for invalid selector. Fix of PR47339 Differential Revision: https://reviews.llvm.org/D87073/new/ --- flang/lib/Semantics/resolve-names.cpp | 6 ++++++ flang/lib/Semantics/tools.cpp | 1 - flang/test/Semantics/resolve95.f90 | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 flang/test/Semantics/resolve95.f90 diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index a75c5b6a829e3..54686232dc0d0 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -5044,6 +5044,9 @@ void ConstructVisitor::Post(const parser::Association &x) { const auto &name{std::get(x.t)}; GetCurrentAssociation().name = &name; if (auto *symbol{MakeAssocEntity()}) { + if (ExtractCoarrayRef(GetCurrentAssociation().selector.expr)) { // C1103 + Say("Selector must not be a coindexed object"_err_en_US); + } SetTypeFromAssociation(*symbol); SetAttrsFromAssociation(*symbol); } @@ -5098,6 +5101,9 @@ void ConstructVisitor::Post(const parser::SelectTypeStmt &x) { MakePlaceholder(*name, MiscDetails::Kind::SelectTypeAssociateName); association.name = &*name; auto exprType{association.selector.expr->GetType()}; + if (ExtractCoarrayRef(association.selector.expr)) { // C1103 + Say("Selector must not be a coindexed object"_err_en_US); + } if (exprType && !exprType->IsPolymorphic()) { // C1159 Say(association.selector.source, "Selector '%s' in SELECT TYPE statement must be " diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 3f93944cd3c33..7a79dedb00a33 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -739,7 +739,6 @@ bool InProtectedContext(const Symbol &symbol, const Scope ¤tScope) { } // C1101 and C1158 -// TODO Need to check for a coindexed object (why? C1103?) std::optional WhyNotModifiable( const Symbol &symbol, const Scope &scope) { const Symbol *root{GetAssociationRoot(symbol)}; diff --git a/flang/test/Semantics/resolve95.f90 b/flang/test/Semantics/resolve95.f90 new file mode 100644 index 0000000000000..78ff09d88d324 --- /dev/null +++ b/flang/test/Semantics/resolve95.f90 @@ -0,0 +1,15 @@ +! RUN: %S/test_errors.sh %s %t %f18 +! Test SELECT TYPE and ASSOCIATE errors: C1103 + +subroutine s1() + class(*),allocatable :: calc[:] + integer,save :: icoa[*] + !ERROR: Selector must not be a coindexed object + associate(sel=>icoa[2]) + end associate + icoa = 2 + allocate(integer::calc[*]) + !ERROR: Selector must not be a coindexed object + select type(sel=>calc[2]) + end select +end subroutine From 649bde488ce9b5c1143e718247f0eda461300a77 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 9 Sep 2020 14:55:48 +0100 Subject: [PATCH 0167/1079] [AMDGPU] Simplify S_SETREG_B32 case in EmitInstrWithCustomInserter NFC. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 26fbab63e1ca5..d88ad58d3ab49 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4263,21 +4263,16 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( // The dedicated instructions can only set the whole denorm or round mode at // once, not a subset of bits in either. - if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK | - AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) { + if (SetMask == + (AMDGPU::Hwreg::FP_ROUND_MASK | AMDGPU::Hwreg::FP_DENORM_MASK)) { // If this fully sets both the round and denorm mode, emit the two // dedicated instructions for these. - assert(Offset == 0); SetRoundOp = AMDGPU::S_ROUND_MODE; SetDenormOp = AMDGPU::S_DENORM_MODE; - } else if (Width == 4) { - if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) { - SetRoundOp = AMDGPU::S_ROUND_MODE; - assert(Offset == 0); - } else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) { - SetDenormOp = AMDGPU::S_DENORM_MODE; - assert(Offset == 4); - } + } else if (SetMask == AMDGPU::Hwreg::FP_ROUND_MASK) { + SetRoundOp = AMDGPU::S_ROUND_MODE; + } else if (SetMask == AMDGPU::Hwreg::FP_DENORM_MASK) { + SetDenormOp = AMDGPU::S_DENORM_MODE; } if (SetRoundOp || SetDenormOp) { From 88ff4d2ca1a0aaed6888152042256a0ef3fe863d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 9 Sep 2020 22:38:58 +0800 Subject: [PATCH 0168/1079] [PowerPC] Fix STRICT_FRINT/STRICT_FNEARBYINT lowering In standard C library, both rint and nearbyint returns rounding result in current rounding mode. But nearbyint never raises inexact exception. On PowerPC, x(v|s)r(d|s)pic may modify FPSCR XX, raising inexact exception. So we can't select constrained fnearbyint into xvrdpic. One exception here is xsrqpi, which will not raise inexact exception, so fnearbyint f128 is okay here. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D87220 --- clang/lib/CodeGen/CGBuiltin.cpp | 4 +- .../test/CodeGen/builtins-ppc-fpconstrained.c | 8 +- clang/test/CodeGen/builtins-ppc-vsx.c | 8 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 +- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 14 +- llvm/test/CodeGen/PowerPC/fp-strict-round.ll | 172 ++++++++++++++- .../vector-constrained-fp-intrinsics.ll | 206 ++++++++++++++---- 7 files changed, 357 insertions(+), 65 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0cb8f8f636f43..b2abc10544e12 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14273,8 +14273,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || BuiltinID == PPC::BI__builtin_vsx_xvrspic) ID = Builder.getIsFPConstrained() - ? Intrinsic::experimental_constrained_nearbyint - : Intrinsic::nearbyint; + ? Intrinsic::experimental_constrained_rint + : Intrinsic::rint; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || BuiltinID == PPC::BI__builtin_vsx_xvrspip) ID = Builder.getIsFPConstrained() diff --git a/clang/test/CodeGen/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/builtins-ppc-fpconstrained.c index c8b08c3fb5d4a..7c770845090fc 100644 --- a/clang/test/CodeGen/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/builtins-ppc-fpconstrained.c @@ -59,14 +59,14 @@ void test_float(void) { vf = __builtin_vsx_xvrspic(vf); // CHECK-LABEL: try-xvrspic - // CHECK-UNCONSTRAINED: @llvm.nearbyint.v4f32(<4 x float> %{{.*}}) - // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-UNCONSTRAINED: @llvm.rint.v4f32(<4 x float> %{{.*}}) + // CHECK-CONSTRAINED: @llvm.experimental.constrained.rint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: xvrspic vd = __builtin_vsx_xvrdpic(vd); // CHECK-LABEL: try-xvrdpic - // CHECK-UNCONSTRAINED: @llvm.nearbyint.v2f64(<2 x double> %{{.*}}) - // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-UNCONSTRAINED: @llvm.rint.v2f64(<2 x double> %{{.*}}) + // CHECK-CONSTRAINED: @llvm.experimental.constrained.rint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: xvrdpic vf = __builtin_vsx_xvrspip(vf); diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index 0d07247262754..2542b30590bf8 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -863,12 +863,12 @@ void test1() { // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double> res_vf = vec_rint(vf); -// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) -// CHECK-LE: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) +// CHECK: call <4 x float> @llvm.rint.v4f32(<4 x float> %{{[0-9]+}}) +// CHECK-LE: call <4 x float> @llvm.rint.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_rint(vd); -// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) -// CHECK-LE: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) +// CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{[0-9]+}}) +// CHECK-LE: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_rsqrte(vf); // CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}}) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f542a8018b4f0..fc9a80919fc1c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -316,8 +316,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); - if (Subtarget.hasVSX()) - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal); + if (Subtarget.hasVSX()) { + setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); + } if (Subtarget.hasFSQRT()) { setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); @@ -1059,7 +1061,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); @@ -1073,7 +1075,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index c3ee1c7ea18a4..9003b1eb089b6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -890,15 +890,15 @@ let hasSideEffects = 0 in { def XSRDPIC : XX2Form<60, 107, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpic $XT, $XB", IIC_VecFP, - [(set f64:$XT, (any_fnearbyint f64:$XB))]>; + [(set f64:$XT, (fnearbyint f64:$XB))]>; def XVRDPIC : XX2Form<60, 235, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpic $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>; + [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; def XVRSPIC : XX2Form<60, 171, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspic $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>; + [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; // Max/Min Instructions let isCommutable = 1 in { def XSMAXDP : XX3Form<60, 160, @@ -2681,7 +2681,7 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(f32 (any_fround f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (any_fnearbyint f32:$S)), +def : Pat<(f32 (fnearbyint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; def : Pat<(f32 (any_ffloor f32:$S)), @@ -2696,11 +2696,11 @@ def : Pat<(f32 (any_ftrunc f32:$S)), def : Pat<(f32 (any_frint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; +def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; // Rounding for double precision. -def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>; -def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; +def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>; +def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; // Materialize a zero-vector of long long def : Pat<(v2i64 immAllZerosV), diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll index 3a43b3584caf8..fa36f244d6239 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -170,12 +170,30 @@ define <2 x double> @floor_v2f64(<2 x double> %vf1) { define double @nearbyint_f64(double %f1, double %f2) { ; P8-LABEL: nearbyint_f64: ; P8: # %bb.0: -; P8-NEXT: xsrdpic f1, f1 +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl nearbyint +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_f64: ; P9: # %bb.0: -; P9-NEXT: xsrdpic f1, f1 +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl nearbyint +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 ; P9-NEXT: blr %res = call double @llvm.experimental.constrained.nearbyint.f64( double %f1, @@ -187,12 +205,104 @@ define double @nearbyint_f64(double %f1, double %f2) { define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) { ; P8-LABEL: nearbyint_v4f32: ; P8: # %bb.0: -; P8-NEXT: xvrspic v2, v2 +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -176(r1) +; P8-NEXT: .cfi_def_cfa_offset 176 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset v30, -32 +; P8-NEXT: .cfi_offset v31, -16 +; P8-NEXT: xxsldwi vs0, v2, v2, 3 +; P8-NEXT: li r3, 144 +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 160 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v2 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: xxsldwi vs0, v31, v31, 1 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: li r3, 128 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs0, vs1, vs0 +; P8-NEXT: xscvspdpn f1, v31 +; P8-NEXT: xvcvdpsp v30, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: xxswapd vs0, v31 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: li r3, 128 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 160 +; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 144 +; P8-NEXT: xxmrghd vs0, vs0, vs1 +; P8-NEXT: xvcvdpsp v2, vs0 +; P8-NEXT: vmrgew v2, v2, v30 +; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 176 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v4f32: ; P9: # %bb.0: -; P9-NEXT: xvrspic v2, v2 +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -80(r1) +; P9-NEXT: .cfi_def_cfa_offset 80 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: .cfi_offset v30, -32 +; P9-NEXT: .cfi_offset v31, -16 +; P9-NEXT: xxsldwi vs0, v2, v2, 3 +; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; P9-NEXT: vmr v31, v2 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: xxsldwi vs0, v31, v31, 1 +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: xxmrghd vs0, vs1, vs0 +; P9-NEXT: xscvspdpn f1, v31 +; P9-NEXT: xvcvdpsp v30, vs0 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: xxswapd vs0, v31 +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload +; P9-NEXT: xxmrghd vs0, vs0, vs1 +; P9-NEXT: xvcvdpsp v2, vs0 +; P9-NEXT: vmrgew v2, v2, v30 +; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; P9-NEXT: addi r1, r1, 80 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 ; P9-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( <4 x float> %vf1, @@ -204,12 +314,62 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) { define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) { ; P8-LABEL: nearbyint_v2f64: ; P8: # %bb.0: -; P8-NEXT: xvrdpic v2, v2 +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -160(r1) +; P8-NEXT: .cfi_def_cfa_offset 160 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset v31, -16 +; P8-NEXT: li r3, 144 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v2 +; P8-NEXT: xxlor f1, v31, v31 +; P8-NEXT: bl nearbyint +; P8-NEXT: nop +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xxswapd vs1, v31 +; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; P8-NEXT: bl nearbyint +; P8-NEXT: nop +; P8-NEXT: li r3, 128 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 144 +; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd v2, vs0, vs1 +; P8-NEXT: addi r1, r1, 160 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v2f64: ; P9: # %bb.0: -; P9-NEXT: xvrdpic v2, v2 +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -64(r1) +; P9-NEXT: .cfi_def_cfa_offset 64 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: .cfi_offset v31, -16 +; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill +; P9-NEXT: vmr v31, v2 +; P9-NEXT: xscpsgndp f1, v31, v31 +; P9-NEXT: bl nearbyint +; P9-NEXT: nop +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xxswapd vs1, v31 +; P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; P9-NEXT: bl nearbyint +; P9-NEXT: nop +; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload +; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: xxmrghd v2, vs0, vs1 +; P9-NEXT: addi r1, r1, 64 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 ; P9-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( <2 x double> %vf1, diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 1acf71e8f1597..7345d65be14aa 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -4899,19 +4899,50 @@ entry: define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpic 34, 0 +; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: addi 1, 1, 64 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -5010,31 +5041,72 @@ entry: define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpic 3, 1 -; PC64LE-NEXT: xvrdpic 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE9-NEXT: xsrdpic 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( @@ -5047,28 +5119,86 @@ entry: define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l -; PC64LE-NEXT: lxvd2x 1, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvrdpic 35, 0 -; PC64LE-NEXT: xvrdpic 34, 1 +; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: vmr 2, 31 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l -; PC64LE9-NEXT: xvrdpic 35, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: vmr 2, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( From e706116e1182f39c8de5d9c9981df08a9f614e7a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 16:13:55 +0100 Subject: [PATCH 0169/1079] X86FrameLowering::adjustStackWithPops - cleanup auto usage. NFCI. Don't use auto for non-obvious types, and use const references. --- llvm/lib/Target/X86/X86FrameLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 7437c2e978af2..90265ddf344a1 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2919,7 +2919,6 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int Offset) const { - if (Offset <= 0) return false; @@ -2942,14 +2941,13 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, unsigned Regs[2]; unsigned FoundRegs = 0; - auto &MRI = MBB.getParent()->getRegInfo(); - auto RegMask = Prev->getOperand(1); + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const MachineOperand &RegMask = Prev->getOperand(1); auto &RegClass = Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; // Try to find up to NumPops free registers. for (auto Candidate : RegClass) { - // Poor man's liveness: // Since we're immediately after a call, any register that is clobbered // by the call and not defined by it can be considered dead. From 53ffeea6d59ae5ba78b8c85a31c06677c3ab7719 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 8 Sep 2020 15:04:35 +0000 Subject: [PATCH 0170/1079] [mlir][Linalg] Reduction dimensions specified in TC definition of ConvOps. This commit specifies reduction dimensions for ConvOps. This prevents running reduction loops in parallel and enables easier detection of kernel dimensions which we will need later on. Differential Revision: https://reviews.llvm.org/D87288 --- .../Linalg/IR/LinalgNamedStructuredOpsSpec.tc | 30 +++++----- mlir/test/Dialect/Linalg/loops.mlir | 60 ++++++++++--------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc index 27d4330a54d5f..9c54a5f0c3c70 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -20,52 +20,50 @@ def batch_matmul(A: f32(Batch, M, K), B: f32(Batch, K, N)) -> (C: f32(Batch, M, ods_def: def conv_1d(I: f32(W), K: f32(KW)) -> (O: f32(W)) { - O(w) = std_addf(O(w), std_mulf(I(w + kw), K(kw))); + O(w) = std_addf(std_mulf(I(w + kw), K(kw))); } ods_def: def conv_1d_nwc(I: f32(N, W, C), K: f32(F, KW, C)) -> (O: f32(N, W, F)) { - O(n, w, f) = std_addf(O(n, w, f), - std_mulf(I(n, w + kw, c), K(f, kw, c))); + O(n, w, f) = std_addf(std_mulf(I(n, w + kw, c), K(f, kw, c))); } ods_def: def conv_1d_ncw(I: f32(N, C, W), K: f32(F, C, KW)) -> (O: f32(N, F, W)) { - O(n, f, w) = std_addf(O(n, f, w), - std_mulf(I(n, c, w + kw), K(f, c, kw))); + O(n, f, w) = std_addf(std_mulf(I(n, c, w + kw), K(f, c, kw))); } ods_def: def conv_2d(I: f32(H, W), K: f32(KH, KW)) -> (O: f32(H, W)) { - O(h, w) = std_addf(O(h, w), std_mulf(I(h + kh, w + kw), K(kh, kw))); + O(h, w) = std_addf(std_mulf(I(h + kh, w + kw), K(kh, kw))); } ods_def: def conv_2d_nhwc(I: f32(N, H, W, C), K: f32(F, KH, KW, C)) -> (O: f32(N, H, W, F)) { - O(n, h, w, f) = std_addf(O(n, h, w, f), - std_mulf(I(n, h + kh, w + kw, c), K(f, kh, kw, c))); + O(n, h, w, f) = std_addf(std_mulf( + I(n, h + kh, w + kw, c), K(f, kh, kw, c))); } ods_def: def conv_2d_nchw(I: f32(N, C, H, W), K: f32(F, C, KH, KW)) -> (O: f32(N, F, H, W)) { - O(n, f, h, w) = std_addf(O(n, f, h, w), - std_mulf(I(n, c, h + kh, w + kw), K(f, c, kh, kw))); + O(n, f, h, w) = std_addf(std_mulf( + I(n, c, h + kh, w + kw), K(f, c, kh, kw))); } ods_def: def conv_3d(I: f32(D, H, W), K: f32(KD, KH, KW)) -> (O: f32(D, H, W)) { - O(d, h, w) = std_addf(O(d, h, w), - std_mulf(I(d + kd, h + kh, w + kw), K(kd, kh, kw))); + O(d, h, w) = std_addf(std_mulf( + I(d + kd, h + kh, w + kw), K(kd, kh, kw))); } ods_def: def conv_3d_ndhwc(I: f32(N, D, H, W, C), K: f32(F, KD, KH, KW, C)) -> (O: f32(N, D, H, W, F)) { - O(n, d, h, w, f) = std_addf(O(n, d, h, w, f), - std_mulf(I(n, d + kd, h + kh, w + kw, c), K(f, kd, kh, kw, c))); + O(n, d, h, w, f) = std_addf(std_mulf( + I(n, d + kd, h + kh, w + kw, c), K(f, kd, kh, kw, c))); } ods_def: def conv_3d_ncdhw(I: f32(N, C, D, H, W), K: f32(F, C, KD, KH, KW)) -> (O: f32(N, F, D, H, W)) { - O(n, f, d, h, w) = std_addf(O(n, f, d, h, w), - std_mulf(I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw))); + O(n, f, d, h, w) = std_addf(std_mulf( + I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw))); } \ No newline at end of file diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir index 6af53a2b8d222..1e10e036ee2d7 100644 --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -1318,14 +1318,15 @@ func @conv1d_no_symbols(%in : memref, %filter : memref, %out : mem // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim1]], %[[dim0]]) step (%[[c1]], %[[c1]]) { -// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) -// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref -// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[m]]] : memref -// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref -// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[b]]] : memref +// CHECKPARALLEL: scf.parallel (%[[b:.*]]) = (%[[c0]]) to (%[[dim1]]) step (%[[c1]]) { +// CHECKPARALLEL: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { +// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) +// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref +// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[m]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[b]]] : memref func @conv2d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { @@ -1367,15 +1368,17 @@ func @conv2d_no_symbols(%in : memref, %filter : memref, %out : // CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref // CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]], %[[arg6:.*]]) = (%[[c0]], %[[c0]], %[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step (%[[c1]], %[[c1]], %[[c1]], %[[c1]]) { -// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) -// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) -// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref -// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref -// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref -// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref +// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]]) step (%[[c1]], %[[c1]]) { +// CHECKPARALLEL: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { +// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { +// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) +// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) +// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref +// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref func @conv3d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { @@ -1427,13 +1430,16 @@ func @conv3d_no_symbols(%in : memref, %filter : memref, %o // CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref // CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref // CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]], %[[arg6:.*]], %[[arg7:.*]], %[[arg8:.*]]) = (%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step (%[[c1]], %[[c1]], %[[c1]], %[[c1]], %[[c1]], %[[c1]]) { -// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) -// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) -// CHECKPARALLEL: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) -// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref -// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref -// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref -// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref +// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]]) step (%[[c1]], %[[c1]], %[[c1]]) { +// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { +// CHECKPARALLEL: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { +// CHECKPARALLEL: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { +// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) +// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) +// CHECKPARALLEL: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) +// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref +// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref From 27cd187587eb6bb81f73533a1e05be24292a0d8b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 9 Sep 2020 10:23:00 -0500 Subject: [PATCH 0171/1079] [DSE] Add testcase that uses masked loads and stores --- .../DeadStoreElimination/masked-dead-store.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll new file mode 100644 index 0000000000000..03d88b1757dee --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -tbaa -dse -S < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + +define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { +; CHECK-LABEL: @f0( +; CHECK-NEXT: b0: +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds i8*, i8** [[A0:%.*]], i32 [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = load i8*, i8** [[V0]], align 4, [[TBAA0:!tbaa !.*]] +; CHECK-NEXT: [[V2:%.*]] = getelementptr i8, i8* [[V1]], i32 [[A3:%.*]] +; CHECK-NEXT: [[V3:%.*]] = bitcast i8* [[V2]] to <128 x i8>* +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> , <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3:!tbaa !.*]] +; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds i8*, i8** [[A1:%.*]], i32 [[A4:%.*]] +; CHECK-NEXT: [[V7:%.*]] = load i8*, i8** [[V6]], align 4, [[TBAA6:!tbaa !.*]] +; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, i8* [[V7]], i32 [[A5:%.*]] +; CHECK-NEXT: [[V9:%.*]] = bitcast i8* [[V8]] to <128 x i8>* +; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V9]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA8:!tbaa !.*]] +; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> undef, <32 x i32> +; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> undef, <128 x i32> +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V14]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3]] +; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> undef, <32 x i32> +; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds i8*, i8** [[A1]], i32 [[A6:%.*]] +; CHECK-NEXT: [[V18:%.*]] = load i8*, i8** [[V17]], align 4, [[TBAA6]] +; CHECK-NEXT: [[V19:%.*]] = getelementptr i8, i8* [[V18]], i32 [[A7:%.*]] +; CHECK-NEXT: [[V20:%.*]] = bitcast i8* [[V19]] to <128 x i8>* +; CHECK-NEXT: [[V21:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V20]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA8]] +; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> undef, <32 x i32> +; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] +; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] +; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> undef, <128 x i32> +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V25]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3]] +; CHECK-NEXT: ret i32 0 +; +b0: + %v0 = getelementptr inbounds i8*, i8** %a0, i32 %a2 + %v1 = load i8*, i8** %v0, align 4, !tbaa !0 + %v2 = getelementptr i8, i8* %v1, i32 %a3 + %v3 = bitcast i8* %v2 to <128 x i8>* + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> , <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + %v6 = getelementptr inbounds i8*, i8** %a1, i32 %a4 + %v7 = load i8*, i8** %v6, align 4, !tbaa !6 + %v8 = getelementptr i8, i8* %v7, i32 %a5 + %v9 = bitcast i8* %v8 to <128 x i8>* + %v10 = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %v9, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 + %v11 = shufflevector <128 x i8> %v10, <128 x i8> undef, <32 x i32> + %v14 = shufflevector <32 x i8> %v11, <32 x i8> undef, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v14, <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + %v16 = shufflevector <128 x i8> %v14, <128 x i8> undef, <32 x i32> + %v17 = getelementptr inbounds i8*, i8** %a1, i32 %a6 + %v18 = load i8*, i8** %v17, align 4, !tbaa !6 + %v19 = getelementptr i8, i8* %v18, i32 %a7 + %v20 = bitcast i8* %v19 to <128 x i8>* + %v21 = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %v20, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 + %v22 = shufflevector <128 x i8> %v21, <128 x i8> undef, <32 x i32> + %v23 = icmp ugt <32 x i8> %v16, %v22 + %v24 = select <32 x i1> %v23, <32 x i8> %v16, <32 x i8> %v22 + %v25 = shufflevector <32 x i8> %v24, <32 x i8> undef, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v25, <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + ret i32 0 +} + +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32 immarg, <128 x i1>) #1 +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>) #2 + +attributes #0 = { nounwind willreturn } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind readonly willreturn } + +!0 = !{!1, !1, i64 0} +!1 = !{!"0x2cf74d0", !2, i64 0} +!2 = !{!"tvm-tbaa"} +!3 = !{!4, !4, i64 0} +!4 = !{!"i8", !5, i64 0} +!5 = !{!"0x2c6ebb0", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"0x2cff870", !2, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"i8", !10, i64 0} +!10 = !{!"0x2c6c3c0", !2, i64 0} From 6e45b989340607682d5ac95285ea7faf3cb2a030 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 16:33:19 +0100 Subject: [PATCH 0172/1079] X86CallFrameOptimization.cpp - use const references where possible. NFCI. --- llvm/lib/Target/X86/X86CallFrameOptimization.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp index caa1f79524750..6125845a337f9 100644 --- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp @@ -202,7 +202,7 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, Align StackAlign = TFL->getStackAlign(); int64_t Advantage = 0; - for (auto CC : CallSeqVector) { + for (const auto &CC : CallSeqVector) { // Call sites where no parameters are passed on the stack // do not affect the cost, since there needs to be no // stack adjustment. @@ -265,7 +265,7 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { if (!isProfitable(MF, CallSeqVector)) return false; - for (auto CC : CallSeqVector) { + for (const auto &CC : CallSeqVector) { if (CC.UsePush) { adjustCallSequence(MF, CC); Changed = true; @@ -288,13 +288,13 @@ X86CallFrameOptimization::classifyInstruction( case X86::AND16mi8: case X86::AND32mi8: case X86::AND64mi8: { - MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); + const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands); return ImmOp.getImm() == 0 ? Convert : Exit; } case X86::OR16mi8: case X86::OR32mi8: case X86::OR64mi8: { - MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); + const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands); return ImmOp.getImm() == -1 ? Convert : Exit; } case X86::MOV32mi: @@ -506,7 +506,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, // replace uses. for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx]; - MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands); + const MachineOperand &PushOp = Store->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; unsigned PushOpcode; switch (Store->getOpcode()) { From ae209397b1733f31e8fa260722aaee49cf3f0f4b Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 4 Sep 2020 15:03:49 -0400 Subject: [PATCH 0173/1079] [OpenMP] Begin Printing Information Dumps In Libomptarget and Plugins Summary: This patch starts adding support for adding information dumps to libomptarget and rtl plugins. The information printing is controlled by the LIBOMPTARGET_INFO environment variable introduced in D86483. The goal of this patch is to provide the user with additional information about the device during kernel execution and providing the user with information dumps in the case of failure. This patch added the ability to dump the pointer mapping table as well as printing the number of blocks and threads in the cuda RTL. Reviewers: jdoerfort gkistanova ye-luo Subscribers: guansong openmp-commits sstefan1 yaxunl ye-luo Tags: #OpenMP Differential Revision: https://reviews.llvm.org/D87165 --- openmp/libomptarget/include/Debug.h | 25 +++++++++++++++- openmp/libomptarget/plugins/cuda/src/rtl.cpp | 30 +++++++++++++------- openmp/libomptarget/src/interface.cpp | 24 ++++++++++++++-- openmp/libomptarget/test/offloading/info.c | 15 ++++++++++ 4 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/info.c diff --git a/openmp/libomptarget/include/Debug.h b/openmp/libomptarget/include/Debug.h index b7092dd61a3d8..4f42794e1bcad 100644 --- a/openmp/libomptarget/include/Debug.h +++ b/openmp/libomptarget/include/Debug.h @@ -70,23 +70,26 @@ static inline int getDebugLevel() { #define GETNAME2(name) #name #define GETNAME(name) GETNAME2(name) -// Messaging interface +/// Print a generic message string from libomptarget or a plugin RTL #define MESSAGE0(_str) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " message: %s\n", _str); \ } while (0) +/// Print a printf formatting string message from libomptarget or a plugin RTL #define MESSAGE(_str, ...) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " message: " _str "\n", __VA_ARGS__); \ } while (0) +/// Print fatal error message with an error string and error identifier #define FATAL_MESSAGE0(_num, _str) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d: %s\n", _num, _str); \ abort(); \ } while (0) +/// Print fatal error message with a printf string and error identifier #define FATAL_MESSAGE(_num, _str, ...) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d:" _str "\n", _num, \ @@ -94,12 +97,20 @@ static inline int getDebugLevel() { abort(); \ } while (0) +/// Print a generic error string from libomptarget or a plugin RTL #define FAILURE_MESSAGE(...) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " error: "); \ fprintf(stderr, __VA_ARGS__); \ } while (0) +/// Print a generic information string used if LIBOMPTARGET_INFO=1 +#define INFO_MESSAGE(_num, ...) \ + do { \ + fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + // Debugging messages #ifdef OMPTARGET_DEBUG #include @@ -110,6 +121,7 @@ static inline int getDebugLevel() { fprintf(stderr, __VA_ARGS__); \ } +/// Emit a message for debugging #define DP(...) \ do { \ if (getDebugLevel() > 0) { \ @@ -117,6 +129,7 @@ static inline int getDebugLevel() { } \ } while (false) +/// Emit a message for debugging or failure if debugging is disabled #define REPORT(...) \ do { \ if (getDebugLevel() > 0) { \ @@ -133,4 +146,14 @@ static inline int getDebugLevel() { #define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__); #endif // OMPTARGET_DEBUG +/// Emit a message giving the user extra information about the runtime if +#define INFO(_id, ...) \ + do { \ + if (getDebugLevel() > 0) { \ + DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \ + } else if (getInfoLevel() > 0) { \ + INFO_MESSAGE(_id, __VA_ARGS__); \ + } \ + } while (false) + #endif // _OMPTARGET_DEBUG_H diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 2675f83ae28f2..1a0bffb9557c3 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -29,7 +29,7 @@ #ifdef OMPTARGET_DEBUG #define CUDA_ERR_STRING(err) \ do { \ - if (getDebugLevel() > 0) { \ + if (getDebugLevel() > 0) { \ const char *errStr; \ cuGetErrorString(err, &errStr); \ DP("CUDA error is: %s\n", errStr); \ @@ -277,14 +277,15 @@ class DeviceRTLTy { E.Entries.push_back(entry); } - // Return true if the entry is associated with device - bool findOffloadEntry(const int DeviceId, const void *Addr) const { + // Return a pointer to the entry associated with the pointer + const __tgt_offload_entry *getOffloadEntry(const int DeviceId, + const void *Addr) const { for (const __tgt_offload_entry &Itr : DeviceData[DeviceId].FuncGblEntries.back().Entries) if (Itr.addr == Addr) - return true; + return &Itr; - return false; + return nullptr; } // Return the pointer to the target entries table @@ -492,9 +493,11 @@ class DeviceRTLTy { DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit; } - DP("Max number of CUDA blocks %d, threads %d & warp size %d\n", - DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock, - DeviceData[DeviceId].WarpSize); + INFO(DeviceId, + "Device supports up to %d CUDA blocks and %d threads with a " + "warp size of %d\n", + DeviceData[DeviceId].BlocksPerGrid, + DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); // Set default number of teams if (EnvNumTeams > 0) { @@ -926,9 +929,14 @@ class DeviceRTLTy { CudaBlocksPerGrid = TeamNum; } - // Run on the device. - DP("Launch kernel with %d blocks and %d threads\n", CudaBlocksPerGrid, - CudaThreadsPerBlock); + INFO(DeviceId, + "Launching kernel %s with %d blocks and %d threads in %s " + "mode\n", + (getOffloadEntry(DeviceId, TgtEntryPtr)) + ? getOffloadEntry(DeviceId, TgtEntryPtr)->name + : "(null)", + CudaBlocksPerGrid, CudaThreadsPerBlock, + (KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic"); CUstream Stream = getStream(DeviceId, AsyncInfo); Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1, diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index d22e5978c20af..084f2ac5aee3c 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -16,6 +16,7 @@ #include "rtl.h" #include +#include #include #include @@ -24,8 +25,22 @@ kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; std::mutex TargetOffloadMtx; //////////////////////////////////////////////////////////////////////////////// -/// manage the success or failure of a target construct +/// dump a table of all the host-target pointer pairs on failure +static void dumpTargetPointerMappings() { + for (const auto &Device : Devices) { + fprintf(stderr, "Device %d:\n", Device.DeviceID); + fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)"); + for (const auto &HostTargetMap : Device.HostDataToTargetMap) { + fprintf(stderr, DPxMOD " " DPxMOD " %lu\n", + DPxPTR(HostTargetMap.HstPtrBegin), + DPxPTR(HostTargetMap.TgtPtrBegin), + HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin); + } + } +} +//////////////////////////////////////////////////////////////////////////////// +/// manage the success or failure of a target construct static void HandleDefaultTargetOffload() { TargetOffloadMtx.lock(); if (TargetOffloadPolicy == tgt_default) { @@ -60,8 +75,11 @@ static void HandleTargetOutcome(bool success) { break; case tgt_mandatory: if (!success) { - if (getInfoLevel() > 0) - MESSAGE0("LIBOMPTARGET_INFO is not supported yet"); + if (getInfoLevel() > 1) + dumpTargetPointerMappings(); + else + FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump tables\n"); + FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory"); } break; diff --git a/openmp/libomptarget/test/offloading/info.c b/openmp/libomptarget/test/offloading/info.c new file mode 100644 index 0000000000000..e0d3f1a0e94c1 --- /dev/null +++ b/openmp/libomptarget/test/offloading/info.c @@ -0,0 +1,15 @@ +// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO + +#include +#include + +int main() { + int ptr = 1; + +// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}} +// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode +#pragma omp target map(tofrom:ptr) + {ptr = 1;} + + return 0; +} From e59d829971e7703042f414d226caba1affe2dfe4 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 9 Sep 2020 08:32:51 -0700 Subject: [PATCH 0174/1079] [libc][obvious] Fix strtok_r signature in the spec. --- libc/spec/posix.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/spec/posix.td b/libc/spec/posix.td index c20cbefe42ce0..1bf64f082c62b 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -228,7 +228,9 @@ def POSIX : StandardSpec<"POSIX"> { FunctionSpec< "strtok_r", RetValSpec, - [ArgSpec, ArgSpec] + [ArgSpec, + ArgSpec, + ArgSpec] >, ] >; From 4b15fc9ddb4d9702a1466e9c0db44d692d1531fb Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 9 Sep 2020 09:55:06 -0700 Subject: [PATCH 0175/1079] [NFC][MLInliner] Don't initialize in an assert. Since the build bots have assertions enabled, this flew under the radar. --- llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp index dc426aaccb22a..5c3a6c41ad432 100644 --- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -171,7 +171,7 @@ unsigned getMaxDominatorTreeDepth(const Function &F, IRToNativeSizeLearning::FunctionFeatures IRToNativeSizeLearning::getFunctionFeatures(Function &F, FunctionAnalysisManager &FAM) { - assert(ensureSortedTuples() && "expected lazy initialization"); + ensureSortedTuples(); auto &DomTree = FAM.getResult(F); FunctionFeatures FF; From fc4bff0cd37fa84ee74e6dff7170b643df3ffa42 Mon Sep 17 00:00:00 2001 From: Olivier Giroux Date: Wed, 9 Sep 2020 10:00:09 -0700 Subject: [PATCH 0176/1079] Update atomic feature macros, synopsis, signatures to match C++20. Improve test coverage for non-lock-free atomics. --- libcxx/docs/FeatureTestMacroTable.rst | 12 ++ libcxx/include/atomic | 193 ++++++------------ libcxx/include/version | 24 +++ .../atomics.flag/atomic_flag_test.pass.cpp | 39 ++++ .../atomic_flag_test_explicit.pass.cpp | 111 ++++++++++ .../isalwayslockfree.pass.cpp | 5 + .../atomic_helpers.h | 42 ++++ libcxx/test/std/atomics/types.pass.cpp | 71 ++++++- .../atomic.version.pass.cpp | 164 ++++++++++++++- .../concepts.version.pass.cpp | 61 ++++-- .../execution.version.pass.cpp | 70 +++++-- .../memory.version.pass.cpp | 26 +++ .../version.version.pass.cpp | 156 ++++++++++++++ libcxx/test/support/cmpxchg_loop.h | 16 +- .../generate_feature_test_macro_components.py | 51 +++++ 15 files changed, 856 insertions(+), 185 deletions(-) create mode 100644 libcxx/test/std/atomics/atomics.flag/atomic_flag_test.pass.cpp create mode 100644 libcxx/test/std/atomics/atomics.flag/atomic_flag_test_explicit.pass.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index f5c6e5b8251aa..61773381c15f8 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -170,8 +170,20 @@ Status ------------------------------------------------------------------- ``__cpp_lib_array_constexpr`` ``201811L`` ------------------------------------------------- ----------------- + ``__cpp_lib_atomic_flag_test`` ``201907L`` + ------------------------------------------------- ----------------- + ``__cpp_lib_atomic_float`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_atomic_lock_free_type_aliases`` ``201907L`` + ------------------------------------------------- ----------------- ``__cpp_lib_atomic_ref`` *unimplemented* ------------------------------------------------- ----------------- + ``__cpp_lib_atomic_shared_ptr`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_atomic_value_initialization`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_atomic_wait`` ``201907L`` + ------------------------------------------------- ----------------- ``__cpp_lib_bind_front`` *unimplemented* ------------------------------------------------- ----------------- ``__cpp_lib_bit_cast`` *unimplemented* diff --git a/libcxx/include/atomic b/libcxx/include/atomic index 9c28986537882..be81f6491edf6 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -16,9 +16,12 @@ namespace std { -// feature test macro +// feature test macro [version.syn] -#define __cpp_lib_atomic_is_always_lock_free // as specified by SG10 +#define __cpp_lib_atomic_is_always_lock_free +#define __cpp_lib_atomic_flag_test +#define __cpp_lib_atomic_lock_free_type_aliases +#define __cpp_lib_atomic_wait // order and consistency @@ -108,6 +111,7 @@ template <> struct atomic { using value_type = integral; + using difference_type = value_type; static constexpr bool is_always_lock_free; bool is_lock_free() const volatile noexcept; @@ -190,6 +194,7 @@ template struct atomic { using value_type = T*; + using difference_type = ptrdiff_t; static constexpr bool is_always_lock_free; bool is_lock_free() const volatile noexcept; @@ -1245,10 +1250,10 @@ template _LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong(volatile __cxx_atomic_lock_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) { - __a->__lock(); _Tp __temp; + __a->__lock(); __cxx_atomic_assign_volatile(__temp, __a->__a_value); - bool __ret = __temp == *__expected; + bool __ret = (memcmp(&__temp, __expected, sizeof(_Tp)) == 0); if(__ret) __cxx_atomic_assign_volatile(__a->__a_value, __value); else @@ -1261,11 +1266,11 @@ _LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong(__cxx_atomic_lock_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) { __a->__lock(); - bool __ret = __a->__a_value == *__expected; + bool __ret = (memcmp(&__a->__a_value, __expected, sizeof(_Tp)) == 0); if(__ret) - __a->__a_value = __value; + memcpy(&__a->__a_value, &__value, sizeof(_Tp)); else - *__expected = __a->__a_value; + memcpy(__expected, &__a->__a_value, sizeof(_Tp)); __a->__unlock(); return __ret; } @@ -1274,10 +1279,10 @@ template _LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak(volatile __cxx_atomic_lock_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) { - __a->__lock(); _Tp __temp; + __a->__lock(); __cxx_atomic_assign_volatile(__temp, __a->__a_value); - bool __ret = __temp == *__expected; + bool __ret = (memcmp(&__temp, __expected, sizeof(_Tp)) == 0); if(__ret) __cxx_atomic_assign_volatile(__a->__a_value, __value); else @@ -1290,11 +1295,11 @@ _LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_lock_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) { __a->__lock(); - bool __ret = __a->__a_value == *__expected; + bool __ret = (memcmp(&__a->__a_value, __expected, sizeof(_Tp)) == 0); if(__ret) - __a->__a_value = __value; + memcpy(&__a->__a_value, &__value, sizeof(_Tp)); else - *__expected = __a->__a_value; + memcpy(__expected, &__a->__a_value, sizeof(_Tp)); __a->__unlock(); return __ret; } @@ -1775,6 +1780,7 @@ struct atomic { typedef __atomic_base<_Tp> __base; typedef _Tp value_type; + typedef value_type difference_type; _LIBCPP_INLINE_VISIBILITY atomic() _NOEXCEPT _LIBCPP_DEFAULT _LIBCPP_INLINE_VISIBILITY @@ -1796,6 +1802,7 @@ struct atomic<_Tp*> { typedef __atomic_base<_Tp*> __base; typedef _Tp* value_type; + typedef ptrdiff_t difference_type; _LIBCPP_INLINE_VISIBILITY atomic() _NOEXCEPT _LIBCPP_DEFAULT _LIBCPP_INLINE_VISIBILITY @@ -1872,7 +1879,7 @@ atomic_is_lock_free(const atomic<_Tp>* __o) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY void -atomic_init(volatile atomic<_Tp>* __o, _Tp __d) _NOEXCEPT +atomic_init(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT { __cxx_atomic_init(&__o->__a_, __d); } @@ -1880,7 +1887,7 @@ atomic_init(volatile atomic<_Tp>* __o, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY void -atomic_init(atomic<_Tp>* __o, _Tp __d) _NOEXCEPT +atomic_init(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT { __cxx_atomic_init(&__o->__a_, __d); } @@ -1890,7 +1897,7 @@ atomic_init(atomic<_Tp>* __o, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY void -atomic_store(volatile atomic<_Tp>* __o, _Tp __d) _NOEXCEPT +atomic_store(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT { __o->store(__d); } @@ -1898,7 +1905,7 @@ atomic_store(volatile atomic<_Tp>* __o, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY void -atomic_store(atomic<_Tp>* __o, _Tp __d) _NOEXCEPT +atomic_store(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT { __o->store(__d); } @@ -1908,7 +1915,7 @@ atomic_store(atomic<_Tp>* __o, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY void -atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) _NOEXCEPT +atomic_store_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { __o->store(__d, __m); @@ -1917,7 +1924,7 @@ atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) _NOE template _LIBCPP_INLINE_VISIBILITY void -atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) _NOEXCEPT +atomic_store_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { __o->store(__d, __m); @@ -1966,7 +1973,7 @@ atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY _Tp -atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) _NOEXCEPT +atomic_exchange(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT { return __o->exchange(__d); } @@ -1974,7 +1981,7 @@ atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY _Tp -atomic_exchange(atomic<_Tp>* __o, _Tp __d) _NOEXCEPT +atomic_exchange(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d) _NOEXCEPT { return __o->exchange(__d); } @@ -1984,7 +1991,7 @@ atomic_exchange(atomic<_Tp>* __o, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY _Tp -atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) _NOEXCEPT +atomic_exchange_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT { return __o->exchange(__d, __m); } @@ -1992,7 +1999,7 @@ atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) _ template _LIBCPP_INLINE_VISIBILITY _Tp -atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) _NOEXCEPT +atomic_exchange_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, memory_order __m) _NOEXCEPT { return __o->exchange(__d, __m); } @@ -2002,7 +2009,7 @@ atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEXCEPT +atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT { return __o->compare_exchange_weak(*__e, __d); } @@ -2010,7 +2017,7 @@ atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEX template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEXCEPT +atomic_compare_exchange_weak(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT { return __o->compare_exchange_weak(*__e, __d); } @@ -2020,7 +2027,7 @@ atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEXCEPT +atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT { return __o->compare_exchange_strong(*__e, __d); } @@ -2028,7 +2035,7 @@ atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NO template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEXCEPT +atomic_compare_exchange_strong(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d) _NOEXCEPT { return __o->compare_exchange_strong(*__e, __d); } @@ -2038,8 +2045,8 @@ atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) _NOEXCEPT template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_weak_explicit(volatile atomic<_Tp>* __o, _Tp* __e, - _Tp __d, +atomic_compare_exchange_weak_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, + typename atomic<_Tp>::value_type __d, memory_order __s, memory_order __f) _NOEXCEPT _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { @@ -2049,7 +2056,7 @@ atomic_compare_exchange_weak_explicit(volatile atomic<_Tp>* __o, _Tp* __e, template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, +atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d, memory_order __s, memory_order __f) _NOEXCEPT _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { @@ -2062,7 +2069,7 @@ template _LIBCPP_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit(volatile atomic<_Tp>* __o, - _Tp* __e, _Tp __d, + typename atomic<_Tp>::value_type* __e, typename atomic<_Tp>::value_type __d, memory_order __s, memory_order __f) _NOEXCEPT _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { @@ -2072,8 +2079,8 @@ atomic_compare_exchange_strong_explicit(volatile atomic<_Tp>* __o, template _LIBCPP_INLINE_VISIBILITY bool -atomic_compare_exchange_strong_explicit(atomic<_Tp>* __o, _Tp* __e, - _Tp __d, +atomic_compare_exchange_strong_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type* __e, + typename atomic<_Tp>::value_type __d, memory_order __s, memory_order __f) _NOEXCEPT _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { @@ -2156,10 +2163,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_add(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT { return __o->fetch_add(__op); } @@ -2168,26 +2175,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) _NOEXCEPT -{ - return __o->fetch_add(__op); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) _NOEXCEPT -{ - return __o->fetch_add(__op); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) _NOEXCEPT +atomic_fetch_add(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT { return __o->fetch_add(__op); } @@ -2198,10 +2189,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_add(__op, __m); } @@ -2210,27 +2201,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT -{ - return __o->fetch_add(__op, __m); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, - memory_order __m) _NOEXCEPT -{ - return __o->fetch_add(__op, __m); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_add_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) _NOEXCEPT +atomic_fetch_add_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_add(__op, __m); } @@ -2241,10 +2215,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_sub(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT { return __o->fetch_sub(__op); } @@ -2253,26 +2227,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) _NOEXCEPT -{ - return __o->fetch_sub(__op); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) _NOEXCEPT -{ - return __o->fetch_sub(__op); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) _NOEXCEPT +atomic_fetch_sub(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT { return __o->fetch_sub(__op); } @@ -2283,10 +2241,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_sub(__op, __m); } @@ -2295,27 +2253,10 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, + is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), _Tp >::type -atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT -{ - return __o->fetch_sub(__op, __m); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, - memory_order __m) _NOEXCEPT -{ - return __o->fetch_sub(__op, __m); -} - -template -_LIBCPP_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) _NOEXCEPT +atomic_fetch_sub_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_sub(__op, __m); } @@ -2329,7 +2270,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_and(volatile atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_and(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT { return __o->fetch_and(__op); } @@ -2341,7 +2282,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_and(atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_and(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT { return __o->fetch_and(__op); } @@ -2355,7 +2296,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_and(__op, __m); } @@ -2367,7 +2308,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_and_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_and_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_and(__op, __m); } @@ -2381,7 +2322,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_or(volatile atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_or(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT { return __o->fetch_or(__op); } @@ -2393,7 +2334,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_or(atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_or(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT { return __o->fetch_or(__op); } @@ -2407,7 +2348,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_or(__op, __m); } @@ -2419,7 +2360,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_or_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_or_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_or(__op, __m); } @@ -2433,7 +2374,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_xor(volatile atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_xor(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT { return __o->fetch_xor(__op); } @@ -2445,7 +2386,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_xor(atomic<_Tp>* __o, _Tp __op) _NOEXCEPT +atomic_fetch_xor(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op) _NOEXCEPT { return __o->fetch_xor(__op); } @@ -2459,7 +2400,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_xor(__op, __m); } @@ -2471,7 +2412,7 @@ typename enable_if is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp >::type -atomic_fetch_xor_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) _NOEXCEPT +atomic_fetch_xor_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __op, memory_order __m) _NOEXCEPT { return __o->fetch_xor(__op, __m); } diff --git a/libcxx/include/version b/libcxx/include/version index dc53be3937c4c..d18da3d146909 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -24,8 +24,14 @@ __cpp_lib_apply 201603L __cpp_lib_array_constexpr 201811L 201603L // C++17 __cpp_lib_as_const 201510L +__cpp_lib_atomic_flag_test 201907L +__cpp_lib_atomic_float 201711L __cpp_lib_atomic_is_always_lock_free 201603L +__cpp_lib_atomic_lock_free_type_aliases 201907L __cpp_lib_atomic_ref 201806L +__cpp_lib_atomic_shared_ptr 201711L +__cpp_lib_atomic_value_initialization 201911L +__cpp_lib_atomic_wait 201907L __cpp_lib_bind_front 201811L __cpp_lib_bit_cast 201806L __cpp_lib_bool_constant 201505L @@ -218,8 +224,26 @@ __cpp_lib_void_t 201411L # undef __cpp_lib_array_constexpr # define __cpp_lib_array_constexpr 201811L # if !defined(_LIBCPP_HAS_NO_THREADS) +# define __cpp_lib_atomic_flag_test 201907L +# endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +// # define __cpp_lib_atomic_float 201711L +# endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +# define __cpp_lib_atomic_lock_free_type_aliases 201907L +# endif +# if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_ref 201806L # endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +// # define __cpp_lib_atomic_shared_ptr 201711L +# endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +// # define __cpp_lib_atomic_value_initialization 201911L +# endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +# define __cpp_lib_atomic_wait 201907L +# endif // # define __cpp_lib_bind_front 201811L // # define __cpp_lib_bit_cast 201806L # if !defined(_LIBCPP_NO_HAS_CHAR8_T) diff --git a/libcxx/test/std/atomics/atomics.flag/atomic_flag_test.pass.cpp b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test.pass.cpp new file mode 100644 index 0000000000000..22e4b66d45c5a --- /dev/null +++ b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads + +// + +// struct atomic_flag + +// bool atomic_flag_test_and_set(volatile atomic_flag*); +// bool atomic_flag_test_and_set(atomic_flag*); + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_and_set(&f) == 0); + assert(f.test_and_set() == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_and_set(&f) == 0); + assert(f.test_and_set() == 1); + } + + return 0; +} diff --git a/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_explicit.pass.cpp new file mode 100644 index 0000000000000..45ac737b59846 --- /dev/null +++ b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_explicit.pass.cpp @@ -0,0 +1,111 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads + +// + +// struct atomic_flag + +// bool atomic_flag_test_explicit(volatile atomic_flag*, memory_order); +// bool atomic_flag_test_explicit(atomic_flag*, memory_order); + +#include +#include + +#include "test_macros.h" + +int main(int, char**) +{ + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_relaxed) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_relaxed) == 1); + } + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_consume) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_consume) == 1); + } + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_acquire) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_acquire) == 1); + } + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_release) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_release) == 1); + } + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_acq_rel) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_acq_rel) == 1); + } + { + std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_seq_cst) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_seq_cst) == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_relaxed) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_relaxed) == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_consume) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_consume) == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_acquire) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_acquire) == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_release) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_release) == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_acq_rel) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_acq_rel) == 1); + } + { + volatile std::atomic_flag f; + f.clear(); + assert(atomic_flag_test_explicit(&f, std::memory_order_seq_cst) == 0); + assert(f.test_and_set() == 0); + assert(atomic_flag_test_explicit(&f, std::memory_order_seq_cst) == 1); + } + + return 0; +} diff --git a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp index 34a0689182867..8dd8c345592bf 100644 --- a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp +++ b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp @@ -134,6 +134,11 @@ void run() checkLongLongTypes(); static_assert(std::atomic::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), ""); static_assert(std::atomic::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), ""); + +#if TEST_STD_VER >= 20 + static_assert(std::atomic::is_always_lock_free, ""); + static_assert(std::atomic::is_always_lock_free, ""); +#endif } int main(int, char**) { run(); return 0; } diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h index 65676339c7429..1cb3a3d111144 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h @@ -23,6 +23,37 @@ struct UserAtomicType { return x.i == y.i; } }; +struct WeirdUserAtomicType +{ + char i, j, k; /* the 3 chars of doom */ + + explicit WeirdUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} + + friend bool operator==(const WeirdUserAtomicType& x, const WeirdUserAtomicType& y) + { return x.i == y.i; } +}; + +struct PaddedUserAtomicType +{ + char i; int j; /* probably lock-free? */ + + explicit PaddedUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} + + friend bool operator==(const PaddedUserAtomicType& x, const PaddedUserAtomicType& y) + { return x.i == y.i; } +}; + +struct LargeUserAtomicType +{ + int i, j[127]; /* decidedly not lock-free */ + + LargeUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) + {} + + friend bool operator==(const LargeUserAtomicType& x, const LargeUserAtomicType& y) + { return x.i == y.i; } +}; + template < template class TestFunctor > struct TestEachIntegralType { void operator()() const { @@ -58,8 +89,19 @@ struct TestEachAtomicType { void operator()() const { TestEachIntegralType()(); TestFunctor()(); + TestFunctor()(); +#ifndef __APPLE__ + /* + These aren't going to be lock-free, + so some libatomic.a is necessary. + */ + TestFunctor()(); + TestFunctor()(); +#endif TestFunctor()(); TestFunctor()(); + TestFunctor()(); + TestFunctor()(); } }; diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index f891f90e116bf..5740b758035ea 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -30,15 +30,43 @@ #include "test_macros.h" +template +struct test_atomic +{ + test_atomic() + { + A a; (void)a; +#if TEST_STD_VER >= 17 + static_assert((std::is_same_v), ""); +#endif + } +}; + template -void -test_atomic() +struct test_atomic { - A a; (void)a; + test_atomic() + { + A a; (void)a; #if TEST_STD_VER >= 17 - static_assert((std::is_same::value), ""); + static_assert((std::is_same_v), ""); + static_assert((std::is_same_v), ""); #endif -} + } +}; + +template +struct test_atomic +{ + test_atomic() + { + A a; (void)a; +#if TEST_STD_VER >= 17 + static_assert((std::is_same_v), ""); + static_assert((std::is_same_v), ""); +#endif + } +}; template void @@ -46,15 +74,30 @@ test() { using A = std::atomic; #if TEST_STD_VER >= 17 - static_assert((std::is_same::value), ""); + static_assert((std::is_same_v), ""); #endif - test_atomic(); + test_atomic::value && !std::is_same::value>(); } struct TriviallyCopyable { int i_; }; +struct WeirdTriviallyCopyable +{ + char i, j, k; /* the 3 chars of doom */ +}; + +struct PaddedTriviallyCopyable +{ + char i; int j; /* probably lock-free? */ +}; + +struct LargeTriviallyCopyable +{ + int i, j[127]; /* decidedly not lock-free */ +}; + int main(int, char**) { test (); @@ -111,13 +154,23 @@ int main(int, char**) test (); test(); + test(); +#ifndef __APPLE__ + /* + These aren't going to be lock-free, + so some libatomic.a is necessary. + */ + test(); + test(); +#endif + test(); test(); test(); #if TEST_STD_VER >= 20 - test_atomic(); - test_atomic(); + test(); + test(); /* test>(); */ diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp index d8f6f548cd23f..d4c63edb5b8a3 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp @@ -15,10 +15,16 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_atomic_is_always_lock_free 201603L [C++17] - __cpp_lib_atomic_ref 201806L [C++2a] - __cpp_lib_char8_t 201811L [C++2a] +/* Constant Value + __cpp_lib_atomic_flag_test 201907L [C++2a] + __cpp_lib_atomic_float 201711L [C++2a] + __cpp_lib_atomic_is_always_lock_free 201603L [C++17] + __cpp_lib_atomic_lock_free_type_aliases 201907L [C++2a] + __cpp_lib_atomic_ref 201806L [C++2a] + __cpp_lib_atomic_shared_ptr 201711L [C++2a] + __cpp_lib_atomic_value_initialization 201911L [C++2a] + __cpp_lib_atomic_wait 201907L [C++2a] + __cpp_lib_char8_t 201811L [C++2a] */ #include @@ -26,34 +32,90 @@ #if TEST_STD_VER < 14 +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++2a" # endif +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined before c++2a" +# endif + # ifdef __cpp_lib_char8_t # error "__cpp_lib_char8_t should not be defined before c++2a" # endif #elif TEST_STD_VER == 14 +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++2a" # endif +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined before c++2a" +# endif + # ifdef __cpp_lib_char8_t # error "__cpp_lib_char8_t should not be defined before c++2a" # endif #elif TEST_STD_VER == 17 +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined before c++2a" +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should be defined in c++17" @@ -67,16 +129,58 @@ # endif # endif +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++2a" # endif +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined before c++2a" +# endif + # ifdef __cpp_lib_char8_t # error "__cpp_lib_char8_t should not be defined before c++2a" # endif #elif TEST_STD_VER > 17 +# if !defined(_LIBCPP_HAS_NO_THREADS) +# ifndef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should be defined in c++2a" +# endif +# if __cpp_lib_atomic_flag_test != 201907L +# error "__cpp_lib_atomic_flag_test should have the value 201907L in c++2a" +# endif +# else +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should be defined in c++2a" +# endif +# if __cpp_lib_atomic_float != 201711L +# error "__cpp_lib_atomic_float should have the value 201711L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should be defined in c++2a" @@ -90,6 +194,19 @@ # endif # endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +# ifndef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should be defined in c++2a" +# endif +# if __cpp_lib_atomic_lock_free_type_aliases != 201907L +# error "__cpp_lib_atomic_lock_free_type_aliases should have the value 201907L in c++2a" +# endif +# else +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++2a" @@ -103,6 +220,45 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should be defined in c++2a" +# endif +# if __cpp_lib_atomic_shared_ptr != 201711L +# error "__cpp_lib_atomic_shared_ptr should have the value 201711L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should be defined in c++2a" +# endif +# if __cpp_lib_atomic_value_initialization != 201911L +# error "__cpp_lib_atomic_value_initialization should have the value 201911L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_HAS_NO_THREADS) +# ifndef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should be defined in c++2a" +# endif +# if __cpp_lib_atomic_wait != 201907L +# error "__cpp_lib_atomic_wait should have the value 201907L in c++2a" +# endif +# else +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# endif +# endif + # if defined(__cpp_char8_t) # ifndef __cpp_lib_char8_t # error "__cpp_lib_char8_t should be defined in c++2a" diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/concepts.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/concepts.version.pass.cpp index 16febf8d3e24a..9ec2157d974ce 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/concepts.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/concepts.version.pass.cpp @@ -1,4 +1,3 @@ - //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,29 +6,53 @@ // //===----------------------------------------------------------------------===// // -// feature macros +// WARNING: This test was generated by generate_feature_test_macro_components.py +// and should not be edited manually. + +// -/* Constant Value - __cpp_lib_concepts 201806L +// Test the feature test macros defined by +/* Constant Value + __cpp_lib_concepts 201806L [C++2a] */ -// XFAIL -// #include -#include +#include #include "test_macros.h" -int main(int, char**) -{ -// ensure that the macros that are supposed to be defined in are defined. +#if TEST_STD_VER < 14 -/* -#if !defined(__cpp_lib_fooby) -# error "__cpp_lib_fooby is not defined" -#elif __cpp_lib_fooby < 201606L -# error "__cpp_lib_fooby has an invalid value" -#endif -*/ +# ifdef __cpp_lib_concepts +# error "__cpp_lib_concepts should not be defined before c++2a" +# endif + +#elif TEST_STD_VER == 14 + +# ifdef __cpp_lib_concepts +# error "__cpp_lib_concepts should not be defined before c++2a" +# endif + +#elif TEST_STD_VER == 17 + +# ifdef __cpp_lib_concepts +# error "__cpp_lib_concepts should not be defined before c++2a" +# endif + +#elif TEST_STD_VER > 17 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_concepts +# error "__cpp_lib_concepts should be defined in c++2a" +# endif +# if __cpp_lib_concepts != 201806L +# error "__cpp_lib_concepts should have the value 201806L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_concepts +# error "__cpp_lib_concepts should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#endif // TEST_STD_VER > 17 - return 0; -} +int main(int, char**) { return 0; } diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.pass.cpp index b05f41bb1731c..1244efa4aebaf 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.pass.cpp @@ -1,4 +1,3 @@ - //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,29 +6,62 @@ // //===----------------------------------------------------------------------===// // -// feature macros +// WARNING: This test was generated by generate_feature_test_macro_components.py +// and should not be edited manually. + +// -/* Constant Value - __cpp_lib_execution 201603L +// Test the feature test macros defined by +/* Constant Value + __cpp_lib_execution 201603L [C++17] */ -// XFAIL -// #include -#include +#include #include "test_macros.h" -int main(int, char**) -{ -// ensure that the macros that are supposed to be defined in are defined. +#if TEST_STD_VER < 14 -/* -#if !defined(__cpp_lib_fooby) -# error "__cpp_lib_fooby is not defined" -#elif __cpp_lib_fooby < 201606L -# error "__cpp_lib_fooby has an invalid value" -#endif -*/ +# ifdef __cpp_lib_execution +# error "__cpp_lib_execution should not be defined before c++17" +# endif + +#elif TEST_STD_VER == 14 + +# ifdef __cpp_lib_execution +# error "__cpp_lib_execution should not be defined before c++17" +# endif + +#elif TEST_STD_VER == 17 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_execution +# error "__cpp_lib_execution should be defined in c++17" +# endif +# if __cpp_lib_execution != 201603L +# error "__cpp_lib_execution should have the value 201603L in c++17" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_execution +# error "__cpp_lib_execution should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#elif TEST_STD_VER > 17 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_execution +# error "__cpp_lib_execution should be defined in c++2a" +# endif +# if __cpp_lib_execution != 201603L +# error "__cpp_lib_execution should have the value 201603L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_execution +# error "__cpp_lib_execution should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#endif // TEST_STD_VER > 17 - return 0; -} +int main(int, char**) { return 0; } diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp index 6c845d71febd7..0117fd83a60c6 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp @@ -16,6 +16,7 @@ /* Constant Value __cpp_lib_addressof_constexpr 201603L [C++17] __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_atomic_value_initialization 201911L [C++2a] __cpp_lib_enable_shared_from_this 201603L [C++17] __cpp_lib_make_unique 201304L [C++14] __cpp_lib_ranges 201811L [C++2a] @@ -37,6 +38,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + # ifdef __cpp_lib_enable_shared_from_this # error "__cpp_lib_enable_shared_from_this should not be defined before c++17" # endif @@ -71,6 +76,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + # ifdef __cpp_lib_enable_shared_from_this # error "__cpp_lib_enable_shared_from_this should not be defined before c++17" # endif @@ -120,6 +129,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + # ifndef __cpp_lib_enable_shared_from_this # error "__cpp_lib_enable_shared_from_this should be defined in c++17" # endif @@ -187,6 +200,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++2a" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should be defined in c++2a" +# endif +# if __cpp_lib_atomic_value_initialization != 201911L +# error "__cpp_lib_atomic_value_initialization should have the value 201911L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_enable_shared_from_this # error "__cpp_lib_enable_shared_from_this should be defined in c++2a" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index afbee586df3c6..46b2e1f21d183 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -21,8 +21,14 @@ __cpp_lib_array_constexpr 201603L [C++17] 201811L [C++2a] __cpp_lib_as_const 201510L [C++17] + __cpp_lib_atomic_flag_test 201907L [C++2a] + __cpp_lib_atomic_float 201711L [C++2a] __cpp_lib_atomic_is_always_lock_free 201603L [C++17] + __cpp_lib_atomic_lock_free_type_aliases 201907L [C++2a] __cpp_lib_atomic_ref 201806L [C++2a] + __cpp_lib_atomic_shared_ptr 201711L [C++2a] + __cpp_lib_atomic_value_initialization 201911L [C++2a] + __cpp_lib_atomic_wait 201907L [C++2a] __cpp_lib_bind_front 201811L [C++2a] __cpp_lib_bit_cast 201806L [C++2a] __cpp_lib_bool_constant 201505L [C++17] @@ -135,14 +141,38 @@ # error "__cpp_lib_as_const should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++2a" # endif +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined before c++2a" +# endif + # ifdef __cpp_lib_bind_front # error "__cpp_lib_bind_front should not be defined before c++2a" # endif @@ -489,14 +519,38 @@ # error "__cpp_lib_as_const should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should not be defined before c++17" # endif +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++2a" # endif +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined before c++2a" +# endif + # ifdef __cpp_lib_bind_front # error "__cpp_lib_bind_front should not be defined before c++2a" # endif @@ -933,6 +987,14 @@ # error "__cpp_lib_as_const should have the value 201510L in c++17" # endif +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined before c++2a" +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should be defined in c++17" @@ -946,10 +1008,26 @@ # endif # endif +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++2a" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++2a" # endif +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined before c++2a" +# endif + +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined before c++2a" +# endif + # ifdef __cpp_lib_bind_front # error "__cpp_lib_bind_front should not be defined before c++2a" # endif @@ -1575,6 +1653,32 @@ # error "__cpp_lib_as_const should have the value 201510L in c++2a" # endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +# ifndef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should be defined in c++2a" +# endif +# if __cpp_lib_atomic_flag_test != 201907L +# error "__cpp_lib_atomic_flag_test should have the value 201907L in c++2a" +# endif +# else +# ifdef __cpp_lib_atomic_flag_test +# error "__cpp_lib_atomic_flag_test should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should be defined in c++2a" +# endif +# if __cpp_lib_atomic_float != 201711L +# error "__cpp_lib_atomic_float should have the value 201711L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_float +# error "__cpp_lib_atomic_float should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_atomic_is_always_lock_free # error "__cpp_lib_atomic_is_always_lock_free should be defined in c++2a" @@ -1588,6 +1692,19 @@ # endif # endif +# if !defined(_LIBCPP_HAS_NO_THREADS) +# ifndef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should be defined in c++2a" +# endif +# if __cpp_lib_atomic_lock_free_type_aliases != 201907L +# error "__cpp_lib_atomic_lock_free_type_aliases should have the value 201907L in c++2a" +# endif +# else +# ifdef __cpp_lib_atomic_lock_free_type_aliases +# error "__cpp_lib_atomic_lock_free_type_aliases should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++2a" @@ -1601,6 +1718,45 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should be defined in c++2a" +# endif +# if __cpp_lib_atomic_shared_ptr != 201711L +# error "__cpp_lib_atomic_shared_ptr should have the value 201711L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_shared_ptr +# error "__cpp_lib_atomic_shared_ptr should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should be defined in c++2a" +# endif +# if __cpp_lib_atomic_value_initialization != 201911L +# error "__cpp_lib_atomic_value_initialization should have the value 201911L in c++2a" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_value_initialization +# error "__cpp_lib_atomic_value_initialization should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_HAS_NO_THREADS) +# ifndef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should be defined in c++2a" +# endif +# if __cpp_lib_atomic_wait != 201907L +# error "__cpp_lib_atomic_wait should have the value 201907L in c++2a" +# endif +# else +# ifdef __cpp_lib_atomic_wait +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_bind_front # error "__cpp_lib_bind_front should be defined in c++2a" diff --git a/libcxx/test/support/cmpxchg_loop.h b/libcxx/test/support/cmpxchg_loop.h index 50bd00a30bdba..e341606098131 100644 --- a/libcxx/test/support/cmpxchg_loop.h +++ b/libcxx/test/support/cmpxchg_loop.h @@ -8,8 +8,8 @@ #include -template -bool cmpxchg_weak_loop(A& atomic, T& expected, T desired) { +template +bool cmpxchg_weak_loop(A& atomic, typename A::value_type& expected, typename A::value_type desired) { for (int i = 0; i < 10; i++) { if (atomic.compare_exchange_weak(expected, desired) == true) { return true; @@ -19,8 +19,8 @@ bool cmpxchg_weak_loop(A& atomic, T& expected, T desired) { return false; } -template -bool cmpxchg_weak_loop(A& atomic, T& expected, T desired, +template +bool cmpxchg_weak_loop(A& atomic, typename A::value_type& expected, typename A::value_type desired, std::memory_order success, std::memory_order failure) { for (int i = 0; i < 10; i++) { @@ -33,8 +33,8 @@ bool cmpxchg_weak_loop(A& atomic, T& expected, T desired, return false; } -template -bool c_cmpxchg_weak_loop(A* atomic, T* expected, T desired) { +template +bool c_cmpxchg_weak_loop(A* atomic, typename A::value_type* expected, typename A::value_type desired) { for (int i = 0; i < 10; i++) { if (std::atomic_compare_exchange_weak(atomic, expected, desired) == true) { return true; @@ -44,8 +44,8 @@ bool c_cmpxchg_weak_loop(A* atomic, T* expected, T desired) { return false; } -template -bool c_cmpxchg_weak_loop(A* atomic, T* expected, T desired, +template +bool c_cmpxchg_weak_loop(A* atomic, typename A::value_type* expected, typename A::value_type desired, std::memory_order success, std::memory_order failure) { for (int i = 0; i < 10; i++) { diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 6ad1a18569893..211702e9982c9 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -613,6 +613,57 @@ def add_version_header(tc): }, "headers": ["utility"], }, + {"name": "__cpp_lib_atomic_flag_test", + "values": { + "c++2a": int(201907), + }, + "headers": ["atomic"], + "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + }, + {"name": "__cpp_lib_atomic_lock_free_type_aliases", + "values": { + "c++2a": int(201907), + }, + "headers": ["atomic"], + "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + }, + {"name": "__cpp_lib_atomic_wait", + "values": { + "c++2a": int(201907), + }, + "headers": ["atomic"], + "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + }, + {"name": "__cpp_lib_atomic_float", + "values": { + "c++2a": int(201711), + }, + "headers": ["atomic"], + "unimplemented": True, + "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + }, + {"name": "__cpp_lib_atomic_shared_ptr", + "values": { + "c++2a": int(201711), + }, + "headers": ["atomic"], + "unimplemented": True, + "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + }, + {"name": "__cpp_lib_atomic_value_initialization", + "values": { + "c++2a": int(201911), + }, + "headers": ["atomic", "memory"], + "unimplemented": True, + "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + }, ]], key=lambda tc: tc["name"]) def get_std_dialects(): From 1a25133bcdfeb525168ed4bd7e747463e635d0a4 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 9 Sep 2020 19:09:52 +0200 Subject: [PATCH 0177/1079] [DAGCombine] Skip re-visiting EntryToken to avoid compile time explosion During the main DAGCombine loop, whenever a node gets replaced, the new node and all its users are pushed onto the worklist. Omit this if the new node is the EntryToken (e.g. if a store managed to get optimized out), because re-visiting the EntryToken and its users will not uncover any additional opportunities, but there may be a large number of such users, potentially causing compile time explosion. This compile time explosion showed up in particular when building the SingleSource/UnitTests/matrix-types-spec.cpp test-suite case on any platform without SIMD vector support. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D86963 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e5c5e5341a680..c714358c01577 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1558,9 +1558,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DAG.ReplaceAllUsesWith(N, &RV); } - // Push the new node and any users onto the worklist - AddToWorklist(RV.getNode()); - AddUsersToWorklist(RV.getNode()); + // Push the new node and any users onto the worklist. Omit this if the + // new node is the EntryToken (e.g. if a store managed to get optimized + // out), because re-visiting the EntryToken and its users will not uncover + // any additional opportunities, but there may be a large number of such + // users, potentially causing compile time explosion. + if (RV.getOpcode() != ISD::EntryToken) { + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); + } // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to From ba5b1371ecc575337a95e9a9fc2b8951dae73aab Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 9 Sep 2020 10:19:37 -0700 Subject: [PATCH 0178/1079] [libc][NFC] Add spec files as dependencies of integration test. --- libc/test/src/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index e6390fc7a1d65..aa606ae630bc4 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -22,6 +22,8 @@ endforeach() list(REMOVE_ITEM entrypoints_name_list "__assert_fail" "__errno_location") list(TRANSFORM entrypoints_name_list PREPEND "-e=") +file(GLOB spec_files ${LIBC_SOURCE_DIR}/spec/*.td) + # Generate integration test souce code. add_custom_command( OUTPUT ${public_test} @@ -30,7 +32,7 @@ add_custom_command( -I ${LIBC_SOURCE_DIR} ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td - DEPENDS ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td + DEPENDS ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td ${spec_files} libc-prototype-testgen ${TARGET_PUBLIC_HEADERS} llvmlibc llvmlibm ) From 447ba60a224f63524a3bc40cdc1cfdbf1f8383db Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Tue, 8 Sep 2020 22:53:08 -0700 Subject: [PATCH 0179/1079] [lldb/Docs] Correct LLDB_ENABLE_TESTS to LLDB_INCLUDE_TESTS Fix references to LLDB_ENABLE_TESTS. Differential Revision: https://reviews.llvm.org/D87345 --- lldb/docs/resources/build.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index b5c1fb8cb0012..579f7574dac53 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -244,7 +244,7 @@ Windows On Windows the LLDB test suite requires lld. Either add ``lld`` to ``LLVM_ENABLE_PROJECTS`` or disable the test suite with -``LLDB_ENABLE_TESTS=OFF``. +``LLDB_INCLUDE_TESTS=OFF``. Although the following CMake variables are by no means Windows specific, they are commonly used on Windows. @@ -300,7 +300,7 @@ macOS On macOS the LLDB test suite requires libc++. Either add ``libcxx`` to ``LLVM_ENABLE_PROJECTS`` or disable the test suite with -``LLDB_ENABLE_TESTS=OFF``. Further useful options: +``LLDB_INCLUDE_TESTS=OFF``. Further useful options: * ``LLDB_BUILD_FRAMEWORK:BOOL``: Builds the LLDB.framework. * ``LLDB_CODESIGN_IDENTITY:STRING``: Set the identity to use for code-signing From 1301febe71416b3d90175ea73ebafa254d89d07c Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Wed, 9 Sep 2020 14:25:17 -0400 Subject: [PATCH 0180/1079] [libc++] Fix variant benchmark build for some configurations. The benchmarks expect to be built in C++17 or newer, but this isn't always how CMake configures the C++ dialect. Instead we need to explicitly set the CXX_STANDARD target property. --- libcxx/benchmarks/CMakeLists.txt | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 8480ede23a49f..42d25c20c8115 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -70,18 +70,9 @@ set(BENCHMARK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(BENCHMARK_LIBCXX_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/benchmark-libcxx) set(BENCHMARK_NATIVE_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/benchmark-native) -check_flag_supported("-std=c++17") -mangle_name("LIBCXX_SUPPORTS_STD_EQ_c++17_FLAG" BENCHMARK_SUPPORTS_STD_CXX17_FLAG) -if (${BENCHMARK_SUPPORTS_STD_CXX17_FLAG}) - set(BENCHMARK_DIALECT_FLAG "-std=c++17") -else() - # If the compiler doesn't support -std=c++17, attempt to fall back to -std=c++1z while still - # requiring C++17 language features. - set(BENCHMARK_DIALECT_FLAG "-std=c++1z") -endif() set(BENCHMARK_TEST_COMPILE_FLAGS - ${BENCHMARK_DIALECT_FLAG} -O2 + -O2 -fsized-deallocation -I${BENCHMARK_LIBCXX_INSTALL}/include -I${LIBCXX_SOURCE_DIR}/test/support @@ -90,6 +81,7 @@ set(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS ${BENCHMARK_TEST_COMPILE_FLAGS} ${SANITIZER_FLAGS} -Wno-user-defined-literals + -Wno-suggest-override ) set(BENCHMARK_TEST_LIBCXX_LINK_FLAGS @@ -147,7 +139,10 @@ function(add_benchmark_test name source_file) OUTPUT_NAME "${name}.libcxx.out" RUNTIME_OUTPUT_DIRECTORY "${BENCHMARK_OUTPUT_DIR}" COMPILE_FLAGS "${BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS}" - LINK_FLAGS "${BENCHMARK_TEST_LIBCXX_LINK_FLAGS}") + LINK_FLAGS "${BENCHMARK_TEST_LIBCXX_LINK_FLAGS}" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) cxx_link_system_libraries(${libcxx_target}) if (LIBCXX_BENCHMARK_NATIVE_STDLIB) if (LIBCXX_BENCHMARK_NATIVE_STDLIB STREQUAL "libstdc++" AND NOT DEFINED LIBSTDCXX_FILESYSTEM_LIB @@ -174,7 +169,10 @@ function(add_benchmark_test name source_file) RUNTIME_OUTPUT_DIRECTORY "${BENCHMARK_OUTPUT_DIR}" INCLUDE_DIRECTORIES "" COMPILE_FLAGS "${BENCHMARK_TEST_NATIVE_COMPILE_FLAGS}" - LINK_FLAGS "${BENCHMARK_TEST_NATIVE_LINK_FLAGS}") + LINK_FLAGS "${BENCHMARK_TEST_NATIVE_LINK_FLAGS}" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) endif() endfunction() From a2cb5448014bbfbfd954cf371977db3c73c9319d Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 9 Sep 2020 10:09:30 -0500 Subject: [PATCH 0181/1079] Revert "[Attributor] Re-enable a run line in noalias.ll" The underlying issue is still there, just hides on most systems, even some Windows builds :( See: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/25479/steps/test-check-all/logs/FAIL%3A%20LLVM%3A%3Anoalias.ll This reverts commit 2600c9e2efce1dc4c64870b00a45ae0082c685fc. --- llvm/test/Transforms/Attributor/noalias.ll | 238 +++++++++++---------- 1 file changed, 122 insertions(+), 116 deletions(-) diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index a4c05fb4ca29d..18bb8e9719d52 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes ; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM -; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/25479/steps/test-check-all/logs/FAIL%3A%20LLVM%3A%3Anoalias.ll +; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; TEST 1 - negative. @@ -41,10 +42,10 @@ define i8* @return_noalias(){ } define void @nocapture(i8* %a){ -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@nocapture -; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@nocapture +; NOT_CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@nocapture @@ -144,10 +145,10 @@ declare i8* @baz(...) nounwind uwtable ; Returning global pointer. Should not be noalias. define i8** @getter() { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@getter -; IS__TUNIT____-SAME: () [[ATTR0]] { -; IS__TUNIT____-NEXT: ret i8** @G +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@getter +; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { +; NOT_CGSCC_NPM-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@getter @@ -159,10 +160,10 @@ define i8** @getter() { ; Returning global pointer. Should not be noalias. define i8** @calle1(){ -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@calle1 -; IS__TUNIT____-SAME: () [[ATTR0]] { -; IS__TUNIT____-NEXT: ret i8** @G +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@calle1 +; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { +; NOT_CGSCC_NPM-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@calle1 @@ -409,7 +410,6 @@ define void @test12_3(){ } define void @test12_4(){ -; ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() { ; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) @@ -422,17 +422,17 @@ define void @test12_4(){ ; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) ; IS________OPM-NEXT: ret void ; -; IS________NPM-LABEL: define {{[^@]+}}@test12_4() { -; IS________NPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; IS________NPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; IS________NPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 -; IS________NPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; IS________NPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; IS________NPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) -; IS________NPM-NEXT: ret void +; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() { +; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) +; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) +; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) +; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; NOT_TUNIT_OPM-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) %B = tail call noalias i8* @malloc(i64 4) @@ -470,6 +470,12 @@ define void @test13_use_noalias(){ ; CHECK-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) ; CHECK-NEXT: ret void ; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() +; IS__CGSCC_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; IS__CGSCC_OPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* +; IS__CGSCC_OPM-NEXT: call void @use_i8_internal(i8* noalias [[C2]]) +; IS__CGSCC_OPM-NEXT: ret void %m1 = tail call noalias i8* @malloc(i64 4) %c1 = bitcast i8* %m1 to i16* %c2 = bitcast i16* %c1 to i8* @@ -498,11 +504,11 @@ define void @test13_use_alias(){ ; TEST 14 i2p casts define internal i32 @p2i(i32* %arg) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@p2i -; IS__TUNIT____-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { -; IS__TUNIT____-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 -; IS__TUNIT____-NEXT: ret i32 [[P2I]] +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@p2i +; NOT_CGSCC_NPM-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { +; NOT_CGSCC_NPM-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 +; NOT_CGSCC_NPM-NEXT: ret i32 [[P2I]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@p2i @@ -515,14 +521,14 @@ define internal i32 @p2i(i32* %arg) { } define i32 @i2p(i32* %arg) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@i2p -; IS__TUNIT____-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { -; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] -; IS__TUNIT____-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* -; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] -; IS__TUNIT____-NEXT: ret i32 [[CALL]] +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readonly willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@i2p +; NOT_CGSCC_NPM-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { +; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] +; NOT_CGSCC_NPM-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* +; NOT_CGSCC_NPM-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* +; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] +; NOT_CGSCC_NPM-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@i2p @@ -540,11 +546,11 @@ define i32 @i2p(i32* %arg) { ret i32 %call } define internal i32 @ret(i32* %arg) { -; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@ret -; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { -; IS__TUNIT____-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 -; IS__TUNIT____-NEXT: ret i32 [[L]] +; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@ret +; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { +; NOT_CGSCC_NPM-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 +; NOT_CGSCC_NPM-NEXT: ret i32 [[L]] ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@ret @@ -624,11 +630,11 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) @alias_of_p = external global i32* define void @make_alias(i32* %p) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@make_alias -; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { -; IS__TUNIT____-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@make_alias +; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { +; NOT_CGSCC_NPM-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@make_alias @@ -641,11 +647,11 @@ define void @make_alias(i32* %p) { } define void @only_store(i32* %p) { -; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@only_store -; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { -; IS__TUNIT____-NEXT: store i32 0, i32* [[P]], align 4 -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@only_store +; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { +; NOT_CGSCC_NPM-NEXT: store i32 0, i32* [[P]], align 4 +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@only_store @@ -658,17 +664,17 @@ define void @only_store(i32* %p) { } define void @test15_caller(i32* noalias %p, i32 %c) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@test15_caller -; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; IS__TUNIT____: if.then: -; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: br label [[IF_END]] -; IS__TUNIT____: if.end: -; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test15_caller +; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; NOT_CGSCC_NPM: if.then: +; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] +; NOT_CGSCC_NPM: if.end: +; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test15_caller @@ -715,23 +721,23 @@ if.end: ; Therefore, only one of the two conditions of if statementes will be fulfilled. define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_sub -; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { -; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 -; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; IS__TUNIT____: if.then: -; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: br label [[IF_END]] -; IS__TUNIT____: if.end: -; IS__TUNIT____-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 -; IS__TUNIT____-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] -; IS__TUNIT____: if.then2: -; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: br label [[IF_END3]] -; IS__TUNIT____: if.end3: -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_sub +; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { +; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 +; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; NOT_CGSCC_NPM: if.then: +; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] +; NOT_CGSCC_NPM: if.end: +; NOT_CGSCC_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 +; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] +; NOT_CGSCC_NPM: if.then2: +; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: br label [[IF_END3]] +; NOT_CGSCC_NPM: if.end3: +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_sub @@ -772,11 +778,11 @@ if.end3: } define void @test16_caller(i32* %p, i32 %c) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_caller -; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; IS__TUNIT____-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_caller +; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; NOT_CGSCC_NPM-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_caller @@ -808,20 +814,20 @@ define void @test16_caller(i32* %p, i32 %c) { ; } define void @test17_caller(i32* noalias %p, i32 %c) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@test17_caller -; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; IS__TUNIT____: l1: -; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: br label [[L3:%.*]] -; IS__TUNIT____: l2: -; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: br label [[L3]] -; IS__TUNIT____: l3: -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test17_caller +; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; NOT_CGSCC_NPM-NEXT: entry: +; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; NOT_CGSCC_NPM: l1: +; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: br label [[L3:%.*]] +; NOT_CGSCC_NPM: l2: +; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: br label [[L3]] +; NOT_CGSCC_NPM: l3: +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test17_caller @@ -866,10 +872,10 @@ l3: ; } define void @noreturn() { -; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; IS__TUNIT____-LABEL: define {{[^@]+}}@noreturn -; IS__TUNIT____-SAME: () [[ATTR9:#.*]] { -; IS__TUNIT____-NEXT: unreachable +; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@noreturn +; NOT_CGSCC_NPM-SAME: () [[ATTR9:#.*]] { +; NOT_CGSCC_NPM-NEXT: unreachable ; ; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noreturn @@ -881,18 +887,18 @@ define void @noreturn() { } define void @test18_caller(i32* noalias %p, i32 %c) { -; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly -; IS__TUNIT____-LABEL: define {{[^@]+}}@test18_caller -; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; IS__TUNIT____: l1: -; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: unreachable -; IS__TUNIT____: l2: -; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; IS__TUNIT____-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test18_caller +; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; NOT_CGSCC_NPM-NEXT: entry: +; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; NOT_CGSCC_NPM: l1: +; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: unreachable +; NOT_CGSCC_NPM: l2: +; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test18_caller From 81ff2d30a900c202f8d58a0eebf116746b12df7f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Sep 2020 14:06:58 -0500 Subject: [PATCH 0182/1079] [DSE] Handle masked stores --- .../Scalar/DeadStoreElimination.cpp | 53 ++++++++++++++----- .../DeadStoreElimination/masked-dead-store.ll | 12 ++--- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 892ba559e7903..1427bd4ad4dfd 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -234,6 +234,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, case Intrinsic::memset_element_unordered_atomic: case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: + case Intrinsic::masked_store: return true; } } @@ -257,8 +258,8 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, /// Return a Location stored to by the specified instruction. If isRemovable /// returns true, this function and getLocForRead completely describe the memory /// operations for this instruction. -static MemoryLocation getLocForWrite(Instruction *Inst) { - +static MemoryLocation getLocForWrite(Instruction *Inst, + const TargetLibraryInfo &TLI) { if (StoreInst *SI = dyn_cast(Inst)) return MemoryLocation::get(SI); @@ -274,6 +275,8 @@ static MemoryLocation getLocForWrite(Instruction *Inst) { return MemoryLocation(); // Unhandled intrinsic. case Intrinsic::init_trampoline: return MemoryLocation(II->getArgOperand(0)); + case Intrinsic::masked_store: + return MemoryLocation::getForArgument(II, 1, TLI); case Intrinsic::lifetime_end: { uint64_t Len = cast(II->getArgOperand(0))->getZExtValue(); return MemoryLocation(II->getArgOperand(1), Len); @@ -325,6 +328,7 @@ static bool isRemovable(Instruction *I) { case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: + case Intrinsic::masked_store: return true; } } @@ -370,9 +374,10 @@ static bool isShortenableAtTheBeginning(Instruction *I) { } /// Return the pointer that is being written to. -static Value *getStoredPointerOperand(Instruction *I) { +static Value *getStoredPointerOperand(Instruction *I, + const TargetLibraryInfo &TLI) { //TODO: factor this to reuse getLocForWrite - MemoryLocation Loc = getLocForWrite(I); + MemoryLocation Loc = getLocForWrite(I, TLI); assert(Loc.Ptr && "unable to find pointer written for analyzable instruction?"); // TODO: most APIs don't expect const Value * @@ -487,6 +492,24 @@ isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, return OW_MaybePartial; } +static OverwriteResult isMaskedStoreOverwrite(Instruction *Later, + Instruction *Earlier) { + auto *IIL = dyn_cast(Later); + auto *IIE = dyn_cast(Earlier); + if (IIL == nullptr || IIE == nullptr) + return OW_Unknown; + if (IIL->getIntrinsicID() != Intrinsic::masked_store || + IIE->getIntrinsicID() != Intrinsic::masked_store) + return OW_Unknown; + // Pointers. + if (IIL->getArgOperand(1) != IIE->getArgOperand(1)) + return OW_Unknown; + // Masks. + if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) + return OW_Unknown; + return OW_Complete; +} + /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the @@ -796,7 +819,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA, break; Value *DepPointer = - getUnderlyingObject(getStoredPointerOperand(Dependency)); + getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI)); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) @@ -902,7 +925,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { // See through pointer-to-pointer bitcasts SmallVector Pointers; - getUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers); + getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers); // Stores to stack values are valid candidates for removal. bool AllDead = true; @@ -1119,11 +1142,12 @@ static bool tryToShortenBegin(Instruction *EarlierWrite, } static bool removePartiallyOverlappedStores(const DataLayout &DL, - InstOverlapIntervalsTy &IOL) { + InstOverlapIntervalsTy &IOL, + const TargetLibraryInfo &TLI) { bool Changed = false; for (auto OI : IOL) { Instruction *EarlierWrite = OI.first; - MemoryLocation Loc = getLocForWrite(EarlierWrite); + MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI); assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); const Value *Ptr = Loc.Ptr->stripPointerCasts(); @@ -1284,7 +1308,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, continue; // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst); + MemoryLocation Loc = getLocForWrite(Inst, *TLI); // If we didn't get a useful location, fail. if (!Loc.Ptr) @@ -1308,7 +1332,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, Instruction *DepWrite = InstDep.getInst(); if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) break; - MemoryLocation DepLoc = getLocForWrite(DepWrite); + MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI); // If we didn't get a useful location, or if it isn't a size, bail out. if (!DepLoc.Ptr) break; @@ -1352,6 +1376,11 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset, *AA, BB.getParent()); + if (OR == OW_Unknown) { + // isOverwrite punts on MemoryLocations with an imprecise size, such + // as masked stores. Handle this here, somwewhat inelegantly. + OR = isMaskedStoreOverwrite(Inst, DepWrite); + } if (OR == OW_MaybePartial) OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, DepWrite, IOL); @@ -1433,7 +1462,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, } if (EnablePartialOverwriteTracking) - MadeChange |= removePartiallyOverlappedStores(DL, IOL); + MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI); // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. @@ -2494,7 +2523,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, if (EnablePartialOverwriteTracking) for (auto &KV : State.IOLs) - MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second); + MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI); MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); return MadeChange; diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll index 03d88b1757dee..4fea8db99949d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -9,26 +9,24 @@ define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ; CHECK-NEXT: [[V1:%.*]] = load i8*, i8** [[V0]], align 4, [[TBAA0:!tbaa !.*]] ; CHECK-NEXT: [[V2:%.*]] = getelementptr i8, i8* [[V1]], i32 [[A3:%.*]] ; CHECK-NEXT: [[V3:%.*]] = bitcast i8* [[V2]] to <128 x i8>* -; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> , <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3:!tbaa !.*]] ; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds i8*, i8** [[A1:%.*]], i32 [[A4:%.*]] -; CHECK-NEXT: [[V7:%.*]] = load i8*, i8** [[V6]], align 4, [[TBAA6:!tbaa !.*]] +; CHECK-NEXT: [[V7:%.*]] = load i8*, i8** [[V6]], align 4, [[TBAA3:!tbaa !.*]] ; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, i8* [[V7]], i32 [[A5:%.*]] ; CHECK-NEXT: [[V9:%.*]] = bitcast i8* [[V8]] to <128 x i8>* -; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V9]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA8:!tbaa !.*]] +; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V9]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA5:!tbaa !.*]] ; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> undef, <128 x i32> -; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V14]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3]] ; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds i8*, i8** [[A1]], i32 [[A6:%.*]] -; CHECK-NEXT: [[V18:%.*]] = load i8*, i8** [[V17]], align 4, [[TBAA6]] +; CHECK-NEXT: [[V18:%.*]] = load i8*, i8** [[V17]], align 4, [[TBAA3]] ; CHECK-NEXT: [[V19:%.*]] = getelementptr i8, i8* [[V18]], i32 [[A7:%.*]] ; CHECK-NEXT: [[V20:%.*]] = bitcast i8* [[V19]] to <128 x i8>* -; CHECK-NEXT: [[V21:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V20]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA8]] +; CHECK-NEXT: [[V21:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V20]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA5]] ; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] ; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] ; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> undef, <128 x i32> -; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V25]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3]] +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V25]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA8:!tbaa !.*]] ; CHECK-NEXT: ret i32 0 ; b0: From 55dd731b291c2d64f318f27c40a17d2255e16215 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Tue, 8 Sep 2020 13:43:15 -0700 Subject: [PATCH 0183/1079] [debugserver] Extract function for default launch flavor Extract a function for turning `eLaunchFlavorDefault` into a concreate `eLaunchFlavor` value. This new function encapsulates the few compile time variables involved, and also prevents clang unused code diagnostics. Differential Revision: https://reviews.llvm.org/D87327 --- lldb/tools/debugserver/source/debugserver.cpp | 88 +++++++------------ 1 file changed, 33 insertions(+), 55 deletions(-) diff --git a/lldb/tools/debugserver/source/debugserver.cpp b/lldb/tools/debugserver/source/debugserver.cpp index 04cbd2c8b503e..feb65eb6d3fbe 100644 --- a/lldb/tools/debugserver/source/debugserver.cpp +++ b/lldb/tools/debugserver/source/debugserver.cpp @@ -156,18 +156,36 @@ RNBRunLoopMode RNBRunLoopGetStartModeFromRemote(RNBRemote *remote) { return eRNBRunLoopModeExit; } -// Check the name to see if it ends with .app -static bool is_dot_app (const char *app_name) { - size_t len = strlen(app_name); - if (len < 4) +static nub_launch_flavor_t default_launch_flavor(const char *app_name) { +#if defined(WITH_FBS) || defined(WITH_BKS) || defined(WITH_SPRINGBOARD) + // Check the name to see if it ends with .app + auto is_dot_app = [](const char *app_name) { + size_t len = strlen(app_name); + if (len < 4) + return false; + + if (app_name[len - 4] == '.' && app_name[len - 3] == 'a' && + app_name[len - 2] == 'p' && app_name[len - 1] == 'p') + return true; return false; - - if (app_name[len - 4] == '.' && - app_name[len - 3] == 'a' && - app_name[len - 2] == 'p' && - app_name[len - 1] == 'p') - return true; - return false; + }; + + if (is_dot_app(app_name)) { +#if defined WITH_FBS + // Check if we have an app bundle, if so launch using FrontBoard Services. + return eLaunchFlavorFBS; +#elif defined WITH_BKS + // Check if we have an app bundle, if so launch using BackBoard Services. + return eLaunchFlavorBKS; +#elif defined WITH_SPRINGBOARD + // Check if we have an app bundle, if so launch using SpringBoard. + return eLaunchFlavorSpringBoard; +#endif + } +#endif + + // Our default launch method is posix spawn + return eLaunchFlavorPosixSpawn; } // This run loop mode will wait for the process to launch and hit its @@ -208,29 +226,8 @@ RNBRunLoopMode RNBRunLoopLaunchInferior(RNBRemote *remote, // figure our how we are going to launch automatically. nub_launch_flavor_t launch_flavor = g_launch_flavor; - if (launch_flavor == eLaunchFlavorDefault) { - // Our default launch method is posix spawn - launch_flavor = eLaunchFlavorPosixSpawn; - - const bool dot_app = is_dot_app(inferior_argv[0]); - (void)dot_app; -#if defined WITH_FBS - // Check if we have an app bundle, if so launch using BackBoard Services. - if (dot_app) { - launch_flavor = eLaunchFlavorFBS; - } -#elif defined WITH_BKS - // Check if we have an app bundle, if so launch using BackBoard Services. - if (dot_app) { - launch_flavor = eLaunchFlavorBKS; - } -#elif defined WITH_SPRINGBOARD - // Check if we have an app bundle, if so launch using SpringBoard. - if (dot_app) { - launch_flavor = eLaunchFlavorSpringBoard; - } -#endif - } + if (launch_flavor == eLaunchFlavorDefault) + launch_flavor = default_launch_flavor(inferior_argv[0]); ctx.SetLaunchFlavor(launch_flavor); char resolved_path[PATH_MAX]; @@ -1509,27 +1506,8 @@ int main(int argc, char *argv[]) { timeout_ptr = &attach_timeout_abstime; } nub_launch_flavor_t launch_flavor = g_launch_flavor; - if (launch_flavor == eLaunchFlavorDefault) { - // Our default launch method is posix spawn - launch_flavor = eLaunchFlavorPosixSpawn; - -#if defined WITH_FBS - // Check if we have an app bundle, if so launch using SpringBoard. - if (is_dot_app(waitfor_pid_name.c_str())) { - launch_flavor = eLaunchFlavorFBS; - } -#elif defined WITH_BKS - // Check if we have an app bundle, if so launch using SpringBoard. - if (is_dot_app(waitfor_pid_name.c_str())) { - launch_flavor = eLaunchFlavorBKS; - } -#elif defined WITH_SPRINGBOARD - // Check if we have an app bundle, if so launch using SpringBoard. - if (is_dot_app(waitfor_pid_name.c_str())) { - launch_flavor = eLaunchFlavorSpringBoard; - } -#endif - } + if (launch_flavor == eLaunchFlavorDefault) + launch_flavor = default_launch_flavor(waitfor_pid_name.c_str()); ctx.SetLaunchFlavor(launch_flavor); bool ignore_existing = false; From db7defd9bab7527ec1d0ed3fc62b379a9adf0971 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 9 Sep 2020 13:44:29 -0500 Subject: [PATCH 0184/1079] [DSE] Explicitly not use MSSA in testcase for now It fails for some reason, but it shouldn't stop switching to MSSA in DSE. --- llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll index 4fea8db99949d..ef74d8eae63f9 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tbaa -dse -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { From 08196e0b2e1f8aaa8a854585335c17ba479114df Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 9 Sep 2020 19:12:32 +0200 Subject: [PATCH 0185/1079] Implements [[likely]] and [[unlikely]] in IfStmt. This is the initial part of the implementation of the C++20 likelihood attributes. It handles the attributes in an if statement. Differential Revision: https://reviews.llvm.org/D85091 --- clang/include/clang/AST/Stmt.h | 22 +++ clang/include/clang/Basic/Attr.td | 12 ++ clang/include/clang/Basic/AttrDocs.td | 95 ++++++++++++ .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/AST/Stmt.cpp | 50 +++++- clang/lib/CodeGen/CGStmt.cpp | 31 +++- clang/lib/CodeGen/CodeGenFunction.cpp | 42 ++--- clang/lib/CodeGen/CodeGenFunction.h | 3 +- clang/lib/Parse/ParseDeclCXX.cpp | 2 + clang/lib/Sema/SemaStmt.cpp | 12 ++ clang/lib/Sema/SemaStmtAttr.cpp | 48 ++++++ .../attr-likelihood-if-branch-weights.cpp | 146 ++++++++++++++++++ clang/test/Preprocessor/has_attribute.cpp | 4 +- clang/test/Sema/attr-likelihood.c | 51 ++++++ clang/test/SemaCXX/attr-likelihood.cpp | 132 ++++++++++++++++ clang/www/cxx_status.html | 2 +- .../Transforms/Scalar/LowerExpectIntrinsic.h | 3 + .../Scalar/LowerExpectIntrinsic.cpp | 5 +- 18 files changed, 633 insertions(+), 30 deletions(-) create mode 100644 clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp create mode 100644 clang/test/Sema/attr-likelihood.c create mode 100644 clang/test/SemaCXX/attr-likelihood.cpp diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 726c61cb0126b..1e04e64727a08 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -1098,6 +1098,14 @@ class alignas(void *) Stmt { /// de-serialization). struct EmptyShell {}; + /// The likelihood of a branch being taken. + enum Likelihood { + LH_Unlikely = -1, ///< Branch has the [[unlikely]] attribute. + LH_None, ///< No attribute set or branches of the IfStmt have + ///< the same attribute. + LH_Likely ///< Branch has the [[likely]] attribute. + }; + protected: /// Iterator for iterating over Stmt * arrays that contain only T *. /// @@ -1166,6 +1174,20 @@ class alignas(void *) Stmt { static void EnableStatistics(); static void PrintStats(); + /// \returns the likelihood of a statement. + static Likelihood getLikelihood(const Stmt *S); + + /// \returns the likelihood of the 'then' branch of an 'if' statement. The + /// 'else' branch is required to determine whether both branches specify the + /// same likelihood, which affects the result. + static Likelihood getLikelihood(const Stmt *Then, const Stmt *Else); + + /// \returns whether the likelihood of the branches of an if statement are + /// conflicting. When the first element is \c true there's a conflict and + /// the Attr's are the conflicting attributes of the Then and Else Stmt. + static std::tuple + determineLikelihoodConflict(const Stmt *Then, const Stmt *Else); + /// Dumps the specified AST fragment and all subtrees to /// \c llvm::errs(). void dump() const; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 2801a4aa19368..5676e9aa16789 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1288,6 +1288,18 @@ def FallThrough : StmtAttr { let Documentation = [FallthroughDocs]; } +def Likely : StmtAttr { + // FIXME: Change the date to 201803 once the implementation is finished. + let Spellings = [CXX11<"", "likely", 2>, C2x<"clang", "likely">]; + let Documentation = [LikelihoodDocs]; +} + +def Unlikely : StmtAttr { + // FIXME: Change the date to 201803 once the implementation is finished. + let Spellings = [CXX11<"", "unlikely", 2>, C2x<"clang", "unlikely">]; + let Documentation = [LikelihoodDocs]; +} + def NoMerge : StmtAttr { let Spellings = [Clang<"nomerge">]; let Documentation = [NoMergeDocs]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index d6d5567c7924e..6daf9ca678961 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1684,6 +1684,101 @@ Here is an example: }]; } +def LikelihoodDocs : Documentation { + let Category = DocCatStmt; + let Heading = "likely and unlikely"; + let Content = [{ +The ``likely`` and ``unlikely`` attributes are used as compiler hints. +The attributes are used to aid the compiler to determine which branch is +likely or unlikely to be taken. This is done by marking the branch substatement +with one of the two attributes. + +It isn't allowed to annotate a single statement with both ``likely`` and +``unlikely``. Annotating the ``true`` and ``false`` branch of an ``if`` +statement with the same likelihood attribute will result in a diagnostic and +the attributes are ignored on both branches. + +These attributes have no effect on the generated code when using +PGO (Profile-Guided Optimization) or at optimization level 0. + +In Clang, the attributes will be ignored if they're not placed on the +substatement of an ``if`` or ``else`` statement. The C++ Standard recommends +to honor them on every statement in the path of execution, but that can be +confusing: + +.. code-block:: c++ + + if (b) { + [[unlikely]] --b; // In the path of execution, + // this branch is considered unlikely. + } + + if (b) { + --b; + if(b) + return; + [[unlikely]] --b; // Not in the path of execution, + } // the branch has no likelihood information. + + if (b) { + --b; + foo(b); + // Whether or not the next statement is in the path of execution depends + // on the declaration of foo(): + // In the path of execution: void foo(int); + // Not in the path of execution: [[noreturn]] void foo(int); + // This means the likelihood of the branch depends on the declaration + // of foo(). + [[unlikely]] --b; + } + + +At the moment the attribute only has effect when used in an ``if`` or ``else`` +statement. + +.. code-block:: c++ + + if (b) [[likely]] { // Placement on the first statement in the branch. + // The compiler will optimize to execute the code here. + } else { + } + + if (b) + [[unlikely]] b++; // Placement on the first statement in the branch. + else { + // The compiler will optimize to execute the code here. + } + + if (b) { + [[unlikely]] b++; // Placement on the second statement in the branch. + } // The attribute will be ignored. + + if (b) [[likely]] { + [[unlikely]] b++; // No contradiction since the second attribute + } // is ignored. + + if (b) + ; + else [[likely]] { + // The compiler will optimize to execute the code here. + } + + if (b) + ; + else + // The compiler will optimize to execute the next statement. + [[likely]] b = f(); + + if (b) [[likely]]; // Both branches are likely. A diagnostic is issued + else [[likely]]; // and the attributes are ignored. + + if (b) + [[likely]] int i = 5; // Issues a diagnostic since the attribute + // isn't allowed on a declaration. + + }]; +} + def ARMInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (ARM)"; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 46f7ffc97ce77..98dc6dfba4efa 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3141,6 +3141,9 @@ def warn_nocf_check_attribute_ignored : def warn_attribute_after_definition_ignored : Warning< "attribute %0 after definition is ignored">, InGroup; +def warn_attributes_likelihood_ifstmt_conflict + : Warning<"conflicting attributes %0 are ignored">, + InGroup; def warn_cxx11_gnu_attribute_on_type : Warning< "attribute %0 ignored, because it cannot be applied to a type">, InGroup; diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp index 25078e7b00fae..bdfaf410131cc 100644 --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -13,11 +13,12 @@ #include "clang/AST/Stmt.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclGroup.h" #include "clang/AST/Expr.h" -#include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/StmtCXX.h" @@ -41,8 +42,8 @@ #include #include #include -#include #include +#include using namespace clang; @@ -129,6 +130,51 @@ void Stmt::EnableStatistics() { StatisticsEnabled = true; } +static std::pair getLikelihood(const Stmt *S) { + if (const auto *AS = dyn_cast_or_null(S)) + for (const auto *A : AS->getAttrs()) { + if (isa(A)) + return std::make_pair(Stmt::LH_Likely, A); + + if (isa(A)) + return std::make_pair(Stmt::LH_Unlikely, A); + } + + return std::make_pair(Stmt::LH_None, nullptr); +} + +Stmt::Likelihood Stmt::getLikelihood(const Stmt *S) { + return ::getLikelihood(S).first; +} + +Stmt::Likelihood Stmt::getLikelihood(const Stmt *Then, const Stmt *Else) { + Likelihood LHT = ::getLikelihood(Then).first; + Likelihood LHE = ::getLikelihood(Else).first; + if (LHE == LH_None) + return LHT; + + // If the same attribute is used on both branches there's a conflict. + if (LHT == LHE) + return LH_None; + + if (LHT != LH_None) + return LHT; + + // Invert the value of Else to get the value for Then. + return LHE == LH_Likely ? LH_Unlikely : LH_Likely; +} + +std::tuple +Stmt::determineLikelihoodConflict(const Stmt *Then, const Stmt *Else) { + std::pair LHT = ::getLikelihood(Then); + std::pair LHE = ::getLikelihood(Else); + // If the same attribute is used on both branches there's a conflict. + if (LHT.first != LH_None && LHT.first == LHE.first) + return std::make_tuple(true, LHT.second, LHE.second); + + return std::make_tuple(false, nullptr, nullptr); +} + /// Skip no-op (attributed, compound) container stmts and skip captured /// stmt at the top, if \a IgnoreCaptured is true. Stmt *Stmt::IgnoreContainers(bool IgnoreCaptured) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 9dd79469b5444..83dd1be31633d 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" using namespace clang; using namespace CodeGen; @@ -651,6 +652,20 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) { EmitBranch(IndGotoBB); } +static Optional> +getLikelihoodWeights(const IfStmt &If) { + switch (Stmt::getLikelihood(If.getThen(), If.getElse())) { + case Stmt::LH_Unlikely: + return std::pair(llvm::UnlikelyBranchWeight, + llvm::LikelyBranchWeight); + case Stmt::LH_None: + return None; + case Stmt::LH_Likely: + return std::pair(llvm::LikelyBranchWeight, + llvm::UnlikelyBranchWeight); + } + llvm_unreachable("Unknown Likelihood"); +} void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // C99 6.8.4.1: The first substatement is executed if the expression compares @@ -695,8 +710,20 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { if (S.getElse()) ElseBlock = createBasicBlock("if.else"); - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, - getProfileCount(S.getThen())); + // Prefer the PGO based weights over the likelihood attribute. + // When the build isn't optimized the metadata isn't used, so don't generate + // it. + llvm::MDNode *Weights = nullptr; + uint64_t Count = getProfileCount(S.getThen()); + if (!Count && CGM.getCodeGenOpts().OptimizationLevel) { + Optional> LHW = getLikelihoodWeights(S); + if (LHW) { + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + Weights = MDHelper.createBranchWeights(LHW->first, LHW->second); + } + } + + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, Weights); // Emit the 'then' code. EmitBlock(ThenBlock); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 8f79cc77f0e64..e7f81087f0d20 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1462,16 +1462,15 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, return true; } - - /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an if /// statement) to the specified blocks. Based on the condition, this might try /// to simplify the codegen of the conditional based on the branch. -/// +/// \param Weights The weights determined by the likelihood attributes. void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, - uint64_t TrueCount) { + uint64_t TrueCount, + llvm::MDNode *Weights) { Cond = Cond->IgnoreParens(); if (const BinaryOperator *CondBOp = dyn_cast(Cond)) { @@ -1486,7 +1485,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // br(1 && X) -> br(X). incrementProfileCounter(CondBOp); return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, - TrueCount); + TrueCount, Weights); } // If we have "X && 1", simplify the code to use an uncond branch. @@ -1495,7 +1494,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConstantBool) { // br(X && 1) -> br(X). return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock, - TrueCount); + TrueCount, Weights); } // Emit the LHS as a conditional. If the LHS conditional is false, we @@ -1508,7 +1507,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConditionalEvaluation eval(*this); { ApplyDebugLocation DL(*this, Cond); - EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount); + EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount, + Weights); EmitBlock(LHSTrue); } @@ -1517,7 +1517,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // Any temporaries created here are conditional. eval.begin(*this); - EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount); + EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount, + Weights); eval.end(*this); return; @@ -1532,7 +1533,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // br(0 || X) -> br(X). incrementProfileCounter(CondBOp); return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, - TrueCount); + TrueCount, Weights); } // If we have "X || 0", simplify the code to use an uncond branch. @@ -1541,7 +1542,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, !ConstantBool) { // br(X || 0) -> br(X). return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock, - TrueCount); + TrueCount, Weights); } // Emit the LHS as a conditional. If the LHS conditional is true, we @@ -1557,7 +1558,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConditionalEvaluation eval(*this); { ApplyDebugLocation DL(*this, Cond); - EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount); + EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount, + Weights); EmitBlock(LHSFalse); } @@ -1566,7 +1568,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // Any temporaries created here are conditional. eval.begin(*this); - EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount); + EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount, + Weights); eval.end(*this); @@ -1581,7 +1584,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, uint64_t FalseCount = getCurrentProfileCount() - TrueCount; // Negate the condition and swap the destination blocks. return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock, - FalseCount); + FalseCount, Weights); } } @@ -1592,7 +1595,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConditionalEvaluation cond(*this); EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, - getProfileCount(CondOp)); + getProfileCount(CondOp), Weights); // When computing PGO branch weights, we only know the overall count for // the true block. This code is essentially doing tail duplication of the @@ -1612,14 +1615,14 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, { ApplyDebugLocation DL(*this, Cond); EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock, - LHSScaledTrueCount); + LHSScaledTrueCount, Weights); } cond.end(*this); cond.begin(*this); EmitBlock(RHSBlock); EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock, - TrueCount - LHSScaledTrueCount); + TrueCount - LHSScaledTrueCount, Weights); cond.end(*this); return; @@ -1650,9 +1653,10 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // Create branch weights based on the number of times we get here and the // number of times the condition should be true. - uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount); - llvm::MDNode *Weights = - createProfileWeights(TrueCount, CurrentCount - TrueCount); + if (!Weights) { + uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount); + Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount); + } // Emit the code with the fully general case. llvm::Value *CondV; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index b4f8b11c0cd36..eb8a1125c7b60 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4361,7 +4361,8 @@ class CodeGenFunction : public CodeGenTypeCache { /// TrueCount should be the number of times we expect the condition to /// evaluate to true based on PGO data. void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, - llvm::BasicBlock *FalseBlock, uint64_t TrueCount); + llvm::BasicBlock *FalseBlock, uint64_t TrueCount, + llvm::MDNode *Weights = nullptr); /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is /// nonnull, if \p LHS is marked _Nonnull. diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 75bb78152e57b..290b3c5df9592 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -4018,6 +4018,8 @@ static bool IsBuiltInOrStandardCXX11Attribute(IdentifierInfo *AttrName, case ParsedAttr::AT_FallThrough: case ParsedAttr::AT_CXX11NoReturn: case ParsedAttr::AT_NoUniqueAddress: + case ParsedAttr::AT_Likely: + case ParsedAttr::AT_Unlikely: return true; case ParsedAttr::AT_WarnUnusedResult: return !ScopeName && AttrName->getName().equals("nodiscard"); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index b4a6099d1d30b..c44636ad1b395 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -597,6 +597,18 @@ StmtResult Sema::ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr, DiagnoseEmptyStmtBody(CondExpr->getEndLoc(), thenStmt, diag::warn_empty_if_body); + std::tuple LHC = + Stmt::determineLikelihoodConflict(thenStmt, elseStmt); + if (std::get<0>(LHC)) { + const Attr *ThenAttr = std::get<1>(LHC); + const Attr *ElseAttr = std::get<2>(LHC); + Diags.Report(ThenAttr->getLocation(), + diag::warn_attributes_likelihood_ifstmt_conflict) + << ThenAttr << ThenAttr->getRange(); + Diags.Report(ElseAttr->getLocation(), diag::note_conflicting_attribute) + << ElseAttr << ElseAttr->getRange(); + } + return BuildIfStmt(IfLoc, IsConstexpr, LParenLoc, InitStmt, Cond, RParenLoc, thenStmt, ElseLoc, elseStmt); } diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 0910ca88c6b77..214952e914ace 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -210,6 +210,24 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A, return ::new (S.Context) NoMergeAttr(S.Context, A); } +static Attr *handleLikely(Sema &S, Stmt *St, const ParsedAttr &A, + SourceRange Range) { + + if (!S.getLangOpts().CPlusPlus20 && A.isCXX11Attribute() && !A.getScopeName()) + S.Diag(A.getLoc(), diag::ext_cxx20_attr) << A << Range; + + return ::new (S.Context) LikelyAttr(S.Context, A); +} + +static Attr *handleUnlikely(Sema &S, Stmt *St, const ParsedAttr &A, + SourceRange Range) { + + if (!S.getLangOpts().CPlusPlus20 && A.isCXX11Attribute() && !A.getScopeName()) + S.Diag(A.getLoc(), diag::ext_cxx20_attr) << A << Range; + + return ::new (S.Context) UnlikelyAttr(S.Context, A); +} + static void CheckForIncompatibleAttributes(Sema &S, const SmallVectorImpl &Attrs) { @@ -315,6 +333,32 @@ CheckForIncompatibleAttributes(Sema &S, << CategoryState.NumericAttr->getDiagnosticName(Policy); } } + + // C++20 [dcl.attr.likelihood]p1 The attribute-token likely shall not appear + // in an attribute-specifier-seq that contains the attribute-token unlikely. + const LikelyAttr *Likely = nullptr; + const UnlikelyAttr *Unlikely = nullptr; + for (const auto *I : Attrs) { + if (const auto *Attr = dyn_cast(I)) { + if (Unlikely) { + S.Diag(Attr->getLocation(), diag::err_attributes_are_not_compatible) + << Attr << Unlikely << Attr->getRange(); + S.Diag(Unlikely->getLocation(), diag::note_conflicting_attribute) + << Unlikely->getRange(); + return; + } + Likely = Attr; + } else if (const auto *Attr = dyn_cast(I)) { + if (Likely) { + S.Diag(Attr->getLocation(), diag::err_attributes_are_not_compatible) + << Attr << Likely << Attr->getRange(); + S.Diag(Likely->getLocation(), diag::note_conflicting_attribute) + << Likely->getRange(); + return; + } + Unlikely = Attr; + } + } } static Attr *handleOpenCLUnrollHint(Sema &S, Stmt *St, const ParsedAttr &A, @@ -377,6 +421,10 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A, return handleSuppressAttr(S, St, A, Range); case ParsedAttr::AT_NoMerge: return handleNoMergeAttr(S, St, A, Range); + case ParsedAttr::AT_Likely: + return handleLikely(S, St, A, Range); + case ParsedAttr::AT_Unlikely: + return handleUnlikely(S, St, A, Range); default: // if we're here, then we parsed a known attribute, but didn't recognize // it as a statement attribute => it is declaration attribute diff --git a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp new file mode 100644 index 0000000000000..6327396a92852 --- /dev/null +++ b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp @@ -0,0 +1,146 @@ +// RUN: %clang_cc1 -O1 -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck -DLIKELY=2000 -DUNLIKELY=1 %s +// RUN: %clang_cc1 -O1 -emit-llvm %s -triple=x86_64-linux-gnu -mllvm -likely-branch-weight=99 -mllvm -unlikely-branch-weight=42 -o - | FileCheck -DLIKELY=99 -DUNLIKELY=42 %s + +extern volatile bool b; +extern volatile int i; +extern bool A(); +extern bool B(); + +bool f() { + // CHECK-LABEL: define zeroext i1 @_Z1fv + // CHECK: br {{.*}} !prof !7 + if (b) + [[likely]] { + return A(); + } + return B(); +} + +bool g() { + // CHECK-LABEL: define zeroext i1 @_Z1gv + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] { + return A(); + } + + return B(); +} + +bool h() { + // CHECK-LABEL: define zeroext i1 @_Z1hv + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] return A(); + + return B(); +} + +void NullStmt() { + // CHECK-LABEL: define{{.*}}NullStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]]; + else { + // Make sure the branches aren't optimized away. + b = true; + } +} + +void IfStmt() { + // CHECK-LABEL: define{{.*}}IfStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] if (B()) {} + + // CHECK-NOT: br {{.*}} !prof + // CHECK: br {{.*}} !prof + if (b) { + if (B()) + [[unlikely]] { b = false; } + } +} + +void WhileStmt() { + // CHECK-LABEL: define{{.*}}WhileStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] while (B()) {} + + // CHECK-NOT: br {{.*}} %if.end{{.*}} !prof + if (b) + while (B()) + [[unlikely]] { b = false; } +} + +void DoStmt() { + // CHECK-LABEL: define{{.*}}DoStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] do {} + while (B()) + ; + + // CHECK-NOT: br {{.*}} %if.end{{.*}} !prof + if (b) + do + [[unlikely]] {} + while (B()); +} + +void ForStmt() { + // CHECK-LABEL: define{{.*}}ForStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] for (; B();) {} + + // CHECK-NOT: br {{.*}} %if.end{{.*}} !prof + if (b) + for (; B();) + [[unlikely]] {} +} + +void GotoStmt() { + // CHECK-LABEL: define{{.*}}GotoStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] goto end; + else { + // Make sure the branches aren't optimized away. + b = true; + } +end:; +} + +void ReturnStmt() { + // CHECK-LABEL: define{{.*}}ReturnStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] return; + else { + // Make sure the branches aren't optimized away. + b = true; + } +} + +void SwitchStmt() { + // CHECK-LABEL: define{{.*}}SwitchStmt + // CHECK: br {{.*}} !prof !8 + if (b) + [[unlikely]] switch (i) {} + else { + // Make sure the branches aren't optimized away. + b = true; + } + // CHECK-NOT: br {{.*}} %if.end{{.*}} !prof + if (b) + switch (i) + [[unlikely]] {} + else { + // Make sure the branches aren't optimized away. + b = true; + } +} + +// CHECK: !7 = !{!"branch_weights", i32 [[UNLIKELY]], i32 [[LIKELY]]} +// CHECK: !8 = !{!"branch_weights", i32 [[LIKELY]], i32 [[UNLIKELY]]} diff --git a/clang/test/Preprocessor/has_attribute.cpp b/clang/test/Preprocessor/has_attribute.cpp index e7303c7c5b4dd..a66624ac4147a 100644 --- a/clang/test/Preprocessor/has_attribute.cpp +++ b/clang/test/Preprocessor/has_attribute.cpp @@ -62,13 +62,13 @@ CXX11(unlikely) // FIXME(201806L) CHECK: ensures: 0 // FIXME(201806L) CHECK: expects: 0 // CHECK: fallthrough: 201603L -// FIXME(201803L) CHECK: likely: 0 +// FIXME(201803L) CHECK: likely: 2L // CHECK: maybe_unused: 201603L // ITANIUM: no_unique_address: 201803L // WINDOWS: no_unique_address: 0 // CHECK: nodiscard: 201907L // CHECK: noreturn: 200809L -// FIXME(201803L) CHECK: unlikely: 0 +// FIXME(201803L) CHECK: unlikely: 2L // Test for Microsoft __declspec attributes diff --git a/clang/test/Sema/attr-likelihood.c b/clang/test/Sema/attr-likelihood.c new file mode 100644 index 0000000000000..66aabd6b64052 --- /dev/null +++ b/clang/test/Sema/attr-likelihood.c @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 %s -fsyntax-only -fdouble-square-bracket-attributes -verify + +void g() { + if (1) + [[clang::likely]] {} +} +void m() { + [[clang::likely]] int x = 42; // expected-error {{'likely' attribute cannot be applied to a declaration}} + + if (x) + [[clang::unlikely]] {} + if (x) { + [[clang::unlikely]]; + } + switch (x) { + case 1: + [[clang::likely]] {} + break; + [[clang::likely]] case 2 : case 3 : {} + break; + } + + do { + [[clang::unlikely]]; + } while (x); + do + [[clang::unlikely]] {} + while (x); + do { // expected-note {{to match this 'do'}} + } + [[clang::unlikely]] while (x); // expected-error {{expected 'while' in do/while loop}} + for (;;) + [[clang::unlikely]] {} + for (;;) { + [[clang::unlikely]]; + } + while (x) + [[clang::unlikely]] {} + while (x) { + [[clang::unlikely]]; + } + + if (x) + goto lbl; + + // FIXME: allow the attribute on the label + [[clang::unlikely]] lbl : // expected-error {{'unlikely' attribute cannot be applied to a declaration}} + [[clang::likely]] x = x + 1; + + [[clang::likely]]++ x; +} diff --git a/clang/test/SemaCXX/attr-likelihood.cpp b/clang/test/SemaCXX/attr-likelihood.cpp new file mode 100644 index 0000000000000..c8be00bfcc32c --- /dev/null +++ b/clang/test/SemaCXX/attr-likelihood.cpp @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 %s -fsyntax-only -verify +// RUN: %clang_cc1 %s -DPEDANTIC -pedantic -fsyntax-only -verify + +#if PEDANTIC +void g() { + if (true) + [[likely]] {} // expected-warning {{use of the 'likely' attribute is a C++20 extension}} + else + [[unlikely]] {} // expected-warning {{use of the 'unlikely' attribute is a C++20 extension}} +} +#else +void a() { + if (true) + [[likely]]; // expected-warning {{conflicting attributes 'likely' are ignored}} + else + [[likely]]; // expected-note {{conflicting attribute is here}} +} + +void b() { + if (true) + [[unlikely]]; // expected-warning {{conflicting attributes 'unlikely' are ignored}} + else + [[unlikely]]; // expected-note {{conflicting attribute is here}} +} + +void c() { + if (true) + [[likely]]; +} + +void d() { + if (true) + [[unlikely]]; +} + +void g() { + if (true) + [[likely]] {} + else + [[unlikely]] {} +} + +void h() { + if (true) + [[likely]] {} + else { + } +} + +void i() { + if (true) + [[unlikely]] {} + else { + } +} + +void j() { + if (true) { + } else + [[likely]] {} +} + +void k() { + if (true) { + } else + [[likely]] {} +} + +void l() { + if (true) + [[likely]] {} + else + [[unlikely]] if (false) [[likely]] {} +} + +void m() { + [[likely]] int x = 42; // expected-error {{'likely' attribute cannot be applied to a declaration}} + + if (x) + [[unlikely]] {} + if (x) { + [[unlikely]]; + } + switch (x) { + case 1: + [[likely]] {} + break; + [[likely]] case 2 : case 3 : {} + break; + } + + do { + [[unlikely]]; + } while (x); + do + [[unlikely]] {} + while (x); + do { // expected-note {{to match this 'do'}} + } + [[unlikely]] while (x); // expected-error {{expected 'while' in do/while loop}} + for (;;) + [[unlikely]] {} + for (;;) { + [[unlikely]]; + } + while (x) + [[unlikely]] {} + while (x) { + [[unlikely]]; + } + + switch (x) + [[unlikely]] {} + + if (x) + goto lbl; + + // FIXME: allow the attribute on the label + [[unlikely]] lbl : // expected-error {{'unlikely' attribute cannot be applied to a declaration}} + [[likely]] x = x + 1; + + [[likely]]++ x; +} + +void n() [[likely]] // expected-error {{'likely' attribute cannot be applied to types}} +{ + try + [[likely]] {} // expected-error {{expected '{'}} + catch (...) [[likely]] { // expected-error {{expected expression}} + } +} +#endif diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index e0c2cefcaa3fe..3c546eb409dee 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -987,7 +987,7 @@

C++20 implementation status

[[likely]] and [[unlikely]] attributes
P0479R5 - No + Clang 12 (partial) typename optional in more contexts diff --git a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h index 4e47ff70d5574..22b2e649e4d48 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h @@ -17,6 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" namespace llvm { @@ -31,6 +32,8 @@ struct LowerExpectIntrinsicPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; +extern cl::opt LikelyBranchWeight; +extern cl::opt UnlikelyBranchWeight; } #endif diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 0fe7dd9cfb39f..33f73f6e163af 100644 --- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -24,7 +24,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/MisExpect.h" @@ -48,10 +47,10 @@ STATISTIC(ExpectIntrinsicsHandled, // 'select' instructions. It may be worthwhile to hoist these values to some // shared space, so they can be used directly by other passes. -static cl::opt LikelyBranchWeight( +cl::opt llvm::LikelyBranchWeight( "likely-branch-weight", cl::Hidden, cl::init(2000), cl::desc("Weight of the branch likely to be taken (default = 2000)")); -static cl::opt UnlikelyBranchWeight( +cl::opt llvm::UnlikelyBranchWeight( "unlikely-branch-weight", cl::Hidden, cl::init(1), cl::desc("Weight of the branch unlikely to be taken (default = 1)")); From 5a4a0cfcfb54be4a64129ff91d95229b4a7eec75 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 9 Sep 2020 19:10:30 +0000 Subject: [PATCH 0186/1079] [NFC] Separate bitcode reading for FUNC_CODE_INST_CMPXCHG(_OLD) This is preparatory work to unable storing alignment for AtomicCmpXchgInst. See D83136 for context and bug: https://bugs.llvm.org/show_bug.cgi?id=27168 This is the fixed version of D83375, which was submitted and reverted. Differential Revision: https://reviews.llvm.org/D87373 --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 10 +- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 106 ++++++++++++++++------ 2 files changed, 83 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 613391ad05ede..d81f61c59c852 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -539,8 +539,9 @@ enum FunctionCodes { FUNC_CODE_DEBUG_LOC = 35, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal] FUNC_CODE_INST_FENCE = 36, // FENCE: [ordering, synchscope] - FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol, - // ordering, synchscope] + FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty, ptr, cmp, val, vol, + // ordering, synchscope, + // failure_ordering?, weak?] FUNC_CODE_INST_ATOMICRMW = 38, // ATOMICRMW: [ptrty,ptr,val, operation, // align, vol, // ordering, synchscope] @@ -554,8 +555,9 @@ enum FunctionCodes { FUNC_CODE_INST_GEP = 43, // GEP: [inbounds, n x operands] FUNC_CODE_INST_STORE = 44, // STORE: [ptrty,ptr,valty,val, align, vol] FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol - FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align, - // vol,ordering,synchscope] + FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty, ptr, cmp, val, vol, + // success_ordering, synchscope, + // failure_ordering, weak] FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...] FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#] FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#] diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 0fa502f4569f4..4d69dd7dcc5d6 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -651,7 +651,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { /// Read a value/type pair out of the specified record from slot 'Slot'. /// Increment Slot past the number of slots used in the record. Return true on /// failure. - bool getValueTypePair(SmallVectorImpl &Record, unsigned &Slot, + bool getValueTypePair(const SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Value *&ResVal, Type **FullTy = nullptr) { if (Slot == Record.size()) return true; @@ -688,7 +688,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { } /// Like popValue, but does not increment the Slot number. - bool getValue(SmallVectorImpl &Record, unsigned Slot, + bool getValue(const SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { ResVal = getValue(Record, Slot, InstNum, Ty); return ResVal == nullptr; @@ -696,7 +696,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { /// Version of getValue that returns ResVal directly, or 0 if there is an /// error. - Value *getValue(SmallVectorImpl &Record, unsigned Slot, + Value *getValue(const SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)Record[Slot]; @@ -707,7 +707,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { } /// Like getValue, but decodes signed VBRs. - Value *getValueSigned(SmallVectorImpl &Record, unsigned Slot, + Value *getValueSigned(const SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); @@ -4989,54 +4989,55 @@ Error BitcodeReader::parseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_CMPXCHG_OLD: - case bitc::FUNC_CODE_INST_CMPXCHG: { - // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid, - // failureordering?, isweak?] + case bitc::FUNC_CODE_INST_CMPXCHG_OLD: { + // CMPXCHG_OLD: [ptrty, ptr, cmp, val, vol, ordering, synchscope, + // failure_ordering?, weak?] + const size_t NumRecords = Record.size(); unsigned OpNum = 0; - Value *Ptr, *Cmp, *New; + Value *Ptr = nullptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy)) return error("Invalid record"); if (!isa(Ptr->getType())) return error("Cmpxchg operand is not a pointer type"); - if (BitCode == bitc::FUNC_CODE_INST_CMPXCHG) { - if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, &FullTy)) - return error("Invalid record"); - } else if (popValue(Record, OpNum, NextValueNo, - getPointerElementFlatType(FullTy), Cmp)) + Value *Cmp = nullptr; + if (popValue(Record, OpNum, NextValueNo, + getPointerElementFlatType(FullTy), Cmp)) return error("Invalid record"); - else - FullTy = cast(FullTy)->getElementType(); + FullTy = cast(FullTy)->getElementType(); + + Value *New = nullptr; if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) || - Record.size() < OpNum + 3 || Record.size() > OpNum + 5) + NumRecords < OpNum + 3 || NumRecords > OpNum + 5) return error("Invalid record"); - AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]); + const AtomicOrdering SuccessOrdering = + getDecodedOrdering(Record[OpNum + 1]); if (SuccessOrdering == AtomicOrdering::NotAtomic || SuccessOrdering == AtomicOrdering::Unordered) return error("Invalid record"); - SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); + + const SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return Err; - AtomicOrdering FailureOrdering; - if (Record.size() < 7) - FailureOrdering = - AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering); - else - FailureOrdering = getDecodedOrdering(Record[OpNum + 3]); - Align Alignment( + const AtomicOrdering FailureOrdering = + NumRecords < 7 + ? AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering) + : getDecodedOrdering(Record[OpNum + 3]); + + const Align Alignment( TheModule->getDataLayout().getTypeStoreSize(Cmp->getType())); + I = new AtomicCmpXchgInst(Ptr, Cmp, New, Alignment, SuccessOrdering, FailureOrdering, SSID); - FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)}); cast(I)->setVolatile(Record[OpNum]); + FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)}); - if (Record.size() < 8) { + if (NumRecords < 8) { // Before weak cmpxchgs existed, the instruction simply returned the // value loaded from memory, so bitcode files from that era will be // expecting the first component of a modern cmpxchg. @@ -5044,12 +5045,59 @@ Error BitcodeReader::parseFunctionBody(Function *F) { I = ExtractValueInst::Create(I, 0); FullTy = cast(FullTy)->getElementType(0); } else { - cast(I)->setWeak(Record[OpNum+4]); + cast(I)->setWeak(Record[OpNum + 4]); } InstructionList.push_back(I); break; } + case bitc::FUNC_CODE_INST_CMPXCHG: { + // CMPXCHG: [ptrty, ptr, cmp, val, vol, success_ordering, synchscope, + // failure_ordering, weak] + const size_t NumRecords = Record.size(); + unsigned OpNum = 0; + Value *Ptr = nullptr; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy)) + return error("Invalid record"); + + if (!isa(Ptr->getType())) + return error("Cmpxchg operand is not a pointer type"); + + Value *Cmp = nullptr; + if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, &FullTy)) + return error("Invalid record"); + + Value *Val = nullptr; + if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), Val) || + NumRecords < OpNum + 3 || NumRecords > OpNum + 5) + return error("Invalid record"); + + const AtomicOrdering SuccessOrdering = + getDecodedOrdering(Record[OpNum + 1]); + if (SuccessOrdering == AtomicOrdering::NotAtomic || + SuccessOrdering == AtomicOrdering::Unordered) + return error("Invalid record"); + + const SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); + + if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) + return Err; + + const AtomicOrdering FailureOrdering = + getDecodedOrdering(Record[OpNum + 3]); + + const Align Alignment( + TheModule->getDataLayout().getTypeStoreSize(Cmp->getType())); + + I = new AtomicCmpXchgInst(Ptr, Cmp, Val, Alignment, SuccessOrdering, + FailureOrdering, SSID); + FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)}); + cast(I)->setVolatile(Record[OpNum]); + cast(I)->setWeak(Record[OpNum + 4]); + + InstructionList.push_back(I); + break; + } case bitc::FUNC_CODE_INST_ATOMICRMW: { // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid] unsigned OpNum = 0; From 11352fa83bcb6dcff1f6704e6dcd1102bfc1aa53 Mon Sep 17 00:00:00 2001 From: Olivier Giroux Date: Wed, 9 Sep 2020 12:14:53 -0700 Subject: [PATCH 0187/1079] Revert a test using padding bits in atomics --- .../atomics.types.operations.req/atomic_helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h index 1cb3a3d111144..d06cca9bbe5ce 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h @@ -95,7 +95,7 @@ struct TestEachAtomicType { These aren't going to be lock-free, so some libatomic.a is necessary. */ - TestFunctor()(); + //TestFunctor()(); //< Actually, nobody is ready for this until P0528 TestFunctor()(); #endif TestFunctor()(); From dbac20bb6bfbf44dc25ce4c0e1a0ec422fa5cffb Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 9 Sep 2020 12:06:39 -0700 Subject: [PATCH 0188/1079] [gcov] Don't split entry block; add a synthetic entry block instead The entry block is split at the first instruction where `shouldKeepInEntry` returns false. The created basic block has a br jumping to the original entry block. The new basic block causes the function label line and the other entry block lines to be covered by different basic blocks, which can affect line counts with special control flows (fork/exec in the entry block requires heuristics in llvm-cov gcov to get consistent line counts). int main() { // BB0 return 0; // BB2 (due to entry block splitting) } // BB1 is the exit block (since gcov 4.8) This patch adds a synthetic entry block (like PGOInstrumentation and GCC) and inserts an edge from the synthetic entry block to the original entry block. We can thus remove the tricky `shouldKeepInEntry` and entry block splitting. The number of basic blocks does not change, but the emitted .gcno files will be smaller because we can save one GCOV_TAG_LINES tag. // BB0 is the synthetic entry block with a single edge to BB2 int main() { // BB2 return 0; // BB2 } // BB1 is the exit block (since gcov 4.8) --- .../Instrumentation/GCOVProfiling.cpp | 103 +++++++++--------- .../GCOVProfiling/atomic-counter.ll | 8 +- 2 files changed, 52 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 3773c3e19ef69..736d12629017f 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -325,16 +325,12 @@ namespace { GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP, unsigned EndLine, uint32_t Ident, int Version) : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident), - Version(Version), ReturnBlock(P, 1) { + Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) { LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); bool ExitBlockBeforeBody = Version >= 48; - uint32_t i = 0; - for (auto &BB : *F) { - // Skip index 1 if it's assigned to the ReturnBlock. - if (i == 1 && ExitBlockBeforeBody) - ++i; + uint32_t i = ExitBlockBeforeBody ? 2 : 1; + for (BasicBlock &BB : *F) Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++))); - } if (!ExitBlockBeforeBody) ReturnBlock.Number = i; @@ -349,6 +345,7 @@ namespace { return Blocks.find(BB)->second; } + GCOVBlock &getEntryBlock() { return EntryBlock; } GCOVBlock &getReturnBlock() { return ReturnBlock; } @@ -391,17 +388,22 @@ namespace { // Emit count of blocks. write(GCOV_TAG_BLOCKS); if (Version < 80) { - write(Blocks.size() + 1); - for (int i = Blocks.size() + 1; i; --i) + write(Blocks.size() + 2); + for (int i = Blocks.size() + 2; i; --i) write(0); } else { write(1); - write(Blocks.size() + 1); + write(Blocks.size() + 2); } LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n"); // Emit edges between blocks. Function *F = Blocks.begin()->first->getParent(); + write(GCOV_TAG_ARCS); + write(3); + write(0); + write(getBlock(&F->getEntryBlock()).Number); + write(0); // no flags for (BasicBlock &I : *F) { GCOVBlock &Block = getBlock(&I); if (Block.OutEdges.empty()) continue; @@ -429,6 +431,7 @@ namespace { uint32_t FuncChecksum; int Version; DenseMap Blocks; + GCOVBlock EntryBlock; GCOVBlock ReturnBlock; }; } @@ -604,16 +607,6 @@ static bool isUsingScopeBasedEH(Function &F) { return isScopedEHPersonality(Personality); } -static bool shouldKeepInEntry(BasicBlock::iterator It) { - if (isa(*It)) return true; - if (isa(*It)) return true; - if (auto *II = dyn_cast(It)) { - if (II->getIntrinsicID() == llvm::Intrinsic::localescape) return true; - } - - return false; -} - bool GCOVProfiler::AddFlushBeforeForkAndExec() { SmallVector Forks; SmallVector Execs; @@ -740,10 +733,6 @@ void GCOVProfiler::emitProfileNotes() { // gcov expects every function to start with an entry block that has a // single successor, so split the entry block to make sure of that. BasicBlock &EntryBlock = F.getEntryBlock(); - BasicBlock::iterator It = EntryBlock.begin(); - while (shouldKeepInEntry(It)) - ++It; - EntryBlock.splitBasicBlock(It); Funcs.push_back(std::make_unique(this, &F, SP, EndLine, FunctionIdent++, Version)); @@ -758,6 +747,7 @@ void GCOVProfiler::emitProfileNotes() { if (!SP->isArtificial()) Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line); + Func.getEntryBlock().addEdge(Func.getBlock(&EntryBlock)); for (auto &BB : F) { GCOVBlock &Block = Func.getBlock(&BB); Instruction *TI = BB.getTerminator(); @@ -846,6 +836,7 @@ bool GCOVProfiler::emitProfileArcs() { DenseMap, unsigned> EdgeToCounter; unsigned Edges = 0; + EdgeToCounter[{nullptr, &F.getEntryBlock()}] = Edges++; for (auto &BB : F) { Instruction *TI = BB.getTerminator(); if (isa(TI)) { @@ -869,12 +860,20 @@ bool GCOVProfiler::emitProfileArcs() { // If a BB has several predecessors, use a PHINode to select // the correct counter. for (auto &BB : F) { - const unsigned EdgeCount = - std::distance(pred_begin(&BB), pred_end(&BB)); - if (EdgeCount) { - // The phi node must be at the begin of the BB. - IRBuilder<> BuilderForPhi(&*BB.begin()); - Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + // The phi node must be at the begin of the BB. + IRBuilder<> BuilderForPhi(&*BB.begin()); + IRBuilder<> Builder(&*BB.getFirstInsertionPt()); + Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + Value *V; + if (&BB == &F.getEntryBlock()) { + auto It = EdgeToCounter.find({nullptr, &BB}); + V = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), + Counters, 0, It->second); + } else { + const unsigned EdgeCount = + std::distance(pred_begin(&BB), pred_end(&BB)); + if (EdgeCount == 0) + continue; PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount); for (BasicBlock *Pred : predecessors(&BB)) { auto It = EdgeToCounter.find({Pred, &BB}); @@ -883,36 +882,34 @@ bool GCOVProfiler::emitProfileArcs() { Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64( Counters->getValueType(), Counters, 0, Edge); Phi->addIncoming(EdgeCounter, Pred); + V = Phi; } + } - // Skip phis, landingpads. - IRBuilder<> Builder(&*BB.getFirstInsertionPt()); + if (Options.Atomic) { + Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1), + AtomicOrdering::Monotonic); + } else { + Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), V); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); + Builder.CreateStore(Count, V); + } + + Instruction *TI = BB.getTerminator(); + if (isa(TI)) { + auto It = EdgeToCounter.find({&BB, nullptr}); + assert(It != EdgeToCounter.end()); + const unsigned Edge = It->second; + Value *Counter = Builder.CreateConstInBoundsGEP2_64( + Counters->getValueType(), Counters, 0, Edge); if (Options.Atomic) { - Builder.CreateAtomicRMW(AtomicRMWInst::Add, Phi, + Builder.CreateAtomicRMW(AtomicRMWInst::Add, Counter, Builder.getInt64(1), AtomicOrdering::Monotonic); } else { - Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Phi); + Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter); Count = Builder.CreateAdd(Count, Builder.getInt64(1)); - Builder.CreateStore(Count, Phi); - } - - Instruction *TI = BB.getTerminator(); - if (isa(TI)) { - auto It = EdgeToCounter.find({&BB, nullptr}); - assert(It != EdgeToCounter.end()); - const unsigned Edge = It->second; - Value *Counter = Builder.CreateConstInBoundsGEP2_64( - Counters->getValueType(), Counters, 0, Edge); - if (Options.Atomic) { - Builder.CreateAtomicRMW(AtomicRMWInst::Add, Counter, - Builder.getInt64(1), - AtomicOrdering::Monotonic); - } else { - Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter); - Count = Builder.CreateAdd(Count, Builder.getInt64(1)); - Builder.CreateStore(Count, Counter); - } + Builder.CreateStore(Count, Counter); } } } diff --git a/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll b/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll index 01843e26331fc..61ee30a4414bf 100644 --- a/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll +++ b/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll @@ -4,12 +4,8 @@ ; CHECK-LABEL: void @empty() ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %0, !dbg [[DBG:![0-9]+]] -; CHECK: 0: -; CHECK-NEXT: %1 = phi i64* [ getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), %entry ], !dbg [[DBG]] -; CHECK-NEXT: %2 = atomicrmw add i64* %1, i64 1 monotonic, !dbg [[DBG]] -;; Counter for the exit. -; CHECK-NEXT: %3 = atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_gcov_ctr, i64 0, i64 1), i64 1 monotonic, !dbg [[DBG]] +; CHECK-NEXT: %0 = atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), i64 1 monotonic, !dbg [[DBG:![0-9]+]] +; CHECK-NEXT: %1 = atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_gcov_ctr, i64 0, i64 1), i64 1 monotonic, !dbg [[DBG]] ; CHECK-NEXT: ret void, !dbg [[DBG]] define dso_local void @empty() !dbg !5 { From 1dd4c4e0a8e21ebb221a2b18f7cc774b2ac6259a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 8 Sep 2020 17:00:40 -0400 Subject: [PATCH 0189/1079] [InstCombine] add tests for add/sub-of-shl; NFC --- .../test/Transforms/InstCombine/shl-factor.ll | 281 ++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/shl-factor.ll diff --git a/llvm/test/Transforms/InstCombine/shl-factor.ll b/llvm/test/Transforms/InstCombine/shl-factor.ll new file mode 100644 index 0000000000000..274d6e3a5e6b2 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shl-factor.ll @@ -0,0 +1,281 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare void @use8(i8) + +define i6 @add_shl_same_amount(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @add_shl_same_amount( +; CHECK-NEXT: [[XS:%.*]] = shl i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl i6 %x, %z + %ys = shl i6 %y, %z + %diff = add i6 %xs, %ys + ret i6 %diff +} + +define <2 x i4> @add_shl_same_amount_nsw(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { +; CHECK-LABEL: @add_shl_same_amount_nsw( +; CHECK-NEXT: [[XS:%.*]] = shl nsw <2 x i4> [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nsw <2 x i4> [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add nsw <2 x i4> [[XS]], [[YS]] +; CHECK-NEXT: ret <2 x i4> [[DIFF]] +; + %xs = shl nsw <2 x i4> %x, %z + %ys = shl nsw <2 x i4> %y, %z + %diff = add nsw <2 x i4> %xs, %ys + ret <2 x i4> %diff +} + +define i64 @add_shl_same_amount_nuw(i64 %x, i64 %y, i64 %z) { +; CHECK-LABEL: @add_shl_same_amount_nuw( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i64 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nuw i64 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add nuw i64 [[XS]], [[YS]] +; CHECK-NEXT: ret i64 [[DIFF]] +; + %xs = shl nuw i64 %x, %z + %ys = shl nuw i64 %y, %z + %diff = add nuw i64 %xs, %ys + ret i64 %diff +} + +define i8 @add_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @add_shl_same_amount_nsw_extra_use1( +; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[XS]]) +; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add nsw i8 [[XS]], [[YS]] +; CHECK-NEXT: ret i8 [[DIFF]] +; + %xs = shl nsw nuw i8 %x, %z + call void @use8(i8 %xs) + %ys = shl nsw nuw i8 %y, %z + %diff = add nsw i8 %xs, %ys + ret i8 %diff +} + +define i8 @add_shl_same_amount_nuw_extra_use2(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @add_shl_same_amount_nuw_extra_use2( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]] +; CHECK-NEXT: call void @use8(i8 [[YS]]) +; CHECK-NEXT: [[DIFF:%.*]] = add nuw nsw i8 [[XS]], [[YS]] +; CHECK-NEXT: ret i8 [[DIFF]] +; + %xs = shl nuw i8 %x, %z + %ys = shl nsw nuw i8 %y, %z + call void @use8(i8 %ys) + %diff = add nsw nuw i8 %xs, %ys + ret i8 %diff +} + +define i8 @add_shl_same_amount_nsw_nuw_extra_use3(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @add_shl_same_amount_nsw_nuw_extra_use3( +; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[XS]]) +; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]] +; CHECK-NEXT: call void @use8(i8 [[YS]]) +; CHECK-NEXT: [[DIFF:%.*]] = add nuw nsw i8 [[XS]], [[YS]] +; CHECK-NEXT: ret i8 [[DIFF]] +; + %xs = shl nsw nuw i8 %x, %z + call void @use8(i8 %xs) + %ys = shl nsw nuw i8 %y, %z + call void @use8(i8 %ys) + %diff = add nsw nuw i8 %xs, %ys + ret i8 %diff +} + +define i6 @add_shl_same_amount_partial_nsw1(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @add_shl_same_amount_partial_nsw1( +; CHECK-NEXT: [[XS:%.*]] = shl nsw i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nsw i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl nsw i6 %x, %z + %ys = shl nsw i6 %y, %z + %diff = add i6 %xs, %ys + ret i6 %diff +} + +define i6 @add_shl_same_amount_partial_nsw2(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @add_shl_same_amount_partial_nsw2( +; CHECK-NEXT: [[XS:%.*]] = shl i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nsw i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add nsw i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl i6 %x, %z + %ys = shl nsw i6 %y, %z + %diff = add nsw i6 %xs, %ys + ret i6 %diff +} + +define i6 @add_shl_same_amount_partial_nuw1(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @add_shl_same_amount_partial_nuw1( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nuw i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl nuw i6 %x, %z + %ys = shl nuw i6 %y, %z + %diff = add i6 %xs, %ys + ret i6 %diff +} + +define i6 @add_shl_same_amount_partial_nuw2(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @add_shl_same_amount_partial_nuw2( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = add nuw i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl nuw i6 %x, %z + %ys = shl i6 %y, %z + %diff = add nuw i6 %xs, %ys + ret i6 %diff +} + +define i6 @sub_shl_same_amount(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @sub_shl_same_amount( +; CHECK-NEXT: [[XS:%.*]] = shl i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl i6 %x, %z + %ys = shl i6 %y, %z + %diff = sub i6 %xs, %ys + ret i6 %diff +} + +define <2 x i4> @sub_shl_same_amount_nsw(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { +; CHECK-LABEL: @sub_shl_same_amount_nsw( +; CHECK-NEXT: [[XS:%.*]] = shl nsw <2 x i4> [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nsw <2 x i4> [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub nsw <2 x i4> [[XS]], [[YS]] +; CHECK-NEXT: ret <2 x i4> [[DIFF]] +; + %xs = shl nsw <2 x i4> %x, %z + %ys = shl nsw <2 x i4> %y, %z + %diff = sub nsw <2 x i4> %xs, %ys + ret <2 x i4> %diff +} + +define i64 @sub_shl_same_amount_nuw(i64 %x, i64 %y, i64 %z) { +; CHECK-LABEL: @sub_shl_same_amount_nuw( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i64 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nuw i64 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub nuw i64 [[XS]], [[YS]] +; CHECK-NEXT: ret i64 [[DIFF]] +; + %xs = shl nuw i64 %x, %z + %ys = shl nuw i64 %y, %z + %diff = sub nuw i64 %xs, %ys + ret i64 %diff +} + +define i8 @sub_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_shl_same_amount_nsw_extra_use1( +; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[XS]]) +; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub nsw i8 [[XS]], [[YS]] +; CHECK-NEXT: ret i8 [[DIFF]] +; + %xs = shl nsw nuw i8 %x, %z + call void @use8(i8 %xs) + %ys = shl nsw nuw i8 %y, %z + %diff = sub nsw i8 %xs, %ys + ret i8 %diff +} + +define i8 @sub_shl_same_amount_nuw_extra_use2(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_shl_same_amount_nuw_extra_use2( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]] +; CHECK-NEXT: call void @use8(i8 [[YS]]) +; CHECK-NEXT: [[DIFF:%.*]] = sub nuw nsw i8 [[XS]], [[YS]] +; CHECK-NEXT: ret i8 [[DIFF]] +; + %xs = shl nuw i8 %x, %z + %ys = shl nsw nuw i8 %y, %z + call void @use8(i8 %ys) + %diff = sub nsw nuw i8 %xs, %ys + ret i8 %diff +} + +define i8 @sub_shl_same_amount_nsw_nuw_extra_use3(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_shl_same_amount_nsw_nuw_extra_use3( +; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[XS]]) +; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]] +; CHECK-NEXT: call void @use8(i8 [[YS]]) +; CHECK-NEXT: [[DIFF:%.*]] = sub nuw nsw i8 [[XS]], [[YS]] +; CHECK-NEXT: ret i8 [[DIFF]] +; + %xs = shl nsw nuw i8 %x, %z + call void @use8(i8 %xs) + %ys = shl nsw nuw i8 %y, %z + call void @use8(i8 %ys) + %diff = sub nsw nuw i8 %xs, %ys + ret i8 %diff +} + +define i6 @sub_shl_same_amount_partial_nsw1(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @sub_shl_same_amount_partial_nsw1( +; CHECK-NEXT: [[XS:%.*]] = shl nsw i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nsw i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl nsw i6 %x, %z + %ys = shl nsw i6 %y, %z + %diff = sub i6 %xs, %ys + ret i6 %diff +} + +define i6 @sub_shl_same_amount_partial_nsw2(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @sub_shl_same_amount_partial_nsw2( +; CHECK-NEXT: [[XS:%.*]] = shl i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nsw i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub nsw i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl i6 %x, %z + %ys = shl nsw i6 %y, %z + %diff = sub nsw i6 %xs, %ys + ret i6 %diff +} + +define i6 @sub_shl_same_amount_partial_nuw1(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @sub_shl_same_amount_partial_nuw1( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl nuw i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl nuw i6 %x, %z + %ys = shl nuw i6 %y, %z + %diff = sub i6 %xs, %ys + ret i6 %diff +} + +define i6 @sub_shl_same_amount_partial_nuw2(i6 %x, i6 %y, i6 %z) { +; CHECK-LABEL: @sub_shl_same_amount_partial_nuw2( +; CHECK-NEXT: [[XS:%.*]] = shl nuw i6 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[YS:%.*]] = shl i6 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[DIFF:%.*]] = sub nuw i6 [[XS]], [[YS]] +; CHECK-NEXT: ret i6 [[DIFF]] +; + %xs = shl nuw i6 %x, %z + %ys = shl i6 %y, %z + %diff = sub nuw i6 %xs, %ys + ret i6 %diff +} + From 0ee54cf88329c50f25872ac1c67d7ae60ee3154c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 9 Sep 2020 14:28:00 -0500 Subject: [PATCH 0190/1079] [Hexagon] Account for truncating pairs to non-pairs when widening truncates Added missing selection patterns for vpackl. --- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 6 ++++++ .../Hexagon/autohvx/isel-widen-truncate-pair.ll | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index c9435cd21c2e0..630fd7a17040d 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -406,9 +406,15 @@ let Predicates = [UseHVX] in { def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; + // Vpackl is a pseudo-op that is used when legalizing widened truncates. + // It should never be produced with a register pair in the output, but + // it can happen to have a pair as an input. def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; + def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>; + def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>; + def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>; def: Pat<(VecI16 (bswap HVI16:$Vs)), (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll new file mode 100644 index 0000000000000..83d49fca03b88 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; This has a v32i8 = truncate v16i32 (64b mode), which was legalized to +; 64i8 = vpackl v32i32, for which there were no selection patterns provided. +; Check that we generate vdeale for this. + +; CHECK-LABEL: fred: +; CHECK: vdeale(v1.b,v0.b) +define void @fred(<32 x i8>* %a0, <32 x i32> %a1) #0 { + %v0 = trunc <32 x i32> %a1 to <32 x i8> + store <32 x i8> %v0, <32 x i8>* %a0, align 32 + ret void +} + +attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length64b" } + From ad61e346d302eccbc12fdfb81ea1b0cd28e80010 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 9 Sep 2020 12:31:25 -0700 Subject: [PATCH 0191/1079] [gcov] Give the __llvm_gcov_ctr load instruction a name for more readable output --- llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 736d12629017f..cc8b92e21c7ce 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -890,7 +890,8 @@ bool GCOVProfiler::emitProfileArcs() { Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1), AtomicOrdering::Monotonic); } else { - Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), V); + Value *Count = + Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr"); Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, V); } From 415a4fbea7c1a39c780caa3cb7287fe09c5267d2 Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Wed, 9 Sep 2020 11:58:22 -0700 Subject: [PATCH 0192/1079] [MC] Resolve the difference of symbols in consecutive MCDataFragements Try to resolve the difference of two symbols in consecutive MCDataFragments. This is important for an idiom like "foo:instr; .if . - foo; instr; .endif" (https://bugs.llvm.org/show_bug.cgi?id=43795). Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D69411 --- llvm/include/llvm/MC/MCFragment.h | 7 ++ llvm/lib/MC/MCExpr.cpp | 83 +++++++++++++-------- llvm/lib/MC/MCSection.cpp | 1 + llvm/test/MC/ARM/directive-if-subtraction.s | 52 +++++++++++++ llvm/test/MC/MachO/reloc-diff.s | 4 - 5 files changed, 110 insertions(+), 37 deletions(-) create mode 100644 llvm/test/MC/ARM/directive-if-subtraction.s diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 87338ab46cc2a..0e5a5976cc8e4 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -64,6 +64,10 @@ class MCFragment : public ilist_node_with_parent { /// The layout order of this fragment. unsigned LayoutOrder; + /// The subsection this fragment belongs to. This is 0 if the fragment is not + // in any subsection. + unsigned SubsectionNumber = 0; + FragmentType Kind; /// Whether fragment is being laid out. @@ -102,6 +106,9 @@ class MCFragment : public ilist_node_with_parent { bool hasInstructions() const { return HasInstructions; } void dump() const; + + void setSubsectionNumber(unsigned Value) { SubsectionNumber = Value; } + unsigned getSubsectionNumber() const { return SubsectionNumber; } }; class MCDummyFragment : public MCFragment { diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index 07680e95e8e1e..7f282a1ba4977 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -588,12 +588,7 @@ static void AttemptToFoldSymbolOffsetDifference( if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet)) return; - MCFragment *FA = SA.getFragment(); - MCFragment *FB = SB.getFragment(); - if (FA == FB && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() && - !SB.isUnset()) { - Addend += (SA.getOffset() - SB.getOffset()); - + auto FinalizeFolding = [&]() { // Pointers to Thumb symbols need to have their low-bit set to allow // for interworking. if (Asm->isThumbFunc(&SA)) @@ -607,11 +602,17 @@ static void AttemptToFoldSymbolOffsetDifference( // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = nullptr; - return; - } + }; - if (!Layout) - return; + const MCFragment *FA = SA.getFragment(); + const MCFragment *FB = SB.getFragment(); + // If both symbols are in the same fragment, return the difference of their + // offsets + if (FA == FB && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() && + !SB.isUnset()) { + Addend += SA.getOffset() - SB.getOffset(); + return FinalizeFolding(); + } const MCSection &SecA = *FA->getParent(); const MCSection &SecB = *FB->getParent(); @@ -619,30 +620,46 @@ static void AttemptToFoldSymbolOffsetDifference( if ((&SecA != &SecB) && !Addrs) return; - // One of the symbol involved is part of a fragment being laid out. Quit now - // to avoid a self loop. - if (!Layout->canGetFragmentOffset(FA) || !Layout->canGetFragmentOffset(FB)) - return; + if (Layout) { + // One of the symbol involved is part of a fragment being laid out. Quit now + // to avoid a self loop. + if (!Layout->canGetFragmentOffset(FA) || !Layout->canGetFragmentOffset(FB)) + return; + + // Eagerly evaluate when layout is finalized. + Addend += Layout->getSymbolOffset(A->getSymbol()) - + Layout->getSymbolOffset(B->getSymbol()); + if (Addrs && (&SecA != &SecB)) + Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB)); + + FinalizeFolding(); + } else { + // When layout is not finalized, our ability to resolve differences between + // symbols is limited to specific cases where the fragments between two + // symbols (including the fragments the symbols are defined in) are + // fixed-size fragments so the difference can be calculated. For example, + // this is important when the Subtarget is changed and a new MCDataFragment + // is created in the case of foo: instr; .arch_extension ext; instr .if . - + // foo. + if (SA.isVariable() || SA.isUnset() || SB.isVariable() || SB.isUnset() || + FA->getKind() != MCFragment::FT_Data || + FB->getKind() != MCFragment::FT_Data || + FA->getSubsectionNumber() != FB->getSubsectionNumber()) + return; + // Try to find a constant displacement from FA to FB, add the displacement + // between the offset in FA of SA and the offset in FB of SB. + int64_t Displacement = SA.getOffset() - SB.getOffset(); + for (auto FI = FB->getIterator(), FE = SecA.end(); FI != FE; ++FI) { + if (&*FI == FA) { + Addend += Displacement; + return FinalizeFolding(); + } - // Eagerly evaluate. - Addend += Layout->getSymbolOffset(A->getSymbol()) - - Layout->getSymbolOffset(B->getSymbol()); - if (Addrs && (&SecA != &SecB)) - Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB)); - - // Pointers to Thumb symbols need to have their low-bit set to allow - // for interworking. - if (Asm->isThumbFunc(&SA)) - Addend |= 1; - - // If symbol is labeled as micromips, we set low-bit to ensure - // correct offset in .gcc_except_table - if (Asm->getBackend().isMicroMips(&SA)) - Addend |= 1; - - // Clear the symbol expr pointers to indicate we have folded these - // operands. - A = B = nullptr; + if (FI->getKind() != MCFragment::FT_Data) + return; + Displacement += cast(FI)->getContents().size(); + } + } } static bool canFold(const MCAssembler *Asm, const MCSymbolRefExpr *A, diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index ba256102080a7..7c5834895e523 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -82,6 +82,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) { SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F)); getFragmentList().insert(IP, F); F->setParent(this); + F->setSubsectionNumber(Subsection); } return IP; diff --git a/llvm/test/MC/ARM/directive-if-subtraction.s b/llvm/test/MC/ARM/directive-if-subtraction.s new file mode 100644 index 0000000000000..edb386593ba63 --- /dev/null +++ b/llvm/test/MC/ARM/directive-if-subtraction.s @@ -0,0 +1,52 @@ +// RUN: llvm-mc -triple armv7a-linux-gnueabihf %s -filetype=obj -o /dev/null 2>&1 | FileCheck --check-prefix=OBJ --allow-empty %s +// RUN: not llvm-mc -triple armv7a-linux-gnueabihf %s -o /dev/null 2>&1 | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple armv7a-linux-gnueabihf %s -filetype=obj -o - | llvm-objdump -d - | FileCheck --check-prefix=DISASM %s + +nop +// Create a new MCDataFragment due to Subtarget change +.arch_extension sec +9997:nop +.if . - 9997b == 0 +// OBJ-NOT:[[@LINE-1]]:5: error: expected absolute expression +// ASM:[[@LINE-2]]:5: error: expected absolute expression +// DISASM: orr r1, r1, #2 +orr r1, r1, #1 +.else +orr r1, r1, #2 +.endif + + + +@ RUN: not llvm-mc -filetype=obj -triple arm-linux-gnueabihf --defsym=ERR=1 %s -o /dev/null 2>&1 | FileCheck --check-prefix=ARM-ERR %s +@ RUN: not llvm-mc -filetype=obj -triple thumbv7a-linux-gnueabihf --defsym=ERR=1 %s -o /dev/null 2>&1 | FileCheck --check-prefix=THUMB2-ERR %s + +.ifdef ERR +9997: nop + .align 4 + nop +.if . - 9997b == 4 +// ARM-ERR:[[@LINE-1]]:5: error: expected absolute expression +.endif + +9997: nop + .space 4 + nop +.if . - 9997b == 4 +// ARM-ERR:[[@LINE-1]]:5: error: expected absolute expression +.endif + +9997: + ldr r0,=0x12345678 + .ltorg + nop +.if . - 9997b == 4 +// ARM-ERR:[[@LINE-1]]:5: error: expected absolute expression +.endif + +9997: nop + b external + nop +.if . - 9997b == 4 +// THUMB2-ERR:[[@LINE-1]]:5: error: expected absolute expression +.endif +.endif diff --git a/llvm/test/MC/MachO/reloc-diff.s b/llvm/test/MC/MachO/reloc-diff.s index 8b2e7606b3542..ba00e7bb1c9ff 100644 --- a/llvm/test/MC/MachO/reloc-diff.s +++ b/llvm/test/MC/MachO/reloc-diff.s @@ -22,9 +22,5 @@ Ltemp: // CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0 // CHECK-NEXT: 0x8 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0 // CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0 -// CHECK-NEXT: 0x4 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0 -// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0 -// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_SECTDIFF 1 0x0 -// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0 // CHECK-NEXT: } // CHECK-NEXT: ] From 72e2fbde5456cfaa03f60750f7f421b165824cc8 Mon Sep 17 00:00:00 2001 From: Tony Date: Sat, 5 Sep 2020 22:53:47 +0000 Subject: [PATCH 0193/1079] [AMDGPU] Correct gfx1031 XNACK setting documentation - gfx1031 does not support XNACK. Differential Revision: https://reviews.llvm.org/D87198 --- llvm/docs/AMDGPUUsage.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 967b667427e05..10f6a3e495092 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -266,9 +266,7 @@ names from both the *Processor* and *Alternative Processor* can be used. .. TODO Add product names. - ``gfx1031`` ``amdgcn`` dGPU - xnack *TBA* - [off] - - wavefrontsize64 + ``gfx1031`` ``amdgcn`` dGPU - wavefrontsize64 *TBA* [off] - cumode [off] From 0ab6a1569806783fcbf6303c462f051e9b5f764b Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Fri, 21 Aug 2020 12:44:36 -0700 Subject: [PATCH 0194/1079] [X86] Add support for using fast short rep mov for memcpy lowering. Disabled by default behind an option. Differential Revision: https://reviews.llvm.org/D86883 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 8 ++++++ llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll | 31 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1cd928c1de120..ce46dd9167f17 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3109,7 +3109,7 @@ argsAreStructReturn(ArrayRef Ins, bool IsMCU) { static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); + SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); return DAG.getMemcpy( Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index ce8d1d464da97..e76908ef4bc40 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -24,6 +24,10 @@ using namespace llvm; #define DEBUG_TYPE "x86-selectiondag-info" +static cl::opt + UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), + cl::desc("Use fast short rep mov in memcpy lowering")); + bool X86SelectionDAGInfo::isBaseRegConflictPossible( SelectionDAG &DAG, ArrayRef ClobberSet) const { // We cannot use TRI->hasBasePointer() until *after* we select all basic @@ -306,6 +310,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); + // If enabled and available, use fast short rep mov. + if (UseFSRMForMemcpy && Subtarget.hasFSRM()) + return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8); + /// Handle constant sizes, if (ConstantSDNode *ConstantSize = dyn_cast(Size)) return emitConstantSizeRepmov( diff --git a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll new file mode 100644 index 0000000000000..54f7973dea39a --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=-fsrm < %s -o - | FileCheck %s --check-prefix=NOFSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=+fsrm < %s -o - | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind + +define void @test1(i8* %a, i8* %b, i64 %s) nounwind { +; NOFSRM-LABEL: test1 +; NOFSRM: # %bb.0: +; NOFSRM: jmp memcpy +; +; FSRM-LABEL: test1 +; FSRM: # %bb.0: +; FSRM-NEXT: movq %rdx, %rcx +; FSRM-NEXT: rep;movsb (%rsi), %es:(%rdi) +; FSRM-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %s, i1 0) + ret void +} + +; Check that we don't crash due to a memcpy size type mismatch error ("Cannot +; emit physreg copy instruction") in X86InstrInfo::copyPhysReg. +%struct = type { [4096 x i8] } +declare void @foo(%struct* byval) +define void @test2(%struct* %x) { + call void @foo(%struct* byval %x) + ret void +} From be35264ab5a38e8367dde49acfbfa1dd71230dfc Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Tue, 8 Sep 2020 15:49:50 -0700 Subject: [PATCH 0195/1079] Wordsmith RegionBranchOpInterface verification errors I was having a lot of trouble parsing the messages. In particular, the messages like: ``` :3:8: error: 'scf.if' op along control flow edge from Region #0 to scf.if source #1 type '!npcomprt.tensor' should match input #1 type 'tensor' ``` In particular, one thing that kept catching me was parsing the "to scf.if source #1 type" as one thing, but really it is "to parent results: source type #1". Differential Revision: https://reviews.llvm.org/D87334 --- mlir/lib/Interfaces/ControlFlowInterfaces.cpp | 15 +++++++-------- mlir/test/Dialect/SCF/invalid.mlir | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp index fc79c820165d4..498486281c770 100644 --- a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp +++ b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp @@ -103,13 +103,13 @@ static LogicalResult verifyTypesAlongAllEdges( if (sourceNo) diag << "Region #" << sourceNo.getValue(); else - diag << op->getName(); + diag << "parent operands"; diag << " to "; if (succRegionNo) diag << "Region #" << succRegionNo.getValue(); else - diag << op->getName(); + diag << "parent results"; return diag; }; @@ -117,10 +117,9 @@ static LogicalResult verifyTypesAlongAllEdges( TypeRange succInputsTypes = succ.getSuccessorInputs().getTypes(); if (sourceTypes.size() != succInputsTypes.size()) { InFlightDiagnostic diag = op->emitOpError(" region control flow edge "); - return printEdgeName(diag) - << " has " << sourceTypes.size() - << " source operands, but target successor needs " - << succInputsTypes.size(); + return printEdgeName(diag) << ": source has " << sourceTypes.size() + << " operands, but target successor needs " + << succInputsTypes.size(); } for (auto typesIdx : @@ -130,8 +129,8 @@ static LogicalResult verifyTypesAlongAllEdges( if (sourceType != inputType) { InFlightDiagnostic diag = op->emitOpError(" along control flow edge "); return printEdgeName(diag) - << " source #" << typesIdx.index() << " type " << sourceType - << " should match input #" << typesIdx.index() << " type " + << ": source type #" << typesIdx.index() << " " << sourceType + << " should match input type #" << typesIdx.index() << " " << inputType; } } diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir index 517e8855c97b8..06b902da781ca 100644 --- a/mlir/test/Dialect/SCF/invalid.mlir +++ b/mlir/test/Dialect/SCF/invalid.mlir @@ -325,7 +325,7 @@ func @reduceReturn_not_inside_reduce(%arg0 : f32) { func @std_if_incorrect_yield(%arg0: i1, %arg1: f32) { - // expected-error@+1 {{region control flow edge from Region #0 to scf.if has 1 source operands, but target successor needs 2}} + // expected-error@+1 {{region control flow edge from Region #0 to parent results: source has 1 operands, but target successor needs 2}} %x, %y = scf.if %arg0 -> (f32, f32) { %0 = addf %arg1, %arg1 : f32 scf.yield %0 : f32 @@ -401,7 +401,7 @@ func @std_for_operands_mismatch_3(%arg0 : index, %arg1 : index, %arg2 : index) { func @std_for_operands_mismatch_4(%arg0 : index, %arg1 : index, %arg2 : index) { %s0 = constant 0.0 : f32 %t0 = constant 1.0 : f32 - // expected-error @+1 {{along control flow edge from Region #0 to Region #0 source #1 type 'i32' should match input #1 type 'f32'}} + // expected-error @+1 {{along control flow edge from Region #0 to Region #0: source type #1 'i32' should match input type #1 'f32'}} %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %ti = %t0) -> (f32, f32) { %sn = addf %si, %si : f32 From fb542b0b8c209b05ba3100baf01718961e30fc26 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 9 Sep 2020 11:28:14 -0700 Subject: [PATCH 0196/1079] [libc][MPFRWrapper] Provide a way to include MPFR header in downstream repos. Reviewed By: asteinhauser Differential Revision: https://reviews.llvm.org/D87412 --- libc/utils/MPFRWrapper/MPFRUtils.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index a121234e62246..0520d8ae3ed91 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -15,10 +15,20 @@ #include "llvm/ADT/StringRef.h" #include -#include #include #include +#ifdef CUSTOM_MPFR_INCLUDER +// Some downstream repos are monoliths carrying MPFR sources in their third +// party directory. In such repos, including the MPFR header as +// `#include ` is either disallowed or not possible. If that is the +// case, a file named `CustomMPFRIncluder.h` should be added through which the +// MPFR header can be included in manner allowed in that repo. +#include "CustomMPFRIncluder.h" +#else +#include +#endif + template using FPBits = __llvm_libc::fputil::FPBits; namespace __llvm_libc { From cc76da7adab71f0b6559ea13069f899b2ecbf70c Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 24 Aug 2020 10:46:50 -0700 Subject: [PATCH 0197/1079] [GlobalISel] Rewrite the elide-br-by-swapping-icmp-ops combine to do less. This combine previously tried to take sequences like: %cond = G_ICMP pred, a, b G_BRCOND %cond, %truebb G_BR %falsebb %truebb: ... %falsebb: ... and by inverting the compare predicate and swapping branch targets, delete the G_BR and instead have a single conditional branch to the falsebb. Since in an earlier patch we have a combine to fold not(icmp) into just an inverted icmp, we don't need this combine to do as much. This patch instead generalizes the combine by just looking for: G_BRCOND %cond, %truebb G_BR %falsebb %truebb: ... %falsebb: ... and then inverting the condition using a not (xor). The xor can be folded away in a separate combine. This change also lets us avoid some optimization code in the IRTranslator. I also think that deleting G_BRs in the combiner is unnecessary. That's something that targets can decide to do at selection time and could simplify generic code in future. Differential Revision: https://reviews.llvm.org/D86664 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 7 +-- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 4 ++ .../include/llvm/Target/GlobalISel/Combine.td | 10 ++--- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 44 ++++++++----------- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 12 +++++ llvm/lib/Target/AArch64/AArch64Combine.td | 1 - llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 3 +- .../CodeGen/AArch64/GlobalISel/const-0.ll | 25 ----------- .../GlobalISel/prelegalizercombiner-br.mir | 9 ++-- .../AArch64/GlobalISel/select-constant.mir | 34 ++++++++++++++ .../AMDGPU/GlobalISel/bool-legalization.ll | 6 ++- .../GlobalISel/llvm.amdgcn.is.private.ll | 8 ++-- .../GlobalISel/llvm.amdgcn.is.shared.ll | 8 ++-- .../CodeGen/AMDGPU/GlobalISel/localizer.ll | 6 ++- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 5 ++- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 5 ++- .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll | 5 ++- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 5 ++- 18 files changed, 111 insertions(+), 86 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index cff6b496cca27..745522d6b98e0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -147,9 +147,10 @@ class CombinerHelper { bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); bool applySextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); - bool matchElideBrByInvertingCond(MachineInstr &MI); - void applyElideBrByInvertingCond(MachineInstr &MI); - bool tryElideBrByInvertingCond(MachineInstr &MI); + /// If a brcond's true block is not the fallthrough, make it so by inverting + /// the condition and swapping operands. + bool matchOptBrCondByInvertingCond(MachineInstr &MI); + void applyOptBrCondByInvertingCond(MachineInstr &MI); /// If \p MI is G_CONCAT_VECTORS, try to combine it. /// Returns true if MI changed. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 50534860bec16..a230f5adfe88f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -245,5 +245,9 @@ bool isBuildVectorAllOnes(const MachineInstr &MI, /// the value \p Val contains a true value. bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP); + +/// Returns an integer representing true, as defined by the +/// TargetBooleanContents. +int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP); } // End namespace llvm. #endif diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 5b940551dad59..4d038ad7b240e 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -145,13 +145,11 @@ def combine_indexed_load_store : GICombineRule< [{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>; -// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of -// all_combines because it wasn't there. -def elide_br_by_inverting_cond : GICombineRule< +def opt_brcond_by_inverting_cond : GICombineRule< (defs root:$root), (match (wip_match_opcode G_BR):$root, - [{ return Helper.matchElideBrByInvertingCond(*${root}); }]), - (apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>; + [{ return Helper.matchOptBrCondByInvertingCond(*${root}); }]), + (apply [{ Helper.applyOptBrCondByInvertingCond(*${root}); }])>; def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">; def ptr_add_immed_chain : GICombineRule< @@ -416,4 +414,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold]>; + not_cmp_fold, opt_brcond_by_inverting_cond]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d58ba7cf5a8c6..356f084711095 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -881,14 +881,12 @@ void CombinerHelper::applyCombineIndexedLoadStore( LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } -bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { +bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_BR) return false; // Try to match the following: // bb1: - // %c(s32) = G_ICMP pred, %a, %b - // %c1(s1) = G_TRUNC %c(s32) // G_BRCOND %c1, %bb2 // G_BR %bb3 // bb2: @@ -898,7 +896,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { // The above pattern does not have a fall through to the successor bb2, always // resulting in a branch no matter which path is taken. Here we try to find // and replace that pattern with conditional branch to bb3 and otherwise - // fallthrough to bb2. + // fallthrough to bb2. This is generally better for branch predictors. MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator BrIt(MI); @@ -913,40 +911,34 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { // Check that the next block is the conditional branch target. if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) return false; - - MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); - if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || - !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg())) - return false; return true; } -bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { - if (!matchElideBrByInvertingCond(MI)) - return false; - applyElideBrByInvertingCond(MI); - return true; -} - -void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { +void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); - MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); - CmpInst::Predicate InversePred = CmpInst::getInversePredicate( - (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); + Builder.setInstrAndDebugLoc(*BrCond); + LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); + // FIXME: Does int/fp matter for this? If so, we might need to restrict + // this to i1 only since we might not know for sure what kind of + // compare generated the condition value. + auto True = Builder.buildConstant( + Ty, getICmpTrueVal(getTargetLowering(), false, false)); + auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); - // Invert the G_ICMP condition. - Observer.changingInstr(*CmpMI); - CmpMI->getOperand(1).setPredicate(InversePred); - Observer.changedInstr(*CmpMI); + auto *FallthroughBB = BrCond->getOperand(1).getMBB(); + Observer.changingInstr(MI); + MI.getOperand(0).setMBB(FallthroughBB); + Observer.changedInstr(MI); - // Change the conditional branch target. + // Change the conditional branch to use the inverted condition and + // new target block. Observer.changingInstr(*BrCond); + BrCond->getOperand(0).setReg(Xor.getReg(0)); BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); - MI.eraseFromParent(); } static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 6f8d233043e70..53e6eff2590e0 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -740,3 +740,15 @@ bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, } llvm_unreachable("Invalid boolean contents"); } + +int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, + bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrOneBooleanContent: + return 1; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return -1; + } + llvm_unreachable("Invalid boolean contents"); +} diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 5fa44606488be..2187b6121421a 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -19,7 +19,6 @@ def fconstant_to_constant : GICombineRule< def AArch64PreLegalizerCombinerHelper: GICombinerHelper< "AArch64GenPreLegalizerCombinerHelper", [all_combines, - elide_br_by_inverting_cond, fconstant_to_constant]> { let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; let StateClass = "AArch64PreLegalizerCombinerHelperState"; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index d243074aa2fd1..d34345e79fa63 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -42,8 +42,7 @@ def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>; def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< - "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, - elide_br_by_inverting_cond]> { + "AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> { let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll b/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll deleted file mode 100644 index 89d1ee29b959c..0000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -global-isel -O0 -o - %s | FileCheck %s - -%struct.comp = type { i8*, i32, i8*, [3 x i8], i32 } - -define void @regbranch() { -; CHECK-LABEL: regbranch: -; CHECK: mov {{w[0-9]+}}, #0 -cond_next240.i: - br i1 false, label %cond_true251.i, label %cond_next272.i - -cond_true251.i: - switch i8 0, label %cond_next272.i [ - i8 42, label %bb268.i - i8 43, label %bb268.i - i8 63, label %bb268.i - ] - -bb268.i: - br label %cond_next272.i - -cond_next272.i: - %len.2.i = phi i32 [ 0, %bb268.i ], [ 0, %cond_next240.i ], [ 0, %cond_true251.i ] - %tmp278.i = icmp eq i32 %len.2.i, 1 - ret void -} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir index 051f33dabf4c8..6ed879d82b9be 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="opt_brcond_by_inverting_cond" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" @@ -38,8 +38,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]] + ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] + ; CHECK: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK: G_BR %bb.1 ; CHECK: bb.1.if.then: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir index e25c84958b9db..c280f000b174e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir @@ -8,6 +8,8 @@ define i16 @const_s16() { ret i16 42 } define i32 @const_s32() { ret i32 42 } define i64 @const_s64() { ret i64 1234567890123 } + define i32 @const_s32_zero() { ret i32 0 } + define i64 @const_s64_zero() { ret i64 0 } define i8* @const_p0_0() { ret i8* null } define i32 @fconst_s32() { ret i32 42 } @@ -81,6 +83,38 @@ body: | $x0 = COPY %0(s64) ... +--- +name: const_s32_zero +legalized: true +regBankSelected: true +registers: + - { id: 0, class: gpr } + +body: | + bb.0: + ; CHECK-LABEL: name: const_s32_zero + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: $w0 = COPY [[COPY]] + %0(s32) = G_CONSTANT i32 0 + $w0 = COPY %0(s32) +... + +--- +name: const_s64_zero +legalized: true +regBankSelected: true +registers: + - { id: 0, class: gpr } + +body: | + bb.0: + ; CHECK-LABEL: name: const_s64_zero + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $xzr + ; CHECK: $x0 = COPY [[COPY]] + %0(s64) = G_CONSTANT i64 0 + $x0 = COPY %0(s64) +... + --- name: const_p0_0 legalized: true diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll index eebfbee8a12e8..cb6822bcf1ba5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll @@ -52,9 +52,10 @@ define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) { ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_xor_b32 s0, s0, -1 ; GCN-NEXT: s_and_b32 s0, s0, 1 ; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc0 BB3_2 +; GCN-NEXT: s_cbranch_scc1 BB3_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: flat_store_dword v[0:1], v0 @@ -80,9 +81,10 @@ define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) { ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_and_b32 s0, s0, s1 +; GCN-NEXT: s_xor_b32 s0, s0, -1 ; GCN-NEXT: s_and_b32 s0, s0, 1 ; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc0 BB4_2 +; GCN-NEXT: s_cbranch_scc1 BB4_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: flat_store_dword v[0:1], v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll index 88c82b1c3f7cf..e25fd7fc43fc5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -51,11 +51,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_load_dword s0, s[4:5], 0x11 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_cmp_eq_u32 s1, s0 +; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cselect_b32 s0, 1, 0 ; CI-NEXT: s_and_b32 s0, s0, 1 ; CI-NEXT: s_cmp_lg_u32 s0, 0 -; CI-NEXT: s_cbranch_scc0 BB1_2 +; CI-NEXT: s_cbranch_scc1 BB1_2 ; CI-NEXT: ; %bb.1: ; %bb0 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: flat_store_dword v[0:1], v0 @@ -68,11 +68,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 -; GFX9-NEXT: s_cmp_eq_u32 s1, s0 +; GFX9-NEXT: s_cmp_lg_u32 s1, s0 ; GFX9-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB1_2 +; GFX9-NEXT: s_cbranch_scc1 BB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll index ec477c9925c9a..356f219ba0c28 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -51,11 +51,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_load_dword s0, s[4:5], 0x10 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_cmp_eq_u32 s1, s0 +; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cselect_b32 s0, 1, 0 ; CI-NEXT: s_and_b32 s0, s0, 1 ; CI-NEXT: s_cmp_lg_u32 s0, 0 -; CI-NEXT: s_cbranch_scc0 BB1_2 +; CI-NEXT: s_cbranch_scc1 BB1_2 ; CI-NEXT: ; %bb.1: ; %bb0 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: flat_store_dword v[0:1], v0 @@ -68,11 +68,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 -; GFX9-NEXT: s_cmp_eq_u32 s1, s0 +; GFX9-NEXT: s_cmp_lg_u32 s1, s0 ; GFX9-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB1_2 +; GFX9-NEXT: s_cbranch_scc1 BB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index 3c550a1a08e1f..5f4d4097b23a2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -29,9 +29,10 @@ define amdgpu_kernel void @localize_constants(i1 %cond) { ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: BB0_2: ; %Flow +; GFX9-NEXT: s_xor_b32 s0, s0, -1 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB0_4 +; GFX9-NEXT: s_cbranch_scc1 BB0_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: global_store_dword v[0:1], v0, off @@ -109,9 +110,10 @@ define amdgpu_kernel void @localize_globals(i1 %cond) { ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: BB1_2: ; %Flow +; GFX9-NEXT: s_xor_b32 s0, s0, -1 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB1_4 +; GFX9-NEXT: s_cbranch_scc1 BB1_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index d2e7328a384fe..9e2f881ee8df8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -357,9 +357,10 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s0, s1, 1 +; CHECK-NEXT: s_xor_b32 s0, s1, -1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 ; CHECK-NEXT: s_sub_i32 s0, 0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index cbb77b54aba55..2217e17358b33 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -351,9 +351,10 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s0, s1, 1 +; CHECK-NEXT: s_xor_b32 s0, s1, -1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 ; CHECK-NEXT: s_sub_i32 s0, 0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 559d116602e50..402ae90219eb0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -323,9 +323,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s1, s5, 1 +; CHECK-NEXT: s_xor_b32 s1, s5, -1 +; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 92f93185530f2..348f38ef250e4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -319,9 +319,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s1, s5, 1 +; CHECK-NEXT: s_xor_b32 s1, s5, -1 +; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 From 467a07128533276e3457b72a775e43190bdc1071 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 24 Aug 2020 14:10:38 -0700 Subject: [PATCH 0198/1079] [GlobalISel][IRTranslator] Generate better conditional branch lowering. This is a port of the functionality from SelectionDAG, which tries to find a tree of conditions from compares that are then combined using OR or AND, before using that result as the input to a branch. Instead of naively lowering the code as is, this change converts that into a sequence of conditional branches on the sub-expressions of the tree. Like SelectionDAG, we re-use the case block codegen functionality from the switch lowering utils, which causes us to generate some different code. The result of which I've tried to mitigate in earlier combine patches. Differential Revision: https://reviews.llvm.org/D86665 --- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 21 ++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 319 ++++++++++++++++-- .../GlobalISel/arm64-irtranslator-switch.ll | 6 +- .../irtranslator-condbr-lower-tree.ll | 234 +++++++++++++ .../llvm-ir/long_ambiguous_chain_s32.ll | 256 ++++++++------ .../llvm-ir/long_ambiguous_chain_s64.ll | 256 ++++++++------ 6 files changed, 851 insertions(+), 241 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 38eb0e4bebe74..8360e81036cd5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -299,6 +299,27 @@ class IRTranslator : public MachineFunctionPass { bool translateBinaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder); + /// If the set of cases should be emitted as a series of branches, return + /// true. If we should emit this as a bunch of and/or'd together conditions, + /// return false. + bool shouldEmitAsBranches(const std::vector &Cases); + /// Helper method for findMergedConditions. + /// This function emits a branch and is used at the leaves of an OR or an + /// AND operator tree. + void emitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + /// Used during condbr translation to find trees of conditions that can be + /// optimized. + void findMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + /// Translate branch (br) instruction. /// \pre \p U is a branch instruction. bool translateBr(const User &U, MachineIRBuilder &MIRBuilder); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index cce0ca938c9fe..34ba4731ca364 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -49,11 +50,13 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -360,28 +363,276 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg); } +void IRTranslator::emitBranchForMergedCondition( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + BranchProbability TProb, BranchProbability FProb, bool InvertCond) { + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast(Cond)) { + CmpInst::Predicate Condition; + if (const ICmpInst *IC = dyn_cast(Cond)) { + Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + } else { + const FCmpInst *FC = cast(Cond); + Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + } + + SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), + BOp->getOperand(1), nullptr, TBB, FBB, CurBB, + CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + SwitchCG::CaseBlock CB( + Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); +} + +static bool isValInBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast(V)) + return I->getParent() == BB; + return true; +} + +void IRTranslator::findMergedConditions( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond) { + using namespace PatternMatch; + assert((Opc == Instruction::And || Opc == Instruction::Or) && + "Expected Opc to be AND/OR"); + // Skip over not part of the tree and remember to invert op and operands at + // next level. + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + isValInBlock(NotCond, CurBB->getBasicBlock())) { + findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + + const Instruction *BOp = dyn_cast(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. + if (!BOp || !(isa(BOp) || isa(BOp)) || + BOpc != static_cast(Opc) || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !isValInBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !isValInBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, + InvertCond); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineBasicBlock *TmpBB = + MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + findMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb, InvertCond); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1], InvertCond); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + findMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb, InvertCond); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1], InvertCond); + } +} + +bool IRTranslator::shouldEmitAsBranches( + const std::vector &Cases) { + // For multiple cases, it's better to emit as branches. + if (Cases.size() != 2) + return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && + isa(Cases[0].CmpRHS) && + cast(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && + Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && + Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { const BranchInst &BrInst = cast(U); - unsigned Succ = 0; - if (!BrInst.isUnconditional()) { - // We want a G_BRCOND to the true BB followed by an unconditional branch. - Register Tst = getOrCreateVReg(*BrInst.getCondition()); - const BasicBlock &TrueTgt = *cast(BrInst.getSuccessor(Succ++)); - MachineBasicBlock &TrueBB = getMBB(TrueTgt); - MIRBuilder.buildBrCond(Tst, TrueBB); + auto &CurMBB = MIRBuilder.getMBB(); + auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); + + if (BrInst.isUnconditional()) { + // If the unconditional target is the layout successor, fallthrough. + if (!CurMBB.isLayoutSuccessor(Succ0MBB)) + MIRBuilder.buildBr(*Succ0MBB); + + // Link successors. + for (const BasicBlock *Succ : successors(&BrInst)) + CurMBB.addSuccessor(&getMBB(*Succ)); + return true; } - const BasicBlock &BrTgt = *cast(BrInst.getSuccessor(Succ)); - MachineBasicBlock &TgtBB = getMBB(BrTgt); - MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = BrInst.getCondition(); + MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); - // If the unconditional target is the layout successor, fallthrough. - if (!CurBB.isLayoutSuccessor(&TgtBB)) - MIRBuilder.buildBr(TgtBB); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - // Link successors. - for (const BasicBlock *Succ : successors(&BrInst)) - CurBB.addSuccessor(&getMBB(*Succ)); + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + using namespace PatternMatch; + if (const BinaryOperator *BOp = dyn_cast(CondVal)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1); + if (!TLI.isJumpExpensive() && BOp->hasOneUse() && + !BrInst.hasMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or) && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + findMergedConditions(BOp, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, + getEdgeProbability(&CurMBB, Succ0MBB), + getEdgeProbability(&CurMBB, Succ1MBB), + /*InvertCond=*/false); + assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (shouldEmitAsBranches(SL->SwitchCases)) { + // Emit the branch for this block. + emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); + SL->SwitchCases.erase(SL->SwitchCases.begin()); + return true; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) + MF->erase(SL->SwitchCases[I].ThisBB); + + SL->SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, + ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, Succ0MBB, Succ1MBB, &CurMBB, + CurBuilder->getDebugLoc()); + + // Use emitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + emitSwitchCase(CB, &CurMBB, *CurBuilder); return true; } @@ -567,8 +818,23 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, const LLT i1Ty = LLT::scalar(1); // Build the compare. if (!CB.CmpMHS) { - Register CondRHS = getOrCreateVReg(*CB.CmpRHS); - Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + const auto *CI = dyn_cast(CB.CmpRHS); + // For conditional branch lowering, we might try to do something silly like + // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, + // just re-use the existing condition vreg. + if (CI && CI->getZExtValue() == 1 && + MRI->getType(CondLHS).getSizeInBits() == 1 && + CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + Cond = CondLHS; + } else { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) + Cond = + MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + else + Cond = + MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } } else { assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && "Can only handle SLE ranges"); @@ -601,17 +867,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); CB.ThisBB->normalizeSuccProbs(); - // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock()) - addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, - CB.ThisBB); - - // If the lhs block is the next block, invert the condition so that we can - // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == CB.ThisBB->getNextNode()) { - std::swap(CB.TrueBB, CB.FalseBB); - auto True = MIB.buildConstant(i1Ty, 1); - Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0); - } + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); MIB.buildBrCond(Cond, *CB.TrueBB); MIB.buildBr(*CB.FalseBB); @@ -2590,6 +2847,10 @@ void IRTranslator::finalizeBasicBlock() { emitJumpTable(JTCase.second, JTCase.second.MBB); } SL->JTCases.clear(); + + for (auto &SwCase : SL->SwitchCases) + emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); + SL->SwitchCases.clear(); } void IRTranslator::finalizeFunction() { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll index 485fa62904f0a..64d9e9588eeeb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -1313,10 +1313,8 @@ define i32 @range_test(i32 %x) { ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C1]] ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[SUB]](s32), [[C5]] - ; CHECK: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[C6]] - ; CHECK: G_BRCOND [[XOR]](s1), %bb.4 - ; CHECK: G_BR %bb.2 + ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK: G_BR %bb.4 ; CHECK: bb.2.sw.bb: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C3]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll new file mode 100644 index 0000000000000..173bc85882d89 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple aarch64 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s + +declare i32 @bar(...) +define void @or_cond(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.2 + ; CHECK: G_BR %bb.4 + ; CHECK: bb.4.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 0 + %tmp3 = icmp slt i32 %Y, 5 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +define void @and_cond(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: and_cond + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.4.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 0 + %tmp3 = icmp slt i32 %Y, 5 + %tmp4 = and i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; Don't emit two branches for same operands. +define void @or_cond_same_values_cmp(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond_same_values_cmp + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 5 + %tmp3 = icmp slt i32 %X, 5 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; Emit multiple branches for more than 2 cases. +define void @or_cond_multiple_cases(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond_multiple_cases + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[OR]], [[ICMP2]] + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK: G_BR %bb.5 + ; CHECK: bb.5.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP4]](s1), %bb.2 + ; CHECK: G_BR %bb.4 + ; CHECK: bb.4.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP5]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 5 + %tmp3 = icmp slt i32 %X, 5 + %tmpZ = icmp eq i32 %Z, 5 + %tmp4 = or i1 %tmp3, %tmp1 + %final = or i1 %tmp4, %tmpZ + br i1 %final, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; (X != null) | (Y != null) --> (X|Y) != 0 +; Don't emit two branches. +define void @or_cond_ne_null(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond_ne_null + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp ne i32 %X, 0 + %tmp3 = icmp ne i32 %Y, 0 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; If the branch is unpredictable, don't add another branch +; regardless of whether they are expensive or not. + +define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: unpredictable + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 0 + %tmp3 = icmp slt i32 %Y, 5 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock, !unpredictable !0 + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +!0 = !{} diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll index 20e549b81a61a..2dcc174860c10 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll @@ -20,88 +20,100 @@ define void @long_chain_ambiguous_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* ; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_9 +; MIPS32-NEXT: bnez $8, $BB0_12 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB0_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_4 +; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_5 +; MIPS32-NEXT: bnez $2, $BB0_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB0_6: # %b.PHI.1 +; MIPS32-NEXT: $BB0_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_8 +; MIPS32-NEXT: bnez $3, $BB0_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB0_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_11 +; MIPS32-NEXT: bnez $2, $BB0_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB0_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB0_13 +; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB0_13: # %b.PHI.2 +; MIPS32-NEXT: $BB0_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_15 +; MIPS32-NEXT: bnez $3, $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB0_18 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_15: # %b.PHI.3 +; MIPS32-NEXT: $BB0_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload @@ -197,35 +209,44 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* % ; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_9 +; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_4 +; MIPS32-NEXT: bnez $2, $BB1_7 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB1_4 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_5 +; MIPS32-NEXT: bnez $2, $BB1_8 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB1_6 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_6: # %b.PHI.1 +; MIPS32-NEXT: $BB1_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 @@ -234,37 +255,37 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* % ; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_8 +; MIPS32-NEXT: bnez $3, $BB1_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB1_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_11 +; MIPS32-NEXT: bnez $2, $BB1_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB1_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB1_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_13 +; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_13: # %b.PHI.2 +; MIPS32-NEXT: $BB1_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 @@ -273,16 +294,19 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* % ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_15 +; MIPS32-NEXT: bnez $3, $BB1_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB1_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_15: # %b.PHI.3 +; MIPS32-NEXT: $BB1_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 40($sp) # 4-byte Folded Reload @@ -375,88 +399,100 @@ define void @long_chain_ambiguous_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, flo ; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_9 +; MIPS32-NEXT: bnez $8, $BB2_12 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB2_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_4 +; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_5 +; MIPS32-NEXT: bnez $2, $BB2_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB2_6: # %b.PHI.1 +; MIPS32-NEXT: $BB2_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_8 +; MIPS32-NEXT: bnez $3, $BB2_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB2_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_11 +; MIPS32-NEXT: bnez $2, $BB2_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB2_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB2_13 +; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB2_13: # %b.PHI.2 +; MIPS32-NEXT: $BB2_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_15 +; MIPS32-NEXT: bnez $3, $BB2_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_15: # %b.PHI.3 +; MIPS32-NEXT: $BB2_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload @@ -553,35 +589,44 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl ; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f0, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_9 +; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_4 +; MIPS32-NEXT: bnez $2, $BB3_7 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB3_4 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_5 +; MIPS32-NEXT: bnez $2, $BB3_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB3_6: # %b.PHI.1 +; MIPS32-NEXT: $BB3_9: # %b.PHI.1 ; MIPS32-NEXT: lwc1 $f0, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -590,37 +635,37 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl ; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: bnez $2, $BB3_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB3_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end ; MIPS32-NEXT: lwc1 $f0, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: bnez $2, $BB3_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB3_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB3_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB3_13 +; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB3_13: # %b.PHI.2 +; MIPS32-NEXT: $BB3_16: # %b.PHI.2 ; MIPS32-NEXT: lwc1 $f0, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -629,16 +674,19 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl ; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_15 +; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: lwc1 $f0, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_15: # %b.PHI.3 +; MIPS32-NEXT: $BB3_19: # %b.PHI.3 ; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll index a237099eb75ba..bafa309df76a1 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll @@ -20,88 +20,100 @@ define void @long_chain_ambiguous_i64_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* ; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_9 +; MIPS32-NEXT: bnez $8, $BB0_12 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB0_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_4 +; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_5 +; MIPS32-NEXT: bnez $2, $BB0_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB0_6: # %b.PHI.1 +; MIPS32-NEXT: $BB0_9: # %b.PHI.1 ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_8 +; MIPS32-NEXT: bnez $2, $BB0_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB0_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end ; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_11 +; MIPS32-NEXT: bnez $2, $BB0_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB0_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB0_13 +; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB0_13: # %b.PHI.2 +; MIPS32-NEXT: $BB0_16: # %b.PHI.2 ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_15 +; MIPS32-NEXT: bnez $2, $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB0_18 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_15: # %b.PHI.3 +; MIPS32-NEXT: $BB0_19: # %b.PHI.3 ; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload @@ -197,41 +209,50 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: sw $2, 56($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 52($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_9 +; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_4 +; MIPS32-NEXT: bnez $2, $BB1_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_5 +; MIPS32-NEXT: bnez $2, $BB1_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_6: # %b.PHI.1 +; MIPS32-NEXT: $BB1_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 64($sp) # 4-byte Folded Reload @@ -246,12 +267,12 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_8 +; MIPS32-NEXT: bnez $4, $BB1_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB1_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) @@ -260,29 +281,29 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_11 +; MIPS32-NEXT: bnez $2, $BB1_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB1_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB1_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_13 +; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_13: # %b.PHI.2 +; MIPS32-NEXT: $BB1_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 68($sp) # 4-byte Folded Reload @@ -297,9 +318,12 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_15 +; MIPS32-NEXT: bnez $4, $BB1_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB1_18 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) @@ -308,7 +332,7 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_15: # %b.PHI.3 +; MIPS32-NEXT: $BB1_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload @@ -408,88 +432,100 @@ define void @long_chain_ambiguous_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, do ; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_9 +; MIPS32-NEXT: bnez $8, $BB2_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB2_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_4 +; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_5 +; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB2_6 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB2_6: # %b.PHI.1 +; MIPS32-NEXT: $BB2_9: # %b.PHI.1 ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: bnez $2, $BB2_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB2_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end ; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_11 +; MIPS32-NEXT: bnez $2, $BB2_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB2_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB2_13 +; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB2_13: # %b.PHI.2 +; MIPS32-NEXT: $BB2_16: # %b.PHI.2 ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_15 +; MIPS32-NEXT: bnez $2, $BB2_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_15: # %b.PHI.3 +; MIPS32-NEXT: $BB2_19: # %b.PHI.3 ; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload @@ -588,35 +624,44 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a, ; MIPS32-NEXT: sw $2, 64($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 60($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sdc1 $f0, 48($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_9 +; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_4 +; MIPS32-NEXT: bnez $2, $BB3_7 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB3_4 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_5 +; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB3_6 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB3_6: # %b.PHI.1 +; MIPS32-NEXT: $BB3_9: # %b.PHI.1 ; MIPS32-NEXT: ldc1 $f0, 40($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -625,37 +670,37 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a, ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: bnez $2, $BB3_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB3_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 80($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: bnez $2, $BB3_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB3_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB3_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB3_13 +; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB3_13: # %b.PHI.2 +; MIPS32-NEXT: $BB3_16: # %b.PHI.2 ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -664,16 +709,19 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a, ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_15 +; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_15: # %b.PHI.3 +; MIPS32-NEXT: $BB3_19: # %b.PHI.3 ; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: ldc1 $f2, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload From 91656fcb57ec6878833aba615e1142225514e13b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 9 Sep 2020 22:35:56 +0200 Subject: [PATCH 0199/1079] [X86] Add tests for minnum/maxnum with constant NaN (NFC) --- llvm/test/CodeGen/X86/fmaxnum.ll | 34 ++++++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/fminnum.ll | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll index 2a7bb25164d31..41256ba18dd63 100644 --- a/llvm/test/CodeGen/X86/fmaxnum.ll +++ b/llvm/test/CodeGen/X86/fmaxnum.ll @@ -609,5 +609,39 @@ define float @test_maxnum_const_op2(float %x) { ret float %r } +define float @test_maxnum_const_nan(float %x) { +; SSE-LABEL: test_maxnum_const_nan: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: cmpunordss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm3 +; SSE-NEXT: andps %xmm2, %xmm3 +; SSE-NEXT: maxss %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm2, %xmm1 +; SSE-NEXT: orps %xmm3, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: test_maxnum_const_nan: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_maxnum_const_nan: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: retq + %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + attributes #0 = { "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll index fc4c48686a953..373920c185e3f 100644 --- a/llvm/test/CodeGen/X86/fminnum.ll +++ b/llvm/test/CodeGen/X86/fminnum.ll @@ -609,5 +609,39 @@ define float @test_minnum_const_op2(float %x) { ret float %r } +define float @test_minnum_const_nan(float %x) { +; SSE-LABEL: test_minnum_const_nan: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: cmpunordss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm3 +; SSE-NEXT: andps %xmm2, %xmm3 +; SSE-NEXT: minss %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm2, %xmm1 +; SSE-NEXT: orps %xmm3, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: test_minnum_const_nan: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_minnum_const_nan: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: retq + %r = call float @llvm.minnum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + attributes #0 = { "no-nans-fp-math"="true" } From e5784ef8f6c6a7779f5dfc8f989ea37d233be388 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 28 Aug 2020 16:21:34 -0700 Subject: [PATCH 0200/1079] [GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator. We weren't using this before, so none of the MachineFunction CFG edges had the branch probability information added. As a result, block placement later in the pipeline was flying blind. This is enabled only with optimizations enabled like SelectionDAG. Differential Revision: https://reviews.llvm.org/D86824 --- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 6 ++-- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 18 +++++++++--- .../Target/AArch64/AArch64TargetMachine.cpp | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 +- llvm/lib/Target/ARM/ARMTargetMachine.cpp | 2 +- llvm/lib/Target/Mips/MipsTargetMachine.cpp | 2 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- llvm/lib/Target/X86/X86TargetMachine.cpp | 2 +- .../irtranslator-condbr-lower-tree.ll | 14 +++++----- .../GlobalISel/irtranslator-switch-bittest.ll | 16 +++++------ .../CodeGen/AArch64/GlobalISel/swifterror.ll | 2 -- .../GlobalISel/divergent-control-flow.ll | 24 ++++++++-------- llvm/test/CodeGen/X86/GlobalISel/phi.ll | 28 +++++++++++-------- 13 files changed, 68 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 8360e81036cd5..0674b53c604a7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -27,6 +27,7 @@ #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CodeGen.h" #include #include @@ -556,6 +557,8 @@ class IRTranslator : public MachineFunctionPass { /// Current target configuration. Controls how the pass handles errors. const TargetPassConfig *TPC; + CodeGenOpt::Level OptLevel; + /// Current optimization remark emitter. Used to report failures. std::unique_ptr ORE; @@ -659,8 +662,7 @@ class IRTranslator : public MachineFunctionPass { BranchProbability Prob); public: - // Ctor, nothing fancy. - IRTranslator(); + IRTranslator(CodeGenOpt::Level OptLevel = CodeGenOpt::None); StringRef getPassName() const override { return "IRTranslator"; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 34ba4731ca364..8a39739242002 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -74,6 +74,7 @@ #include "llvm/Target/TargetMachine.h" #include #include +#include #include #include #include @@ -114,7 +115,8 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID) { } +IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) + : MachineFunctionPass(ID), OptLevel(optlevel) {} #ifndef NDEBUG namespace { @@ -158,6 +160,8 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -2912,14 +2916,20 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); ORE = std::make_unique(&F); + const TargetMachine &TM = MF->getTarget(); + EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - FuncInfo.BPI = nullptr; + if (EnableOpts) + FuncInfo.BPI = &getAnalysis().getBPI(); + else + FuncInfo.BPI = nullptr; + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - const TargetMachine &TM = MF->getTarget(); + SL = std::make_unique(this, FuncInfo); SL->init(TLI, TM, *DL); - EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); + assert(PendingPHIs.empty() && "stale PHIs"); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index d7a14a3dc7728..6df717f030a72 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -544,7 +544,7 @@ bool AArch64PassConfig::addInstSelector() { } bool AArch64PassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 5946249e84b09..f46349cb87df5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -946,7 +946,7 @@ bool GCNPassConfig::addInstSelector() { } bool GCNPassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 5068f9b5a0f46..cf4115f77fec5 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -470,7 +470,7 @@ bool ARMPassConfig::addInstSelector() { } bool ARMPassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 5433b29f3f089..7e2c43164d52f 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -316,7 +316,7 @@ void MipsPassConfig::addPreEmitPass() { } bool MipsPassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index eeb0cabc2f8bd..1b305eac74876 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -147,7 +147,7 @@ bool RISCVPassConfig::addInstSelector() { } bool RISCVPassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 7616b2ea7d998..34bc72a2e69f3 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -444,7 +444,7 @@ bool X86PassConfig::addInstSelector() { } bool X86PassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll index 173bc85882d89..223fa28d49faa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll @@ -5,7 +5,7 @@ declare i32 @bar(...) define void @or_cond(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: successors: %bb.2(0x20000000), %bb.4(0x60000000) ; CHECK: liveins: $w0, $w1, $w2 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 @@ -19,7 +19,7 @@ define void @or_cond(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.2 ; CHECK: G_BR %bb.4 ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: successors: %bb.2(0x2aaaaaab), %bb.3(0x55555555) ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 ; CHECK: G_BR %bb.3 @@ -44,7 +44,7 @@ UnifiedReturnBlock: define void @and_cond(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: and_cond ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK: successors: %bb.4(0x60000000), %bb.3(0x20000000) ; CHECK: liveins: $w0, $w1, $w2 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 @@ -58,7 +58,7 @@ define void @and_cond(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4 ; CHECK: G_BR %bb.3 ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 ; CHECK: G_BR %bb.3 @@ -117,7 +117,7 @@ UnifiedReturnBlock: define void @or_cond_multiple_cases(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond_multiple_cases ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: successors: %bb.2(0x10000000), %bb.5(0x70000000) ; CHECK: liveins: $w0, $w1, $w2 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 @@ -132,12 +132,12 @@ define void @or_cond_multiple_cases(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 ; CHECK: G_BR %bb.5 ; CHECK: bb.5.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: successors: %bb.2(0x12492492), %bb.4(0x6db6db6e) ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: G_BRCOND [[ICMP4]](s1), %bb.2 ; CHECK: G_BR %bb.4 ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: successors: %bb.2(0x2aaaaaab), %bb.3(0x55555555) ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; CHECK: G_BRCOND [[ICMP5]](s1), %bb.2 ; CHECK: G_BR %bb.3 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll index 28756a4ae6175..8dfae82d02a62 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll @@ -4,7 +4,7 @@ define i32 @test_bittest(i16 %p) { ; CHECK-LABEL: name: test_bittest ; CHECK: bb.1 (%ir-block.0): - ; CHECK: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; CHECK: successors: %bb.4(0x1b6db6db), %bb.5(0x64924925) ; CHECK: liveins: $w0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -25,7 +25,7 @@ define i32 @test_bittest(i16 %p) { ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.3 ; CHECK: G_BR %bb.2 ; CHECK: bb.5 (%ir-block.0): - ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK: successors: %bb.3(0x745d1746), %bb.4(0x0ba2e8ba) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[C5]], [[ZEXT1]](s64) ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 866239240827043840 @@ -61,7 +61,7 @@ declare void @callee() define void @test_bittest_2_bt(i32 %p) { ; CHECK-LABEL: name: test_bittest_2_bt ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK: successors: %bb.5(0x345d1746), %bb.6(0x4ba2e8ba) ; CHECK: liveins: $w0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 @@ -71,7 +71,7 @@ define void @test_bittest_2_bt(i32 %p) { ; CHECK: G_BRCOND [[ICMP]](s1), %bb.5 ; CHECK: G_BR %bb.6 ; CHECK: bb.5.entry: - ; CHECK: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK: successors: %bb.4(0x0ccccccd), %bb.7(0x73333333) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C2]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SUB1]](s32) @@ -80,7 +80,7 @@ define void @test_bittest_2_bt(i32 %p) { ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.4 ; CHECK: G_BR %bb.7 ; CHECK: bb.6.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: successors: %bb.2(0x76276276), %bb.5(0x09d89d8a) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[SUB]](s32) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 57351 @@ -90,7 +90,7 @@ define void @test_bittest_2_bt(i32 %p) { ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.2 ; CHECK: G_BR %bb.5 ; CHECK: bb.7.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK: successors: %bb.3(0x71c71c72), %bb.4(0x0e38e38e) ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[C7]], [[ZEXT]](s64) ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 365072220160 @@ -134,7 +134,7 @@ sw.default: ; preds = %entry define i32 @test_bittest_single_bt_only_with_fallthrough(i16 %p) { ; CHECK-LABEL: name: test_bittest_single_bt_only_with_fallthrough ; CHECK: bb.1 (%ir-block.0): - ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: successors: %bb.2(0x0aaaaaab), %bb.4(0x75555555) ; CHECK: liveins: $w0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -148,7 +148,7 @@ define i32 @test_bittest_single_bt_only_with_fallthrough(i16 %p) { ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[SUB]](s32), [[C3]] ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK: bb.4 (%ir-block.0): - ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: successors: %bb.3(0x745d1746), %bb.2(0x0ba2e8ba) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[C4]], [[ZEXT1]](s64) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 866239240827043840 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll index a4a1747b05af9..cbfadbdb5d720 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -131,8 +131,6 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK: malloc ; CHECK: mov x21, x0 ; CHECK: strb w{{.*}}, [x0, #8] -; CHECK: fcmp -; CHECK: b.le ; CHECK: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 4b8554b781fd9..bf1f0ccbc2e24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -205,24 +205,26 @@ define amdgpu_kernel void @break_loop(i32 %arg) { ; CHECK-NEXT: ; implicit-def: $vgpr1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0 -; CHECK-NEXT: BB5_1: ; %bb1 +; CHECK-NEXT: s_branch BB5_2 +; CHECK-NEXT: BB5_1: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] +; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] +; CHECK-NEXT: s_cbranch_execz BB5_4 +; CHECK-NEXT: BB5_2: ; %bb1 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, 1 -; CHECK-NEXT: s_cbranch_vccnz BB5_3 -; CHECK-NEXT: ; %bb.2: ; %bb4 -; CHECK-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; CHECK-NEXT: s_cbranch_vccnz BB5_1 +; CHECK-NEXT: ; %bb.3: ; %bb4 +; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; CHECK-NEXT: global_load_dword v2, v[0:1], off ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, v2 -; CHECK-NEXT: BB5_3: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] -; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] -; CHECK-NEXT: s_cbranch_execnz BB5_1 -; CHECK-NEXT: ; %bb.4: ; %bb9 +; CHECK-NEXT: s_branch BB5_1 +; CHECK-NEXT: BB5_4: ; %bb9 ; CHECK-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/X86/GlobalISel/phi.ll b/llvm/test/CodeGen/X86/GlobalISel/phi.ll index 28e65c73acae5..d2ce98d0fb41a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/phi.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/phi.ll @@ -71,10 +71,11 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) { ; ALL-NEXT: cmpl %ecx, %edi ; ALL-NEXT: setg %cl ; ALL-NEXT: testb $1, %cl -; ALL-NEXT: jne .LBB2_2 -; ALL-NEXT: # %bb.1: # %cond.false +; ALL-NEXT: je .LBB2_1 +; ALL-NEXT: # %bb.2: # %cond.end +; ALL-NEXT: retq +; ALL-NEXT: .LBB2_1: # %cond.false ; ALL-NEXT: movl %edx, %eax -; ALL-NEXT: .LBB2_2: # %cond.end ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -99,10 +100,11 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) { ; ALL-NEXT: cmpl %ecx, %edi ; ALL-NEXT: setg %cl ; ALL-NEXT: testb $1, %cl -; ALL-NEXT: jne .LBB3_2 -; ALL-NEXT: # %bb.1: # %cond.false +; ALL-NEXT: je .LBB3_1 +; ALL-NEXT: # %bb.2: # %cond.end +; ALL-NEXT: retq +; ALL-NEXT: .LBB3_1: # %cond.false ; ALL-NEXT: movq %rdx, %rax -; ALL-NEXT: .LBB3_2: # %cond.end ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -126,10 +128,11 @@ define float @test_float(i32 %a, float %f, float %t) { ; ALL-NEXT: cmpl %eax, %edi ; ALL-NEXT: setg %al ; ALL-NEXT: testb $1, %al -; ALL-NEXT: jne .LBB4_2 -; ALL-NEXT: # %bb.1: # %cond.false +; ALL-NEXT: je .LBB4_1 +; ALL-NEXT: # %bb.2: # %cond.end +; ALL-NEXT: retq +; ALL-NEXT: .LBB4_1: # %cond.false ; ALL-NEXT: movaps %xmm1, %xmm0 -; ALL-NEXT: .LBB4_2: # %cond.end ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -153,10 +156,11 @@ define double @test_double(i32 %a, double %f, double %t) { ; ALL-NEXT: cmpl %eax, %edi ; ALL-NEXT: setg %al ; ALL-NEXT: testb $1, %al -; ALL-NEXT: jne .LBB5_2 -; ALL-NEXT: # %bb.1: # %cond.false +; ALL-NEXT: je .LBB5_1 +; ALL-NEXT: # %bb.2: # %cond.end +; ALL-NEXT: retq +; ALL-NEXT: .LBB5_1: # %cond.false ; ALL-NEXT: movaps %xmm1, %xmm0 -; ALL-NEXT: .LBB5_2: # %cond.end ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 From a9f79707624fe20e7ac19c5063d77190baa8b281 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 9 Sep 2020 14:30:47 -0700 Subject: [PATCH 0201/1079] Add REQUIRES: asserts to a test that uses an asserts only flag. --- .../CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir index 6ed879d82b9be..0631ff89ade0d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -1,5 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="opt_brcond_by_inverting_cond" -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +# Need asserts for the only-enable-rule to work. + +# REQUIRES: asserts + --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" From 2955a27abc25cd1b9d737c211c2cfe11e2a5de3e Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 9 Sep 2020 14:41:00 -0700 Subject: [PATCH 0202/1079] [lldb] Pass the arch as part of the triple in the ARCH_CFLAGS --- lldb/packages/Python/lldbsuite/test/builders/darwin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/builders/darwin.py b/lldb/packages/Python/lldbsuite/test/builders/darwin.py index 4548217c3fab8..236e4fac13682 100644 --- a/lldb/packages/Python/lldbsuite/test/builders/darwin.py +++ b/lldb/packages/Python/lldbsuite/test/builders/darwin.py @@ -78,7 +78,7 @@ def getExtraMakeArgs(self): {'{}="{}"'.format(key, value) for key, value in args.items()}) - def getArchCFlags(self, architecture): + def getArchCFlags(self, arch): """Returns the ARCH_CFLAGS for the make system.""" # Get the triple components. vendor, os, version, env = get_triple() @@ -86,7 +86,7 @@ def getArchCFlags(self, architecture): return "" # Construct the triple from its components. - triple = "{}-{}-{}-{}".format(vendor, os, version, env) + triple = '-'.join([arch, vendor, os, version, env]) # Construct min version argument version_min = "" From 5a4a05c8116ebdcb434cd15796a255cf024a6bf0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 9 Sep 2020 23:48:44 +0200 Subject: [PATCH 0203/1079] [ARM] Add additional fmin/fmax with nan tests (NFC) Adding these to ARM which has both FMINNUM and FMINIMUM. --- llvm/test/CodeGen/ARM/fminmax-folds.ll | 71 ++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/fminmax-folds.ll diff --git a/llvm/test/CodeGen/ARM/fminmax-folds.ll b/llvm/test/CodeGen/ARM/fminmax-folds.ll new file mode 100644 index 0000000000000..807c0a8b8eb44 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fminmax-folds.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=armv8-eabi | FileCheck %s + +declare float @llvm.minnum.f32(float, float) +declare float @llvm.maxnum.f32(float, float) +declare float @llvm.minimum.f32(float, float) +declare float @llvm.maximum.f32(float, float) + +define float @test_minnum_const_nan(float %x) { +; CHECK-LABEL: test_minnum_const_nan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI0_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 0x7ff80000 @ float NaN + %r = call float @llvm.minnum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_maxnum_const_nan(float %x) { +; CHECK-LABEL: test_maxnum_const_nan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI1_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 0x7ff80000 @ float NaN + %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_maximum_const_nan(float %x) { +; CHECK-LABEL: test_maximum_const_nan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI2_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .long 0x7ff80000 @ float NaN + %r = call float @llvm.maximum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_minimum_const_nan(float %x) { +; CHECK-LABEL: test_minimum_const_nan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI3_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI3_0: +; CHECK-NEXT: .long 0x7ff80000 @ float NaN + %r = call float @llvm.minimum.f32(float %x, float 0x7fff000000000000) + ret float %r +} From 0a5dc7effb191eff740e0e7ae7bd8e1f6bdb3ad9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 9 Sep 2020 22:35:02 +0200 Subject: [PATCH 0204/1079] [DAGCombiner] Fold fmin/fmax of NaN fminnum(X, NaN) is X, fminimum(X, NaN) is NaN. This mirrors the behavior of existing InstSimplify folds. This is expected to improve the reduction lowerings in D87391, which use NaN as a neutral element. Differential Revision: https://reviews.llvm.org/D87415 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++++--- llvm/test/CodeGen/ARM/fminmax-folds.ll | 36 +++---------------- llvm/test/CodeGen/X86/fmaxnum.ll | 32 ++--------------- llvm/test/CodeGen/X86/fminnum.ll | 32 ++--------------- 4 files changed, 23 insertions(+), 95 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c714358c01577..eaa70444578a4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14040,7 +14040,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, - APFloat (*Op)(const APFloat &, const APFloat &)) { + APFloat (*Op)(const APFloat &, const APFloat &), + bool PropagatesNaN) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); @@ -14058,23 +14059,30 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + // minnum(X, nan) -> X + // maxnum(X, nan) -> X + // minimum(X, nan) -> nan + // maximum(X, nan) -> nan + if (N1CFP && N1CFP->isNaN()) + return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); + return SDValue(); } SDValue DAGCombiner::visitFMINNUM(SDNode *N) { - return visitFMinMax(DAG, N, minnum); + return visitFMinMax(DAG, N, minnum, /* PropagatesNaN */ false); } SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { - return visitFMinMax(DAG, N, maxnum); + return visitFMinMax(DAG, N, maxnum, /* PropagatesNaN */ false); } SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { - return visitFMinMax(DAG, N, minimum); + return visitFMinMax(DAG, N, minimum, /* PropagatesNaN */ true); } SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { - return visitFMinMax(DAG, N, maximum); + return visitFMinMax(DAG, N, maximum, /* PropagatesNaN */ true); } SDValue DAGCombiner::visitFABS(SDNode *N) { diff --git a/llvm/test/CodeGen/ARM/fminmax-folds.ll b/llvm/test/CodeGen/ARM/fminmax-folds.ll index 807c0a8b8eb44..35fdcd1d0d6fd 100644 --- a/llvm/test/CodeGen/ARM/fminmax-folds.ll +++ b/llvm/test/CodeGen/ARM/fminmax-folds.ll @@ -9,15 +9,7 @@ declare float @llvm.maximum.f32(float, float) define float @test_minnum_const_nan(float %x) { ; CHECK-LABEL: test_minnum_const_nan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI0_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI0_0: -; CHECK-NEXT: .long 0x7ff80000 @ float NaN %r = call float @llvm.minnum.f32(float %x, float 0x7fff000000000000) ret float %r } @@ -25,15 +17,7 @@ define float @test_minnum_const_nan(float %x) { define float @test_maxnum_const_nan(float %x) { ; CHECK-LABEL: test_maxnum_const_nan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI1_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI1_0: -; CHECK-NEXT: .long 0x7ff80000 @ float NaN %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) ret float %r } @@ -41,15 +25,9 @@ define float @test_maxnum_const_nan(float %x) { define float @test_maximum_const_nan(float %x) { ; CHECK-LABEL: test_maximum_const_nan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI2_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32760 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI2_0: -; CHECK-NEXT: .long 0x7ff80000 @ float NaN %r = call float @llvm.maximum.f32(float %x, float 0x7fff000000000000) ret float %r } @@ -57,15 +35,9 @@ define float @test_maximum_const_nan(float %x) { define float @test_minimum_const_nan(float %x) { ; CHECK-LABEL: test_minimum_const_nan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI3_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32760 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI3_0: -; CHECK-NEXT: .long 0x7ff80000 @ float NaN %r = call float @llvm.minimum.f32(float %x, float 0x7fff000000000000) ret float %r } diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll index 41256ba18dd63..fd5b638a146da 100644 --- a/llvm/test/CodeGen/X86/fmaxnum.ll +++ b/llvm/test/CodeGen/X86/fmaxnum.ll @@ -610,35 +610,9 @@ define float @test_maxnum_const_op2(float %x) { } define float @test_maxnum_const_nan(float %x) { -; SSE-LABEL: test_maxnum_const_nan: -; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: cmpunordss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: andps %xmm2, %xmm3 -; SSE-NEXT: maxss %xmm0, %xmm2 -; SSE-NEXT: andnps %xmm2, %xmm1 -; SSE-NEXT: orps %xmm3, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: retq -; -; AVX1-LABEL: test_maxnum_const_nan: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX512-LABEL: test_maxnum_const_nan: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 -; AVX512-NEXT: retq +; CHECK-LABEL: test_maxnum_const_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) ret float %r } diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll index 373920c185e3f..dc1b8ca8eb4db 100644 --- a/llvm/test/CodeGen/X86/fminnum.ll +++ b/llvm/test/CodeGen/X86/fminnum.ll @@ -610,35 +610,9 @@ define float @test_minnum_const_op2(float %x) { } define float @test_minnum_const_nan(float %x) { -; SSE-LABEL: test_minnum_const_nan: -; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: cmpunordss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: andps %xmm2, %xmm3 -; SSE-NEXT: minss %xmm0, %xmm2 -; SSE-NEXT: andnps %xmm2, %xmm1 -; SSE-NEXT: orps %xmm3, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: retq -; -; AVX1-LABEL: test_minnum_const_nan: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX512-LABEL: test_minnum_const_nan: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 -; AVX512-NEXT: retq +; CHECK-LABEL: test_minnum_const_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = call float @llvm.minnum.f32(float %x, float 0x7fff000000000000) ret float %r } From 9969c317ff0877ed6155043422c70e1d4c028a35 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 9 Sep 2020 19:36:41 +0100 Subject: [PATCH 0205/1079] [DSE,MemorySSA] Handle atomic stores explicitly in isReadClobber. Atomic stores are modeled as MemoryDef to model the fact that they may not be reordered, depending on the ordering constraints. Atomic stores that are monotonic or weaker do not limit re-ordering, so we do not have to treat them as potential read clobbers. Note that llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll already contains a set of negative test cases. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87386 --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 5 +++++ .../DeadStoreElimination/MSSA/atomic-todo.ll | 11 ----------- .../Transforms/DeadStoreElimination/MSSA/atomic.ll | 11 +++++++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1427bd4ad4dfd..12514be0e631a 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1824,6 +1824,11 @@ struct DSEState { // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) { + // Monotonic or weaker atomic stores can be re-ordered and do not need to be + // treated as read clobber. + if (auto SI = dyn_cast(UseInst)) + return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic); + if (!UseInst->mayReadFromMemory()) return false; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll index 04361e63e6d08..8dfb85719c309 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll @@ -21,14 +21,3 @@ define i32 @test9() { store i32 1, i32* @x ret i32 %x } - -; DSE across monotonic store (allowed as long as the eliminated store isUnordered) -define void @test10() { -; CHECK-LABEL: test10 -; CHECK-NOT: store i32 0 -; CHECK: store i32 1 - store i32 0, i32* @x - store atomic i32 42, i32* @y monotonic, align 4 - store i32 1, i32* @x - ret void -} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll index 5a3ea376415c3..51129fe2bcadb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -88,6 +88,17 @@ define i32 @test8() { ret i32 %x } +; DSE across monotonic store (allowed as long as the eliminated store isUnordered) +define void @test10() { +; CHECK-LABEL: test10 +; CHECK-NOT: store i32 0 +; CHECK: store i32 1 + store i32 0, i32* @x + store atomic i32 42, i32* @y monotonic, align 4 + store i32 1, i32* @x + ret void +} + ; DSE across monotonic load (forbidden since the eliminated store is atomic) define i32 @test11() { ; CHECK-LABEL: @test11( From 480e7f43a22578beaa2edc7a271e77793222a1c3 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Wed, 9 Sep 2020 09:45:54 -0700 Subject: [PATCH 0206/1079] [AArch64][GlobalISel] Share address mode selection code for memops We were missing support for the G_ADD_LOW + ADRP folding optimization in the manual selection code for G_LOAD, G_STORE, and G_ZEXTLOAD. As a result, we were missing cases like this: ``` @foo = external hidden global i32* define void @baz(i32* %0) { store i32* %0, i32** @foo ret void } ``` https://godbolt.org/z/16r7ad This functionality already existed in the addressing mode functions for the importer. So, this patch makes the manual selection code use `selectAddrModeIndexed` rather than duplicating work. This is a 0.2% geomean code size improvement for CTMark at -O3. There is one code size increase (0.1% on lencod) which is likely because `selectAddrModeIndexed` doesn't look through constants. Differential Revision: https://reviews.llvm.org/D87397 --- .../GISel/AArch64InstructionSelector.cpp | 87 ++++++++++--------- .../AArch64/GlobalISel/select-store.mir | 20 +++++ 2 files changed, 64 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index a8d68180bb76a..228db83533cdf 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2260,18 +2260,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } auto &MemOp = **I.memoperands_begin(); + uint64_t MemSizeInBytes = MemOp.getSize(); if (MemOp.isAtomic()) { // For now we just support s8 acquire loads to be able to compile stack // protector code. if (MemOp.getOrdering() == AtomicOrdering::Acquire && - MemOp.getSize() == 1) { + MemSizeInBytes == 1) { I.setDesc(TII.get(AArch64::LDARB)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); return false; } - unsigned MemSizeInBits = MemOp.getSize() * 8; + unsigned MemSizeInBits = MemSizeInBytes * 8; const Register PtrReg = I.getOperand(1).getReg(); #ifndef NDEBUG @@ -2286,78 +2287,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const Register ValReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); - const unsigned NewOpc = - selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); - if (NewOpc == I.getOpcode()) - return false; - - I.setDesc(TII.get(NewOpc)); - - uint64_t Offset = 0; - auto *PtrMI = MRI.getVRegDef(PtrReg); - - // Try to fold a GEP into our unsigned immediate addressing mode. - if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { - if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { - int64_t Imm = *COff; - const unsigned Size = MemSizeInBits / 8; - const unsigned Scale = Log2_32(Size); - if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { - Register Ptr2Reg = PtrMI->getOperand(1).getReg(); - I.getOperand(1).setReg(Ptr2Reg); - PtrMI = MRI.getVRegDef(Ptr2Reg); - Offset = Imm / Size; - } + // Helper lambda for partially selecting I. Either returns the original + // instruction with an updated opcode, or a new instruction. + auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { + bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; + const unsigned NewOpc = + selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); + if (NewOpc == I.getOpcode()) + return nullptr; + // Check if we can fold anything into the addressing mode. + auto AddrModeFns = + selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); + if (!AddrModeFns) { + // Can't fold anything. Use the original instruction. + I.setDesc(TII.get(NewOpc)); + I.addOperand(MachineOperand::CreateImm(0)); + return &I; } - } - // If we haven't folded anything into our addressing mode yet, try to fold - // a frame index into the base+offset. - if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) - I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); + // Folded something. Create a new instruction and return it. + auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); + IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); + NewInst.cloneMemRefs(I); + for (auto &Fn : *AddrModeFns) + Fn(NewInst); + I.eraseFromParent(); + return &*NewInst; + }; - I.addOperand(MachineOperand::CreateImm(Offset)); + MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); + if (!LoadStore) + return false; // If we're storing a 0, use WZR/XZR. if (Opcode == TargetOpcode::G_STORE) { auto CVal = getConstantVRegValWithLookThrough( - ValReg, MRI, /*LookThroughInstrs = */ true, + LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, /*HandleFConstants = */ false); if (CVal && CVal->Value == 0) { - unsigned Opc = I.getOpcode(); - switch (Opc) { + switch (LoadStore->getOpcode()) { case AArch64::STRWui: case AArch64::STRHHui: case AArch64::STRBBui: - I.getOperand(0).setReg(AArch64::WZR); + LoadStore->getOperand(0).setReg(AArch64::WZR); break; case AArch64::STRXui: - I.getOperand(0).setReg(AArch64::XZR); + LoadStore->getOperand(0).setReg(AArch64::XZR); break; } } } if (IsZExtLoad) { - // The zextload from a smaller type to i32 should be handled by the importer. - if (MRI.getType(ValReg).getSizeInBits() != 64) + // The zextload from a smaller type to i32 should be handled by the + // importer. + if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) return false; // If we have a ZEXTLOAD then change the load's type to be a narrower reg - //and zero_extend with SUBREG_TO_REG. + // and zero_extend with SUBREG_TO_REG. Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - Register DstReg = I.getOperand(0).getReg(); - I.getOperand(0).setReg(LdReg); + Register DstReg = LoadStore->getOperand(0).getReg(); + LoadStore->getOperand(0).setReg(LdReg); - MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); + MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) .addImm(0) .addUse(LdReg) .addImm(AArch64::sub_32); - constrainSelectedInstRegOperands(I, TII, TRI, RBI); + constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, MRI); } - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); } case TargetOpcode::G_SMULH: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir index db355dfc151f5..05038b40ca365 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -39,6 +39,9 @@ define void @store_8xi16(<8 x i16> %v, <8 x i16>* %ptr) { ret void } define void @store_16xi8(<16 x i8> %v, <16 x i8>* %ptr) { ret void } + @x = external hidden local_unnamed_addr global i32*, align 8 + define void @store_adrp_add_low() { ret void } + ... --- @@ -600,3 +603,20 @@ body: | RET_ReallyLR ... +--- +name: store_adrp_add_low +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: store_adrp_add_low + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x + ; CHECK: STRXui %copy, %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store 8 into @x) + %copy:gpr(p0) = COPY $x0 + %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) From 8b7c8f2c549d301fcea75d8e6e98a8ee160d5ff4 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 9 Sep 2020 17:27:04 -0500 Subject: [PATCH 0207/1079] Mark masked.{store,scatter,compressstore} intrinsics as write-only --- llvm/include/llvm/IR/Intrinsics.td | 72 +++++++++---------- llvm/test/Analysis/BasicAA/intrinsics.ll | 2 +- .../TypeBasedAliasAnalysis/intrinsics.ll | 2 +- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d42d576dc2030..20c6d3b8cb1c4 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1349,42 +1349,42 @@ def int_get_active_lane_mask: //===-------------------------- Masked Intrinsics -------------------------===// // -def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, - LLVMAnyPointerType>, - llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, IntrWillReturn, ImmArg>]>; - -def int_masked_load : Intrinsic<[llvm_anyvector_ty], - [LLVMAnyPointerType>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly, IntrWillReturn, - ImmArg>]>; - -def int_masked_gather: Intrinsic<[llvm_anyvector_ty], - [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>], - [IntrReadMem, IntrWillReturn, - ImmArg>]>; - -def int_masked_scatter: Intrinsic<[], - [llvm_anyvector_ty, - LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrWillReturn, ImmArg>]>; - -def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], - [LLVMPointerToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>], - [IntrReadMem, IntrWillReturn]>; - -def int_masked_compressstore: Intrinsic<[], - [llvm_anyvector_ty, - LLVMPointerToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, IntrWillReturn]>; +def int_masked_load: + Intrinsic<[llvm_anyvector_ty], + [LLVMAnyPointerType>, llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg>]>; + +def int_masked_store: + Intrinsic<[], + [llvm_anyvector_ty, LLVMAnyPointerType>, + llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, + ImmArg>]>; + +def int_masked_gather: + Intrinsic<[llvm_anyvector_ty], + [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], + [IntrReadMem, IntrWillReturn, ImmArg>]>; + +def int_masked_scatter: + Intrinsic<[], + [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrWriteMem, IntrWillReturn, ImmArg>]>; + +def int_masked_expandload: + Intrinsic<[llvm_anyvector_ty], + [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>], + [IntrReadMem, IntrWillReturn]>; + +def int_masked_compressstore: + Intrinsic<[], + [llvm_anyvector_ty, LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn]>; // Test whether a pointer is associated with a type metadata identifier. def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index 9cc55ca7a3dec..679beefac5284 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -23,5 +23,5 @@ declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { argmemonly nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nounwind willreturn } +; CHECK: attributes #1 = { argmemonly nounwind willreturn writeonly } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll index 648fcf707f9f6..116a0ce0f3afa 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -23,7 +23,7 @@ declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { argmemonly nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nounwind willreturn } +; CHECK: attributes #1 = { argmemonly nounwind willreturn writeonly } ; CHECK: attributes [[NUW]] = { nounwind } !0 = !{!"tbaa root"} From c259d3a061c8fc0f9520208eb265d4352a0ad447 Mon Sep 17 00:00:00 2001 From: dfukalov Date: Fri, 4 Sep 2020 22:44:01 +0300 Subject: [PATCH 0208/1079] [AMDGPU] Fix for folding v2.16 literals. It was found some packed immediate operands (e.g. ``) are incorrectly processed so one of two packed values were lost. Introduced new function to check immediate 32-bit operand can be folded. Converted condition about current op_sel flags value to fall-through. Fixes: SWDEV-247595 Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D87158 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 44 +++++++++---------- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 13 ++++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 ++ .../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 4 +- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9a30d4fd6bd4a..b5f6765e85abb 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -192,8 +192,8 @@ static bool updateOperand(FoldCandidate &Fold, if (Fold.isImm()) { if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked && !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) && - AMDGPU::isInlinableLiteralV216(static_cast(Fold.ImmToFold), - ST.hasInv2PiInlineImm())) { + AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, + ST.hasInv2PiInlineImm())) { // Set op_sel/op_sel_hi on this operand or bail out if op_sel is // already set. unsigned Opcode = MI->getOpcode(); @@ -209,30 +209,30 @@ static bool updateOperand(FoldCandidate &Fold, ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx); MachineOperand &Mod = MI->getOperand(ModIdx); unsigned Val = Mod.getImm(); - if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1)) - return false; - // Only apply the following transformation if that operand requries - // a packed immediate. - switch (TII.get(Opcode).OpInfo[OpNo].OperandType) { - case AMDGPU::OPERAND_REG_IMM_V2FP16: - case AMDGPU::OPERAND_REG_IMM_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: - case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - // If upper part is all zero we do not need op_sel_hi. - if (!isUInt<16>(Fold.ImmToFold)) { - if (!(Fold.ImmToFold & 0xffff)) { - Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); + if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) { + // Only apply the following transformation if that operand requries + // a packed immediate. + switch (TII.get(Opcode).OpInfo[OpNo].OperandType) { + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + // If upper part is all zero we do not need op_sel_hi. + if (!isUInt<16>(Fold.ImmToFold)) { + if (!(Fold.ImmToFold & 0xffff)) { + Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); + Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); + Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); + return true; + } Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); - Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); + Old.ChangeToImmediate(Fold.ImmToFold & 0xffff); return true; } - Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); - Old.ChangeToImmediate(Fold.ImmToFold & 0xffff); - return true; + break; + default: + break; } - break; - default: - break; } } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index dd662d9d06f24..92cbbf336f937 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1380,6 +1380,19 @@ bool isInlinableIntLiteralV216(int32_t Literal) { return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); } +bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { + assert(HasInv2Pi); + + int16_t Lo16 = static_cast(Literal); + if (isInt<16>(Literal) || isUInt<16>(Literal)) + return true; + + int16_t Hi16 = static_cast(Literal >> 16); + if (!(Literal & 0xffff)) + return true; + return Lo16 == Hi16; +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 9c66b27733dbe..c5feadb98f13e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -693,6 +693,9 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); LLVM_READNONE bool isInlinableIntLiteralV216(int32_t Literal); +LLVM_READNONE +bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); + bool isArgPassedInSGPR(const Argument *Arg); LLVM_READONLY diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll index 1bb5b9dd4bce4..3a9fe209a0ca6 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -1026,7 +1026,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(<2 x i16> addrspace(1)* %out, ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v1, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_pk_sub_i16 v1, v1, 7 op_sel_hi:[1,0] +; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x400007 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1100,7 +1100,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(<2 x i16> addrspace(1)* %ou ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v1, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_pk_sub_i16 v1, v1, 64 op_sel_hi:[1,0] +; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x7b0040 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() From 09d492902f178f60b3ab986360eadde9b5c8d359 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Wed, 9 Sep 2020 15:43:35 -0700 Subject: [PATCH 0209/1079] [libunwind] Bare-metal DWARF: set dso_base to 0 Previously, DwarfFDECache::findFDE used 0 as a special value meaning "search the entire cache, including dynamically-registered FDEs". Switch this special value to -1, which doesn't make sense as a DSO base. Fixes PR47335. Reviewed By: compnerd, #libunwind Differential Revision: https://reviews.llvm.org/D86748 --- libunwind/src/AddressSpace.hpp | 1 + libunwind/src/UnwindCursor.hpp | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index cc298c9bbb838..eccc2153c6977 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -518,6 +518,7 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, return true; } #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) + info.dso_base = 0; // Bare metal is statically linked, so no need to ask the dynamic loader info.dwarf_section_length = (uintptr_t)(&__eh_frame_end - &__eh_frame_start); info.dwarf_section = (uintptr_t)(&__eh_frame_start); diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index e6a36764fc793..206b5e3983217 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -81,6 +81,7 @@ template class _LIBUNWIND_HIDDEN DwarfFDECache { typedef typename A::pint_t pint_t; public: + static constexpr pint_t kSearchAll = static_cast(-1); static pint_t findFDE(pint_t mh, pint_t pc); static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde); static void removeAllIn(pint_t mh); @@ -138,7 +139,7 @@ typename A::pint_t DwarfFDECache::findFDE(pint_t mh, pint_t pc) { pint_t result = 0; _LIBUNWIND_LOG_IF_FALSE(_lock.lock_shared()); for (entry *p = _buffer; p < _bufferUsed; ++p) { - if ((mh == p->mh) || (mh == 0)) { + if ((mh == p->mh) || (mh == kSearchAll)) { if ((p->ip_start <= pc) && (pc < p->ip_end)) { result = p->fde; break; @@ -1945,7 +1946,8 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) // There is no static unwind info for this pc. Look to see if an FDE was // dynamically registered for it. - pint_t cachedFDE = DwarfFDECache::findFDE(0, pc); + pint_t cachedFDE = DwarfFDECache::findFDE(DwarfFDECache::kSearchAll, + pc); if (cachedFDE != 0) { typename CFI_Parser::FDE_Info fdeInfo; typename CFI_Parser::CIE_Info cieInfo; From a6183d0f028cb73eccc82a7cce9534708a149762 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Sep 2020 02:55:06 +0900 Subject: [PATCH 0210/1079] [ValueTracking] isKnownNonZero, computeKnownBits for freeze This implements support for isKnownNonZero, computeKnownBits when freeze is involved. ``` br (x != 0), BB1, BB2 BB1: y = freeze x ``` In the above program, we can say that y is non-zero. The reason is as follows: (1) If x was poison, `br (x != 0)` raised UB (2) If x was fully undef, the branch again raised UB (3) If x was non-zero partially undef, say `undef | 1`, `freeze x` will return a nondeterministic value which is also non-zero. (4) If x was just a concrete value, it is trivial Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D75808 --- llvm/lib/Analysis/ValueTracking.cpp | 11 ++++++++++ .../Transforms/InstSimplify/known-non-zero.ll | 21 +++++++++++++++++++ llvm/unittests/Analysis/ValueTrackingTest.cpp | 18 ++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 469257d91071d..1a894959c5bd9 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1872,6 +1872,10 @@ static void computeKnownBitsFromOperator(const Operator *I, } } break; + case Instruction::Freeze: + if (isGuaranteedNotToBePoison(I->getOperand(0), Q.CxtI, Q.DT, Depth + 1)) + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + break; } } @@ -2577,6 +2581,13 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); } } + // Freeze + else if (const FreezeInst *FI = dyn_cast(V)) { + auto *Op = FI->getOperand(0); + if (isKnownNonZero(Op, Depth, Q) && + isGuaranteedNotToBePoison(Op, Q.CxtI, Q.DT, Depth)) + return true; + } KnownBits Known(BitWidth); computeKnownBits(V, DemandedElts, Known, Depth, Q); diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 524e51be76f54..2af4f27162061 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -145,3 +145,24 @@ for.body: ; preds = %for.cond %inc = add nuw nsw i32 %shift.0, 1 br label %for.cond } + +define i1 @freeze_nonzero(i8 %x, i8 %mask) { +; CHECK-LABEL: @freeze_nonzero( +; CHECK-NEXT: [[Y:%.*]] = or i8 [[X:%.*]], [[MASK:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[Y]], 0 +; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: ret i1 false +; CHECK: B: +; CHECK-NEXT: ret i1 false +; + %y = or i8 %x, %mask + %c = icmp ne i8 %y, 0 + br i1 %c, label %A, label %B +A: + %fr = freeze i8 %y + %c2 = icmp eq i8 %fr, 0 + ret i1 %c2 +B: + ret i1 0 +} diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 09faad4484599..c45bca1c53bf7 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -1059,6 +1059,24 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownBitsPtrToIntZext) { EXPECT_EQ(Known.One.getZExtValue(), 0u); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsFreeze) { + parseAssembly("define void @test() {\n" + " %m = call i32 @any_num()\n" + " %A = freeze i32 %m\n" + " %n = and i32 %m, 31\n" + " %c = icmp eq i32 %n, 0\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n" + "declare i32 @any_num()\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC, + F->front().getTerminator()); + EXPECT_EQ(Known.Zero.getZExtValue(), 31u); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + class IsBytewiseValueTest : public ValueTrackingTest, public ::testing::WithParamInterface< std::pair> { From 91c28bbe74f24e0e84edf84daae7659c11e7afd6 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 9 Sep 2020 16:17:37 -0700 Subject: [PATCH 0211/1079] [Asan] Return nullptr for invalid chunks CHUNK_ALLOCATED. CHUNK_QUARANTINE are only states which make AsanChunk useful for GetAsanChunk callers. In either case member of AsanChunk are not useful. Fix few cases which didn't expect nullptr. Most of the callers are already expects nullptr. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87135 --- compiler-rt/lib/asan/asan_allocator.cpp | 38 ++++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 64796f7526714..f7e238d613e16 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -302,9 +302,9 @@ struct Allocator { // This could be a user-facing chunk (with redzones), or some internal // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); - uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - if (atomic_load(&ac->chunk_state, memory_order_acquire) == - CHUNK_ALLOCATED) { + uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)chunk); + if (ac && atomic_load(&ac->chunk_state, memory_order_acquire) == + CHUNK_ALLOCATED) { uptr beg = ac->Beg(); uptr end = ac->Beg() + ac->UsedSize(true); uptr chunk_end = chunk + allocated_size; @@ -385,6 +385,10 @@ struct Allocator { // We have an address between two chunks, and we want to report just one. AsanChunk *ChooseChunk(uptr addr, AsanChunk *left_chunk, AsanChunk *right_chunk) { + if (!left_chunk) + return right_chunk; + if (!right_chunk) + return left_chunk; // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. u8 left_state = atomic_load(&left_chunk->chunk_state, memory_order_relaxed); @@ -737,18 +741,25 @@ struct Allocator { AsanChunk *GetAsanChunk(void *alloc_beg) { if (!alloc_beg) return nullptr; + AsanChunk *p = nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); - AsanChunk *m = reinterpret_cast(meta[1]); - return m; + p = reinterpret_cast(meta[1]); + } else { + uptr *alloc_magic = reinterpret_cast(alloc_beg); + if (alloc_magic[0] == kAllocBegMagic) + p = reinterpret_cast(alloc_magic[1]); + else + p = reinterpret_cast(alloc_beg); } - uptr *alloc_magic = reinterpret_cast(alloc_beg); - if (alloc_magic[0] == kAllocBegMagic) - return reinterpret_cast(alloc_magic[1]); - // FIXME: This is either valid small chunk with tiny redzone or invalid - // chunk which is beeing allocated/deallocated. The latter case should - // return nullptr like secondary allocator does. - return reinterpret_cast(alloc_beg); + if (!p) + return nullptr; + u8 state = atomic_load(&p->chunk_state, memory_order_relaxed); + // It does not guaranty that Chunk is initialized, but it's + // definitely not for any other value. + if (state == CHUNK_ALLOCATED || state == CHUNK_QUARANTINE) + return p; + return nullptr; } AsanChunk *GetAsanChunkByAddr(uptr p) { @@ -774,9 +785,8 @@ struct Allocator { AsanChunkView FindHeapChunkByAddress(uptr addr) { AsanChunk *m1 = GetAsanChunkByAddr(addr); - if (!m1) return AsanChunkView(m1); sptr offset = 0; - if (AsanChunkView(m1).AddrIsAtLeft(addr, 1, &offset)) { + if (!m1 || AsanChunkView(m1).AddrIsAtLeft(addr, 1, &offset)) { // The address is in the chunk's left redzone, so maybe it is actually // a right buffer overflow from the other chunk to the left. // Search a bit to the left to see if there is another chunk. From 82cbc9330a4dc61e867864d96b0dbec74abaca89 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Sep 2020 10:24:35 -0400 Subject: [PATCH 0212/1079] AMDGPU: Fix inserting waitcnts before kill uses --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- .../AMDGPU/waitcnt-meta-instructions.mir | 66 +++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 87ef8bcaa92e4..5abe39241c707 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -855,7 +855,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( setForceEmitWaitcnt(); bool IsForceEmitWaitcnt = isForceEmitWaitcnt(); - if (MI.isDebugInstr()) + if (MI.isMetaInstruction()) return false; AMDGPU::Waitcnt Wait; diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir new file mode 100644 index 0000000000000..4905bcc06c622 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s + +# Make sure no waitcnt is inserted for meta instruction uses. + +--- + +name: waitcnt_kill + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: waitcnt_kill + ; GCN: S_WAITCNT 0 + ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: KILL $vgpr0 + $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + KILL $vgpr0 +... + +--- + +name: waitcnt_implicit_def + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: waitcnt_implicit_def + ; GCN: S_WAITCNT 0 + ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr0 = IMPLICIT_DEF + $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + $vgpr0 = IMPLICIT_DEF +... + +--- + +name: waitcnt_eh_label + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; GCN-LABEL: name: waitcnt_eh_label + ; GCN: S_WAITCNT 0 + ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: EH_LABEL , implicit $vgpr0 + $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + EH_LABEL , implicit $vgpr0 + +... + +--- + +name: waitcnt_cfi + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; GCN-LABEL: name: waitcnt_cfi + ; GCN: S_WAITCNT 0 + ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: CFI_INSTRUCTION offset $vgpr0_lo16, 16 + $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + CFI_INSTRUCTION offset $vgpr0, 16 + +... From 85490874b23ba1337210dbcb700b258ffb751b78 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Sep 2020 18:08:48 -0400 Subject: [PATCH 0213/1079] AMDGPU: Skip all meta instructions in hazard recognizer This was not adding a necessary nop due to thinking the kill counted. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 2 +- .../AMDGPU/hazard-recognizer-meta-insts.mir | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index d897127812b9b..67db397b19f63 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -368,7 +368,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, if (IsHazard(&*I)) return WaitStates; - if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr()) + if (I->isInlineAsm() || I->isMetaInstruction()) continue; WaitStates += SIInstrInfo::getNumWaitStates(*I); diff --git a/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir b/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir new file mode 100644 index 0000000000000..e59db4fead3d7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir @@ -0,0 +1,41 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx906 -run-pass=post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GFX9 %s + +# Make sure the kill is skipped for hazard purposes, so the nop is +# correctly inserted. + +--- + +name: global_store_dwordx4_data_hazard_kill + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-LABEL: name: global_store_dwordx4_data_hazard_kill + ; GFX9: GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec + ; GFX9: $vgpr2 = KILL + ; GFX9: S_NOP 0 + ; GFX9: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec + $vgpr2 = KILL + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + +... + +--- + +name: global_store_dwordx3_data_hazard_kill + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9-LABEL: name: global_store_dwordx3_data_hazard_kill + ; GFX9: GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec + ; GFX9: $vgpr2 = KILL + ; GFX9: S_NOP 0 + ; GFX9: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec + $vgpr2 = KILL + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + +... From e15215e04154e1bc8ea57d46f36b054adf49a3ed Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Sep 2020 16:58:52 -0400 Subject: [PATCH 0214/1079] AMDGPU: Hoist check for VGPRs --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5abe39241c707..ae1f6e212d98e 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1026,8 +1026,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( continue; RegInterval Interval = ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I); + + const bool IsVGPR = TRI->isVGPR(*MRI, Op.getReg()); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { - if (TRI->isVGPR(*MRI, Op.getReg())) { + if (IsVGPR) { // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the // previous write and this write are the same type of VMEM // instruction, in which case they're guaranteed to write their From f559bf31adb21220bbb39e0524b4113f9611fff4 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 9 Sep 2020 16:57:33 -0700 Subject: [PATCH 0215/1079] [gcov] Delete unused llvm_gcda_increment_indirect_counter It has been unused since r157564 (2012). --- compiler-rt/lib/profile/GCDAProfiling.c | 26 ------------------------- 1 file changed, 26 deletions(-) diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index d57fdbae5371d..cf6c44bae6415 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -406,32 +406,6 @@ void llvm_gcda_start_file(const char *orig_filename, uint32_t version, #endif } -/* Given an array of pointers to counters (counters), increment the n-th one, - * where we're also given a pointer to n (predecessor). - */ -COMPILER_RT_VISIBILITY -void llvm_gcda_increment_indirect_counter(uint32_t *predecessor, - uint64_t **counters) { - uint64_t *counter; - uint32_t pred; - - pred = *predecessor; - if (pred == 0xffffffff) - return; - counter = counters[pred]; - - /* Don't crash if the pred# is out of sync. This can happen due to threads, - or because of a TODO in GCOVProfiling.cpp buildEdgeLookupTable(). */ - if (counter) - ++*counter; -#ifdef DEBUG_GCDAPROFILING - else - fprintf(stderr, - "llvmgcda: increment_indirect_counter counters=%08llx, pred=%u\n", - *counter, *predecessor); -#endif -} - COMPILER_RT_VISIBILITY void llvm_gcda_emit_function(uint32_t ident, uint32_t func_checksum, uint32_t cfg_checksum) { From b897729a39d35f95173852fe97da3602ec574c1d Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Wed, 9 Sep 2020 17:11:08 -0700 Subject: [PATCH 0216/1079] [llvm-install-name-tool] Add -V flag This diff adds -V alias for --version to make llvm-install-name-tool consistent with other tools (llvm-objcopy, llvm-strip, etc). Test plan: make check-all Differential revision: https://reviews.llvm.org/D87264 --- llvm/test/tools/llvm-objcopy/tool-version.test | 1 + llvm/tools/llvm-objcopy/InstallNameToolOpts.td | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/llvm/test/tools/llvm-objcopy/tool-version.test b/llvm/test/tools/llvm-objcopy/tool-version.test index 5fe33eb8e7173..a6cc8f96221d2 100644 --- a/llvm/test/tools/llvm-objcopy/tool-version.test +++ b/llvm/test/tools/llvm-objcopy/tool-version.test @@ -5,6 +5,7 @@ # RUN: llvm-strip -V | FileCheck --check-prefix=STRIP %s # RUN: llvm-install-name-tool --version | FileCheck %s +# RUN: llvm-install-name-tool -V | FileCheck %s # OBJCOPY-DAG: {{ version }} # OBJCOPY-DAG: GNU objcopy diff --git a/llvm/tools/llvm-objcopy/InstallNameToolOpts.td b/llvm/tools/llvm-objcopy/InstallNameToolOpts.td index 04ffe62c42fca..7998041513cb1 100644 --- a/llvm/tools/llvm-objcopy/InstallNameToolOpts.td +++ b/llvm/tools/llvm-objcopy/InstallNameToolOpts.td @@ -32,3 +32,7 @@ def change: MultiArg<["-", "--"], "change", 2>, def version : Flag<["--"], "version">, HelpText<"Print the version and exit.">; + +def V : Flag<["-"], "V">, + Alias, + HelpText<"Alias for --version">; From 01cdab0b335e21321987505e66f34c24dc55b0d7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 9 Sep 2020 17:24:45 -0700 Subject: [PATCH 0217/1079] [gcov] Delete flush_fn_list (unused since D83149) --- compiler-rt/lib/profile/GCDAProfiling.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index cf6c44bae6415..4055681872415 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -127,11 +127,6 @@ struct fn_list { */ struct fn_list writeout_fn_list; -/* - * A list of flush functions that our __gcov_flush() function should call, shared between all dynamic objects. - */ -struct fn_list flush_fn_list; - /* * A list of reset functions, shared between all dynamic objects. */ From 3e4e0fb2435544acadf3614d3cd7b5f0f8fdfda2 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 9 Sep 2020 18:17:44 -0700 Subject: [PATCH 0218/1079] mlir/Transforms/BufferPlacement.h: Add missing override --- mlir/include/mlir/Transforms/BufferPlacement.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index 6d88ac3599cf1..8d3e476928b75 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -158,7 +158,7 @@ class BufferAssignmentFuncOpConverter /// Performs the actual signature rewriting step. LogicalResult matchAndRewrite(mlir::FuncOp, ArrayRef, - ConversionPatternRewriter &) const; + ConversionPatternRewriter &) const override; }; /// Rewrites the `ReturnOp` to conform with the changed function signature. @@ -235,7 +235,7 @@ class BufferAssignmentCallOpConverter /// Performs the actual rewriting step. LogicalResult matchAndRewrite(CallOp, ArrayRef, - ConversionPatternRewriter &) const; + ConversionPatternRewriter &) const override; }; /// Populates `patterns` with the conversion patterns of buffer From 52f0837778b6f3b742b36c22b7c608535a52097b Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 9 Sep 2020 20:23:59 -0700 Subject: [PATCH 0219/1079] [NFC] Move definition of variable now only used in debug builds --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 228db83533cdf..33fb9b7287d5c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2274,8 +2274,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } unsigned MemSizeInBits = MemSizeInBytes * 8; - const Register PtrReg = I.getOperand(1).getReg(); #ifndef NDEBUG + const Register PtrReg = I.getOperand(1).getReg(); const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); // Sanity-check the pointer register. assert(PtrRB.getID() == AArch64::GPRRegBankID && From c4d7536136b331bada079b2afbb2bd09ad8296bf Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Fri, 24 Jul 2020 15:47:38 -0700 Subject: [PATCH 0220/1079] [CMake] Simplify CMake handling for libxml2 This matches the changes made to handling of zlib done in 10b1b4a where we rely on find_package and the imported target rather than manually appending the library and include paths. The use of LLVM_LIBXML2_ENABLED has been replaced by LLVM_ENABLE_LIBXML2 thus reducing the number of variables. Differential Revision: https://reviews.llvm.org/D84563 --- lld/test/CMakeLists.txt | 2 +- lld/test/lit.cfg.py | 4 +- lld/test/lit.site.cfg.py.in | 2 +- llvm/cmake/config-ix.cmake | 40 ++++++++++--------- llvm/cmake/modules/GetLibraryName.cmake | 17 ++++++++ llvm/cmake/modules/LLVMConfig.cmake.in | 5 ++- llvm/include/llvm/Config/config.h.cmake | 2 +- llvm/lib/Support/CMakeLists.txt | 25 +++--------- llvm/lib/WindowsManifest/CMakeLists.txt | 35 +++++++++------- .../WindowsManifest/WindowsManifestMerger.cpp | 6 +-- llvm/test/CMakeLists.txt | 2 +- llvm/test/lit.cfg.py | 2 +- llvm/test/lit.site.cfg.py.in | 2 +- llvm/utils/gn/secondary/lld/test/BUILD.gn | 4 +- .../llvm/include/llvm/Config/BUILD.gn | 4 +- llvm/utils/gn/secondary/llvm/test/BUILD.gn | 4 +- 16 files changed, 85 insertions(+), 71 deletions(-) create mode 100644 llvm/cmake/modules/GetLibraryName.cmake diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index 52e6118ba876b..ff957e8912114 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -6,7 +6,7 @@ set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/%(build_config)s" llvm_canonicalize_cmake_booleans( LLVM_ENABLE_ZLIB - LLVM_LIBXML2_ENABLED + LLVM_ENABLE_LIBXML2 ) configure_lit_site_cfg( diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 267f8c5178584..037b9ed2d1676 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -87,11 +87,11 @@ # Indirectly check if the mt.exe Microsoft utility exists by searching for # cvtres, which always accompanies it. Alternatively, check if we can use # libxml2 to merge manifests. -if (lit.util.which('cvtres', config.environment['PATH']) or +if (lit.util.which('cvtres', config.environment['PATH']) or config.llvm_libxml2_enabled): config.available_features.add('manifest_tool') -if config.llvm_libxml2_enabled: +if config.have_libxml2: config.available_features.add('libxml2') if config.have_dia_sdk: diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in index 3d4c51f4ab647..bbc2c892eb715 100644 --- a/lld/test/lit.site.cfg.py.in +++ b/lld/test/lit.site.cfg.py.in @@ -7,7 +7,6 @@ config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.llvm_libs_dir = "@LLVM_LIBS_DIR@" -config.llvm_libxml2_enabled = @LLVM_LIBXML2_ENABLED@ config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.lld_obj_root = "@LLD_BINARY_DIR@" config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@" @@ -15,6 +14,7 @@ config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@" config.target_triple = "@TARGET_TRIPLE@" config.python_executable = "@Python3_EXECUTABLE@" config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_libxml2 = @LLVM_ENABLE_LIBXML2@ config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@ # Support substitution of the tools and libs dirs with user parameters. This is diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 6b92180b739e8..eeaebf31c926f 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -137,6 +137,27 @@ if(LLVM_ENABLE_ZLIB) set(LLVM_ENABLE_ZLIB "${HAVE_ZLIB}") endif() +if(LLVM_ENABLE_LIBXML2) + if(LLVM_ENABLE_LIBXML2 STREQUAL FORCE_ON) + find_package(LibXml2 REQUIRED) + elseif(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") + find_package(LibXml2) + endif() + if(LibXml2_FOUND) + # Check if libxml2 we found is usable; for example, we may have found a 32-bit + # library on a 64-bit system which would result in a link-time failure. + cmake_push_check_state() + set(CMAKE_REQUIRED_INCLUDES ${LIBXML2_INCLUDE_DIRS}) + set(CMAKE_REQUIRED_LIBRARIES ${LIBXML2_LIBRARIES}) + check_symbol_exists(xmlReadMemory libxml/xmlreader.h HAVE_LIBXML2) + cmake_pop_check_state() + if(LLVM_ENABLE_LIBXML2 STREQUAL FORCE_ON AND NOT HAVE_LIBXML2) + message(FATAL_ERROR "Failed to configure libxml2") + endif() + endif() + set(LLVM_ENABLE_LIBXML2 "${HAVE_LIBXML2}") +endif() + # Don't look for these libraries if we're using MSan, since uninstrumented third # party code may call MSan interceptors like strlen, leading to false positives. if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") @@ -161,21 +182,6 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") else() set(LLVM_ENABLE_TERMINFO 0) endif() - - find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) - set(LLVM_LIBXML2_ENABLED 0) - set(LIBXML2_FOUND 0) - if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) - find_package(LibXml2) - if (LIBXML2_FOUND) - set(LLVM_LIBXML2_ENABLED 1) - if ((CMAKE_OSX_SYSROOT) AND (EXISTS ${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR})) - include_directories(${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR}) - else() - include_directories(${LIBXML2_INCLUDE_DIR}) - endif() - endif() - endif() else() set(LLVM_ENABLE_TERMINFO 0) endif() @@ -183,10 +189,6 @@ else() set(LLVM_ENABLE_TERMINFO 0) endif() -if (LLVM_ENABLE_LIBXML2 STREQUAL "FORCE_ON" AND NOT LLVM_LIBXML2_ENABLED) - message(FATAL_ERROR "Failed to congifure libxml2") -endif() - check_library_exists(xar xar_open "" HAVE_LIBXAR) if(HAVE_LIBXAR) set(XAR_LIB xar) diff --git a/llvm/cmake/modules/GetLibraryName.cmake b/llvm/cmake/modules/GetLibraryName.cmake new file mode 100644 index 0000000000000..13c0080671a3c --- /dev/null +++ b/llvm/cmake/modules/GetLibraryName.cmake @@ -0,0 +1,17 @@ +# Returns library name for a given path. +function(get_library_name path name) + get_filename_component(path ${path} NAME) + set(prefixes ${CMAKE_FIND_LIBRARY_PREFIXES}) + set(suffixes ${CMAKE_FIND_LIBRARY_SUFFIXES}) + list(FILTER prefixes EXCLUDE REGEX "^\\s*$") + list(FILTER suffixes EXCLUDE REGEX "^\\s*$") + if(prefixes) + string(REPLACE ";" "|" prefixes "${prefixes}") + string(REGEX REPLACE "^(${prefixes})" "" path ${path}) + endif() + if(suffixes) + string(REPLACE ";" "|" suffixes "${suffixes}") + string(REGEX REPLACE "(${suffixes})$" "" path ${path}) + endif() + set(${name} "${path}" PARENT_SCOPE) +endfunction() diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index a5c370bbc25e4..4453020cf4da4 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -55,7 +55,10 @@ if(LLVM_ENABLE_ZLIB) find_package(ZLIB) endif() -set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@) +set(LLVM_ENABLE_LIBXML2 @LLVM_ENABLE_LIBXML2@) +if(LLVM_ENABLE_LIBXML2) + find_package(LibXml2) +endif() set(LLVM_WITH_Z3 @LLVM_WITH_Z3@) diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index aec8d08f30e74..9ad0d827dfd8d 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -306,7 +306,7 @@ #cmakedefine01 LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO /* Define if libxml2 is supported on this platform. */ -#cmakedefine LLVM_LIBXML2_ENABLED ${LLVM_LIBXML2_ENABLED} +#cmakedefine LLVM_ENABLE_LIBXML2 ${LLVM_ENABLE_LIBXML2} /* Define to the extension used for shared libraries, say, ".so". */ #cmakedefine LTDL_SHLIB_EXT "${LTDL_SHLIB_EXT}" diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 9eefea566feef..01bf8febb5407 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -1,24 +1,9 @@ +include(GetLibraryName) + if(LLVM_ENABLE_ZLIB) set(imported_libs ZLIB::ZLIB) endif() -function(get_system_libname libpath libname) - get_filename_component(libpath ${libpath} NAME) - set(prefixes ${CMAKE_FIND_LIBRARY_PREFIXES}) - set(suffixes ${CMAKE_FIND_LIBRARY_SUFFIXES}) - list(FILTER prefixes EXCLUDE REGEX "^\\s*$") - list(FILTER suffixes EXCLUDE REGEX "^\\s*$") - if( prefixes ) - string(REPLACE ";" "|" prefixes "${prefixes}") - string(REGEX REPLACE "^(${prefixes})" "" libpath ${libpath}) - endif() - if( suffixes ) - string(REPLACE ";" "|" suffixes "${suffixes}") - string(REGEX REPLACE "(${suffixes})$" "" libpath ${libpath}) - endif() - set(${libname} "${libpath}" PARENT_SCOPE) -endfunction() - if( MSVC OR MINGW ) # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc. @@ -242,6 +227,8 @@ add_llvm_component_library(LLVMSupport set(llvm_system_libs ${system_libs}) +# This block is only needed for llvm-config. When we deprecate llvm-config and +# move to using CMake export, this block can be removed. if(LLVM_ENABLE_ZLIB) # CMAKE_BUILD_TYPE is only meaningful to single-configuration generators. if(CMAKE_BUILD_TYPE) @@ -251,12 +238,12 @@ if(LLVM_ENABLE_ZLIB) if(NOT zlib_library) get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION) endif() - get_system_libname(${zlib_library} zlib_library) + get_library_name(${zlib_library} zlib_library) set(llvm_system_libs ${llvm_system_libs} "${zlib_library}") endif() if(LLVM_ENABLE_TERMINFO) - get_system_libname(${TERMINFO_LIB} terminfo_library) + get_library_name(${TERMINFO_LIB} terminfo_library) set(llvm_system_libs ${llvm_system_libs} "${terminfo_library}") endif() diff --git a/llvm/lib/WindowsManifest/CMakeLists.txt b/llvm/lib/WindowsManifest/CMakeLists.txt index 7ccc17ad577d3..0f597af3c36f8 100644 --- a/llvm/lib/WindowsManifest/CMakeLists.txt +++ b/llvm/lib/WindowsManifest/CMakeLists.txt @@ -1,23 +1,28 @@ +include(GetLibraryName) + +if(LLVM_ENABLE_LIBXML2) + set(imported_libs LibXml2::LibXml2) +endif() + add_llvm_component_library(LLVMWindowsManifest WindowsManifestMerger.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/WindowsManifest - ${Backtrace_INCLUDE_DIRS}) + ${Backtrace_INCLUDE_DIRS} + LINK_LIBS ${imported_libs}) -if(LIBXML2_LIBRARIES) - target_link_libraries(LLVMWindowsManifest PUBLIC ${LIBXML2_LIBRARIES}) - - get_filename_component(xml2_library ${LIBXML2_LIBRARIES} NAME) - if (CMAKE_STATIC_LIBRARY_PREFIX AND - xml2_library MATCHES "^${CMAKE_STATIC_LIBRARY_PREFIX}.*${CMAKE_STATIC_LIBRARY_SUFFIX}$") - string(REGEX REPLACE "^${CMAKE_STATIC_LIBRARY_PREFIX}" "" xml2_library ${xml2_library}) - string(REGEX REPLACE "${CMAKE_STATIC_LIBRARY_SUFFIX}$" "" xml2_library ${xml2_library}) - elseif (CMAKE_SHARED_LIBRARY_PREFIX AND - xml2_library MATCHES "^${CMAKE_SHARED_LIBRARY_PREFIX}.*${CMAKE_SHARED_LIBRARY_SUFFIX}$") - string(REGEX REPLACE "^${CMAKE_SHARED_LIBRARY_PREFIX}" "" xml2_library ${xml2_library}) - string(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "" xml2_library ${xml2_library}) +# This block is only needed for llvm-config. When we deprecate llvm-config and +# move to using CMake export, this block can be removed. +if(LLVM_ENABLE_LIBXML2) + # CMAKE_BUILD_TYPE is only meaningful to single-configuration generators. + if(CMAKE_BUILD_TYPE) + string(TOUPPER ${CMAKE_BUILD_TYPE} build_type) + get_property(libxml2_library TARGET LibXml2::LibXml2 PROPERTY LOCATION_${build_type}) + endif() + if(NOT zlib_library) + get_property(libxml2_library TARGET LibXml2::LibXml2 PROPERTY LOCATION) endif() - set_property(TARGET LLVMWindowsManifest PROPERTY - LLVM_SYSTEM_LIBS ${xml2_library}) + get_library_name(${libxml2_library} libxml2_library) + set_property(TARGET LLVMWindowsManifest PROPERTY LLVM_SYSTEM_LIBS ${libxml2_library}) endif() diff --git a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp index 031a963cd3b0c..6af7bc699d056 100644 --- a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp +++ b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp @@ -16,7 +16,7 @@ #include -#if LLVM_LIBXML2_ENABLED +#if LLVM_ENABLE_LIBXML2 #include #endif @@ -41,7 +41,7 @@ class WindowsManifestMerger::WindowsManifestMergerImpl { private: static void errorCallback(void *Ctx, const char *Format, ...); Error getParseError(); -#if LLVM_LIBXML2_ENABLED +#if LLVM_ENABLE_LIBXML2 xmlDocPtr CombinedDoc = nullptr; std::vector MergedDocs; @@ -56,7 +56,7 @@ class WindowsManifestMerger::WindowsManifestMergerImpl { bool ParseErrorOccurred = false; }; -#if LLVM_LIBXML2_ENABLED +#if LLVM_ENABLE_LIBXML2 static constexpr std::pair MtNsHrefsPrefixes[] = { {"urn:schemas-microsoft-com:asm.v1", "ms_asmv1"}, diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 58aa680a54c22..772ff0fd5f780 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -7,8 +7,8 @@ llvm_canonicalize_cmake_booleans( LLVM_ENABLE_FFI LLVM_ENABLE_THREADS LLVM_ENABLE_ZLIB + LLVM_ENABLE_LIBXML2 LLVM_INCLUDE_GO_TESTS - LLVM_LIBXML2_ENABLED LLVM_LINK_LLVM_DYLIB LLVM_TOOL_LTO_BUILD LLVM_USE_INTEL_JITEVENTS diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 4d7d3c861aba5..3c4cb9c32065b 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -356,7 +356,7 @@ def have_ld64_plugin_support(): if config.enable_threads: config.available_features.add('thread_support') -if config.llvm_libxml2_enabled: +if config.have_libxml2: config.available_features.add('libxml2') if config.have_opt_viewer_modules: diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 52f709f817ddd..0e77c1087ac13 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -35,13 +35,13 @@ config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.have_zlib = @LLVM_ENABLE_ZLIB@ config.have_libxar = @HAVE_LIBXAR@ +config.have_libxml2 = @LLVM_ENABLE_LIBXML2@ config.have_dia_sdk = @LLVM_ENABLE_DIA_SDK@ config.enable_ffi = @LLVM_ENABLE_FFI@ config.build_examples = @LLVM_BUILD_EXAMPLES@ config.enable_threads = @LLVM_ENABLE_THREADS@ config.build_shared_libs = @BUILD_SHARED_LIBS@ config.link_llvm_dylib = @LLVM_LINK_LLVM_DYLIB@ -config.llvm_libxml2_enabled = @LLVM_LIBXML2_ENABLED@ config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' config.host_arch = "@HOST_ARCH@" config.have_opt_viewer_modules = @LLVM_HAVE_OPT_VIEWER_MODULES@ diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn index bfb63a39ba65a..00cb2f2c024c8 100644 --- a/llvm/utils/gn/secondary/lld/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn @@ -43,9 +43,9 @@ write_lit_cfg("lit_site_cfg") { } if (llvm_enable_libxml2) { - extra_values += [ "LLVM_LIBXML2_ENABLED=1" ] + extra_values += [ "LLVM_ENABLE_LIBXML2=1" ] } else { - extra_values += [ "LLVM_LIBXML2_ENABLED=0" ] # Must be 0. + extra_values += [ "LLVM_ENABLE_LIBXML2=0" ] # Must be 0. } if (llvm_enable_zlib) { diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index d54242da38cca..acbd66aca4ded 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -304,9 +304,9 @@ write_cmake_config("config") { } if (llvm_enable_libxml2) { - values += [ "LLVM_LIBXML2_ENABLED=1" ] + values += [ "LLVM_ENABLE_LIBXML2=1" ] } else { - values += [ "LLVM_LIBXML2_ENABLED=" ] + values += [ "LLVM_ENABLE_LIBXML2=" ] } } diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index c714d9b5ba7b1..df4c763f64cd6 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -162,9 +162,9 @@ write_lit_config("lit_site_cfg") { } if (llvm_enable_libxml2) { - extra_values += [ "LLVM_LIBXML2_ENABLED=1" ] + extra_values += [ "LLVM_ENABLE_LIBXML2=1" ] } else { - extra_values += [ "LLVM_LIBXML2_ENABLED=0" ] # Must be 0. + extra_values += [ "LLVM_ENABLE_LIBXML2=0" ] # Must be 0. } if (llvm_enable_threads) { From f7941d98091827b8d0b6fdabb731e38c99f44b13 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 9 Sep 2020 22:03:13 -0700 Subject: [PATCH 0221/1079] [lit] Use correct variable name for libxml2 This addresses an issue introduced in c4d7536136b3. --- lld/test/lit.cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 037b9ed2d1676..090a7c21fa782 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -88,7 +88,7 @@ # cvtres, which always accompanies it. Alternatively, check if we can use # libxml2 to merge manifests. if (lit.util.which('cvtres', config.environment['PATH']) or - config.llvm_libxml2_enabled): + config.have_libxml2): config.available_features.add('manifest_tool') if config.have_libxml2: From 6afb27910044cc0906b99b1284fbd29208816f82 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 10 Sep 2020 13:28:09 +0800 Subject: [PATCH 0222/1079] [PowerPC] [FPEnv] Disable strict FP mutation by default 22a0edd0 introduced a config IsStrictFPEnabled, which controls the strict floating point mutation (transforming some strict-fp operations into non-strict in ISel). This patch disables the mutation by default since we've finished PowerPC strict-fp enablement in backend. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D87222 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 ++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 +- .../CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index fc9a80919fc1c..469fe9701d065 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1320,6 +1320,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxLoadsPerMemcmpOptSize = 4; } + IsStrictFPEnabled = true; + // Let the subtarget (CPU) decide if a predictable select is more expensive // than the corresponding branch. This information is used in CGP to decide // when to convert selects into branches. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a6932005d5ad1..c865fa10956b2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3477,7 +3477,7 @@ def : Pat<(f64 (extloadf32 iaddr:$src)), def : Pat<(f64 (extloadf32 xaddr:$src)), (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; -def : Pat<(f64 (fpextend f32:$src)), +def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; } diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 7345d65be14aa..21fc855aa8547 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -7168,19 +7168,19 @@ define <3 x double> @constrained_vector_fpext_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI133_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI133_1@toc@ha ; PC64LE-NEXT: addis 5, 2, .LCPI133_2@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI133_0@toc@l(3) +; PC64LE-NEXT: lfs 3, .LCPI133_0@toc@l(3) ; PC64LE-NEXT: lfs 2, .LCPI133_1@toc@l(4) -; PC64LE-NEXT: lfs 3, .LCPI133_2@toc@l(5) +; PC64LE-NEXT: lfs 1, .LCPI133_2@toc@l(5) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fpext_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI133_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI133_0@toc@l(3) +; PC64LE9-NEXT: lfs 3, .LCPI133_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI133_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI133_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI133_2@toc@ha -; PC64LE9-NEXT: lfs 3, .LCPI133_2@toc@l(3) +; PC64LE9-NEXT: lfs 1, .LCPI133_2@toc@l(3) ; PC64LE9-NEXT: blr entry: %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32( From a7b2977aa613b5e9b9d9e6e8232f89012404c52c Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Wed, 9 Sep 2020 22:20:12 -0700 Subject: [PATCH 0223/1079] [mlir][Linalg] Add Utility method to get loop ranges for a LinalgOp. Also refactor the getViewSizes method to work on LinalgOp instead of being a templated version. Keeping the templated version for compatibility. Differential Revision: https://reviews.llvm.org/D87303 --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 44 +++++-------------- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 44 +++++++++++++++++++ 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index beef1a70096e6..c0c59bda1894f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -94,42 +94,22 @@ Operation *fuseTensorOps(PatternRewriter &rewriter, Operation *consumer, unsigned consumerIdx, OperationFolder *folder = nullptr); -/// Returns the linearized list of all view dimensions in a linalgOp. Applying +/// Returns the linearized list of all view dimensions in a `linalgOp`. Applying /// the inverse, concatenated loopToOperandRangeMaps to this list allows the /// derivation of loop ranges for any linalgOp. -template -SmallVector getViewSizes(OpBuilder &builder, ConcreteOp linalgOp) { - auto loc = linalgOp.getLoc(); - SmallVector res; - SmallVector ranks; - for (auto v : linalgOp.getInputsAndOutputBuffers()) { - MemRefType t = v.getType().template cast(); - ranks.push_back(t.getRank()); - for (unsigned i = 0; i < t.getRank(); ++i) - res.push_back(builder.create(loc, v, i)); - } - - auto attr = linalgOp.template getAttrOfType("symbol_source"); - if (attr) { - // Find the correct position for inserting values for symbols. - unsigned numSymb = ranks[attr.getInt()], symbolsPos = 0; - for (unsigned idx = 0; idx < attr.getInt(); idx++) - symbolsPos += ranks[idx]; - - // Append the end of the value list that corresponds to the - // values mapping to symbols. Since inside concatinated map symbols are - // repeated we have to repeat the sizes as well. - - // Reserve is mandatory to avoid a potential undefined behavior with - // pushing back to smallvector from itself. - res.reserve(res.size() + ranks.size() * numSymb); - for (unsigned idx = 0, s = ranks.size(); idx < s; ++idx) - for (unsigned idx2 = 0; idx2 < numSymb; ++idx2) - res.push_back(res[symbolsPos + idx2]); - } - return res; +SmallVector getViewSizes(OpBuilder &builder, LinalgOp linalgOp); +template +SmallVector getViewSizes(OpBuilder &builder, ConcreteOpTy linalgOp) { + return getViewSizes(builder, cast(linalgOp.getOperation())); } +/// Returns the loop ranges of the `linalgOp`. Applies the inverse of the +/// concatenated indexing maps to the result of `getViewSizes`. Returns None if +/// the bounds computation fails. +Optional> +getLoopRanges(OpBuilder &builder, LinalgOp linalgOp, + OperationFolder *folder = nullptr); + /// Returns the values obtained by applying `map` to the list of values. /// When non-null, the optional pointer `folder` is used to call into the /// `createAndFold` builder method. If `folder` is null, the regular `create` diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index cf14555aa63fc..585b00189964d 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -147,6 +147,50 @@ static void unpackRanges(ArrayRef ranges, namespace mlir { namespace linalg { +/// Return the linearized list of all view dimensions in a linalgOp. +SmallVector getViewSizes(OpBuilder &builder, LinalgOp linalgOp) { + auto loc = linalgOp.getLoc(); + SmallVector res; + SmallVector ranks; + for (auto v : linalgOp.getInputsAndOutputBuffers()) { + MemRefType t = v.getType().template cast(); + ranks.push_back(t.getRank()); + for (unsigned i = 0; i < t.getRank(); ++i) + res.push_back(builder.create(loc, v, i)); + } + + auto attr = linalgOp.template getAttrOfType("symbol_source"); + if (attr) { + // Find the correct position for inserting values for symbols. + unsigned numSymb = ranks[attr.getInt()], symbolsPos = 0; + for (unsigned idx = 0; idx < attr.getInt(); idx++) + symbolsPos += ranks[idx]; + + // Append the end of the value list that corresponds to the + // values mapping to symbols. Since inside concatinated map symbols are + // repeated we have to repeat the sizes as well. + + // Reserve is mandatory to avoid a potential undefined behavior with + // pushing back to smallvector from itself. + res.reserve(res.size() + ranks.size() * numSymb); + for (unsigned idx = 0, s = ranks.size(); idx < s; ++idx) + for (unsigned idx2 = 0; idx2 < numSymb; ++idx2) + res.push_back(res[symbolsPos + idx2]); + } + return res; +} + +Optional> +getLoopRanges(OpBuilder &builder, LinalgOp linalgOp, OperationFolder *folder) { + SmallVector viewSizes = getViewSizes(builder, linalgOp); + AffineMap invertedMap = + inversePermutation(concatAffineMaps(linalgOp.getIndexingMaps())); + if (!invertedMap) + return {}; + return applyMapToValues(builder, linalgOp.getLoc(), invertedMap, viewSizes, + folder); +} + /// Specialization to build an scf "for" nest. template <> void GenerateLoopNest::doit( From 060c8e083dd637866854acb6a0823c45b2ef68ef Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Wed, 9 Sep 2020 23:15:41 -0400 Subject: [PATCH 0224/1079] libclc/spirv: Add various functions Adds fma,fmod,ldexp. Reviewer: jenatali jvesely Differential Revision: https://reviews.llvm.org/D85911 --- libclc/spirv/lib/SOURCES | 6 ++++++ libclc/spirv64/lib/SOURCES | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES index f594fa7e85d49..854cba614c8bf 100644 --- a/libclc/spirv/lib/SOURCES +++ b/libclc/spirv/lib/SOURCES @@ -41,6 +41,10 @@ subnormal_config.cl ../../generic/lib/math/exp2.cl ../../generic/lib/math/clc_exp10.cl ../../generic/lib/math/exp10.cl +../../generic/lib/math/clc_fma.cl +math/fma.cl +../../generic/lib/math/clc_fmod.cl +../../generic/lib/math/fmod.cl ../../generic/lib/math/fract.cl ../../generic/lib/math/frexp.cl ../../generic/lib/math/half_rsqrt.cl @@ -48,6 +52,8 @@ subnormal_config.cl ../../generic/lib/math/clc_hypot.cl ../../generic/lib/math/hypot.cl ../../generic/lib/math/ilogb.cl +../../generic/lib/math/clc_ldexp.cl +../../generic/lib/math/ldexp.cl ../../generic/lib/math/lgamma.cl ../../generic/lib/math/lgamma_r.cl ../../generic/lib/math/log.cl diff --git a/libclc/spirv64/lib/SOURCES b/libclc/spirv64/lib/SOURCES index f594fa7e85d49..854cba614c8bf 100644 --- a/libclc/spirv64/lib/SOURCES +++ b/libclc/spirv64/lib/SOURCES @@ -41,6 +41,10 @@ subnormal_config.cl ../../generic/lib/math/exp2.cl ../../generic/lib/math/clc_exp10.cl ../../generic/lib/math/exp10.cl +../../generic/lib/math/clc_fma.cl +math/fma.cl +../../generic/lib/math/clc_fmod.cl +../../generic/lib/math/fmod.cl ../../generic/lib/math/fract.cl ../../generic/lib/math/frexp.cl ../../generic/lib/math/half_rsqrt.cl @@ -48,6 +52,8 @@ subnormal_config.cl ../../generic/lib/math/clc_hypot.cl ../../generic/lib/math/hypot.cl ../../generic/lib/math/ilogb.cl +../../generic/lib/math/clc_ldexp.cl +../../generic/lib/math/ldexp.cl ../../generic/lib/math/lgamma.cl ../../generic/lib/math/lgamma_r.cl ../../generic/lib/math/log.cl From c413a8a8ecd3c0ef7bcb08525fd73eb1392a738c Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 10 Sep 2020 13:29:45 +0700 Subject: [PATCH 0225/1079] [LoopLoadElim] Filter away candidates that stop being AddRecs after loop versioning. PR47457 The test in PR47457 demonstrates a situation when candidate load's pointer's SCEV is no loger a SCEVAddRec after loop versioning. The code there assumes that it is always a SCEVAddRec and crashes otherwise. This patch makes sure that we do not consider candidates for which this requirement is broken after the versioning. Differential Revision: https://reviews.llvm.org/D87355 Reviewed By: asbirlea --- .../Transforms/Scalar/LoopLoadElimination.cpp | 25 +++++++++++++++---- llvm/test/Transforms/LoopLoadElim/pr47457.ll | 2 +- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 3b70695640414..e8473d6520254 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -486,7 +486,6 @@ class LoadEliminationForLoop { // Filter the candidates further. SmallVector Candidates; - unsigned NumForwarding = 0; for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) { LLVM_DEBUG(dbgs() << "Candidate " << Cand); @@ -506,12 +505,17 @@ class LoadEliminationForLoop { if (!Cand.isDependenceDistanceOfOne(PSE, L)) continue; - ++NumForwarding; + assert(isa(PSE.getSCEV(Cand.Load->getPointerOperand())) && + "Loading from something other than indvar?"); + assert( + isa(PSE.getSCEV(Cand.Store->getPointerOperand())) && + "Storing to something other than indvar?"); + + Candidates.push_back(Cand); LLVM_DEBUG( dbgs() - << NumForwarding + << Candidates.size() << ". Valid store-to-load forwarding across the loop backedge\n"); - Candidates.push_back(Cand); } if (Candidates.empty()) return false; @@ -563,6 +567,17 @@ class LoadEliminationForLoop { LV.setAliasChecks(std::move(Checks)); LV.setSCEVChecks(LAI.getPSE().getUnionPredicate()); LV.versionLoop(); + + // After versioning, some of the candidates' pointers could stop being + // SCEVAddRecs. We need to filter them out. + auto NoLongerGoodCandidate = [this]( + const StoreToLoadForwardingCandidate &Cand) { + return !isa( + PSE.getSCEV(Cand.Load->getPointerOperand())) || + !isa( + PSE.getSCEV(Cand.Store->getPointerOperand())); + }; + llvm::erase_if(Candidates, NoLongerGoodCandidate); } // Next, propagate the value stored by the store to the users of the load. @@ -571,7 +586,7 @@ class LoadEliminationForLoop { "storeforward"); for (const auto &Cand : Candidates) propagateStoredValueToLoadUsers(Cand, SEE); - NumLoopLoadEliminted += NumForwarding; + NumLoopLoadEliminted += Candidates.size(); return true; } diff --git a/llvm/test/Transforms/LoopLoadElim/pr47457.ll b/llvm/test/Transforms/LoopLoadElim/pr47457.ll index 1b102944cd767..a58be5a8cf5e9 100644 --- a/llvm/test/Transforms/LoopLoadElim/pr47457.ll +++ b/llvm/test/Transforms/LoopLoadElim/pr47457.ll @@ -1,11 +1,11 @@ ; RUN: opt -loop-load-elim -S %s | FileCheck %s ; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s ; REQUIRES: asserts -; XFAIL: * target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" target triple = "x86_64-unknown-linux-gnu" +; Make sure it does not crash with assert. define void @test() { ; CHECK-LABEL: test From cde8fc65aeedda5e7cfc66d5c06a74399a80fffa Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 10 Sep 2020 13:38:49 +0700 Subject: [PATCH 0226/1079] [NFC] Rename variables to avoid name confusion Name `LI` is used for loop info, loop and load inst at the same function, which causes a lot of confusion. --- llvm/lib/Analysis/ScalarEvolution.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 649e8d3733a9b..795919458aaa3 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8036,22 +8036,22 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (const SCEVUnknown *SU = dyn_cast(V)) { if (Instruction *I = dyn_cast(SU->getValue())) { if (PHINode *PN = dyn_cast(I)) { - const Loop *LI = this->LI[I->getParent()]; + const Loop *CurrLoop = this->LI[I->getParent()]; // Looking for loop exit value. - if (LI && LI->getParentLoop() == L && - PN->getParent() == LI->getHeader()) { + if (CurrLoop && CurrLoop->getParentLoop() == L && + PN->getParent() == CurrLoop->getHeader()) { // Okay, there is no closed form solution for the PHI node. Check // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. - const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop); // This trivial case can show up in some degenerate cases where // the incoming IR has not yet been fully simplified. if (BackedgeTakenCount->isZero()) { Value *InitValue = nullptr; bool MultipleInitValues = false; for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (!LI->contains(PN->getIncomingBlock(i))) { + if (!CurrLoop->contains(PN->getIncomingBlock(i))) { if (!InitValue) InitValue = PN->getIncomingValue(i); else if (InitValue != PN->getIncomingValue(i)) { @@ -8069,17 +8069,18 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { isKnownPositive(BackedgeTakenCount) && PN->getNumIncomingValues() == 2) { - unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1; + unsigned InLoopPred = + CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1; Value *BackedgeVal = PN->getIncomingValue(InLoopPred); - if (LI->isLoopInvariant(BackedgeVal)) + if (CurrLoop->isLoopInvariant(BackedgeVal)) return getSCEV(BackedgeVal); } if (auto *BTCC = dyn_cast(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. - Constant *RV = - getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); + Constant *RV = getConstantEvolutionLoopExitValue( + PN, BTCC->getAPInt(), CurrLoop); if (RV) return getSCEV(RV); } } @@ -8135,9 +8136,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], DL, &TLI); - else if (const LoadInst *LI = dyn_cast(I)) { - if (!LI->isVolatile()) - C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); + else if (const LoadInst *Load = dyn_cast(I)) { + if (!Load->isVolatile()) + C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(), + DL); } else C = ConstantFoldInstOperands(I, Operands, DL, &TLI); if (!C) return V; From 39c1653b3dbb7d1c439a3e8cf31d1aa159a4afc5 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Sep 2020 15:49:04 +0900 Subject: [PATCH 0227/1079] [JumpThreading] Conditionally freeze its condition when unfolding select This patch fixes pr45956 (https://bugs.llvm.org/show_bug.cgi?id=45956 ). To minimize its impact to the quality of generated code, I suggest enabling this only for LTO as a start (it has two JumpThreading passes registered). This patch contains a flag that makes JumpThreading enable it. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D84940 --- llvm/include/llvm/Transforms/Scalar.h | 8 +- .../llvm/Transforms/Scalar/JumpThreading.h | 3 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 29 +- .../JumpThreading/select-unfold-freeze.ll | 248 ++++++++++++++++++ 4 files changed, 272 insertions(+), 16 deletions(-) create mode 100644 llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 242ffa0ede09d..5ab8a0584ad0c 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -240,10 +240,12 @@ FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some -// preds always go to some succ. Thresholds other than minus one override the -// internal BB duplication default threshold. +// preds always go to some succ. If FreezeSelectCond is true, unfold the +// condition of a select that unfolds to branch. Thresholds other than minus one +// override the internal BB duplication default threshold. // -FunctionPass *createJumpThreadingPass(int Threshold = -1); +FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, + int Threshold = -1); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 327bf6d00c479..b5b907471cd72 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -91,9 +91,10 @@ class JumpThreadingPass : public PassInfoMixin { unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; + bool InsertFreezeWhenUnfoldingSelect; public: - JumpThreadingPass(int T = -1); + JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 311ca11de84e7..354afc710f31c 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -104,6 +104,11 @@ static cl::opt PrintLVIAfterJumpThreading( cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden); +static cl::opt JumpThreadingFreezeSelectCond( + "jump-threading-freeze-select-cond", + cl::desc("Freeze the condition when unfolding select"), cl::init(false), + cl::Hidden); + static cl::opt ThreadAcrossLoopHeaders( "jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), @@ -133,7 +138,8 @@ namespace { public: static char ID; // Pass identification - JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) { + JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1) + : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } @@ -166,11 +172,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) // Public interface to the Jump Threading pass -FunctionPass *llvm::createJumpThreadingPass(int Threshold) { - return new JumpThreading(Threshold); +FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) { + return new JumpThreading(InsertFr, Threshold); } -JumpThreadingPass::JumpThreadingPass(int T) { +JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) { + InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr; DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } @@ -2798,13 +2805,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { /// select is not jump-threaded, it will be folded again in the later /// optimizations. bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { - // This transform can introduce a UB (a conditional branch that depends on a - // poison value) that was not present in the original program. See - // @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll. + // This transform would reduce the quality of msan diagnostics. // Disable this transform under MemorySanitizer. - // FIXME: either delete it or replace with a valid transform. This issue is - // not limited to MemorySanitizer (but has only been observed as an MSan false - // positive in practice so far). if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) return false; @@ -2852,8 +2854,11 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { if (!SI) continue; // Expand the select. - Instruction *Term = - SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + Value *Cond = SI->getCondition(); + if (InsertFreezeWhenUnfoldingSelect && + !isGuaranteedNotToBeUndefOrPoison(Cond, SI, &DTU->getDomTree())) + Cond = new FreezeInst(Cond, "cond.fr", SI); + Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false); BasicBlock *SplitBB = SI->getParent(); BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); diff --git a/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll new file mode 100644 index 0000000000000..12288fc272627 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -jump-threading-freeze-select-cond -jump-threading < %s | FileCheck %s + +declare void @foo() +declare void @bar() +declare void @baz() +declare void @quux() + + +define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind { +; CHECK-LABEL: @test_switch_cmp( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[L0:%.*]], label [[L0_THREAD:%.*]] +; CHECK: L0: +; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[VAL:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VAL_PHI]], 0 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[L1:%.*]], label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ] +; CHECK-NEXT: switch i8 [[TMP1]], label [[L3:%.*]] [ +; CHECK-NEXT: i8 1, label [[L1]] +; CHECK-NEXT: i8 2, label [[L2:%.*]] +; CHECK-NEXT: ] +; CHECK: L1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: ret void +; CHECK: L2: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: ret void +; CHECK: L3: +; CHECK-NEXT: call void @baz() +; CHECK-NEXT: ret void +; CHECK: L0.thread: +; CHECK-NEXT: call void @quux() +; CHECK-NEXT: br label [[L1]] +; +entry: + br i1 %cond, label %L0, label %L4 +L0: + %val.phi = phi i32 [%val, %entry], [-1, %L4] + %cmp = icmp slt i32 %val.phi, 0 + %expr = select i1 %cmp, i8 1, i8 %value + switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2] + +L1: + call void @foo() + ret void +L2: + call void @bar() + ret void +L3: + call void @baz() + ret void +L4: + call void @quux() + br label %L0 +} + +define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { +; CHECK-LABEL: @unfold3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK: cond.false.i: +; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]] +; CHECK: cond.false.6.i: +; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]] +; CHECK: cond.false.10.i: +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]] +; CHECK: .exit: +; CHECK-NEXT: [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[PHITMP]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]] +; CHECK: .exit.thread: +; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] +; CHECK: .exit.thread4: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %add3 = add nsw i32 %j, 2 + %cmp.i = icmp slt i32 %u, %v + br i1 %cmp.i, label %.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + %cmp4.i = icmp sgt i32 %u, %v + br i1 %cmp4.i, label %.exit, label %cond.false.6.i + +cond.false.6.i: ; preds = %cond.false.i + %cmp8.i = icmp slt i32 %w, %x + br i1 %cmp8.i, label %.exit, label %cond.false.10.i + +cond.false.10.i: ; preds = %cond.false.6.i + %cmp13.i = icmp sgt i32 %w, %x + br i1 %cmp13.i, label %.exit, label %cond.false.15.i + +cond.false.15.i: ; preds = %cond.false.10.i + %phitmp = icmp sge i32 %y, %z + br label %.exit + +.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i + %cond23.i = phi i1 [ false, %entry ], [ true, %cond.false.i ], [ false, %cond.false.6.i ], [ %phitmp, %cond.false.15.i ], [ true, %cond.false.10.i ] + %j.add3 = select i1 %cond23.i, i32 %j, i32 %add3 + ret i32 %j.add3 +} + +define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { +; CHECK-LABEL: @unfold4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK: cond.false.i: +; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]] +; CHECK: cond.false.6.i: +; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] +; CHECK: cond.false.10.i: +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]] +; CHECK: .exit: +; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 +; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]] +; CHECK: .exit.thread: +; CHECK-NEXT: br label [[DOTEXIT_THREAD5]] +; CHECK: .exit.thread5: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %add3 = add nsw i32 %j, 2 + %cmp.i = icmp slt i32 %u, %v + br i1 %cmp.i, label %.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + %cmp4.i = icmp sgt i32 %u, %v + br i1 %cmp4.i, label %.exit, label %cond.false.6.i + +cond.false.6.i: ; preds = %cond.false.i + %cmp8.i = icmp slt i32 %w, %x + br i1 %cmp8.i, label %.exit, label %cond.false.10.i + +cond.false.10.i: ; preds = %cond.false.6.i + %cmp13.i = icmp sgt i32 %w, %x + br i1 %cmp13.i, label %.exit, label %cond.false.15.i + +cond.false.15.i: ; preds = %cond.false.10.i + %cmp19.i = icmp sge i32 %y, %z + %conv = zext i1 %cmp19.i to i32 + br label %.exit + +.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i + %cond23.i = phi i32 [ 1, %entry ], [ 0, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 0, %cond.false.10.i ] + %lnot.i18 = icmp eq i32 %cond23.i, 1 + %j.add3 = select i1 %lnot.i18, i32 %j, i32 %add3 + ret i32 %j.add3 +} + +; TODO: cond23_i should be constant-folded. +define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { +; CHECK-LABEL: @unfold5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK: cond.false.i: +; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_6_I:%.*]] +; CHECK: cond.false.6.i: +; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] +; CHECK: cond.false.10.i: +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[TMP0:%.*]], label [[COND_FALSE_15_I:%.*]] +; CHECK: cond.false.15.i: +; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 +; CHECK-NEXT: br label [[DOTEXIT_THREAD]] +; CHECK: 0: +; CHECK-NEXT: [[COND23_I:%.*]] = phi i32 [ 7, [[COND_FALSE_10_I]] ] +; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp sgt i32 [[COND23_I]], 5 +; CHECK-NEXT: br label [[DOTEXIT_THREAD]] +; CHECK: .exit.thread: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[J]], [[TMP0]] ], [ [[CONV]], [[COND_FALSE_15_I]] ], [ 1, [[COND_FALSE_6_I]] ], [ 3, [[COND_FALSE_I]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %add3 = add nsw i32 %j, 2 + %cmp.i = icmp slt i32 %u, %v + br i1 %cmp.i, label %.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + %cmp4.i = icmp sgt i32 %u, %v + br i1 %cmp4.i, label %.exit, label %cond.false.6.i + +cond.false.6.i: ; preds = %cond.false.i + %cmp8.i = icmp slt i32 %w, %x + br i1 %cmp8.i, label %.exit, label %cond.false.10.i + +cond.false.10.i: ; preds = %cond.false.6.i + %cmp13.i = icmp sgt i32 %w, %x + br i1 %cmp13.i, label %.exit, label %cond.false.15.i + +cond.false.15.i: ; preds = %cond.false.10.i + %cmp19.i = icmp sge i32 %y, %z + %conv = zext i1 %cmp19.i to i32 + br label %.exit + +.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i + %cond23.i = phi i32 [ 2, %entry ], [ 3, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 7, %cond.false.10.i ] + %lnot.i18 = icmp sgt i32 %cond23.i, 5 + %j.add3 = select i1 %lnot.i18, i32 %j, i32 %cond23.i + ret i32 %j.add3 +} + +define i32 @TryToUnfoldSelectInCurrBB(i1 %b, i1 %ui, i32 %s, i1 %x) { +; CHECK-LABEL: @TryToUnfoldSelectInCurrBB( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[IF_END_THREAD]] +; CHECK: 0: +; CHECK-NEXT: br label [[IF_END_THREAD]] +; CHECK: if.end.thread: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[S:%.*]], [[TMP0]] ], [ 42, [[IF_END]] ], [ 42, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + br i1 %b, label %if.end, label %if.else + +if.else: + br label %if.end + +if.end: + %v = phi i1 [ %x, %if.else ], [ false, %entry ] + %v1 = select i1 %v, i32 %s, i32 42 + ret i32 %v1 +} From fea175b59fbdf5d2b95e8fd81ac043479f20fe10 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Thu, 10 Sep 2020 07:03:43 +0000 Subject: [PATCH 0228/1079] [mlir][Linalg] Small refactoring of ConvOpVectorization This commit addresses comments that were requested on D86619 after it was landed. Differential Revision: https://reviews.llvm.org/D87354 --- mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h | 3 ++- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index ce3b5fd2fd247..3049570bd47b6 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -568,10 +568,11 @@ struct AffineMinSCFCanonicalizationPattern /// Subsequently, they are contracted together and the result is written to /// the first entry of the output buffer. template -struct ConvOpVectorization : public OpRewritePattern { +class ConvOpVectorization : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; SmallVector mask; +public: ConvOpVectorization(MLIRContext *context, SmallVector msk) : OpRewritePattern(context) { assert(msk.size() == N && "Mask size does not match rank"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 51781af9cb304..f4aabf8a8302f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -371,7 +371,7 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( template LogicalResult ConvOpVectorization::matchAndRewrite( ConvOp op, PatternRewriter &rewriter) const { - const unsigned dimSize = 3; + unsigned dimSize = 3; Location loc = op.getLoc(); MLIRContext *context = op.getContext(); edsc::ScopedContext scope(rewriter, loc); From 157cd93b48a90f484e9eb2ed9997e0372b9c7ebb Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Wed, 9 Sep 2020 17:57:03 -0700 Subject: [PATCH 0229/1079] [clang] Disallow fbasic-block-sections on non-ELF, non-x86 targets. Basic block sections is untested on other platforms and binary formats apart from x86,elf. This patch emits a warning and drops the flag if the platform and binary format are not compatible. Add a test to ensure that specifying an incompatible target in the driver does not enable the feature. Differential Revision: https://reviews.llvm.org/D87426 --- clang/lib/Driver/ToolChains/Clang.cpp | 19 ++++++++++++------- clang/test/Driver/fbasic-block-sections.c | 17 ++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1680f2ad91ea2..40659ebb1395e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4880,13 +4880,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) { - StringRef Val = A->getValue(); - if (Val != "all" && Val != "labels" && Val != "none" && - !(Val.startswith("list=") && llvm::sys::fs::exists(Val.substr(5)))) - D.Diag(diag::err_drv_invalid_value) - << A->getAsString(Args) << A->getValue(); - else - A->render(Args, CmdArgs); + if (Triple.isX86() && Triple.isOSBinFormatELF()) { + StringRef Val = A->getValue(); + if (Val != "all" && Val != "labels" && Val != "none" && + !(Val.startswith("list=") && llvm::sys::fs::exists(Val.substr(5)))) + D.Diag(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + else + A->render(Args, CmdArgs); + } else { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; + } } if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections, diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c index 2ff98c94222b2..93c7fe9fc0699 100644 --- a/clang/test/Driver/fbasic-block-sections.c +++ b/clang/test/Driver/fbasic-block-sections.c @@ -1,9 +1,12 @@ -// RUN: %clang -### -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s -// RUN: %clang -### -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-ALL %s -// RUN: %clang -### -fbasic-block-sections=list=%s %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LIST %s -// RUN: %clang -### -fbasic-block-sections=labels %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LABELS %s +// RUN: %clang -### -target x86_64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s +// RUN: %clang -### -target x86_64 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-ALL %s +// RUN: %clang -### -target x86_64 -fbasic-block-sections=list=%s %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LIST %s +// RUN: %clang -### -target x86_64 -fbasic-block-sections=labels %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LABELS %s +// RUN: not %clang -c -target arm-unknown-linux -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s +// RUN: not %clang -c -target x86_64-apple-darwin10 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s // -// CHECK-OPT-NONE: "-fbasic-block-sections=none" -// CHECK-OPT-ALL: "-fbasic-block-sections=all" -// CHECK-OPT-LIST: "-fbasic-block-sections={{[^ ]*}}fbasic-block-sections.c" +// CHECK-OPT-NONE: "-fbasic-block-sections=none" +// CHECK-OPT-ALL: "-fbasic-block-sections=all" +// CHECK-OPT-LIST: "-fbasic-block-sections={{[^ ]*}}fbasic-block-sections.c" // CHECK-OPT-LABELS: "-fbasic-block-sections=labels" +// CHECK-TRIPLE: error: unsupported option '-fbasic-block-sections=all' for target From 1919b650523282c550536b6b72eb4713cd6712f4 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 9 Sep 2020 08:15:55 +0100 Subject: [PATCH 0230/1079] [ARM] Tail predicate VQDMULH and VQRDMULH Mark the family of instructions as valid for tail predication. Differential Revision: https://reviews.llvm.org/D87348 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 2 ++ .../Thumb2/LowOverheadLoops/remat-vctp.ll | 18 +++--------------- llvm/unittests/Target/ARM/MachineInstrTest.cpp | 12 ++++++++++++ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 2287edeef7662..1d562c5702c62 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1918,6 +1918,7 @@ class MVE_VQxDMULH_Base size, bit rounding, let Inst{12-8} = 0b01011; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VQxDMULH_mThis Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vldrw.u32 q4, [r1], #16 ; CHECK-NEXT: vabs.s32 q5, q4 ; CHECK-NEXT: vcls.s32 q3, q5 ; CHECK-NEXT: vshl.u32 q5, q5, q3 @@ -41,15 +31,13 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg ; CHECK-NEXT: vqshl.s32 q5, q5, #1 ; CHECK-NEXT: vpt.s32 lt, q4, zr ; CHECK-NEXT: vnegt.s32 q5, q5 -; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 ; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vstrwt.32 q4, [r2], #16 ; CHECK-NEXT: vstrwt.32 q3, [r3], #16 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %bb44 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: pop {r4, r5, r7, pc} diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index 876e011e1ce8a..bc37f991c3081 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -754,6 +754,12 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VQADDu16: case MVE_VQADDu32: case MVE_VQADDu8: + case MVE_VQDMULH_qr_s16: + case MVE_VQDMULH_qr_s32: + case MVE_VQDMULH_qr_s8: + case MVE_VQDMULHi16: + case MVE_VQDMULHi32: + case MVE_VQDMULHi8: case MVE_VQDMULL_qr_s16bh: case MVE_VQDMULL_qr_s16th: case MVE_VQDMULL_qr_s32bh: @@ -762,6 +768,12 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VQDMULLs16th: case MVE_VQDMULLs32bh: case MVE_VQDMULLs32th: + case MVE_VQRDMULH_qr_s16: + case MVE_VQRDMULH_qr_s32: + case MVE_VQRDMULH_qr_s8: + case MVE_VQRDMULHi16: + case MVE_VQRDMULHi32: + case MVE_VQRDMULHi8: case MVE_VQNEGs16: case MVE_VQNEGs32: case MVE_VQNEGs8: From 0bdf8c9127244127aef3620a8ef1eb4d2be57dad Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 7 Sep 2020 12:06:02 +0100 Subject: [PATCH 0231/1079] [SCEV] Constant expansion cost at minsize As code size is the only thing we care about at minsize, query the cost of materialising immediates when calculating the cost of a SCEV expansion. We also modify the CostKind to TCK_CodeSize for minsize, instead of RecipThroughput. Differential Revision: https://reviews.llvm.org/D76434 --- .../Utils/ScalarEvolutionExpander.cpp | 76 ++- .../ARM/indvar-unroll-imm-cost.ll | 462 ++---------------- 2 files changed, 106 insertions(+), 432 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 1bb827cd3057b..165030c6d2f1b 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2184,26 +2184,37 @@ template static int costAndCollectOperands( const T *S = cast(WorkItem.S); int Cost = 0; - // Collect the opcodes of all the instructions that will be needed to expand - // the SCEVExpr. This is so that when we come to cost the operands, we know - // what the generated user(s) will be. - SmallVector Opcodes; + // Object to help map SCEV operands to expanded IR instructions. + struct OperationIndices { + OperationIndices(unsigned Opc, size_t min, size_t max) : + Opcode(Opc), MinIdx(min), MaxIdx(max) { } + unsigned Opcode; + size_t MinIdx; + size_t MaxIdx; + }; + + // Collect the operations of all the instructions that will be needed to + // expand the SCEVExpr. This is so that when we come to cost the operands, + // we know what the generated user(s) will be. + SmallVector Operations; auto CastCost = [&](unsigned Opcode) { - Opcodes.push_back(Opcode); + Operations.emplace_back(Opcode, 0, 0); return TTI.getCastInstrCost(Opcode, S->getType(), S->getOperand(0)->getType(), TTI::CastContextHint::None, CostKind); }; - auto ArithCost = [&](unsigned Opcode, unsigned NumRequired) { - Opcodes.push_back(Opcode); + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx = 0, unsigned MaxIdx = 1) { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); return NumRequired * TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); }; - auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired) { - Opcodes.push_back(Opcode); + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx, unsigned MaxIdx) { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); Type *OpType = S->getOperand(0)->getType(); return NumRequired * TTI.getCmpSelInstrCost(Opcode, OpType, @@ -2246,8 +2257,8 @@ template static int costAndCollectOperands( case scUMaxExpr: case scSMinExpr: case scUMinExpr: { - Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1); - Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1); + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); + Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); break; } case scAddRecExpr: { @@ -2270,7 +2281,8 @@ template static int costAndCollectOperands( // Much like with normal add expr, the polynominal will require // one less addition than the number of it's terms. - int AddCost = ArithCost(Instruction::Add, NumTerms - 1); + int AddCost = ArithCost(Instruction::Add, NumTerms - 1, + /*MinIdx*/1, /*MaxIdx*/1); // Here, *each* one of those will require a multiplication. int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); Cost = AddCost + MulCost; @@ -2286,12 +2298,18 @@ template static int costAndCollectOperands( // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. // FIXME: this is conservatively correct, but might be overly pessimistic. Cost += MulCost * (PolyDegree - 1); + break; } } - for (unsigned Opc : Opcodes) - for (auto I : enumerate(S->operands())) - Worklist.emplace_back(Opc, I.index(), I.value()); + for (auto &CostOp : Operations) { + for (auto SCEVOp : enumerate(S->operands())) { + // Clamp the index to account for multiple IR operations being chained. + size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); + size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); + Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); + } + } return Cost; } @@ -2305,7 +2323,7 @@ bool SCEVExpander::isHighCostExpansionHelper( const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? - if (!Processed.insert(S).second) + if (!isa(S) && !Processed.insert(S).second) return false; // We have already accounted for this expression. // If we can find an existing value for this scev available at the point "At" @@ -2313,16 +2331,26 @@ bool SCEVExpander::isHighCostExpansionHelper( if (getRelatedExistingExpansion(S, &At, L)) return false; // Consider the expression to be free. - switch (S->getSCEVType()) { - case scUnknown: - case scConstant: - return false; // Assume to be zero-cost. - } + // Assume to be zero-cost. + if (isa(S)) + return false; TargetTransformInfo::TargetCostKind CostKind = - TargetTransformInfo::TCK_RecipThroughput; - - if (isa(S)) { + L->getHeader()->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_RecipThroughput; + + if (auto *Constant = dyn_cast(S)) { + // Only evalulate the costs of constants when optimizing for size. + if (CostKind != TargetTransformInfo::TCK_CodeSize) + return 0; + const APInt &Imm = Constant->getAPInt(); + Type *Ty = S->getType(); + BudgetRemaining -= + TTI.getIntImmCostInst(WorkItem.ParentOpcode, WorkItem.OperandIdx, + Imm, Ty, CostKind); + return BudgetRemaining < 0; + } else if (isa(S)) { int Cost = costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); BudgetRemaining -= Cost; diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll index 36749a03553ea..16f967be12c21 100644 --- a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll +++ b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll @@ -18,344 +18,92 @@ define dso_local arm_aapcscc void @test(i32* nocapture %pDest, i16* nocapture re ; CHECK-NEXT: [[PSRCA_ADDR_090:%.*]] = phi i16* [ [[PSRCA_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCA:%.*]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[PSRCB_ADDR_089:%.*]] = phi i16* [ [[PSRCB_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCB:%.*]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[I_092]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[I_092]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 2147483644 -; CHECK-NEXT: [[CMP272:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483644 +; CHECK-NEXT: [[CMP272:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP272]], label [[FOR_END:%.*]], label [[FOR_BODY3_PREHEADER:%.*]] ; CHECK: for.body3.preheader: -; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP3]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 3 -; CHECK-NEXT: br i1 [[TMP7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY3_PREHEADER_NEW:%.*]] -; CHECK: for.body3.preheader.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP3]], [[XTRAITER]] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: [[J_076:%.*]] = phi i32 [ 0, [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD24_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PDEST_ADDR_175:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[INCDEC_PTR_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCA_ADDR_174:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD_PTR_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCB_ADDR_173:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD_PTR23_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[PSRCA_ADDR_174]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[PSRCB_ADDR_173]], align 2 -; CHECK-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-NEXT: [[J_076:%.*]] = phi i32 [ [[ADD24:%.*]], [[FOR_BODY3]] ], [ 0, [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[PDEST_ADDR_175:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY3]] ], [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[PSRCA_ADDR_174:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[FOR_BODY3]] ], [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[PSRCB_ADDR_173:%.*]] = phi i16* [ [[ADD_PTR23:%.*]], [[FOR_BODY3]] ], [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[PSRCA_ADDR_174]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[PSRCB_ADDR_173]], align 2 +; CHECK-NEXT: [[CONV5:%.*]] = sext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV5]], [[CONV]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 -; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2 -; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2 +; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[MUL10:%.*]] = mul nsw i32 [[CONV9]], [[CONV7]] ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 2 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX11]], align 2 -; CHECK-NEXT: [[CONV12:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX11]], align 2 +; CHECK-NEXT: [[CONV12:%.*]] = sext i16 [[TMP7]] to i32 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2 -; CHECK-NEXT: [[CONV14:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2 +; CHECK-NEXT: [[CONV14:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[MUL15:%.*]] = mul nsw i32 [[CONV14]], [[CONV12]] ; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX17]], align 2 -; CHECK-NEXT: [[CONV18:%.*]] = sext i16 [[TMP14]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX17]], align 2 +; CHECK-NEXT: [[CONV18:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[ADD21:%.*]] = add i32 [[MUL10]], [[MUL]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ADD21]], [[CONV14]] ; CHECK-NEXT: [[ADD16:%.*]] = add i32 [[ADD]], [[MUL15]] ; CHECK-NEXT: [[ADD22:%.*]] = add i32 [[ADD16]], [[CONV18]] ; CHECK-NEXT: store i32 [[ADD22]], i32* [[PDEST_ADDR_175]], align 4 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 4 -; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175]], i32 1 -; CHECK-NEXT: [[ADD24:%.*]] = add nuw nsw i32 [[J_076]], 4 -; CHECK-NEXT: [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ADD_PTR]], align 2 -; CHECK-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP15]] to i32 -; CHECK-NEXT: [[TMP16:%.*]] = load i16, i16* [[ADD_PTR23]], align 2 -; CHECK-NEXT: [[CONV5_1:%.*]] = sext i16 [[TMP16]] to i32 -; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[CONV5_1]], [[CONV_1]] -; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX6_1]], align 2 -; CHECK-NEXT: [[CONV7_1:%.*]] = sext i16 [[TMP17]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 1 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX8_1]], align 2 -; CHECK-NEXT: [[CONV9_1:%.*]] = sext i16 [[TMP18]] to i32 -; CHECK-NEXT: [[MUL10_1:%.*]] = mul nsw i32 [[CONV9_1]], [[CONV7_1]] -; CHECK-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 2 -; CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX11_1]], align 2 -; CHECK-NEXT: [[CONV12_1:%.*]] = sext i16 [[TMP19]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 3 -; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX13_1]], align 2 -; CHECK-NEXT: [[CONV14_1:%.*]] = sext i16 [[TMP20]] to i32 -; CHECK-NEXT: [[MUL15_1:%.*]] = mul nsw i32 [[CONV14_1]], [[CONV12_1]] -; CHECK-NEXT: [[ARRAYIDX17_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 3 -; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX17_1]], align 2 -; CHECK-NEXT: [[CONV18_1:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[ADD21_1:%.*]] = add i32 [[MUL10_1]], [[MUL_1]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD21_1]], [[CONV14_1]] -; CHECK-NEXT: [[ADD16_1:%.*]] = add i32 [[ADD_1]], [[MUL15_1]] -; CHECK-NEXT: [[ADD22_1:%.*]] = add i32 [[ADD16_1]], [[CONV18_1]] -; CHECK-NEXT: store i32 [[ADD22_1]], i32* [[INCDEC_PTR]], align 4 -; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR]], i32 1 -; CHECK-NEXT: [[ADD24_1:%.*]] = add nuw nsw i32 [[ADD24]], 4 -; CHECK-NEXT: [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2 -; CHECK-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP22]] to i32 -; CHECK-NEXT: [[TMP23:%.*]] = load i16, i16* [[ADD_PTR23_1]], align 2 -; CHECK-NEXT: [[CONV5_2:%.*]] = sext i16 [[TMP23]] to i32 -; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[CONV5_2]], [[CONV_2]] -; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 1 -; CHECK-NEXT: [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX6_2]], align 2 -; CHECK-NEXT: [[CONV7_2:%.*]] = sext i16 [[TMP24]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 1 -; CHECK-NEXT: [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX8_2]], align 2 -; CHECK-NEXT: [[CONV9_2:%.*]] = sext i16 [[TMP25]] to i32 -; CHECK-NEXT: [[MUL10_2:%.*]] = mul nsw i32 [[CONV9_2]], [[CONV7_2]] -; CHECK-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 2 -; CHECK-NEXT: [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX11_2]], align 2 -; CHECK-NEXT: [[CONV12_2:%.*]] = sext i16 [[TMP26]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 3 -; CHECK-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX13_2]], align 2 -; CHECK-NEXT: [[CONV14_2:%.*]] = sext i16 [[TMP27]] to i32 -; CHECK-NEXT: [[MUL15_2:%.*]] = mul nsw i32 [[CONV14_2]], [[CONV12_2]] -; CHECK-NEXT: [[ARRAYIDX17_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 3 -; CHECK-NEXT: [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX17_2]], align 2 -; CHECK-NEXT: [[CONV18_2:%.*]] = sext i16 [[TMP28]] to i32 -; CHECK-NEXT: [[ADD21_2:%.*]] = add i32 [[MUL10_2]], [[MUL_2]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD21_2]], [[CONV14_2]] -; CHECK-NEXT: [[ADD16_2:%.*]] = add i32 [[ADD_2]], [[MUL15_2]] -; CHECK-NEXT: [[ADD22_2:%.*]] = add i32 [[ADD16_2]], [[CONV18_2]] -; CHECK-NEXT: store i32 [[ADD22_2]], i32* [[INCDEC_PTR_1]], align 4 -; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_2:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_1]], i32 1 -; CHECK-NEXT: [[ADD24_2:%.*]] = add nuw nsw i32 [[ADD24_1]], 4 -; CHECK-NEXT: [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2 -; CHECK-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP29]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = load i16, i16* [[ADD_PTR23_2]], align 2 -; CHECK-NEXT: [[CONV5_3:%.*]] = sext i16 [[TMP30]] to i32 -; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[CONV5_3]], [[CONV_3]] -; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 1 -; CHECK-NEXT: [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX6_3]], align 2 -; CHECK-NEXT: [[CONV7_3:%.*]] = sext i16 [[TMP31]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 1 -; CHECK-NEXT: [[TMP32:%.*]] = load i16, i16* [[ARRAYIDX8_3]], align 2 -; CHECK-NEXT: [[CONV9_3:%.*]] = sext i16 [[TMP32]] to i32 -; CHECK-NEXT: [[MUL10_3:%.*]] = mul nsw i32 [[CONV9_3]], [[CONV7_3]] -; CHECK-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 2 -; CHECK-NEXT: [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX11_3]], align 2 -; CHECK-NEXT: [[CONV12_3:%.*]] = sext i16 [[TMP33]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 3 -; CHECK-NEXT: [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX13_3]], align 2 -; CHECK-NEXT: [[CONV14_3:%.*]] = sext i16 [[TMP34]] to i32 -; CHECK-NEXT: [[MUL15_3:%.*]] = mul nsw i32 [[CONV14_3]], [[CONV12_3]] -; CHECK-NEXT: [[ARRAYIDX17_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 3 -; CHECK-NEXT: [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX17_3]], align 2 -; CHECK-NEXT: [[CONV18_3:%.*]] = sext i16 [[TMP35]] to i32 -; CHECK-NEXT: [[ADD21_3:%.*]] = add i32 [[MUL10_3]], [[MUL_3]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD21_3]], [[CONV14_3]] -; CHECK-NEXT: [[ADD16_3:%.*]] = add i32 [[ADD_3]], [[MUL15_3]] -; CHECK-NEXT: [[ADD22_3:%.*]] = add i32 [[ADD16_3]], [[CONV18_3]] -; CHECK-NEXT: store i32 [[ADD22_3]], i32* [[INCDEC_PTR_2]], align 4 -; CHECK-NEXT: [[ADD_PTR_3]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_3]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_3]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_2]], i32 1 -; CHECK-NEXT: [[ADD24_3]] = add nuw nsw i32 [[ADD24_2]], 4 -; CHECK-NEXT: [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1 -; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp ne i32 [[NITER_NSUB_3]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_BODY3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] -; CHECK: for.end.loopexit.unr-lcssa.loopexit: -; CHECK-NEXT: [[ADD_PTR_LCSSA_PH_PH:%.*]] = phi i16* [ [[ADD_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA_PH_PH:%.*]] = phi i16* [ [[ADD_PTR23_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA_PH_PH:%.*]] = phi i32* [ [[INCDEC_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[J_076_UNR_PH:%.*]] = phi i32 [ [[ADD24_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PDEST_ADDR_175_UNR_PH:%.*]] = phi i32* [ [[INCDEC_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCA_ADDR_174_UNR_PH:%.*]] = phi i16* [ [[ADD_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCB_ADDR_173_UNR_PH:%.*]] = phi i16* [ [[ADD_PTR23_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.end.loopexit.unr-lcssa: -; CHECK-NEXT: [[ADD_PTR_LCSSA_PH:%.*]] = phi i16* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA_PH:%.*]] = phi i16* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR23_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA_PH:%.*]] = phi i32* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[INCDEC_PTR_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[J_076_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY3_PREHEADER]] ], [ [[J_076_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PDEST_ADDR_175_UNR:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER]] ], [ [[PDEST_ADDR_175_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PSRCA_ADDR_174_UNR:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER]] ], [ [[PSRCA_ADDR_174_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PSRCB_ADDR_173_UNR:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER]] ], [ [[PSRCB_ADDR_173_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY3_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.body3.epil.preheader: -; CHECK-NEXT: br label [[FOR_BODY3_EPIL:%.*]] -; CHECK: for.body3.epil: -; CHECK-NEXT: [[TMP36:%.*]] = load i16, i16* [[PSRCA_ADDR_174_UNR]], align 2 -; CHECK-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP36]] to i32 -; CHECK-NEXT: [[TMP37:%.*]] = load i16, i16* [[PSRCB_ADDR_173_UNR]], align 2 -; CHECK-NEXT: [[CONV5_EPIL:%.*]] = sext i16 [[TMP37]] to i32 -; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV5_EPIL]], [[CONV_EPIL]] -; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 1 -; CHECK-NEXT: [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL]], align 2 -; CHECK-NEXT: [[CONV7_EPIL:%.*]] = sext i16 [[TMP38]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 1 -; CHECK-NEXT: [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL]], align 2 -; CHECK-NEXT: [[CONV9_EPIL:%.*]] = sext i16 [[TMP39]] to i32 -; CHECK-NEXT: [[MUL10_EPIL:%.*]] = mul nsw i32 [[CONV9_EPIL]], [[CONV7_EPIL]] -; CHECK-NEXT: [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 2 -; CHECK-NEXT: [[TMP40:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL]], align 2 -; CHECK-NEXT: [[CONV12_EPIL:%.*]] = sext i16 [[TMP40]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 3 -; CHECK-NEXT: [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL]], align 2 -; CHECK-NEXT: [[CONV14_EPIL:%.*]] = sext i16 [[TMP41]] to i32 -; CHECK-NEXT: [[MUL15_EPIL:%.*]] = mul nsw i32 [[CONV14_EPIL]], [[CONV12_EPIL]] -; CHECK-NEXT: [[ARRAYIDX17_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 3 -; CHECK-NEXT: [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL]], align 2 -; CHECK-NEXT: [[CONV18_EPIL:%.*]] = sext i16 [[TMP42]] to i32 -; CHECK-NEXT: [[ADD21_EPIL:%.*]] = add i32 [[MUL10_EPIL]], [[MUL_EPIL]] -; CHECK-NEXT: [[ADD_EPIL:%.*]] = add i32 [[ADD21_EPIL]], [[CONV14_EPIL]] -; CHECK-NEXT: [[ADD16_EPIL:%.*]] = add i32 [[ADD_EPIL]], [[MUL15_EPIL]] -; CHECK-NEXT: [[ADD22_EPIL:%.*]] = add i32 [[ADD16_EPIL]], [[CONV18_EPIL]] -; CHECK-NEXT: store i32 [[ADD22_EPIL]], i32* [[PDEST_ADDR_175_UNR]], align 4 -; CHECK-NEXT: [[ADD_PTR_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_EPIL:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175_UNR]], i32 1 -; CHECK-NEXT: [[ADD24_EPIL:%.*]] = add nuw nsw i32 [[J_076_UNR]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1 -; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY3_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: for.end.loopexit.epilog-lcssa: -; CHECK-NEXT: [[ADD_PTR_LCSSA_PH1:%.*]] = phi i16* [ [[ADD_PTR_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[ADD_PTR_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[ADD_PTR_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2:%.*]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA_PH2:%.*]] = phi i16* [ [[ADD_PTR23_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[ADD_PTR23_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[ADD_PTR23_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA_PH3:%.*]] = phi i32* [ [[INCDEC_PTR_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[INCDEC_PTR_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[INCDEC_PTR_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 4 +; CHECK-NEXT: [[ADD_PTR23]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175]], i32 1 +; CHECK-NEXT: [[ADD24]] = add nuw nsw i32 [[J_076]], 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[ADD24]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_PTR_LCSSA_PH1]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA:%.*]] = phi i16* [ [[ADD_PTR23_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_PTR23_LCSSA_PH2]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i32* [ [[INCDEC_PTR_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[INCDEC_PTR_LCSSA_PH3]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ] +; CHECK-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[FOR_BODY3]] ] +; CHECK-NEXT: [[ADD_PTR23_LCSSA:%.*]] = phi i16* [ [[ADD_PTR23]], [[FOR_BODY3]] ] +; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i32* [ [[INCDEC_PTR]], [[FOR_BODY3]] ] ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[PSRCB_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY]] ], [ [[ADD_PTR23_LCSSA]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: [[PSRCA_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY]] ], [ [[ADD_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: [[PDEST_ADDR_1_LCSSA:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY]] ], [ [[INCDEC_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: [[J_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[TMP6]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: [[REM:%.*]] = and i32 [[TMP4]], 3 +; CHECK-NEXT: [[J_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[TMP2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[REM:%.*]] = and i32 [[TMP0]], 3 ; CHECK-NEXT: [[ADD25:%.*]] = or i32 [[J_0_LCSSA]], [[REM]] ; CHECK-NEXT: [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]] ; CHECK-NEXT: br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]] ; CHECK: for.body29.preheader: -; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP44:%.*]] = sub i32 [[ADD25]], [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[ADD25]], -1 -; CHECK-NEXT: [[TMP46:%.*]] = sub i32 [[TMP45]], [[J_0_LCSSA]] -; CHECK-NEXT: [[XTRAITER4:%.*]] = and i32 [[TMP44]], 3 -; CHECK-NEXT: [[LCMP_MOD5:%.*]] = icmp ne i32 [[XTRAITER4]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD5]], label [[FOR_BODY29_PROL_PREHEADER:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT:%.*]] -; CHECK: for.body29.prol.preheader: -; CHECK-NEXT: br label [[FOR_BODY29_PROL:%.*]] -; CHECK: for.body29.prol: -; CHECK-NEXT: [[ARRAYIDX30_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX30_PROL]], align 2 -; CHECK-NEXT: [[CONV31_PROL:%.*]] = sext i16 [[TMP47]] to i32 -; CHECK-NEXT: [[ARRAYIDX32_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP48:%.*]] = load i16, i16* [[ARRAYIDX32_PROL]], align 2 -; CHECK-NEXT: [[CONV33_PROL:%.*]] = sext i16 [[TMP48]] to i32 -; CHECK-NEXT: [[MUL34_PROL:%.*]] = mul nsw i32 [[CONV33_PROL]], [[CONV31_PROL]] -; CHECK-NEXT: [[TMP49:%.*]] = load i32, i32* [[PDEST_ADDR_1_LCSSA]], align 4 -; CHECK-NEXT: [[ADD35_PROL:%.*]] = add nsw i32 [[MUL34_PROL]], [[TMP49]] -; CHECK-NEXT: store i32 [[ADD35_PROL]], i32* [[PDEST_ADDR_1_LCSSA]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38_PROL:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 1 -; CHECK-NEXT: [[INC_PROL:%.*]] = add nuw i32 [[J_0_LCSSA]], 1 -; CHECK-NEXT: [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER4]], 1 -; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY29_PROL_1:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA:%.*]] -; CHECK: for.body29.prol.loopexit.unr-lcssa: -; CHECK-NEXT: [[J_184_UNR_PH:%.*]] = phi i32 [ [[INC_PROL]], [[FOR_BODY29_PROL]] ], [ [[INC_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INC_PROL_2:%.*]], [[FOR_BODY29_PROL_2:%.*]] ] -; CHECK-NEXT: [[PDEST_ADDR_283_UNR_PH:%.*]] = phi i32* [ [[INCDEC_PTR38_PROL]], [[FOR_BODY29_PROL]] ], [ [[INCDEC_PTR38_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INCDEC_PTR38_PROL_2:%.*]], [[FOR_BODY29_PROL_2]] ] -; CHECK-NEXT: [[PSRCA_ADDR_282_UNR_PH:%.*]] = phi i16* [ [[INCDEC_PTR36_PROL]], [[FOR_BODY29_PROL]] ], [ [[INCDEC_PTR36_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INCDEC_PTR36_PROL_2:%.*]], [[FOR_BODY29_PROL_2]] ] -; CHECK-NEXT: [[PSRCB_ADDR_281_UNR_PH:%.*]] = phi i16* [ [[INCDEC_PTR37_PROL]], [[FOR_BODY29_PROL]] ], [ [[INCDEC_PTR37_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INCDEC_PTR37_PROL_2:%.*]], [[FOR_BODY29_PROL_2]] ] -; CHECK-NEXT: br label [[FOR_BODY29_PROL_LOOPEXIT]] -; CHECK: for.body29.prol.loopexit: -; CHECK-NEXT: [[J_184_UNR:%.*]] = phi i32 [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[J_184_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[PDEST_ADDR_283_UNR:%.*]] = phi i32* [ [[PDEST_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PDEST_ADDR_283_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[PSRCA_ADDR_282_UNR:%.*]] = phi i16* [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PSRCA_ADDR_282_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[PSRCB_ADDR_281_UNR:%.*]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PSRCB_ADDR_281_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP50:%.*]] = icmp ult i32 [[TMP46]], 3 -; CHECK-NEXT: br i1 [[TMP50]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29_PREHEADER_NEW:%.*]] -; CHECK: for.body29.preheader.new: +; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]] ; CHECK-NEXT: br label [[FOR_BODY29:%.*]] ; CHECK: for.body29: -; CHECK-NEXT: [[J_184:%.*]] = phi i32 [ [[J_184_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY29]] ] -; CHECK-NEXT: [[PDEST_ADDR_283:%.*]] = phi i32* [ [[PDEST_ADDR_283_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR38_3:%.*]], [[FOR_BODY29]] ] -; CHECK-NEXT: [[PSRCA_ADDR_282:%.*]] = phi i16* [ [[PSRCA_ADDR_282_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR36_3:%.*]], [[FOR_BODY29]] ] -; CHECK-NEXT: [[PSRCB_ADDR_281:%.*]] = phi i16* [ [[PSRCB_ADDR_281_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR37_3:%.*]], [[FOR_BODY29]] ] +; CHECK-NEXT: [[J_184:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY29]] ], [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ] +; CHECK-NEXT: [[PDEST_ADDR_283:%.*]] = phi i32* [ [[INCDEC_PTR38:%.*]], [[FOR_BODY29]] ], [ [[PDEST_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ] +; CHECK-NEXT: [[PSRCA_ADDR_282:%.*]] = phi i16* [ [[INCDEC_PTR36:%.*]], [[FOR_BODY29]] ], [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ] +; CHECK-NEXT: [[PSRCB_ADDR_281:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[FOR_BODY29]] ], [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 [[J_184]] -; CHECK-NEXT: [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX30]], align 2 -; CHECK-NEXT: [[CONV31:%.*]] = sext i16 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX30]], align 2 +; CHECK-NEXT: [[CONV31:%.*]] = sext i16 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 [[J_184]] -; CHECK-NEXT: [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 -; CHECK-NEXT: [[CONV33:%.*]] = sext i16 [[TMP52]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 +; CHECK-NEXT: [[CONV33:%.*]] = sext i16 [[TMP12]] to i32 ; CHECK-NEXT: [[MUL34:%.*]] = mul nsw i32 [[CONV33]], [[CONV31]] -; CHECK-NEXT: [[TMP53:%.*]] = load i32, i32* [[PDEST_ADDR_283]], align 4 -; CHECK-NEXT: [[ADD35:%.*]] = add nsw i32 [[MUL34]], [[TMP53]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[PDEST_ADDR_283]], align 4 +; CHECK-NEXT: [[ADD35:%.*]] = add nsw i32 [[MUL34]], [[TMP13]] ; CHECK-NEXT: store i32 [[ADD35]], i32* [[PDEST_ADDR_283]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_283]], i32 1 -; CHECK-NEXT: [[INC:%.*]] = add nuw i32 [[J_184]], 1 -; CHECK-NEXT: [[ARRAYIDX30_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36]], i32 [[INC]] -; CHECK-NEXT: [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX30_1]], align 2 -; CHECK-NEXT: [[CONV31_1:%.*]] = sext i16 [[TMP54]] to i32 -; CHECK-NEXT: [[ARRAYIDX32_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37]], i32 [[INC]] -; CHECK-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX32_1]], align 2 -; CHECK-NEXT: [[CONV33_1:%.*]] = sext i16 [[TMP55]] to i32 -; CHECK-NEXT: [[MUL34_1:%.*]] = mul nsw i32 [[CONV33_1]], [[CONV31_1]] -; CHECK-NEXT: [[TMP56:%.*]] = load i32, i32* [[INCDEC_PTR38]], align 4 -; CHECK-NEXT: [[ADD35_1:%.*]] = add nsw i32 [[MUL34_1]], [[TMP56]] -; CHECK-NEXT: store i32 [[ADD35_1]], i32* [[INCDEC_PTR38]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38_1:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38]], i32 1 -; CHECK-NEXT: [[INC_1:%.*]] = add nuw i32 [[INC]], 1 -; CHECK-NEXT: [[ARRAYIDX30_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_1]], i32 [[INC_1]] -; CHECK-NEXT: [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX30_2]], align 2 -; CHECK-NEXT: [[CONV31_2:%.*]] = sext i16 [[TMP57]] to i32 -; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_1]], i32 [[INC_1]] -; CHECK-NEXT: [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX32_2]], align 2 -; CHECK-NEXT: [[CONV33_2:%.*]] = sext i16 [[TMP58]] to i32 -; CHECK-NEXT: [[MUL34_2:%.*]] = mul nsw i32 [[CONV33_2]], [[CONV31_2]] -; CHECK-NEXT: [[TMP59:%.*]] = load i32, i32* [[INCDEC_PTR38_1]], align 4 -; CHECK-NEXT: [[ADD35_2:%.*]] = add nsw i32 [[MUL34_2]], [[TMP59]] -; CHECK-NEXT: store i32 [[ADD35_2]], i32* [[INCDEC_PTR38_1]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_1]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_1]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38_2:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_1]], i32 1 -; CHECK-NEXT: [[INC_2:%.*]] = add nuw i32 [[INC_1]], 1 -; CHECK-NEXT: [[ARRAYIDX30_3:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_2]], i32 [[INC_2]] -; CHECK-NEXT: [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX30_3]], align 2 -; CHECK-NEXT: [[CONV31_3:%.*]] = sext i16 [[TMP60]] to i32 -; CHECK-NEXT: [[ARRAYIDX32_3:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_2]], i32 [[INC_2]] -; CHECK-NEXT: [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX32_3]], align 2 -; CHECK-NEXT: [[CONV33_3:%.*]] = sext i16 [[TMP61]] to i32 -; CHECK-NEXT: [[MUL34_3:%.*]] = mul nsw i32 [[CONV33_3]], [[CONV31_3]] -; CHECK-NEXT: [[TMP62:%.*]] = load i32, i32* [[INCDEC_PTR38_2]], align 4 -; CHECK-NEXT: [[ADD35_3:%.*]] = add nsw i32 [[MUL34_3]], [[TMP62]] -; CHECK-NEXT: store i32 [[ADD35_3]], i32* [[INCDEC_PTR38_2]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36_3]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_2]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37_3]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_2]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38_3]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_2]], i32 1 -; CHECK-NEXT: [[INC_3]] = add nuw i32 [[INC_2]], 1 -; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[ADD25]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_END40_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY29]] -; CHECK: for.end40.loopexit.unr-lcssa: -; CHECK-NEXT: br label [[FOR_END40_LOOPEXIT]] +; CHECK-NEXT: [[INCDEC_PTR36]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 1 +; CHECK-NEXT: [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 1 +; CHECK-NEXT: [[INCDEC_PTR38]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_283]], i32 1 +; CHECK-NEXT: [[INC]] = add nuw i32 [[J_184]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ADD25]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29]] ; CHECK: for.end40.loopexit: -; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP43]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP43]] -; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 [[TMP43]] +; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]] +; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]] ; CHECK-NEXT: br label [[FOR_END40]] ; CHECK: for.end40: ; CHECK-NEXT: [[PSRCB_ADDR_2_LCSSA]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP93]], [[FOR_END40_LOOPEXIT]] ] @@ -364,110 +112,6 @@ define dso_local arm_aapcscc void @test(i32* nocapture %pDest, i16* nocapture re ; CHECK-NEXT: [[INC42]] = add nuw i32 [[I_092]], 1 ; CHECK-NEXT: [[EXITCOND95:%.*]] = icmp eq i32 [[INC42]], [[BLKCNT]] ; CHECK-NEXT: br i1 [[EXITCOND95]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.body3.epil.1: -; CHECK-NEXT: [[TMP63:%.*]] = load i16, i16* [[ADD_PTR_EPIL]], align 2 -; CHECK-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP64:%.*]] = load i16, i16* [[ADD_PTR23_EPIL]], align 2 -; CHECK-NEXT: [[CONV5_EPIL_1:%.*]] = sext i16 [[TMP64]] to i32 -; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[CONV5_EPIL_1]], [[CONV_EPIL_1]] -; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 1 -; CHECK-NEXT: [[TMP65:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV7_EPIL_1:%.*]] = sext i16 [[TMP65]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 1 -; CHECK-NEXT: [[TMP66:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV9_EPIL_1:%.*]] = sext i16 [[TMP66]] to i32 -; CHECK-NEXT: [[MUL10_EPIL_1:%.*]] = mul nsw i32 [[CONV9_EPIL_1]], [[CONV7_EPIL_1]] -; CHECK-NEXT: [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 2 -; CHECK-NEXT: [[TMP67:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV12_EPIL_1:%.*]] = sext i16 [[TMP67]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 3 -; CHECK-NEXT: [[TMP68:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV14_EPIL_1:%.*]] = sext i16 [[TMP68]] to i32 -; CHECK-NEXT: [[MUL15_EPIL_1:%.*]] = mul nsw i32 [[CONV14_EPIL_1]], [[CONV12_EPIL_1]] -; CHECK-NEXT: [[ARRAYIDX17_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 3 -; CHECK-NEXT: [[TMP69:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV18_EPIL_1:%.*]] = sext i16 [[TMP69]] to i32 -; CHECK-NEXT: [[ADD21_EPIL_1:%.*]] = add i32 [[MUL10_EPIL_1]], [[MUL_EPIL_1]] -; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add i32 [[ADD21_EPIL_1]], [[CONV14_EPIL_1]] -; CHECK-NEXT: [[ADD16_EPIL_1:%.*]] = add i32 [[ADD_EPIL_1]], [[MUL15_EPIL_1]] -; CHECK-NEXT: [[ADD22_EPIL_1:%.*]] = add i32 [[ADD16_EPIL_1]], [[CONV18_EPIL_1]] -; CHECK-NEXT: store i32 [[ADD22_EPIL_1]], i32* [[INCDEC_PTR_EPIL]], align 4 -; CHECK-NEXT: [[ADD_PTR_EPIL_1]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_EPIL_1]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_EPIL_1]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_EPIL]], i32 1 -; CHECK-NEXT: [[ADD24_EPIL_1:%.*]] = add nuw nsw i32 [[ADD24_EPIL]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1 -; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY3_EPIL_2]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.body3.epil.2: -; CHECK-NEXT: [[TMP70:%.*]] = load i16, i16* [[ADD_PTR_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP71:%.*]] = load i16, i16* [[ADD_PTR23_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV5_EPIL_2:%.*]] = sext i16 [[TMP71]] to i32 -; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[CONV5_EPIL_2]], [[CONV_EPIL_2]] -; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 1 -; CHECK-NEXT: [[TMP72:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV7_EPIL_2:%.*]] = sext i16 [[TMP72]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 1 -; CHECK-NEXT: [[TMP73:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV9_EPIL_2:%.*]] = sext i16 [[TMP73]] to i32 -; CHECK-NEXT: [[MUL10_EPIL_2:%.*]] = mul nsw i32 [[CONV9_EPIL_2]], [[CONV7_EPIL_2]] -; CHECK-NEXT: [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 2 -; CHECK-NEXT: [[TMP74:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV12_EPIL_2:%.*]] = sext i16 [[TMP74]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 3 -; CHECK-NEXT: [[TMP75:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV14_EPIL_2:%.*]] = sext i16 [[TMP75]] to i32 -; CHECK-NEXT: [[MUL15_EPIL_2:%.*]] = mul nsw i32 [[CONV14_EPIL_2]], [[CONV12_EPIL_2]] -; CHECK-NEXT: [[ARRAYIDX17_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 3 -; CHECK-NEXT: [[TMP76:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV18_EPIL_2:%.*]] = sext i16 [[TMP76]] to i32 -; CHECK-NEXT: [[ADD21_EPIL_2:%.*]] = add i32 [[MUL10_EPIL_2]], [[MUL_EPIL_2]] -; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = add i32 [[ADD21_EPIL_2]], [[CONV14_EPIL_2]] -; CHECK-NEXT: [[ADD16_EPIL_2:%.*]] = add i32 [[ADD_EPIL_2]], [[MUL15_EPIL_2]] -; CHECK-NEXT: [[ADD22_EPIL_2:%.*]] = add i32 [[ADD16_EPIL_2]], [[CONV18_EPIL_2]] -; CHECK-NEXT: store i32 [[ADD22_EPIL_2]], i32* [[INCDEC_PTR_EPIL_1]], align 4 -; CHECK-NEXT: [[ADD_PTR_EPIL_2]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_EPIL_2]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_EPIL_2]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_EPIL_1]], i32 1 -; CHECK-NEXT: [[ADD24_EPIL_2:%.*]] = add nuw nsw i32 [[ADD24_EPIL_1]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1 -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.body29.prol.1: -; CHECK-NEXT: [[ARRAYIDX30_PROL_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL]], i32 [[INC_PROL]] -; CHECK-NEXT: [[TMP77:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_1]], align 2 -; CHECK-NEXT: [[CONV31_PROL_1:%.*]] = sext i16 [[TMP77]] to i32 -; CHECK-NEXT: [[ARRAYIDX32_PROL_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL]], i32 [[INC_PROL]] -; CHECK-NEXT: [[TMP78:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_1]], align 2 -; CHECK-NEXT: [[CONV33_PROL_1:%.*]] = sext i16 [[TMP78]] to i32 -; CHECK-NEXT: [[MUL34_PROL_1:%.*]] = mul nsw i32 [[CONV33_PROL_1]], [[CONV31_PROL_1]] -; CHECK-NEXT: [[TMP79:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL]], align 4 -; CHECK-NEXT: [[ADD35_PROL_1:%.*]] = add nsw i32 [[MUL34_PROL_1]], [[TMP79]] -; CHECK-NEXT: store i32 [[ADD35_PROL_1]], i32* [[INCDEC_PTR38_PROL]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36_PROL_1]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37_PROL_1]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38_PROL_1]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_PROL]], i32 1 -; CHECK-NEXT: [[INC_PROL_1]] = add nuw i32 [[INC_PROL]], 1 -; CHECK-NEXT: [[PROL_ITER_SUB_1:%.*]] = sub i32 [[PROL_ITER_SUB]], 1 -; CHECK-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 [[PROL_ITER_SUB_1]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY29_PROL_2]], label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.body29.prol.2: -; CHECK-NEXT: [[ARRAYIDX30_PROL_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL_1]], i32 [[INC_PROL_1]] -; CHECK-NEXT: [[TMP80:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_2]], align 2 -; CHECK-NEXT: [[CONV31_PROL_2:%.*]] = sext i16 [[TMP80]] to i32 -; CHECK-NEXT: [[ARRAYIDX32_PROL_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL_1]], i32 [[INC_PROL_1]] -; CHECK-NEXT: [[TMP81:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_2]], align 2 -; CHECK-NEXT: [[CONV33_PROL_2:%.*]] = sext i16 [[TMP81]] to i32 -; CHECK-NEXT: [[MUL34_PROL_2:%.*]] = mul nsw i32 [[CONV33_PROL_2]], [[CONV31_PROL_2]] -; CHECK-NEXT: [[TMP82:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL_1]], align 4 -; CHECK-NEXT: [[ADD35_PROL_2:%.*]] = add nsw i32 [[MUL34_PROL_2]], [[TMP82]] -; CHECK-NEXT: store i32 [[ADD35_PROL_2]], i32* [[INCDEC_PTR38_PROL_1]], align 4 -; CHECK-NEXT: [[INCDEC_PTR36_PROL_2]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL_1]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR37_PROL_2]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL_1]], i32 1 -; CHECK-NEXT: [[INCDEC_PTR38_PROL_2]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_PROL_1]], i32 1 -; CHECK-NEXT: [[INC_PROL_2]] = add nuw i32 [[INC_PROL_1]], 1 -; CHECK-NEXT: [[PROL_ITER_SUB_2:%.*]] = sub i32 [[PROL_ITER_SUB_1]], 1 -; CHECK-NEXT: br label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ; entry: %cmp88 = icmp eq i32 %blkCnt, 0 @@ -576,3 +220,5 @@ for.end40: ; preds = %for.end40.loopexit, %exitcond95 = icmp eq i32 %inc42, %blkCnt br i1 %exitcond95, label %for.cond.cleanup, label %for.body } + +attributes #0 = { minsize optsize } From 3c42c0dcf631ad6b90e718df895c05f79718659f Mon Sep 17 00:00:00 2001 From: aartbik Date: Wed, 9 Sep 2020 11:11:52 -0700 Subject: [PATCH 0232/1079] [mlir] [VectorOps] Enable 32-bit index optimizations Rationale: After some discussion we decided that it is safe to assume 32-bit indices for all subscripting in the vector dialect (it is unlikely the dialect will be used; or even work; for such long vectors). So rather than detecting specific situations that can exploit 32-bit indices with higher parallel SIMD, we just optimize it by default, and let users that don't want it opt-out. Reviewed By: nicolasvasilache, bkramer Differential Revision: https://reviews.llvm.org/D87404 --- mlir/include/mlir/Conversion/Passes.td | 2 +- .../VectorToLLVM/ConvertVectorToLLVM.h | 5 +- .../VectorToLLVM/vector-to-llvm.mlir | 51 ++++++++++--------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index d4b478dbf4ed0..dae59c9e792e0 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -350,7 +350,7 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { "bool", /*default=*/"false", "Allows llvm to reassociate floating-point reductions for speed">, Option<"enableIndexOptimizations", "enable-index-optimizations", - "bool", /*default=*/"false", + "bool", /*default=*/"true", "Allows compiler to assume indices fit in 32-bit if that yields faster code"> ]; } diff --git a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h index 81ffa63281357..1a6fe7d166d05 100644 --- a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h +++ b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h @@ -22,7 +22,7 @@ class OperationPass; /// ConvertVectorToLLVM pass in include/mlir/Conversion/Passes.td struct LowerVectorToLLVMOptions { bool reassociateFPReductions = false; - bool enableIndexOptimizations = false; + bool enableIndexOptimizations = true; LowerVectorToLLVMOptions &setReassociateFPReductions(bool b) { reassociateFPReductions = b; return *this; @@ -42,8 +42,7 @@ void populateVectorToLLVMMatrixConversionPatterns( /// Collect a set of patterns to convert from the Vector dialect to LLVM. void populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, - bool reassociateFPReductions = false, - bool enableIndexOptimizations = false); + bool reassociateFPReductions = false, bool enableIndexOptimizations = true); /// Create a pass to convert vector operations to the LLVMIR dialect. std::unique_ptr> createConvertVectorToLLVMPass( diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index e0800c2fd2272..42336b8e9b70e 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -755,34 +755,36 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. // CHECK: %[[linearIndex:.*]] = llvm.mlir.constant(dense // CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> +// CHECK-SAME: vector<17xi32>) : !llvm.vec<17 x i32> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> +// CHECK: %[[otrunc:.*]] = llvm.trunc %[[BASE]] : !llvm.i64 to !llvm.i32 +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[BASE]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> +// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[otrunc]], %[[offsetVec]][%[[c0]] : +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i32> // CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> // CHECK: %[[offsetVec4:.*]] = llvm.add %[[offsetVec3]], %[[linearIndex]] : -// CHECK-SAME: !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32> // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> +// CHECK: %[[dtrunc:.*]] = llvm.trunc %[[DIM]] : !llvm.i64 to !llvm.i32 +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> +// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dtrunc]], %[[dimVec]][%[[c01]] : +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i32> // CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> // CHECK: %[[mask:.*]] = llvm.icmp "slt" %[[offsetVec4]], %[[dimVec3]] : -// CHECK-SAME: !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32> // // 5. Rewrite as a masked read. // CHECK: %[[PASS_THROUGH:.*]] = llvm.mlir.constant(dense<7.000000e+00> : @@ -801,13 +803,13 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. // CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant(dense // CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> +// CHECK-SAME: vector<17xi32>) : !llvm.vec<17 x i32> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> // CHECK: llvm.add // // 4. Let dim the memref dimension, compute the vector comparison mask: @@ -815,8 +817,8 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> -// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> +// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : !llvm.vec<17 x i32> // // 5. Rewrite as a masked write. // CHECK: llvm.intr.masked.store %[[loaded]], %[[vecPtr_b]], %[[mask_b]] @@ -836,28 +838,29 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) // CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> +// CHECK: %[[trunc:.*]] = llvm.trunc %[[BASE_1]] : !llvm.i64 to !llvm.i32 +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// Here we check we properly use %BASE_1 -// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[BASE_1]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> +// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[trunc]], %[[offsetVec]][%[[c0]] : +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i32> // CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : +// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> // // Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// Here we check we properly use %DIM[1] -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> +// CHECK: %[[dimtrunc:.*]] = llvm.trunc %[[DIM]] : !llvm.i64 to !llvm.i32 +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> +// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dimtrunc]], %[[dimVec]][%[[c01]] : +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i32> // CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> +// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 From 8060283ff8b73195c400e18acf947e04bf5ec980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 8 Sep 2020 09:56:45 +0300 Subject: [PATCH 0233/1079] [llvm-readobj] [ARMWinEH] Print set_fp/add_fp differently in epilogues This matches how e.g. stp/ldp and other opcodes are printed differently for epilogues. Also add a missing --strict-whitespace in an existing test that was added explicitly for testing vertical alignment, and change to using temp files for the generated object files. Differential Revision: https://reviews.llvm.org/D87363 --- llvm/test/CodeGen/AArch64/wineh6.mir | 2 +- llvm/test/CodeGen/AArch64/wineh7.mir | 2 +- .../llvm-readobj/COFF/arm64-unwind-opcodes.s | 30 ++++++++++++++++--- llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp | 11 +++++-- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/wineh6.mir b/llvm/test/CodeGen/AArch64/wineh6.mir index 3ea7c0f20d45c..95a11aa3c4e82 100644 --- a/llvm/test/CodeGen/AArch64/wineh6.mir +++ b/llvm/test/CodeGen/AArch64/wineh6.mir @@ -20,7 +20,7 @@ # CHECK-NEXT: StartOffset: 20 # CHECK-NEXT: EpilogueStartIndex: 4 # CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe1 ; mov fp, sp +# CHECK-NEXT: 0xe1 ; mov sp, fp # CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] diff --git a/llvm/test/CodeGen/AArch64/wineh7.mir b/llvm/test/CodeGen/AArch64/wineh7.mir index c445cbfd6b005..da64b3c002f3d 100644 --- a/llvm/test/CodeGen/AArch64/wineh7.mir +++ b/llvm/test/CodeGen/AArch64/wineh7.mir @@ -21,7 +21,7 @@ # CHECK-NEXT: StartOffset: 13 # CHECK-NEXT: EpilogueStartIndex: 8 # CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe204 ; add fp, sp, #32 +# CHECK-NEXT: 0xe204 ; sub sp, fp, #32 # CHECK-NEXT: 0x44 ; ldp x29, x30, [sp, #32] # CHECK-NEXT: 0xc802 ; ldp x19, x20, [sp, #16] # CHECK-NEXT: 0xcc85 ; ldp x21, x22, [sp], #48 diff --git a/llvm/test/tools/llvm-readobj/COFF/arm64-unwind-opcodes.s b/llvm/test/tools/llvm-readobj/COFF/arm64-unwind-opcodes.s index 98e2da8fb226b..8ac8f6c98e272 100644 --- a/llvm/test/tools/llvm-readobj/COFF/arm64-unwind-opcodes.s +++ b/llvm/test/tools/llvm-readobj/COFF/arm64-unwind-opcodes.s @@ -1,12 +1,25 @@ // REQUIRES: aarch64-registered-target -// RUN: llvm-mc -filetype=obj -triple aarch64-windows %s -o - \ -// RUN: | llvm-readobj --unwind - | FileCheck %s +// RUN: llvm-mc -filetype=obj -triple aarch64-windows %s -o %t.o +// RUN: llvm-readobj --unwind %t.o | FileCheck --strict-whitespace %s // CHECK: Prologue [ +// CHECK-NEXT: 0xe202 ; add fp, sp, #16 +// CHECK-NEXT: 0xe1 ; mov fp, sp // CHECK-NEXT: 0xdc01 ; str d8, [sp, #8] // CHECK-NEXT: 0xd400 ; str x19, [sp, #-8]! // CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] +// CHECK-NEXT: EpilogueScopes [ +// CHECK-NEXT: EpilogueScope { +// CHECK-NEXT: StartOffset: +// CHECK-NEXT: EpilogueStartIndex: +// CHECK-NEXT: Opcodes [ +// CHECK-NEXT: 0xe202 ; sub sp, fp, #16 +// CHECK-NEXT: 0xe1 ; mov sp, fp +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] .section .pdata,"dr" .long func@IMGREL @@ -16,9 +29,18 @@ .globl func func: str x19, [sp, #-8]! - str d8, [sp, #8] + str d8, [sp, #8] + mov x29, sp + add x29, sp, #16 + nop + sub sp, x29, #16 + mov sp, x29 ret .section .xdata,"dr" "$unwind$func": -.long 0x10000002, 0x00d401dc, 0xe3e3e3e4 +.byte 0x08, 0x00, 0x40, 0x18 +.byte 0x05, 0x00, 0x00, 0x02 +.byte 0xe2, 0x02, 0xe1, 0xdc +.byte 0x01, 0xd4, 0x00, 0xe4 +.byte 0xe2, 0x02, 0xe1, 0xe4 diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index d753185177050..c2a84e3ba4835 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -746,7 +746,9 @@ bool Decoder::opcode_alloc_l(const uint8_t *OC, unsigned &Offset, bool Decoder::opcode_setfp(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { - SW.startLine() << format("0x%02x ; mov fp, sp\n", OC[Offset]); + SW.startLine() << format("0x%02x ; mov %s, %s\n", OC[Offset], + static_cast(Prologue ? "fp" : "sp"), + static_cast(Prologue ? "sp" : "fp")); ++Offset; return false; } @@ -754,8 +756,11 @@ bool Decoder::opcode_setfp(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Decoder::opcode_addfp(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { unsigned NumBytes = OC[Offset + 1] << 3; - SW.startLine() << format("0x%02x%02x ; add fp, sp, #%u\n", - OC[Offset], OC[Offset + 1], NumBytes); + SW.startLine() << format( + "0x%02x%02x ; %s %s, %s, #%u\n", OC[Offset], OC[Offset + 1], + static_cast(Prologue ? "add" : "sub"), + static_cast(Prologue ? "fp" : "sp"), + static_cast(Prologue ? "sp" : "fp"), NumBytes); Offset += 2; return false; } From 6313f5561945930e9a5ec63cb187605ce741bb61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 8 Sep 2020 23:14:42 +0300 Subject: [PATCH 0234/1079] [llvm-readobj] [ARMWinEH] Fix printing of exception handlers with packed epilogues If there's a packed epilogue (indicated by the flag E), the EpilogueCount() field actually should be interpreted as EpilogueOffset. Differential Revision: https://reviews.llvm.org/D87365 --- llvm/include/llvm/Support/ARMWinEH.h | 5 +-- .../llvm-readobj/COFF/arm64-packed-epilog.s | 34 +++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/COFF/arm64-packed-epilog.s diff --git a/llvm/include/llvm/Support/ARMWinEH.h b/llvm/include/llvm/Support/ARMWinEH.h index 857a0d3814a8f..83ba044ed446d 100644 --- a/llvm/include/llvm/Support/ARMWinEH.h +++ b/llvm/include/llvm/Support/ARMWinEH.h @@ -416,12 +416,13 @@ struct ExceptionDataRecord { uint32_t ExceptionHandlerRVA() const { assert(X() && "Exception Handler RVA is only valid if the X bit is set"); - return Data[HeaderWords(*this) + EpilogueCount() + CodeWords()]; + return Data[HeaderWords(*this) + (E() ? 0 : EpilogueCount()) + CodeWords()]; } uint32_t ExceptionHandlerParameter() const { assert(X() && "Exception Handler RVA is only valid if the X bit is set"); - return Data[HeaderWords(*this) + EpilogueCount() + CodeWords() + 1]; + return Data[HeaderWords(*this) + (E() ? 0 : EpilogueCount()) + CodeWords() + + 1]; } }; diff --git a/llvm/test/tools/llvm-readobj/COFF/arm64-packed-epilog.s b/llvm/test/tools/llvm-readobj/COFF/arm64-packed-epilog.s new file mode 100644 index 0000000000000..c3bfe5a9cf559 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/COFF/arm64-packed-epilog.s @@ -0,0 +1,34 @@ +// REQUIRES: aarch64-registered-target +// RUN: llvm-mc -filetype=obj -triple aarch64-windows %s -o %t.o +// RUN: llvm-readobj --unwind %t.o | FileCheck %s + +// CHECK: ExceptionData { +// CHECK-NEXT: FunctionLength: 4 +// CHECK-NEXT: Version: 0 +// CHECK-NEXT: ExceptionData: Yes +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK-NEXT: EpilogueOffset: 0 +// CHECK-NEXT: ByteCodeLength: 4 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: ExceptionHandler [ +// CHECK-NEXT: Routine: 0x11223344 +// CHECK-NEXT: Parameter: 0x55667788 +// CHECK-NEXT: ] + +.section .pdata,"dr" + .long func@IMGREL + .long "$unwind$func"@IMGREL + + .text + .globl func +func: + ret + +.section .xdata,"dr" +"$unwind$func": +.byte 0x01, 0x00, 0x30, 0x08 +.byte 0xe4, 0xe3, 0xe3, 0xe3 +.byte 0x44, 0x33, 0x22, 0x11 +.byte 0x88, 0x77, 0x66, 0x55 From b81c57d646e49c15de1b6e2938b8689b7854a02b Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Wed, 9 Sep 2020 14:01:02 +0100 Subject: [PATCH 0235/1079] [ARM][LowOverheadLoops] Allow tail predication on predicated instructions with unknown lane values The effects of unpredicated vector instruction with unknown lanes cannot be predicted and therefore cannot be tail predicated. This does not apply to predicated vector instructions and so this patch allows tail predication on them. Differential Revision: https://reviews.llvm.org/D87376 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 2 +- .../predicated-liveout-unknown-lanes.ll | 44 +++++++++++++++++++ .../test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll | 20 +++------ 3 files changed, 50 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 69e188fe5f888..755c2e5eb6665 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -723,7 +723,7 @@ bool LowOverheadLoop::ValidateLiveOuts() { continue; else if (!isPredicated && retainsOrReduces) return false; - else + else if (!isPredicated) FalseLanesUnknown.insert(&MI); } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll new file mode 100644 index 0000000000000..f6e175d792d14 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -O3 -tail-predication=force-enabled-no-reductions %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { +; CHECK-LABEL: arm_max_no_idx_f32_mve: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: subs r2, r1, #4 +; CHECK-NEXT: adr r3, .LCPI0_0 +; CHECK-NEXT: vldrw.u32 q0, [r3] +; CHECK-NEXT: dlstp.32 lr, r1 +; CHECK-NEXT: .LBB0_1: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vmaxnm.f32 q0, q1, q0 +; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %do.end +; CHECK-NEXT: pop {r7, pc} +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %blockSize.addr.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ] + %curExtremValVec.0 = phi <4 x float> [ , %entry ], [ %3, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blockSize.addr.0) + %1 = bitcast float* %pSrc.addr.0 to <4 x float>* + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %3 = tail call fast <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %curExtremValVec.0, i32 0, <4 x i1> %0, <4 x float> %curExtremValVec.0) + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %sub = add i32 %blockSize.addr.0, -4 + %cmp = icmp sgt i32 %sub, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret <4 x float> %3 +} + +declare <4 x i1> @llvm.arm.mve.vctp32(i32) + +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) + +declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, i32, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index ed7e84a899d24..311a06a675771 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -9,32 +9,22 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: movs r6, #0 -; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: vidup.u32 q2, r6, #1 -; CHECK-NEXT: cmp r1, #4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge.w r12, #4 -; CHECK-NEXT: sub.w r6, r1, r12 -; CHECK-NEXT: adds r6, #3 -; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w lr, lr, r6, lsr #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov.i32 q3, #0x4 ; CHECK-NEXT: mov r12, r1 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: vpstttt -; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vcmpt.f32 ge, q1, q4 +; CHECK-NEXT: vldrw.u32 q4, [r0], #16 +; CHECK-NEXT: vcmp.f32 ge, q1, q4 +; CHECK-NEXT: vpstt ; CHECK-NEXT: vmovt q1, q4 ; CHECK-NEXT: vmovt q0, q2 ; CHECK-NEXT: vadd.i32 q2, q2, q3 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: vldr s8, .LCPI0_1 ; CHECK-NEXT: vdup.32 q3, r1 From f51e55e09eefbbc57fdd802f5f17e34749ba03ec Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 10 Sep 2020 11:44:12 +0200 Subject: [PATCH 0236/1079] [compiler-rt] [netbsd] Reintroduce __sanitizer_protoent Partial revert of https://reviews.llvm.org/D82424 --- .../lib/sanitizer_common/sanitizer_platform_limits_netbsd.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h index ae54a8cf105ee..d80280d9bf8c8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h @@ -129,6 +129,12 @@ struct __sanitizer_shmid_ds { void *_shm_internal; }; +struct __sanitizer_protoent { + char *p_name; + char **p_aliases; + int p_proto; +}; + struct __sanitizer_netent { char *n_name; char **n_aliases; From 1b9884df8d2d855879a8231c7a432ec8b291d8fa Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Sep 2020 19:05:24 +0900 Subject: [PATCH 0237/1079] Enable InsertFreeze flag of JumpThreading when used in LTO This patch enables inserting freeze when JumpThreading converts a select to a conditional branch when it is run in LTO. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D85534 --- llvm/lib/Passes/PassBuilder.cpp | 4 ++-- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 9a2e895d7b717..bae84784628d6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1508,7 +1508,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - FPM.addPass(JumpThreadingPass()); + FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); // Do a post inline PGO instrumentation and use pass. This is a context // sensitive PGO pass. @@ -1575,7 +1575,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, MainFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(MainFPM, Level); - MainFPM.addPass(JumpThreadingPass()); + MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); // Create a function that performs CFI checks for cross-DSO calls with diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index caa9a98ecb074..4b72a95120b38 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -998,7 +998,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass()); + PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); // Break up allocas PM.add(createSROAPass()); @@ -1061,7 +1061,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass()); + PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); } void PassManagerBuilder::addLateLTOOptimizationPasses( From b7586afc4dcddd1abc70724585c3eb3857e27f43 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 7 Sep 2020 16:56:36 +0200 Subject: [PATCH 0238/1079] [analyzer][StdLibraryFunctionsChecker] Remove strcasecmp There are 2 reasons to remove strcasecmp and strncasecmp. 1) They are also modeled in CStringChecker and the related argumentum contraints are checked there. 2) The argument constraints are checked in CStringChecker::evalCall. This is fundamentally flawed, they should be checked in checkPreCall. Even if we set up CStringChecker as a weak dependency for StdLibraryFunctionsChecker then the latter reports the warning always. Besides, CStringChecker fails to discover the constraint violation before the call, so, its evalCall returns with `true` and then StdCLibraryFunctions also tries to evaluate, this causes an assertion in CheckerManager. Either we fix CStringChecker to handle the call prerequisites in checkPreCall, or we must not evaluate any pure functions in StdCLibraryFunctions that are also handled in CStringChecker. We do the latter in this patch. Differential Revision: https://reviews.llvm.org/D87239 --- .../Checkers/StdLibraryFunctionsChecker.cpp | 16 -------------- .../Analysis/std-c-library-functions-POSIX.c | 4 ---- ...library-functions-arg-cstring-dependency.c | 21 +++++++++++++++++++ 3 files changed, 21 insertions(+), 20 deletions(-) create mode 100644 clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index b71c19a80da90..c6c37a85306e7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -1676,22 +1676,6 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( RetType{IntTy}, NoEvalCall) .ArgConstraint(NotNull(ArgNo(0)))); - // int strcasecmp(const char *s1, const char *s2); - addToFunctionSummaryMap("strcasecmp", - Summary(ArgTypes{ConstCharPtrTy, ConstCharPtrTy}, - RetType{IntTy}, EvalCallAsPure) - .ArgConstraint(NotNull(ArgNo(0))) - .ArgConstraint(NotNull(ArgNo(1)))); - - // int strncasecmp(const char *s1, const char *s2, size_t n); - addToFunctionSummaryMap( - "strncasecmp", Summary(ArgTypes{ConstCharPtrTy, ConstCharPtrTy, SizeTy}, - RetType{IntTy}, EvalCallAsPure) - .ArgConstraint(NotNull(ArgNo(0))) - .ArgConstraint(NotNull(ArgNo(1))) - .ArgConstraint(ArgumentCondition( - 2, WithinRange, Range(0, SizeMax)))); - // int fileno(FILE *stream); addToFunctionSummaryMap( "fileno", Summary(ArgTypes{FilePtrTy}, RetType{IntTy}, NoEvalCall) diff --git a/clang/test/Analysis/std-c-library-functions-POSIX.c b/clang/test/Analysis/std-c-library-functions-POSIX.c index c2c98df864899..9285aee6178bc 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX.c +++ b/clang/test/Analysis/std-c-library-functions-POSIX.c @@ -63,8 +63,6 @@ // CHECK: Loaded summary for: void rewinddir(DIR *dir) // CHECK: Loaded summary for: void seekdir(DIR *dirp, long loc) // CHECK: Loaded summary for: int rand_r(unsigned int *seedp) -// CHECK: Loaded summary for: int strcasecmp(const char *s1, const char *s2) -// CHECK: Loaded summary for: int strncasecmp(const char *s1, const char *s2, size_t n) // CHECK: Loaded summary for: int fileno(FILE *stream) // CHECK: Loaded summary for: int fseeko(FILE *stream, off_t offset, int whence) // CHECK: Loaded summary for: off_t ftello(FILE *stream) @@ -195,8 +193,6 @@ FILE *fdopen(int fd, const char *mode); void rewinddir(DIR *dir); void seekdir(DIR *dirp, long loc); int rand_r(unsigned int *seedp); -int strcasecmp(const char *s1, const char *s2); -int strncasecmp(const char *s1, const char *s2, size_t n); int fileno(FILE *stream); int fseeko(FILE *stream, off_t offset, int whence); off_t ftello(FILE *stream); diff --git a/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c b/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c new file mode 100644 index 0000000000000..37425e4e3e169 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-cstring-dependency.c @@ -0,0 +1,21 @@ +// This test case crashes if strncasecmp is modeled in StdCLibraryFunctions. +// Either we fix CStringChecker to handle the call prerequisites in +// checkPreCall, or we must not evaluate any pure functions in +// StdCLibraryFunctions that are also handled in CStringChecker. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-checker=unix.cstring.NullArg \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -triple x86_64-unknown-linux-gnu \ +// RUN: -verify + +typedef __typeof(sizeof(int)) size_t; +int strncasecmp(const char *s1, const char *s2, size_t n); + +int strncasecmp_null_argument(char *a, size_t n) { + char *b = 0; + return strncasecmp(a, b, n); // expected-warning{{Null pointer passed as 2nd argument to string comparison function}} +} From cd89f5c91b4bad90278a59865fc06a75211589a1 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 10 Sep 2020 09:55:54 +0100 Subject: [PATCH 0239/1079] [SVE][CodeGen] Legalisation of truncate for scalable vectors Truncating from an illegal SVE type to a legal type, e.g. `trunc %in to ` fails after PromoteIntOp_CONCAT_VECTORS attempts to create a BUILD_VECTOR. This patch changes the promote function to create a sequence of INSERT_SUBVECTORs if the return type is scalable, and replaces these with UNPK+UZP1 for AArch64. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D86548 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 15 +++++ .../Target/AArch64/AArch64ISelLowering.cpp | 60 ++++++++++++++++- llvm/test/CodeGen/AArch64/sve-split-trunc.ll | 66 +++++++++++++++++++ 3 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-split-trunc.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e1881c20e5b3b..bfe1b365efc4d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4702,8 +4702,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); + + EVT ResVT = N->getValueType(0); unsigned NumElems = N->getNumOperands(); + if (ResVT.isScalableVector()) { + SDValue ResVec = DAG.getUNDEF(ResVT); + + for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) { + SDValue Op = N->getOperand(OpIdx); + unsigned OpNumElts = Op.getValueType().getVectorMinNumElements(); + ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op, + DAG.getIntPtrConstant(OpIdx * OpNumElts, dl)); + } + + return ResVec; + } + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); SmallVector NewOps; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 063644716a654..d4f324490430c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -964,8 +964,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } } - for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) + for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); @@ -9099,9 +9101,34 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, EVT InVT = Op.getOperand(1).getValueType(); unsigned Idx = cast(Op.getOperand(2))->getZExtValue(); - // We don't have any patterns for scalable vector yet. - if (InVT.isScalableVector()) + if (InVT.isScalableVector()) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + if (!isTypeLegal(VT) || !VT.isInteger()) + return SDValue(); + + SDValue Vec0 = Op.getOperand(0); + SDValue Vec1 = Op.getOperand(1); + + // Ensure the subvector is half the size of the main vector. + if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2)) + return SDValue(); + + // Extend elements of smaller vector... + EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); + SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); + + if (Idx == 0) { + SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0); + return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0); + } else if (Idx == InVT.getVectorMinNumElements()) { + SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0); + return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec); + } + return SDValue(); + } // This will be matched by custom code during ISelDAGToDAG. if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef()) @@ -13001,6 +13028,31 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, S->getMemOperand()->getFlags()); } +static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT ResVT = N->getValueType(0); + + // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z) + if (Op0.getOpcode() == AArch64ISD::UUNPKLO) { + if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) { + SDValue X = Op0.getOperand(0).getOperand(0); + return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1); + } + } + + // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z) + if (Op1.getOpcode() == AArch64ISD::UUNPKHI) { + if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) { + SDValue Z = Op1.getOperand(0).getOperand(1); + return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z); + } + } + + return SDValue(); +} + /// Target-specific DAG combine function for post-increment LD1 (lane) and /// post-increment LD1R. static SDValue performPostLD1Combine(SDNode *N, @@ -14342,6 +14394,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performPostLD1Combine(N, DCI, false); case AArch64ISD::NVCAST: return performNVCASTCombine(N); + case AArch64ISD::UZP1: + return performUzpCombine(N, DAG); case ISD::INSERT_VECTOR_ELT: return performPostLD1Combine(N, DCI, true); case ISD::INTRINSIC_VOID: diff --git a/llvm/test/CodeGen/AArch64/sve-split-trunc.ll b/llvm/test/CodeGen/AArch64/sve-split-trunc.ll new file mode 100644 index 0000000000000..6c81c49070fb0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-split-trunc.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @trunc_i16toi8( %in) { +; CHECK-LABEL: trunc_i16toi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i32toi8( %in) { +; CHECK-LABEL: trunc_i32toi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z2.b +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i32toi16( %in) { +; CHECK-LABEL: trunc_i32toi16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i64toi32( %in) { +; CHECK-LABEL: trunc_i64toi32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i64toi16( %in) { +; CHECK-LABEL: trunc_i64toi16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i64toi8( %in) { +; CHECK-LABEL: trunc_i64toi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z6.s, z6.s, z7.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z5.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.h, z4.h, z6.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} From a97648b93846f163af262b9a0db684c7f5efc43f Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 10 Sep 2020 12:41:29 +0200 Subject: [PATCH 0240/1079] [analyzer][StdLibraryFunctionsChecker] Add better diagnostics Differential Revision: https://reviews.llvm.org/D79431 --- .../Checkers/StdLibraryFunctionsChecker.cpp | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index c6c37a85306e7..f5ad80950ef11 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -126,6 +126,8 @@ class StdLibraryFunctionsChecker } ArgNo getArgNo() const { return ArgN; } + virtual StringRef getName() const = 0; + protected: ArgNo ArgN; // Argument to which we apply the constraint. @@ -152,6 +154,7 @@ class StdLibraryFunctionsChecker IntRangeVector Ranges; public: + StringRef getName() const override { return "Range"; } RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Ranges) : ValueConstraint(ArgN), Kind(Kind), Ranges(Ranges) {} @@ -205,6 +208,7 @@ class StdLibraryFunctionsChecker ArgNo OtherArgN; public: + virtual StringRef getName() const override { return "Comparison"; }; ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, ArgNo OtherArgN) : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} @@ -221,6 +225,7 @@ class StdLibraryFunctionsChecker bool CannotBeNull = true; public: + StringRef getName() const override { return "NonNull"; } ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary, CheckerContext &C) const override { @@ -272,6 +277,7 @@ class StdLibraryFunctionsChecker BinaryOperator::Opcode Op = BO_LE; public: + StringRef getName() const override { return "BufferSize"; } BufferSizeConstraint(ArgNo Buffer, llvm::APSInt BufMinSize) : ValueConstraint(Buffer), ConcreteSize(BufMinSize) {} BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize) @@ -466,6 +472,8 @@ class StdLibraryFunctionsChecker return *this; } Summary &ArgConstraint(ValueConstraintPtr VC) { + assert(VC->getArgNo() != Ret && + "Arg constraint should not refer to the return value"); ArgConstraints.push_back(VC); return *this; } @@ -549,17 +557,24 @@ class StdLibraryFunctionsChecker void initFunctionSummaries(CheckerContext &C) const; void reportBug(const CallEvent &Call, ExplodedNode *N, - CheckerContext &C) const { + const ValueConstraint *VC, CheckerContext &C) const { if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) return; - // TODO Add detailed diagnostic. - StringRef Msg = "Function argument constraint is not satisfied"; + // TODO Add more detailed diagnostic. + std::string Msg = + (Twine("Function argument constraint is not satisfied, constraint: ") + + VC->getName().data() + ", ArgN: " + Twine(VC->getArgNo())) + .str(); if (!BT_InvalidArg) BT_InvalidArg = std::make_unique( CheckNames[CK_StdCLibraryFunctionArgsChecker], "Unsatisfied argument constraints", categories::LogicError); auto R = std::make_unique(*BT_InvalidArg, Msg, N); - bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); + bugreporter::trackExpressionValue(N, Call.getArgExpr(VC->getArgNo()), *R); + + // Highlight the range of the argument that was violated. + R->addRange(Call.getArgSourceRange(VC->getArgNo())); + C.emitReport(std::move(R)); } }; @@ -696,7 +711,7 @@ void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, // The argument constraint is not satisfied. if (FailureSt && !SuccessSt) { if (ExplodedNode *N = C.generateErrorNode(NewState)) - reportBug(Call, N, C); + reportBug(Call, N, Constraint.get(), C); break; } else { // We will apply the constraint even if we cannot reason about the From e80605e2421f1fe09eb6f64f46dc65766c2d5184 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Sep 2020 17:48:22 +0100 Subject: [PATCH 0241/1079] [X86] Remove WaitInsert::TTI member. NFCI. This is only ever set/used inside WaitInsert::runOnMachineFunction so don't bother storing it in the class. --- llvm/lib/Target/X86/X86InsertWait.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp index a82d98d88b306..56d2709f59374 100644 --- a/llvm/lib/Target/X86/X86InsertWait.cpp +++ b/llvm/lib/Target/X86/X86InsertWait.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/Debug.h" @@ -48,9 +47,6 @@ class WaitInsert : public MachineFunctionPass { StringRef getPassName() const override { return "X86 insert wait instruction"; } - -private: - const TargetInstrInfo *TII; // Machine instruction info. }; } // namespace @@ -119,7 +115,7 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) { return false; const X86Subtarget &ST = MF.getSubtarget(); - TII = ST.getInstrInfo(); + const X86InstrInfo *TII = ST.getInstrInfo(); bool Changed = false; for (MachineBasicBlock &MBB : MF) { From fc49abee5674261289d7e66c3291c0f1c5199689 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 11:29:06 +0100 Subject: [PATCH 0242/1079] [X86][SSE] lowerShuffleAsSplitOrBlend always returns a shuffle. lowerShuffleAsSplitOrBlend always returns a target shuffle result (and is the default operation for lowering some shuffle types), so we don't need to check for null. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ce46dd9167f17..031234925de47 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16788,9 +16788,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef Mask, // since after split we get a more efficient code using vpunpcklwd and // vpunpckhwd instrs than vblend. if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) - if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, - Subtarget, DAG)) - return V; + return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, + DAG); // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. @@ -16828,9 +16827,8 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef Mask, // vpunpcklwd and vpunpckhwd instrs. if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && !Subtarget.hasAVX512()) - if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, - Subtarget, DAG)) - return V; + return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, + DAG); if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) From 0aea3a79adfdd6b83f53f6653c98c1bfd94ef878 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 11:52:20 +0100 Subject: [PATCH 0243/1079] [SLP][X86] Add division by uniform constant tests (PR47476) --- .../Transforms/SLPVectorizer/X86/arith-div.ll | 903 ++++++++++++++++++ 1 file changed, 903 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll new file mode 100644 index 0000000000000..30930eacb5007 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll @@ -0,0 +1,903 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=-prefer-128-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 + +@a64 = common global [8 x i64] zeroinitializer, align 64 +@b64 = common global [8 x i64] zeroinitializer, align 64 +@c64 = common global [8 x i64] zeroinitializer, align 64 +@a32 = common global [16 x i32] zeroinitializer, align 64 +@b32 = common global [16 x i32] zeroinitializer, align 64 +@c32 = common global [16 x i32] zeroinitializer, align 64 +@a16 = common global [32 x i16] zeroinitializer, align 64 +@b16 = common global [32 x i16] zeroinitializer, align 64 +@c16 = common global [32 x i16] zeroinitializer, align 64 +@a8 = common global [64 x i8] zeroinitializer, align 64 +@b8 = common global [64 x i8] zeroinitializer, align 64 +@c8 = common global [64 x i8] zeroinitializer, align 64 + +define void @sdiv_v16i32_uniformconst() { +; SSE-LABEL: @sdiv_v16i32_uniformconst( +; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SSE-NEXT: [[R0:%.*]] = sdiv i32 [[A0]], 5 +; SSE-NEXT: [[R1:%.*]] = sdiv i32 [[A1]], 5 +; SSE-NEXT: [[R2:%.*]] = sdiv i32 [[A2]], 5 +; SSE-NEXT: [[R3:%.*]] = sdiv i32 [[A3]], 5 +; SSE-NEXT: [[R4:%.*]] = sdiv i32 [[A4]], 5 +; SSE-NEXT: [[R5:%.*]] = sdiv i32 [[A5]], 5 +; SSE-NEXT: [[R6:%.*]] = sdiv i32 [[A6]], 5 +; SSE-NEXT: [[R7:%.*]] = sdiv i32 [[A7]], 5 +; SSE-NEXT: [[R8:%.*]] = sdiv i32 [[A8]], 5 +; SSE-NEXT: [[R9:%.*]] = sdiv i32 [[A9]], 5 +; SSE-NEXT: [[R10:%.*]] = sdiv i32 [[A10]], 5 +; SSE-NEXT: [[R11:%.*]] = sdiv i32 [[A11]], 5 +; SSE-NEXT: [[R12:%.*]] = sdiv i32 [[A12]], 5 +; SSE-NEXT: [[R13:%.*]] = sdiv i32 [[A13]], 5 +; SSE-NEXT: [[R14:%.*]] = sdiv i32 [[A14]], 5 +; SSE-NEXT: [[R15:%.*]] = sdiv i32 [[A15]], 5 +; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: ret void +; +; SLM-LABEL: @sdiv_v16i32_uniformconst( +; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SLM-NEXT: [[R0:%.*]] = sdiv i32 [[A0]], 5 +; SLM-NEXT: [[R1:%.*]] = sdiv i32 [[A1]], 5 +; SLM-NEXT: [[R2:%.*]] = sdiv i32 [[A2]], 5 +; SLM-NEXT: [[R3:%.*]] = sdiv i32 [[A3]], 5 +; SLM-NEXT: [[R4:%.*]] = sdiv i32 [[A4]], 5 +; SLM-NEXT: [[R5:%.*]] = sdiv i32 [[A5]], 5 +; SLM-NEXT: [[R6:%.*]] = sdiv i32 [[A6]], 5 +; SLM-NEXT: [[R7:%.*]] = sdiv i32 [[A7]], 5 +; SLM-NEXT: [[R8:%.*]] = sdiv i32 [[A8]], 5 +; SLM-NEXT: [[R9:%.*]] = sdiv i32 [[A9]], 5 +; SLM-NEXT: [[R10:%.*]] = sdiv i32 [[A10]], 5 +; SLM-NEXT: [[R11:%.*]] = sdiv i32 [[A11]], 5 +; SLM-NEXT: [[R12:%.*]] = sdiv i32 [[A12]], 5 +; SLM-NEXT: [[R13:%.*]] = sdiv i32 [[A13]], 5 +; SLM-NEXT: [[R14:%.*]] = sdiv i32 [[A14]], 5 +; SLM-NEXT: [[R15:%.*]] = sdiv i32 [[A15]], 5 +; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: ret void +; +; AVX1-LABEL: @sdiv_v16i32_uniformconst( +; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; AVX1-NEXT: [[R0:%.*]] = sdiv i32 [[A0]], 5 +; AVX1-NEXT: [[R1:%.*]] = sdiv i32 [[A1]], 5 +; AVX1-NEXT: [[R2:%.*]] = sdiv i32 [[A2]], 5 +; AVX1-NEXT: [[R3:%.*]] = sdiv i32 [[A3]], 5 +; AVX1-NEXT: [[R4:%.*]] = sdiv i32 [[A4]], 5 +; AVX1-NEXT: [[R5:%.*]] = sdiv i32 [[A5]], 5 +; AVX1-NEXT: [[R6:%.*]] = sdiv i32 [[A6]], 5 +; AVX1-NEXT: [[R7:%.*]] = sdiv i32 [[A7]], 5 +; AVX1-NEXT: [[R8:%.*]] = sdiv i32 [[A8]], 5 +; AVX1-NEXT: [[R9:%.*]] = sdiv i32 [[A9]], 5 +; AVX1-NEXT: [[R10:%.*]] = sdiv i32 [[A10]], 5 +; AVX1-NEXT: [[R11:%.*]] = sdiv i32 [[A11]], 5 +; AVX1-NEXT: [[R12:%.*]] = sdiv i32 [[A12]], 5 +; AVX1-NEXT: [[R13:%.*]] = sdiv i32 [[A13]], 5 +; AVX1-NEXT: [[R14:%.*]] = sdiv i32 [[A14]], 5 +; AVX1-NEXT: [[R15:%.*]] = sdiv i32 [[A15]], 5 +; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @sdiv_v16i32_uniformconst( +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP3:%.*]] = sdiv <8 x i32> [[TMP1]], +; AVX2-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP2]], +; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @sdiv_v16i32_uniformconst( +; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 +; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], +; AVX512-NEXT: store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4 +; AVX512-NEXT: ret void +; + %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4 + %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4 + %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4 + %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4 + %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4 + %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4 + %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4 + %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4 + %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4 + %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4 + %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 + %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 + %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 + %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 + %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 + %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 + %r0 = sdiv i32 %a0 , 5 + %r1 = sdiv i32 %a1 , 5 + %r2 = sdiv i32 %a2 , 5 + %r3 = sdiv i32 %a3 , 5 + %r4 = sdiv i32 %a4 , 5 + %r5 = sdiv i32 %a5 , 5 + %r6 = sdiv i32 %a6 , 5 + %r7 = sdiv i32 %a7 , 5 + %r8 = sdiv i32 %a8 , 5 + %r9 = sdiv i32 %a9 , 5 + %r10 = sdiv i32 %a10, 5 + %r11 = sdiv i32 %a11, 5 + %r12 = sdiv i32 %a12, 5 + %r13 = sdiv i32 %a13, 5 + %r14 = sdiv i32 %a14, 5 + %r15 = sdiv i32 %a15, 5 + store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4 + store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4 + store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4 + store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4 + store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4 + store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4 + store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4 + store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4 + store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4 + store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4 + store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 + store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 + store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 + store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 + store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 + store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 + ret void +} + +define void @srem_v16i32_uniformconst() { +; SSE-LABEL: @srem_v16i32_uniformconst( +; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SSE-NEXT: [[R0:%.*]] = srem i32 [[A0]], 5 +; SSE-NEXT: [[R1:%.*]] = srem i32 [[A1]], 5 +; SSE-NEXT: [[R2:%.*]] = srem i32 [[A2]], 5 +; SSE-NEXT: [[R3:%.*]] = srem i32 [[A3]], 5 +; SSE-NEXT: [[R4:%.*]] = srem i32 [[A4]], 5 +; SSE-NEXT: [[R5:%.*]] = srem i32 [[A5]], 5 +; SSE-NEXT: [[R6:%.*]] = srem i32 [[A6]], 5 +; SSE-NEXT: [[R7:%.*]] = srem i32 [[A7]], 5 +; SSE-NEXT: [[R8:%.*]] = srem i32 [[A8]], 5 +; SSE-NEXT: [[R9:%.*]] = srem i32 [[A9]], 5 +; SSE-NEXT: [[R10:%.*]] = srem i32 [[A10]], 5 +; SSE-NEXT: [[R11:%.*]] = srem i32 [[A11]], 5 +; SSE-NEXT: [[R12:%.*]] = srem i32 [[A12]], 5 +; SSE-NEXT: [[R13:%.*]] = srem i32 [[A13]], 5 +; SSE-NEXT: [[R14:%.*]] = srem i32 [[A14]], 5 +; SSE-NEXT: [[R15:%.*]] = srem i32 [[A15]], 5 +; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: ret void +; +; SLM-LABEL: @srem_v16i32_uniformconst( +; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SLM-NEXT: [[R0:%.*]] = srem i32 [[A0]], 5 +; SLM-NEXT: [[R1:%.*]] = srem i32 [[A1]], 5 +; SLM-NEXT: [[R2:%.*]] = srem i32 [[A2]], 5 +; SLM-NEXT: [[R3:%.*]] = srem i32 [[A3]], 5 +; SLM-NEXT: [[R4:%.*]] = srem i32 [[A4]], 5 +; SLM-NEXT: [[R5:%.*]] = srem i32 [[A5]], 5 +; SLM-NEXT: [[R6:%.*]] = srem i32 [[A6]], 5 +; SLM-NEXT: [[R7:%.*]] = srem i32 [[A7]], 5 +; SLM-NEXT: [[R8:%.*]] = srem i32 [[A8]], 5 +; SLM-NEXT: [[R9:%.*]] = srem i32 [[A9]], 5 +; SLM-NEXT: [[R10:%.*]] = srem i32 [[A10]], 5 +; SLM-NEXT: [[R11:%.*]] = srem i32 [[A11]], 5 +; SLM-NEXT: [[R12:%.*]] = srem i32 [[A12]], 5 +; SLM-NEXT: [[R13:%.*]] = srem i32 [[A13]], 5 +; SLM-NEXT: [[R14:%.*]] = srem i32 [[A14]], 5 +; SLM-NEXT: [[R15:%.*]] = srem i32 [[A15]], 5 +; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: ret void +; +; AVX1-LABEL: @srem_v16i32_uniformconst( +; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; AVX1-NEXT: [[R0:%.*]] = srem i32 [[A0]], 5 +; AVX1-NEXT: [[R1:%.*]] = srem i32 [[A1]], 5 +; AVX1-NEXT: [[R2:%.*]] = srem i32 [[A2]], 5 +; AVX1-NEXT: [[R3:%.*]] = srem i32 [[A3]], 5 +; AVX1-NEXT: [[R4:%.*]] = srem i32 [[A4]], 5 +; AVX1-NEXT: [[R5:%.*]] = srem i32 [[A5]], 5 +; AVX1-NEXT: [[R6:%.*]] = srem i32 [[A6]], 5 +; AVX1-NEXT: [[R7:%.*]] = srem i32 [[A7]], 5 +; AVX1-NEXT: [[R8:%.*]] = srem i32 [[A8]], 5 +; AVX1-NEXT: [[R9:%.*]] = srem i32 [[A9]], 5 +; AVX1-NEXT: [[R10:%.*]] = srem i32 [[A10]], 5 +; AVX1-NEXT: [[R11:%.*]] = srem i32 [[A11]], 5 +; AVX1-NEXT: [[R12:%.*]] = srem i32 [[A12]], 5 +; AVX1-NEXT: [[R13:%.*]] = srem i32 [[A13]], 5 +; AVX1-NEXT: [[R14:%.*]] = srem i32 [[A14]], 5 +; AVX1-NEXT: [[R15:%.*]] = srem i32 [[A15]], 5 +; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @srem_v16i32_uniformconst( +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP3:%.*]] = srem <8 x i32> [[TMP1]], +; AVX2-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP2]], +; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @srem_v16i32_uniformconst( +; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 +; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], +; AVX512-NEXT: store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4 +; AVX512-NEXT: ret void +; + %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4 + %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4 + %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4 + %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4 + %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4 + %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4 + %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4 + %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4 + %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4 + %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4 + %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 + %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 + %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 + %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 + %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 + %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 + %r0 = srem i32 %a0 , 5 + %r1 = srem i32 %a1 , 5 + %r2 = srem i32 %a2 , 5 + %r3 = srem i32 %a3 , 5 + %r4 = srem i32 %a4 , 5 + %r5 = srem i32 %a5 , 5 + %r6 = srem i32 %a6 , 5 + %r7 = srem i32 %a7 , 5 + %r8 = srem i32 %a8 , 5 + %r9 = srem i32 %a9 , 5 + %r10 = srem i32 %a10, 5 + %r11 = srem i32 %a11, 5 + %r12 = srem i32 %a12, 5 + %r13 = srem i32 %a13, 5 + %r14 = srem i32 %a14, 5 + %r15 = srem i32 %a15, 5 + store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4 + store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4 + store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4 + store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4 + store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4 + store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4 + store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4 + store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4 + store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4 + store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4 + store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 + store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 + store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 + store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 + store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 + store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 + ret void +} + +define void @udiv_v16i32_uniformconst() { +; SSE-LABEL: @udiv_v16i32_uniformconst( +; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SSE-NEXT: [[R0:%.*]] = udiv i32 [[A0]], 5 +; SSE-NEXT: [[R1:%.*]] = udiv i32 [[A1]], 5 +; SSE-NEXT: [[R2:%.*]] = udiv i32 [[A2]], 5 +; SSE-NEXT: [[R3:%.*]] = udiv i32 [[A3]], 5 +; SSE-NEXT: [[R4:%.*]] = udiv i32 [[A4]], 5 +; SSE-NEXT: [[R5:%.*]] = udiv i32 [[A5]], 5 +; SSE-NEXT: [[R6:%.*]] = udiv i32 [[A6]], 5 +; SSE-NEXT: [[R7:%.*]] = udiv i32 [[A7]], 5 +; SSE-NEXT: [[R8:%.*]] = udiv i32 [[A8]], 5 +; SSE-NEXT: [[R9:%.*]] = udiv i32 [[A9]], 5 +; SSE-NEXT: [[R10:%.*]] = udiv i32 [[A10]], 5 +; SSE-NEXT: [[R11:%.*]] = udiv i32 [[A11]], 5 +; SSE-NEXT: [[R12:%.*]] = udiv i32 [[A12]], 5 +; SSE-NEXT: [[R13:%.*]] = udiv i32 [[A13]], 5 +; SSE-NEXT: [[R14:%.*]] = udiv i32 [[A14]], 5 +; SSE-NEXT: [[R15:%.*]] = udiv i32 [[A15]], 5 +; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: ret void +; +; SLM-LABEL: @udiv_v16i32_uniformconst( +; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SLM-NEXT: [[R0:%.*]] = udiv i32 [[A0]], 5 +; SLM-NEXT: [[R1:%.*]] = udiv i32 [[A1]], 5 +; SLM-NEXT: [[R2:%.*]] = udiv i32 [[A2]], 5 +; SLM-NEXT: [[R3:%.*]] = udiv i32 [[A3]], 5 +; SLM-NEXT: [[R4:%.*]] = udiv i32 [[A4]], 5 +; SLM-NEXT: [[R5:%.*]] = udiv i32 [[A5]], 5 +; SLM-NEXT: [[R6:%.*]] = udiv i32 [[A6]], 5 +; SLM-NEXT: [[R7:%.*]] = udiv i32 [[A7]], 5 +; SLM-NEXT: [[R8:%.*]] = udiv i32 [[A8]], 5 +; SLM-NEXT: [[R9:%.*]] = udiv i32 [[A9]], 5 +; SLM-NEXT: [[R10:%.*]] = udiv i32 [[A10]], 5 +; SLM-NEXT: [[R11:%.*]] = udiv i32 [[A11]], 5 +; SLM-NEXT: [[R12:%.*]] = udiv i32 [[A12]], 5 +; SLM-NEXT: [[R13:%.*]] = udiv i32 [[A13]], 5 +; SLM-NEXT: [[R14:%.*]] = udiv i32 [[A14]], 5 +; SLM-NEXT: [[R15:%.*]] = udiv i32 [[A15]], 5 +; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: ret void +; +; AVX1-LABEL: @udiv_v16i32_uniformconst( +; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; AVX1-NEXT: [[R0:%.*]] = udiv i32 [[A0]], 5 +; AVX1-NEXT: [[R1:%.*]] = udiv i32 [[A1]], 5 +; AVX1-NEXT: [[R2:%.*]] = udiv i32 [[A2]], 5 +; AVX1-NEXT: [[R3:%.*]] = udiv i32 [[A3]], 5 +; AVX1-NEXT: [[R4:%.*]] = udiv i32 [[A4]], 5 +; AVX1-NEXT: [[R5:%.*]] = udiv i32 [[A5]], 5 +; AVX1-NEXT: [[R6:%.*]] = udiv i32 [[A6]], 5 +; AVX1-NEXT: [[R7:%.*]] = udiv i32 [[A7]], 5 +; AVX1-NEXT: [[R8:%.*]] = udiv i32 [[A8]], 5 +; AVX1-NEXT: [[R9:%.*]] = udiv i32 [[A9]], 5 +; AVX1-NEXT: [[R10:%.*]] = udiv i32 [[A10]], 5 +; AVX1-NEXT: [[R11:%.*]] = udiv i32 [[A11]], 5 +; AVX1-NEXT: [[R12:%.*]] = udiv i32 [[A12]], 5 +; AVX1-NEXT: [[R13:%.*]] = udiv i32 [[A13]], 5 +; AVX1-NEXT: [[R14:%.*]] = udiv i32 [[A14]], 5 +; AVX1-NEXT: [[R15:%.*]] = udiv i32 [[A15]], 5 +; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @udiv_v16i32_uniformconst( +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP3:%.*]] = udiv <8 x i32> [[TMP1]], +; AVX2-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP2]], +; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @udiv_v16i32_uniformconst( +; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 +; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], +; AVX512-NEXT: store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4 +; AVX512-NEXT: ret void +; + %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4 + %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4 + %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4 + %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4 + %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4 + %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4 + %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4 + %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4 + %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4 + %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4 + %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 + %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 + %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 + %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 + %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 + %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 + %r0 = udiv i32 %a0 , 5 + %r1 = udiv i32 %a1 , 5 + %r2 = udiv i32 %a2 , 5 + %r3 = udiv i32 %a3 , 5 + %r4 = udiv i32 %a4 , 5 + %r5 = udiv i32 %a5 , 5 + %r6 = udiv i32 %a6 , 5 + %r7 = udiv i32 %a7 , 5 + %r8 = udiv i32 %a8 , 5 + %r9 = udiv i32 %a9 , 5 + %r10 = udiv i32 %a10, 5 + %r11 = udiv i32 %a11, 5 + %r12 = udiv i32 %a12, 5 + %r13 = udiv i32 %a13, 5 + %r14 = udiv i32 %a14, 5 + %r15 = udiv i32 %a15, 5 + store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4 + store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4 + store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4 + store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4 + store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4 + store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4 + store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4 + store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4 + store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4 + store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4 + store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 + store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 + store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 + store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 + store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 + store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 + ret void +} + +define void @urem_v16i32_uniformconst() { +; SSE-LABEL: @urem_v16i32_uniformconst( +; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SSE-NEXT: [[R0:%.*]] = urem i32 [[A0]], 5 +; SSE-NEXT: [[R1:%.*]] = urem i32 [[A1]], 5 +; SSE-NEXT: [[R2:%.*]] = urem i32 [[A2]], 5 +; SSE-NEXT: [[R3:%.*]] = urem i32 [[A3]], 5 +; SSE-NEXT: [[R4:%.*]] = urem i32 [[A4]], 5 +; SSE-NEXT: [[R5:%.*]] = urem i32 [[A5]], 5 +; SSE-NEXT: [[R6:%.*]] = urem i32 [[A6]], 5 +; SSE-NEXT: [[R7:%.*]] = urem i32 [[A7]], 5 +; SSE-NEXT: [[R8:%.*]] = urem i32 [[A8]], 5 +; SSE-NEXT: [[R9:%.*]] = urem i32 [[A9]], 5 +; SSE-NEXT: [[R10:%.*]] = urem i32 [[A10]], 5 +; SSE-NEXT: [[R11:%.*]] = urem i32 [[A11]], 5 +; SSE-NEXT: [[R12:%.*]] = urem i32 [[A12]], 5 +; SSE-NEXT: [[R13:%.*]] = urem i32 [[A13]], 5 +; SSE-NEXT: [[R14:%.*]] = urem i32 [[A14]], 5 +; SSE-NEXT: [[R15:%.*]] = urem i32 [[A15]], 5 +; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: ret void +; +; SLM-LABEL: @urem_v16i32_uniformconst( +; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; SLM-NEXT: [[R0:%.*]] = urem i32 [[A0]], 5 +; SLM-NEXT: [[R1:%.*]] = urem i32 [[A1]], 5 +; SLM-NEXT: [[R2:%.*]] = urem i32 [[A2]], 5 +; SLM-NEXT: [[R3:%.*]] = urem i32 [[A3]], 5 +; SLM-NEXT: [[R4:%.*]] = urem i32 [[A4]], 5 +; SLM-NEXT: [[R5:%.*]] = urem i32 [[A5]], 5 +; SLM-NEXT: [[R6:%.*]] = urem i32 [[A6]], 5 +; SLM-NEXT: [[R7:%.*]] = urem i32 [[A7]], 5 +; SLM-NEXT: [[R8:%.*]] = urem i32 [[A8]], 5 +; SLM-NEXT: [[R9:%.*]] = urem i32 [[A9]], 5 +; SLM-NEXT: [[R10:%.*]] = urem i32 [[A10]], 5 +; SLM-NEXT: [[R11:%.*]] = urem i32 [[A11]], 5 +; SLM-NEXT: [[R12:%.*]] = urem i32 [[A12]], 5 +; SLM-NEXT: [[R13:%.*]] = urem i32 [[A13]], 5 +; SLM-NEXT: [[R14:%.*]] = urem i32 [[A14]], 5 +; SLM-NEXT: [[R15:%.*]] = urem i32 [[A15]], 5 +; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: ret void +; +; AVX1-LABEL: @urem_v16i32_uniformconst( +; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; AVX1-NEXT: [[R0:%.*]] = urem i32 [[A0]], 5 +; AVX1-NEXT: [[R1:%.*]] = urem i32 [[A1]], 5 +; AVX1-NEXT: [[R2:%.*]] = urem i32 [[A2]], 5 +; AVX1-NEXT: [[R3:%.*]] = urem i32 [[A3]], 5 +; AVX1-NEXT: [[R4:%.*]] = urem i32 [[A4]], 5 +; AVX1-NEXT: [[R5:%.*]] = urem i32 [[A5]], 5 +; AVX1-NEXT: [[R6:%.*]] = urem i32 [[A6]], 5 +; AVX1-NEXT: [[R7:%.*]] = urem i32 [[A7]], 5 +; AVX1-NEXT: [[R8:%.*]] = urem i32 [[A8]], 5 +; AVX1-NEXT: [[R9:%.*]] = urem i32 [[A9]], 5 +; AVX1-NEXT: [[R10:%.*]] = urem i32 [[A10]], 5 +; AVX1-NEXT: [[R11:%.*]] = urem i32 [[A11]], 5 +; AVX1-NEXT: [[R12:%.*]] = urem i32 [[A12]], 5 +; AVX1-NEXT: [[R13:%.*]] = urem i32 [[A13]], 5 +; AVX1-NEXT: [[R14:%.*]] = urem i32 [[A14]], 5 +; AVX1-NEXT: [[R15:%.*]] = urem i32 [[A15]], 5 +; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @urem_v16i32_uniformconst( +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP3:%.*]] = urem <8 x i32> [[TMP1]], +; AVX2-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP2]], +; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @urem_v16i32_uniformconst( +; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 +; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], +; AVX512-NEXT: store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4 +; AVX512-NEXT: ret void +; + %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4 + %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4 + %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4 + %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4 + %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4 + %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4 + %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4 + %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4 + %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4 + %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4 + %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 + %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 + %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 + %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 + %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 + %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 + %r0 = urem i32 %a0 , 5 + %r1 = urem i32 %a1 , 5 + %r2 = urem i32 %a2 , 5 + %r3 = urem i32 %a3 , 5 + %r4 = urem i32 %a4 , 5 + %r5 = urem i32 %a5 , 5 + %r6 = urem i32 %a6 , 5 + %r7 = urem i32 %a7 , 5 + %r8 = urem i32 %a8 , 5 + %r9 = urem i32 %a9 , 5 + %r10 = urem i32 %a10, 5 + %r11 = urem i32 %a11, 5 + %r12 = urem i32 %a12, 5 + %r13 = urem i32 %a13, 5 + %r14 = urem i32 %a14, 5 + %r15 = urem i32 %a15, 5 + store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4 + store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4 + store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4 + store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4 + store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4 + store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4 + store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4 + store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4 + store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4 + store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4 + store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 + store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 + store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 + store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 + store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 + store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 + ret void +} From de25ebaac6d2fed371fcd03d95b35eaa2207f395 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 12:17:54 +0100 Subject: [PATCH 0244/1079] [CostModel][X86] Add vXi32 division by uniform constant costs (PR47476) Other types can be handled in future patches but their uniform / non-uniform costs are more similar and don't appear to cause many vectorization issues. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 19 + llvm/test/Analysis/CostModel/X86/div.ll | 152 ++-- llvm/test/Analysis/CostModel/X86/rem.ll | 178 ++--- llvm/test/Analysis/CostModel/X86/vdiv-cost.ll | 52 +- .../Transforms/SLPVectorizer/X86/arith-div.ll | 748 +++--------------- 5 files changed, 269 insertions(+), 880 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index c9179742bcb9c..03f8be094c252 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -321,6 +321,11 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SHL, MVT::v64i8, 4 }, // psllw + pand. { ISD::SRL, MVT::v64i8, 4 }, // psrlw + pand. { ISD::SRA, MVT::v64i8, 8 }, // psrlw, pand, pxor, psubb. + + { ISD::SDIV, MVT::v16i32, 6 }, // pmuludq sequence + { ISD::SREM, MVT::v16i32, 8 }, // pmuludq+mul+sub sequence + { ISD::UDIV, MVT::v16i32, 5 }, // pmuludq sequence + { ISD::UREM, MVT::v16i32, 7 }, // pmuludq+mul+sub sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -336,6 +341,11 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. + + { ISD::SDIV, MVT::v8i32, 6 }, // pmuludq sequence + { ISD::SREM, MVT::v8i32, 8 }, // pmuludq+mul+sub sequence + { ISD::UDIV, MVT::v8i32, 5 }, // pmuludq sequence + { ISD::UREM, MVT::v8i32, 7 }, // pmuludq+mul+sub sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -353,6 +363,15 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split. { ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split. { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split. + + { ISD::SDIV, MVT::v8i32, 12+2 }, // 2*pmuludq sequence + split. + { ISD::SREM, MVT::v8i32, 16+2 }, // 2*pmuludq+mul+sub sequence + split. + { ISD::SDIV, MVT::v4i32, 6 }, // pmuludq sequence + { ISD::SREM, MVT::v4i32, 8 }, // pmuludq+mul+sub sequence + { ISD::UDIV, MVT::v8i32, 10+2 }, // 2*pmuludq sequence + split. + { ISD::UREM, MVT::v8i32, 14+2 }, // 2*pmuludq+mul+sub sequence + split. + { ISD::UDIV, MVT::v4i32, 5 }, // pmuludq sequence + { ISD::UREM, MVT::v4i32, 7 }, // pmuludq+mul+sub sequence }; // XOP has faster vXi8 shifts. diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll index fb3b705fd186d..4bead926bb90b 100644 --- a/llvm/test/Analysis/CostModel/X86/div.ll +++ b/llvm/test/Analysis/CostModel/X86/div.ll @@ -450,62 +450,24 @@ define i32 @udiv_const() { } define i32 @sdiv_uniformconst() { -; SSE2-LABEL: 'sdiv_uniformconst' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = sdiv <16 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSSE3-LABEL: 'sdiv_uniformconst' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = sdiv <16 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'sdiv_uniformconst' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = sdiv <16 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'sdiv_uniformconst' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 @@ -513,9 +475,9 @@ define i32 @sdiv_uniformconst() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, @@ -532,9 +494,9 @@ define i32 @sdiv_uniformconst() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, @@ -551,9 +513,9 @@ define i32 @sdiv_uniformconst() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, @@ -570,9 +532,9 @@ define i32 @sdiv_uniformconst() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, @@ -589,9 +551,9 @@ define i32 @sdiv_uniformconst() { ; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, @@ -608,9 +570,9 @@ define i32 @sdiv_uniformconst() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = sdiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = sdiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, @@ -651,9 +613,9 @@ define i32 @udiv_uniformconst() { ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, @@ -670,9 +632,9 @@ define i32 @udiv_uniformconst() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, @@ -689,9 +651,9 @@ define i32 @udiv_uniformconst() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, @@ -708,9 +670,9 @@ define i32 @udiv_uniformconst() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, @@ -727,9 +689,9 @@ define i32 @udiv_uniformconst() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, @@ -746,9 +708,9 @@ define i32 @udiv_uniformconst() { ; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, @@ -765,9 +727,9 @@ define i32 @udiv_uniformconst() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = udiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = udiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index 7942cda3725f3..30dd9a7a4f13f 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -450,62 +450,24 @@ define i32 @urem_const() { } define i32 @srem_uniformconst() { -; SSE2-LABEL: 'srem_uniformconst' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSSE3-LABEL: 'srem_uniformconst' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'srem_uniformconst' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = srem <16 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'srem_uniformconst' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 @@ -513,9 +475,9 @@ define i32 @srem_uniformconst() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, @@ -532,9 +494,9 @@ define i32 @srem_uniformconst() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, @@ -551,9 +513,9 @@ define i32 @srem_uniformconst() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, @@ -570,9 +532,9 @@ define i32 @srem_uniformconst() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, @@ -583,53 +545,15 @@ define i32 @srem_uniformconst() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; SLM-LABEL: 'srem_uniformconst' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = srem <16 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; GLM-LABEL: 'srem_uniformconst' -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = srem <8 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = srem <16 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; ; BTVER2-LABEL: 'srem_uniformconst' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, @@ -670,9 +594,9 @@ define i32 @urem_uniformconst() { ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, @@ -689,9 +613,9 @@ define i32 @urem_uniformconst() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, @@ -708,9 +632,9 @@ define i32 @urem_uniformconst() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, @@ -727,9 +651,9 @@ define i32 @urem_uniformconst() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, @@ -746,9 +670,9 @@ define i32 @urem_uniformconst() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, @@ -765,9 +689,9 @@ define i32 @urem_uniformconst() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16i32 = urem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, diff --git a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll index d87d21c487d84..8552509daeced 100644 --- a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll @@ -10,7 +10,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; CHECK-LABEL: 'test1' -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = udiv <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = udiv <4 x i32> %a, @@ -19,19 +19,19 @@ define <4 x i32> @test1(<4 x i32> %a) { define <8 x i32> @test2(<8 x i32> %a) { ; SSE-LABEL: 'test2' -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %div = udiv <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX1-LABEL: 'test2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %div = udiv <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX2-LABEL: 'test2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = udiv <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX512-LABEL: 'test2' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = udiv <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; %div = udiv <8 x i32> %a, @@ -108,53 +108,29 @@ define <16 x i8> @test7(<16 x i8> %a) { } define <4 x i32> @test8(<4 x i32> %a) { -; SSE2-LABEL: 'test8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %div = sdiv <4 x i32> %a, -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div -; -; SSSE3-LABEL: 'test8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %div = sdiv <4 x i32> %a, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div -; -; SSE42-LABEL: 'test8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = sdiv <4 x i32> %a, -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div -; -; AVX-LABEL: 'test8' -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = sdiv <4 x i32> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div -; -; AVX512-LABEL: 'test8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = sdiv <4 x i32> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div +; CHECK-LABEL: 'test8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = sdiv <4 x i32> %a, ret <4 x i32> %div } define <8 x i32> @test9(<8 x i32> %a) { -; SSE2-LABEL: 'test9' -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %div = sdiv <8 x i32> %a, -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div -; -; SSSE3-LABEL: 'test9' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %div = sdiv <8 x i32> %a, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div -; -; SSE42-LABEL: 'test9' -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %div = sdiv <8 x i32> %a, -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div +; SSE-LABEL: 'test9' +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX1-LABEL: 'test9' -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %div = sdiv <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX2-LABEL: 'test9' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = sdiv <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX512-LABEL: 'test9' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %div = sdiv <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; %div = sdiv <8 x i32> %a, diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll index 30930eacb5007..fb4ec00906adc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll @@ -24,166 +24,43 @@ define void @sdiv_v16i32_uniformconst() { ; SSE-LABEL: @sdiv_v16i32_uniformconst( -; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SSE-NEXT: [[R0:%.*]] = sdiv i32 [[A0]], 5 -; SSE-NEXT: [[R1:%.*]] = sdiv i32 [[A1]], 5 -; SSE-NEXT: [[R2:%.*]] = sdiv i32 [[A2]], 5 -; SSE-NEXT: [[R3:%.*]] = sdiv i32 [[A3]], 5 -; SSE-NEXT: [[R4:%.*]] = sdiv i32 [[A4]], 5 -; SSE-NEXT: [[R5:%.*]] = sdiv i32 [[A5]], 5 -; SSE-NEXT: [[R6:%.*]] = sdiv i32 [[A6]], 5 -; SSE-NEXT: [[R7:%.*]] = sdiv i32 [[A7]], 5 -; SSE-NEXT: [[R8:%.*]] = sdiv i32 [[A8]], 5 -; SSE-NEXT: [[R9:%.*]] = sdiv i32 [[A9]], 5 -; SSE-NEXT: [[R10:%.*]] = sdiv i32 [[A10]], 5 -; SSE-NEXT: [[R11:%.*]] = sdiv i32 [[A11]], 5 -; SSE-NEXT: [[R12:%.*]] = sdiv i32 [[A12]], 5 -; SSE-NEXT: [[R13:%.*]] = sdiv i32 [[A13]], 5 -; SSE-NEXT: [[R14:%.*]] = sdiv i32 [[A14]], 5 -; SSE-NEXT: [[R15:%.*]] = sdiv i32 [[A15]], 5 -; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP2]], +; SSE-NEXT: [[TMP7:%.*]] = sdiv <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP4]], +; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @sdiv_v16i32_uniformconst( -; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SLM-NEXT: [[R0:%.*]] = sdiv i32 [[A0]], 5 -; SLM-NEXT: [[R1:%.*]] = sdiv i32 [[A1]], 5 -; SLM-NEXT: [[R2:%.*]] = sdiv i32 [[A2]], 5 -; SLM-NEXT: [[R3:%.*]] = sdiv i32 [[A3]], 5 -; SLM-NEXT: [[R4:%.*]] = sdiv i32 [[A4]], 5 -; SLM-NEXT: [[R5:%.*]] = sdiv i32 [[A5]], 5 -; SLM-NEXT: [[R6:%.*]] = sdiv i32 [[A6]], 5 -; SLM-NEXT: [[R7:%.*]] = sdiv i32 [[A7]], 5 -; SLM-NEXT: [[R8:%.*]] = sdiv i32 [[A8]], 5 -; SLM-NEXT: [[R9:%.*]] = sdiv i32 [[A9]], 5 -; SLM-NEXT: [[R10:%.*]] = sdiv i32 [[A10]], 5 -; SLM-NEXT: [[R11:%.*]] = sdiv i32 [[A11]], 5 -; SLM-NEXT: [[R12:%.*]] = sdiv i32 [[A12]], 5 -; SLM-NEXT: [[R13:%.*]] = sdiv i32 [[A13]], 5 -; SLM-NEXT: [[R14:%.*]] = sdiv i32 [[A14]], 5 -; SLM-NEXT: [[R15:%.*]] = sdiv i32 [[A15]], 5 -; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP2]], +; SLM-NEXT: [[TMP7:%.*]] = sdiv <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP4]], +; SLM-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SLM-NEXT: ret void ; -; AVX1-LABEL: @sdiv_v16i32_uniformconst( -; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; AVX1-NEXT: [[R0:%.*]] = sdiv i32 [[A0]], 5 -; AVX1-NEXT: [[R1:%.*]] = sdiv i32 [[A1]], 5 -; AVX1-NEXT: [[R2:%.*]] = sdiv i32 [[A2]], 5 -; AVX1-NEXT: [[R3:%.*]] = sdiv i32 [[A3]], 5 -; AVX1-NEXT: [[R4:%.*]] = sdiv i32 [[A4]], 5 -; AVX1-NEXT: [[R5:%.*]] = sdiv i32 [[A5]], 5 -; AVX1-NEXT: [[R6:%.*]] = sdiv i32 [[A6]], 5 -; AVX1-NEXT: [[R7:%.*]] = sdiv i32 [[A7]], 5 -; AVX1-NEXT: [[R8:%.*]] = sdiv i32 [[A8]], 5 -; AVX1-NEXT: [[R9:%.*]] = sdiv i32 [[A9]], 5 -; AVX1-NEXT: [[R10:%.*]] = sdiv i32 [[A10]], 5 -; AVX1-NEXT: [[R11:%.*]] = sdiv i32 [[A11]], 5 -; AVX1-NEXT: [[R12:%.*]] = sdiv i32 [[A12]], 5 -; AVX1-NEXT: [[R13:%.*]] = sdiv i32 [[A13]], 5 -; AVX1-NEXT: [[R14:%.*]] = sdiv i32 [[A14]], 5 -; AVX1-NEXT: [[R15:%.*]] = sdiv i32 [[A15]], 5 -; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @sdiv_v16i32_uniformconst( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP3:%.*]] = sdiv <8 x i32> [[TMP1]], -; AVX2-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP2]], -; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 -; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: ret void +; AVX-LABEL: @sdiv_v16i32_uniformconst( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP3:%.*]] = sdiv <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP2]], +; AVX-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: ret void ; ; AVX512-LABEL: @sdiv_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 @@ -244,166 +121,43 @@ define void @sdiv_v16i32_uniformconst() { define void @srem_v16i32_uniformconst() { ; SSE-LABEL: @srem_v16i32_uniformconst( -; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SSE-NEXT: [[R0:%.*]] = srem i32 [[A0]], 5 -; SSE-NEXT: [[R1:%.*]] = srem i32 [[A1]], 5 -; SSE-NEXT: [[R2:%.*]] = srem i32 [[A2]], 5 -; SSE-NEXT: [[R3:%.*]] = srem i32 [[A3]], 5 -; SSE-NEXT: [[R4:%.*]] = srem i32 [[A4]], 5 -; SSE-NEXT: [[R5:%.*]] = srem i32 [[A5]], 5 -; SSE-NEXT: [[R6:%.*]] = srem i32 [[A6]], 5 -; SSE-NEXT: [[R7:%.*]] = srem i32 [[A7]], 5 -; SSE-NEXT: [[R8:%.*]] = srem i32 [[A8]], 5 -; SSE-NEXT: [[R9:%.*]] = srem i32 [[A9]], 5 -; SSE-NEXT: [[R10:%.*]] = srem i32 [[A10]], 5 -; SSE-NEXT: [[R11:%.*]] = srem i32 [[A11]], 5 -; SSE-NEXT: [[R12:%.*]] = srem i32 [[A12]], 5 -; SSE-NEXT: [[R13:%.*]] = srem i32 [[A13]], 5 -; SSE-NEXT: [[R14:%.*]] = srem i32 [[A14]], 5 -; SSE-NEXT: [[R15:%.*]] = srem i32 [[A15]], 5 -; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP5:%.*]] = srem <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP2]], +; SSE-NEXT: [[TMP7:%.*]] = srem <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP4]], +; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @srem_v16i32_uniformconst( -; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SLM-NEXT: [[R0:%.*]] = srem i32 [[A0]], 5 -; SLM-NEXT: [[R1:%.*]] = srem i32 [[A1]], 5 -; SLM-NEXT: [[R2:%.*]] = srem i32 [[A2]], 5 -; SLM-NEXT: [[R3:%.*]] = srem i32 [[A3]], 5 -; SLM-NEXT: [[R4:%.*]] = srem i32 [[A4]], 5 -; SLM-NEXT: [[R5:%.*]] = srem i32 [[A5]], 5 -; SLM-NEXT: [[R6:%.*]] = srem i32 [[A6]], 5 -; SLM-NEXT: [[R7:%.*]] = srem i32 [[A7]], 5 -; SLM-NEXT: [[R8:%.*]] = srem i32 [[A8]], 5 -; SLM-NEXT: [[R9:%.*]] = srem i32 [[A9]], 5 -; SLM-NEXT: [[R10:%.*]] = srem i32 [[A10]], 5 -; SLM-NEXT: [[R11:%.*]] = srem i32 [[A11]], 5 -; SLM-NEXT: [[R12:%.*]] = srem i32 [[A12]], 5 -; SLM-NEXT: [[R13:%.*]] = srem i32 [[A13]], 5 -; SLM-NEXT: [[R14:%.*]] = srem i32 [[A14]], 5 -; SLM-NEXT: [[R15:%.*]] = srem i32 [[A15]], 5 -; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = srem <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP2]], +; SLM-NEXT: [[TMP7:%.*]] = srem <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP4]], +; SLM-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SLM-NEXT: ret void ; -; AVX1-LABEL: @srem_v16i32_uniformconst( -; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; AVX1-NEXT: [[R0:%.*]] = srem i32 [[A0]], 5 -; AVX1-NEXT: [[R1:%.*]] = srem i32 [[A1]], 5 -; AVX1-NEXT: [[R2:%.*]] = srem i32 [[A2]], 5 -; AVX1-NEXT: [[R3:%.*]] = srem i32 [[A3]], 5 -; AVX1-NEXT: [[R4:%.*]] = srem i32 [[A4]], 5 -; AVX1-NEXT: [[R5:%.*]] = srem i32 [[A5]], 5 -; AVX1-NEXT: [[R6:%.*]] = srem i32 [[A6]], 5 -; AVX1-NEXT: [[R7:%.*]] = srem i32 [[A7]], 5 -; AVX1-NEXT: [[R8:%.*]] = srem i32 [[A8]], 5 -; AVX1-NEXT: [[R9:%.*]] = srem i32 [[A9]], 5 -; AVX1-NEXT: [[R10:%.*]] = srem i32 [[A10]], 5 -; AVX1-NEXT: [[R11:%.*]] = srem i32 [[A11]], 5 -; AVX1-NEXT: [[R12:%.*]] = srem i32 [[A12]], 5 -; AVX1-NEXT: [[R13:%.*]] = srem i32 [[A13]], 5 -; AVX1-NEXT: [[R14:%.*]] = srem i32 [[A14]], 5 -; AVX1-NEXT: [[R15:%.*]] = srem i32 [[A15]], 5 -; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @srem_v16i32_uniformconst( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP3:%.*]] = srem <8 x i32> [[TMP1]], -; AVX2-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP2]], -; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 -; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: ret void +; AVX-LABEL: @srem_v16i32_uniformconst( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP3:%.*]] = srem <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP2]], +; AVX-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: ret void ; ; AVX512-LABEL: @srem_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 @@ -464,166 +218,43 @@ define void @srem_v16i32_uniformconst() { define void @udiv_v16i32_uniformconst() { ; SSE-LABEL: @udiv_v16i32_uniformconst( -; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SSE-NEXT: [[R0:%.*]] = udiv i32 [[A0]], 5 -; SSE-NEXT: [[R1:%.*]] = udiv i32 [[A1]], 5 -; SSE-NEXT: [[R2:%.*]] = udiv i32 [[A2]], 5 -; SSE-NEXT: [[R3:%.*]] = udiv i32 [[A3]], 5 -; SSE-NEXT: [[R4:%.*]] = udiv i32 [[A4]], 5 -; SSE-NEXT: [[R5:%.*]] = udiv i32 [[A5]], 5 -; SSE-NEXT: [[R6:%.*]] = udiv i32 [[A6]], 5 -; SSE-NEXT: [[R7:%.*]] = udiv i32 [[A7]], 5 -; SSE-NEXT: [[R8:%.*]] = udiv i32 [[A8]], 5 -; SSE-NEXT: [[R9:%.*]] = udiv i32 [[A9]], 5 -; SSE-NEXT: [[R10:%.*]] = udiv i32 [[A10]], 5 -; SSE-NEXT: [[R11:%.*]] = udiv i32 [[A11]], 5 -; SSE-NEXT: [[R12:%.*]] = udiv i32 [[A12]], 5 -; SSE-NEXT: [[R13:%.*]] = udiv i32 [[A13]], 5 -; SSE-NEXT: [[R14:%.*]] = udiv i32 [[A14]], 5 -; SSE-NEXT: [[R15:%.*]] = udiv i32 [[A15]], 5 -; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP5:%.*]] = udiv <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP2]], +; SSE-NEXT: [[TMP7:%.*]] = udiv <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP4]], +; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @udiv_v16i32_uniformconst( -; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SLM-NEXT: [[R0:%.*]] = udiv i32 [[A0]], 5 -; SLM-NEXT: [[R1:%.*]] = udiv i32 [[A1]], 5 -; SLM-NEXT: [[R2:%.*]] = udiv i32 [[A2]], 5 -; SLM-NEXT: [[R3:%.*]] = udiv i32 [[A3]], 5 -; SLM-NEXT: [[R4:%.*]] = udiv i32 [[A4]], 5 -; SLM-NEXT: [[R5:%.*]] = udiv i32 [[A5]], 5 -; SLM-NEXT: [[R6:%.*]] = udiv i32 [[A6]], 5 -; SLM-NEXT: [[R7:%.*]] = udiv i32 [[A7]], 5 -; SLM-NEXT: [[R8:%.*]] = udiv i32 [[A8]], 5 -; SLM-NEXT: [[R9:%.*]] = udiv i32 [[A9]], 5 -; SLM-NEXT: [[R10:%.*]] = udiv i32 [[A10]], 5 -; SLM-NEXT: [[R11:%.*]] = udiv i32 [[A11]], 5 -; SLM-NEXT: [[R12:%.*]] = udiv i32 [[A12]], 5 -; SLM-NEXT: [[R13:%.*]] = udiv i32 [[A13]], 5 -; SLM-NEXT: [[R14:%.*]] = udiv i32 [[A14]], 5 -; SLM-NEXT: [[R15:%.*]] = udiv i32 [[A15]], 5 -; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = udiv <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP2]], +; SLM-NEXT: [[TMP7:%.*]] = udiv <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP4]], +; SLM-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SLM-NEXT: ret void ; -; AVX1-LABEL: @udiv_v16i32_uniformconst( -; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; AVX1-NEXT: [[R0:%.*]] = udiv i32 [[A0]], 5 -; AVX1-NEXT: [[R1:%.*]] = udiv i32 [[A1]], 5 -; AVX1-NEXT: [[R2:%.*]] = udiv i32 [[A2]], 5 -; AVX1-NEXT: [[R3:%.*]] = udiv i32 [[A3]], 5 -; AVX1-NEXT: [[R4:%.*]] = udiv i32 [[A4]], 5 -; AVX1-NEXT: [[R5:%.*]] = udiv i32 [[A5]], 5 -; AVX1-NEXT: [[R6:%.*]] = udiv i32 [[A6]], 5 -; AVX1-NEXT: [[R7:%.*]] = udiv i32 [[A7]], 5 -; AVX1-NEXT: [[R8:%.*]] = udiv i32 [[A8]], 5 -; AVX1-NEXT: [[R9:%.*]] = udiv i32 [[A9]], 5 -; AVX1-NEXT: [[R10:%.*]] = udiv i32 [[A10]], 5 -; AVX1-NEXT: [[R11:%.*]] = udiv i32 [[A11]], 5 -; AVX1-NEXT: [[R12:%.*]] = udiv i32 [[A12]], 5 -; AVX1-NEXT: [[R13:%.*]] = udiv i32 [[A13]], 5 -; AVX1-NEXT: [[R14:%.*]] = udiv i32 [[A14]], 5 -; AVX1-NEXT: [[R15:%.*]] = udiv i32 [[A15]], 5 -; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @udiv_v16i32_uniformconst( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP3:%.*]] = udiv <8 x i32> [[TMP1]], -; AVX2-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP2]], -; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 -; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: ret void +; AVX-LABEL: @udiv_v16i32_uniformconst( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP3:%.*]] = udiv <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP2]], +; AVX-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: ret void ; ; AVX512-LABEL: @udiv_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 @@ -684,166 +315,43 @@ define void @udiv_v16i32_uniformconst() { define void @urem_v16i32_uniformconst() { ; SSE-LABEL: @urem_v16i32_uniformconst( -; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SSE-NEXT: [[R0:%.*]] = urem i32 [[A0]], 5 -; SSE-NEXT: [[R1:%.*]] = urem i32 [[A1]], 5 -; SSE-NEXT: [[R2:%.*]] = urem i32 [[A2]], 5 -; SSE-NEXT: [[R3:%.*]] = urem i32 [[A3]], 5 -; SSE-NEXT: [[R4:%.*]] = urem i32 [[A4]], 5 -; SSE-NEXT: [[R5:%.*]] = urem i32 [[A5]], 5 -; SSE-NEXT: [[R6:%.*]] = urem i32 [[A6]], 5 -; SSE-NEXT: [[R7:%.*]] = urem i32 [[A7]], 5 -; SSE-NEXT: [[R8:%.*]] = urem i32 [[A8]], 5 -; SSE-NEXT: [[R9:%.*]] = urem i32 [[A9]], 5 -; SSE-NEXT: [[R10:%.*]] = urem i32 [[A10]], 5 -; SSE-NEXT: [[R11:%.*]] = urem i32 [[A11]], 5 -; SSE-NEXT: [[R12:%.*]] = urem i32 [[A12]], 5 -; SSE-NEXT: [[R13:%.*]] = urem i32 [[A13]], 5 -; SSE-NEXT: [[R14:%.*]] = urem i32 [[A14]], 5 -; SSE-NEXT: [[R15:%.*]] = urem i32 [[A15]], 5 -; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SSE-NEXT: [[TMP5:%.*]] = urem <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP2]], +; SSE-NEXT: [[TMP7:%.*]] = urem <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP4]], +; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SSE-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @urem_v16i32_uniformconst( -; SLM-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; SLM-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; SLM-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; SLM-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; SLM-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; SLM-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; SLM-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; SLM-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; SLM-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; SLM-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; SLM-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; SLM-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; SLM-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; SLM-NEXT: [[R0:%.*]] = urem i32 [[A0]], 5 -; SLM-NEXT: [[R1:%.*]] = urem i32 [[A1]], 5 -; SLM-NEXT: [[R2:%.*]] = urem i32 [[A2]], 5 -; SLM-NEXT: [[R3:%.*]] = urem i32 [[A3]], 5 -; SLM-NEXT: [[R4:%.*]] = urem i32 [[A4]], 5 -; SLM-NEXT: [[R5:%.*]] = urem i32 [[A5]], 5 -; SLM-NEXT: [[R6:%.*]] = urem i32 [[A6]], 5 -; SLM-NEXT: [[R7:%.*]] = urem i32 [[A7]], 5 -; SLM-NEXT: [[R8:%.*]] = urem i32 [[A8]], 5 -; SLM-NEXT: [[R9:%.*]] = urem i32 [[A9]], 5 -; SLM-NEXT: [[R10:%.*]] = urem i32 [[A10]], 5 -; SLM-NEXT: [[R11:%.*]] = urem i32 [[A11]], 5 -; SLM-NEXT: [[R12:%.*]] = urem i32 [[A12]], 5 -; SLM-NEXT: [[R13:%.*]] = urem i32 [[A13]], 5 -; SLM-NEXT: [[R14:%.*]] = urem i32 [[A14]], 5 -; SLM-NEXT: [[R15:%.*]] = urem i32 [[A15]], 5 -; SLM-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; SLM-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; SLM-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; SLM-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; SLM-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; SLM-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; SLM-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; SLM-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; SLM-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; SLM-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; SLM-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; SLM-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; SLM-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; SLM-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; SLM-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = urem <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP2]], +; SLM-NEXT: [[TMP7:%.*]] = urem <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP4]], +; SLM-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SLM-NEXT: ret void ; -; AVX1-LABEL: @urem_v16i32_uniformconst( -; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 -; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 -; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 -; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 -; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 -; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 -; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 -; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 -; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 -; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 -; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 -; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 -; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 -; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 -; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 -; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 -; AVX1-NEXT: [[R0:%.*]] = urem i32 [[A0]], 5 -; AVX1-NEXT: [[R1:%.*]] = urem i32 [[A1]], 5 -; AVX1-NEXT: [[R2:%.*]] = urem i32 [[A2]], 5 -; AVX1-NEXT: [[R3:%.*]] = urem i32 [[A3]], 5 -; AVX1-NEXT: [[R4:%.*]] = urem i32 [[A4]], 5 -; AVX1-NEXT: [[R5:%.*]] = urem i32 [[A5]], 5 -; AVX1-NEXT: [[R6:%.*]] = urem i32 [[A6]], 5 -; AVX1-NEXT: [[R7:%.*]] = urem i32 [[A7]], 5 -; AVX1-NEXT: [[R8:%.*]] = urem i32 [[A8]], 5 -; AVX1-NEXT: [[R9:%.*]] = urem i32 [[A9]], 5 -; AVX1-NEXT: [[R10:%.*]] = urem i32 [[A10]], 5 -; AVX1-NEXT: [[R11:%.*]] = urem i32 [[A11]], 5 -; AVX1-NEXT: [[R12:%.*]] = urem i32 [[A12]], 5 -; AVX1-NEXT: [[R13:%.*]] = urem i32 [[A13]], 5 -; AVX1-NEXT: [[R14:%.*]] = urem i32 [[A14]], 5 -; AVX1-NEXT: [[R15:%.*]] = urem i32 [[A15]], 5 -; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 -; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 -; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 -; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 -; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 -; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 -; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 -; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 -; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 -; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 -; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 -; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 -; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 -; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 -; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 -; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @urem_v16i32_uniformconst( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: [[TMP3:%.*]] = urem <8 x i32> [[TMP1]], -; AVX2-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP2]], -; AVX2-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 -; AVX2-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX2-NEXT: ret void +; AVX-LABEL: @urem_v16i32_uniformconst( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: [[TMP3:%.*]] = urem <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP2]], +; AVX-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX-NEXT: ret void ; ; AVX512-LABEL: @urem_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 From 576bd52f778405de08f309678e4fe4f7523bf7c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 12:38:23 +0100 Subject: [PATCH 0245/1079] [Codegen][X86] Move AMX specific codegen tests into X86 subfolder. --- clang/test/CodeGen/{AMX => X86}/amx.c | 0 clang/test/CodeGen/{AMX => X86}/amx_errors.c | 0 clang/test/CodeGen/{AMX => X86}/amx_inline_asm.c | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename clang/test/CodeGen/{AMX => X86}/amx.c (100%) rename clang/test/CodeGen/{AMX => X86}/amx_errors.c (100%) rename clang/test/CodeGen/{AMX => X86}/amx_inline_asm.c (100%) diff --git a/clang/test/CodeGen/AMX/amx.c b/clang/test/CodeGen/X86/amx.c similarity index 100% rename from clang/test/CodeGen/AMX/amx.c rename to clang/test/CodeGen/X86/amx.c diff --git a/clang/test/CodeGen/AMX/amx_errors.c b/clang/test/CodeGen/X86/amx_errors.c similarity index 100% rename from clang/test/CodeGen/AMX/amx_errors.c rename to clang/test/CodeGen/X86/amx_errors.c diff --git a/clang/test/CodeGen/AMX/amx_inline_asm.c b/clang/test/CodeGen/X86/amx_inline_asm.c similarity index 100% rename from clang/test/CodeGen/AMX/amx_inline_asm.c rename to clang/test/CodeGen/X86/amx_inline_asm.c From 875b8537eea0662ead820979f18c83d5e31b4b8b Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Tue, 8 Sep 2020 14:38:16 +0200 Subject: [PATCH 0246/1079] [clang-tidy] Fix reST syntax Authored by Eisuke Kawashima [https://github.com/llvm/llvm-project/pull/245] --- .../checks/bugprone-argument-comment.rst | 1 + .../checks/bugprone-exception-escape.rst | 1 + ...bugprone-forwarding-reference-overload.rst | 6 +- .../checks/bugprone-lambda-function-name.rst | 2 +- .../bugprone-not-null-terminated-result.rst | 28 +++--- .../checks/bugprone-suspicious-include.rst | 4 +- .../bugprone-suspicious-missing-comma.rst | 6 +- .../checks/bugprone-terminating-continue.rst | 6 +- .../docs/clang-tidy/checks/cert-con36-c.rst | 4 +- .../docs/clang-tidy/checks/cert-con54-cpp.rst | 4 +- ...lines-avoid-non-const-global-variables.rst | 4 +- ...oogle-objc-global-variable-declaration.rst | 4 +- .../checks/google-readability-casting.rst | 4 +- .../checks/misc-misplaced-const.rst | 2 +- .../clang-tidy/checks/misc-no-recursion.rst | 2 + .../checks/misc-unused-parameters.rst | 2 +- ...replace-disallow-copy-and-assign-macro.rst | 2 +- .../checks/modernize-use-noexcept.rst | 14 +-- .../modernize-use-uncaught-exceptions.rst | 90 +++++++++---------- .../checks/readability-const-return-type.rst | 2 +- .../checks/zircon-temporary-objects.rst | 22 ++--- 21 files changed, 107 insertions(+), 103 deletions(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-argument-comment.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-argument-comment.rst index 8484c393a12bd..8c59541b8d42a 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-argument-comment.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-argument-comment.rst @@ -29,6 +29,7 @@ Options account. .. option:: IgnoreSingleArgument + When true, the check will ignore the single argument. .. option:: CommentBoolLiterals diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-exception-escape.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-exception-escape.rst index 9c7f113a1bf3c..52f3ceff28149 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-exception-escape.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-exception-escape.rst @@ -5,6 +5,7 @@ bugprone-exception-escape Finds functions which may throw an exception directly or indirectly, but they should not. The functions which should not throw exceptions are the following: + * Destructors * Move constructors * Move assignment operators diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-forwarding-reference-overload.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-forwarding-reference-overload.rst index 61255e7596b40..b2a9e0f3b3dfb 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-forwarding-reference-overload.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-forwarding-reference-overload.rst @@ -37,7 +37,7 @@ The check warns for constructors C1 and C2, because those can hide copy and move constructors. We suppress warnings if the copy and the move constructors are both disabled (deleted or private), because there is nothing the perfect forwarding constructor could hide in this case. We also suppress warnings for constructors -like C3 that are guarded with an enable_if, assuming the programmer was aware of +like C3 that are guarded with an ``enable_if``, assuming the programmer was aware of the possible hiding. Background @@ -45,5 +45,5 @@ Background For deciding whether a constructor is guarded with enable_if, we consider the default values of the type parameters and the types of the constructor -parameters. If any part of these types is std::enable_if or std::enable_if_t, we -assume the constructor is guarded. +parameters. If any part of these types is ``std::enable_if`` or ``std::enable_if_t``, +we assume the constructor is guarded. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-lambda-function-name.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-lambda-function-name.rst index 683977a3d2c06..6f0ba836fdf5c 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-lambda-function-name.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-lambda-function-name.rst @@ -10,7 +10,7 @@ is almost never what was intended. Example: .. code-block:: c++ - + void FancyFunction() { [] { printf("Called from %s\n", __func__); }(); [] { printf("Now called from %s\n", __FUNCTION__); }(); diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-not-null-terminated-result.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-not-null-terminated-result.rst index 9e5a702630c88..54e48268181ca 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-not-null-terminated-result.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-not-null-terminated-result.rst @@ -5,7 +5,7 @@ bugprone-not-null-terminated-result Finds function calls where it is possible to cause a not null-terminated result. Usually the proper length of a string is ``strlen(src) + 1`` or equal length of -this expression, because the null terminator needs an extra space. Without the +this expression, because the null terminator needs an extra space. Without the null terminator it can result in undefined behaviour when the string is read. The following and their respective ``wchar_t`` based functions are checked: @@ -17,27 +17,27 @@ The following is a real-world example where the programmer forgot to increase the passed third argument, which is ``size_t length``. That is why the length of the allocated memory is not enough to hold the null terminator. - .. code-block:: c +.. code-block:: c - static char *stringCpy(const std::string &str) { - char *result = reinterpret_cast(malloc(str.size())); - memcpy(result, str.data(), str.size()); - return result; - } + static char *stringCpy(const std::string &str) { + char *result = reinterpret_cast(malloc(str.size())); + memcpy(result, str.data(), str.size()); + return result; + } In addition to issuing warnings, fix-it rewrites all the necessary code. It also tries to adjust the capacity of the destination array: - .. code-block:: c +.. code-block:: c - static char *stringCpy(const std::string &str) { - char *result = reinterpret_cast(malloc(str.size() + 1)); - strcpy(result, str.data()); - return result; - } + static char *stringCpy(const std::string &str) { + char *result = reinterpret_cast(malloc(str.size() + 1)); + strcpy(result, str.data()); + return result; + } Note: It cannot guarantee to rewrite every of the path-sensitive memory - allocations. +allocations. .. _MemcpyTransformation: diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-include.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-include.rst index 237823ce8558b..3c05f39db12d5 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-include.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-include.rst @@ -19,7 +19,7 @@ Options ------- .. option:: HeaderFileExtensions - Default value: `";h;hh;hpp;hxx"` + Default value: ``";h;hh;hpp;hxx"`` A semicolon-separated list of filename extensions of header files (the filename extensions should not contain a "." prefix). For extension-less header files, use an empty string or leave an empty string between ";" @@ -27,6 +27,6 @@ Options .. option:: ImplementationFileExtensions - Default value: `"c;cc;cpp;cxx"` + Default value: ``"c;cc;cpp;cxx"`` Likewise, a semicolon-separated list of filename extensions of implementation files. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-missing-comma.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-missing-comma.rst index 9fe9153117c2c..7455a2ef13509 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-missing-comma.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-suspicious-missing-comma.rst @@ -46,14 +46,14 @@ Options .. option:: SizeThreshold An unsigned integer specifying the minimum size of a string literal to be - considered by the check. Default is `5U`. + considered by the check. Default is ``5U``. .. option:: RatioThreshold A string specifying the maximum threshold ratio [0, 1.0] of suspicious string - literals to be considered. Default is `".2"`. + literals to be considered. Default is ``".2"``. .. option:: MaxConcatenatedTokens An unsigned integer specifying the maximum number of concatenated tokens. - Default is `5U`. + Default is ``5U``. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-terminating-continue.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-terminating-continue.rst index 1a6ae812f2aa1..222de90037336 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-terminating-continue.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-terminating-continue.rst @@ -3,15 +3,15 @@ bugprone-terminating-continue ============================= -Detects `do while` loops with a condition always evaluating to false that -have a `continue` statement, as this `continue` terminates the loop +Detects ``do while`` loops with a condition always evaluating to false that +have a ``continue`` statement, as this ``continue`` terminates the loop effectively. .. code-block:: c++ void f() { do { - // some code + // some code continue; // terminating continue // some other code } while(false); diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert-con36-c.rst b/clang-tools-extra/docs/clang-tidy/checks/cert-con36-c.rst index 7d74e05cf64d3..6fabd146993bc 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert-con36-c.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert-con36-c.rst @@ -1,10 +1,10 @@ .. title:: clang-tidy - cert-con36-c .. meta:: :http-equiv=refresh: 5;URL=bugprone-spuriously-wake-up-functions.html - + cert-con36-c ============ The cert-con36-c check is an alias, please see -`bugprone-spuriously-wake-up-functions `_ +`bugprone-spuriously-wake-up-functions `_ for more information. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert-con54-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert-con54-cpp.rst index f74bc44962199..ff9237ef53a55 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert-con54-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert-con54-cpp.rst @@ -1,10 +1,10 @@ .. title:: clang-tidy - cert-con54-cpp .. meta:: :http-equiv=refresh: 5;URL=bugprone-spuriously-wake-up-functions.html - + cert-con54-cpp ============== The cert-con54-cpp check is an alias, please see -`bugprone-spuriously-wake-up-functions `_ +`bugprone-spuriously-wake-up-functions `_ for more information. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-avoid-non-const-global-variables.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-avoid-non-const-global-variables.rst index 4d1ffde62dbb7..53dafc7f8b435 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-avoid-non-const-global-variables.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-avoid-non-const-global-variables.rst @@ -3,8 +3,8 @@ cppcoreguidelines-avoid-non-const-global-variables ================================================== -Finds non-const global variables as described in `I.2 of C++ Core Guidelines ` . -As `R.6 of C++ Core Guidelines ` is a duplicate of rule I.2 it also covers that rule. +Finds non-const global variables as described in `I.2 of C++ Core Guidelines `_ . +As `R.6 of C++ Core Guidelines `_ is a duplicate of rule I.2 it also covers that rule. .. code-block:: c++ diff --git a/clang-tools-extra/docs/clang-tidy/checks/google-objc-global-variable-declaration.rst b/clang-tools-extra/docs/clang-tidy/checks/google-objc-global-variable-declaration.rst index e4b41fbc723a2..15b59996e3d31 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/google-objc-global-variable-declaration.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/google-objc-global-variable-declaration.rst @@ -9,8 +9,8 @@ pattern of variable names in Google's Objective-C Style Guide. The corresponding style guide rule: https://google.github.io/styleguide/objcguide.html#variable-names -All the global variables should follow the pattern of `g[A-Z].*` (variables) or -`k[A-Z].*` (constants). The check will suggest a variable name that follows the +All the global variables should follow the pattern of ``g[A-Z].*`` (variables) or +``k[A-Z].*`` (constants). The check will suggest a variable name that follows the pattern if it can be inferred from the original name. For code: diff --git a/clang-tools-extra/docs/clang-tidy/checks/google-readability-casting.rst b/clang-tools-extra/docs/clang-tidy/checks/google-readability-casting.rst index 4c9d1bc4f99d6..d927e1ce29fce 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/google-readability-casting.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/google-readability-casting.rst @@ -9,6 +9,6 @@ https://google.github.io/styleguide/cppguide.html#Casting Corresponding cpplint.py check name: `readability/casting`. -This check is similar to `-Wold-style-cast`, but it suggests automated fixes +This check is similar to ``-Wold-style-cast``, but it suggests automated fixes in some cases. The reported locations should not be different from the -ones generated by `-Wold-style-cast`. +ones generated by ``-Wold-style-cast``. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-misplaced-const.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-misplaced-const.rst index e583ecb54cac1..3b21a87069863 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc-misplaced-const.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc-misplaced-const.rst @@ -8,7 +8,7 @@ This check diagnoses when a ``const`` qualifier is applied to a ``typedef``/ are often misleading to developers because the ``const`` applies to the pointer rather than the pointee. -For instance, in the following code, the resulting type is ``int *`` ``const`` +For instance, in the following code, the resulting type is ``int * const`` rather than ``const int *``: .. code-block:: c++ diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-no-recursion.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-no-recursion.rst index dad6f74ef7f4d..c8281075ded8f 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc-no-recursion.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc-no-recursion.rst @@ -9,10 +9,12 @@ diagnoses each function in the cycle, and displays one example of a possible call graph loop (recursion). References: + * CERT C++ Coding Standard rule `DCL56-CPP. Avoid cycles during initialization of static objects `_. * JPL Institutional Coding Standard for the C Programming Language (JPL DOCID D-60411) rule `2.4 Do not use direct or indirect recursion`. * OpenCL Specification, Version 1.2 rule `6.9 Restrictions: i. Recursion is not supported. `_. Limitations: + * The check does not handle calls done through function pointers * The check does not handle C++ destructors diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-unused-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-unused-parameters.rst index 3dfeb299de06b..d954c1ddb1c54 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc-unused-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc-unused-parameters.rst @@ -8,7 +8,7 @@ code (e.g. when a different parameter is used instead). The suggested fixes either comment parameter name out or remove the parameter completely, if all callers of the function are in the same translation unit and can be updated. -The check is similar to the `-Wunused-parameter` compiler diagnostic and can be +The check is similar to the ``-Wunused-parameter`` compiler diagnostic and can be used to prepare a codebase to enabling of that diagnostic. By default the check is more permissive (see :option:`StrictMode`). diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst index 6717c928506a7..c1c8ace0c937d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst @@ -37,7 +37,7 @@ Known Limitations ----------------- * Notice that the migration example above leaves the ``private`` access - specification untouched. You might want to run the check:doc:`modernize-use-equals-delete + specification untouched. You might want to run the check :doc:`modernize-use-equals-delete ` to get warnings for deleted functions in private sections. diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-use-noexcept.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-use-noexcept.rst index 084dad74f8d5a..8addc8b4b66dd 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-use-noexcept.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-use-noexcept.rst @@ -15,25 +15,25 @@ Example .. code-block:: c++ void foo() throw(); - void bar() throw(int) {} + void bar() throw(int) {} transforms to: .. code-block:: c++ void foo() noexcept; - void bar() noexcept(false) {} + void bar() noexcept(false) {} Options ------- .. option:: ReplacementString -Users can use :option:`ReplacementString` to specify a macro to use -instead of ``noexcept``. This is useful when maintaining source code -that uses custom exception specification marking other than -``noexcept``. Fix-it hints will only be generated for non-throwing -specifications. + Users can use :option:`ReplacementString` to specify a macro to use + instead of ``noexcept``. This is useful when maintaining source code + that uses custom exception specification marking other than + ``noexcept``. Fix-it hints will only be generated for non-throwing + specifications. Example ^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-use-uncaught-exceptions.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-use-uncaught-exceptions.rst index 615f2e3f4a27f..d10556ff3b60e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-use-uncaught-exceptions.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-use-uncaught-exceptions.rst @@ -12,53 +12,53 @@ they will be replaced with. .. code-block:: c++ - #define MACRO1 std::uncaught_exception - #define MACRO2 std::uncaught_exception - - int uncaught_exception() { - return 0; - } - - int main() { - int res; - - res = uncaught_exception(); - // No warning, since it is not the deprecated function from namespace std - - res = MACRO2(); - // Warning, but will not be replaced - - res = std::uncaught_exception(); - // Warning and replaced - - using std::uncaught_exception; - // Warning and replaced - - res = uncaught_exception(); - // Warning and replaced - } + #define MACRO1 std::uncaught_exception + #define MACRO2 std::uncaught_exception + + int uncaught_exception() { + return 0; + } + + int main() { + int res; + + res = uncaught_exception(); + // No warning, since it is not the deprecated function from namespace std + + res = MACRO2(); + // Warning, but will not be replaced + + res = std::uncaught_exception(); + // Warning and replaced + + using std::uncaught_exception; + // Warning and replaced + + res = uncaught_exception(); + // Warning and replaced + } After applying the fixes the code will look like the following: .. code-block:: c++ - #define MACRO1 std::uncaught_exception - #define MACRO2 std::uncaught_exception - - int uncaught_exception() { - return 0; - } - - int main() { - int res; - - res = uncaught_exception(); - - res = MACRO2(); - - res = std::uncaught_exceptions(); - - using std::uncaught_exceptions; - - res = uncaught_exceptions(); - } + #define MACRO1 std::uncaught_exception + #define MACRO2 std::uncaught_exception + + int uncaught_exception() { + return 0; + } + + int main() { + int res; + + res = uncaught_exception(); + + res = MACRO2(); + + res = std::uncaught_exceptions(); + + using std::uncaught_exceptions; + + res = uncaught_exceptions(); + } diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-const-return-type.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-const-return-type.rst index e236d8d00e627..6242e43818d48 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-const-return-type.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-const-return-type.rst @@ -11,7 +11,7 @@ return types. Examples: .. code-block:: c++ - + const int foo(); const Clazz foo(); Clazz *const foo(); diff --git a/clang-tools-extra/docs/clang-tidy/checks/zircon-temporary-objects.rst b/clang-tools-extra/docs/clang-tidy/checks/zircon-temporary-objects.rst index 7491f77e4b9f4..ab1225faa2139 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/zircon-temporary-objects.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/zircon-temporary-objects.rst @@ -3,12 +3,12 @@ zircon-temporary-objects ======================== -Warns on construction of specific temporary objects in the Zircon kernel. -If the object should be flagged, If the object should be flagged, the fully +Warns on construction of specific temporary objects in the Zircon kernel. +If the object should be flagged, If the object should be flagged, the fully qualified type name must be explicitly passed to the check. -For example, given the list of classes "Foo" and "NS::Bar", all of the -following will trigger the warning: +For example, given the list of classes "Foo" and "NS::Bar", all of the +following will trigger the warning: .. code-block:: c++ @@ -26,14 +26,14 @@ With the same list, the following will not trigger the warning: .. code-block:: c++ - Foo F; // Non-temporary construction okay - Foo F(param); // Non-temporary construction okay - Foo *F = new Foo(); // New construction okay + Foo F; // Non-temporary construction okay + Foo F(param); // Non-temporary construction okay + Foo *F = new Foo(); // New construction okay - Bar(); // Not NS::Bar, so okay - NS::Bar B; // Non-temporary construction okay + Bar(); // Not NS::Bar, so okay + NS::Bar B; // Non-temporary construction okay -Note that objects must be explicitly specified in order to be flagged, +Note that objects must be explicitly specified in order to be flagged, and so objects that inherit a specified object will not be flagged. This check matches temporary objects without regard for inheritance and so a @@ -49,5 +49,5 @@ Options .. option:: Names - A semi-colon-separated list of fully-qualified names of C++ classes that + A semi-colon-separated list of fully-qualified names of C++ classes that should not be constructed as temporaries. Default is empty. From 2239882f7d0e4e6d5702bc20ba071a92ec75d37c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 12:58:03 +0100 Subject: [PATCH 0247/1079] [CodeGen][X86] Move x86 builtin intrinsic/codegen tests into X86 subfolder. There are still plenty of tests that specify x86 as a triple but most shouldn't be doing anything very target specific - we can move any ones that I have missed on a case by case basis. --- clang/test/CodeGen/{ => X86}/3dnow-builtins.c | 0 clang/test/CodeGen/{ => X86}/adc-builtins.c | 0 clang/test/CodeGen/{ => X86}/adx-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx-builtins-constrained-cmp.c | 0 clang/test/CodeGen/{ => X86}/avx-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx-cmp-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx-shuffle-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx2-builtins.c | 0 .../test/CodeGen/{ => X86}/avx512-inline-asm-kregisters-basics.c | 0 clang/test/CodeGen/{ => X86}/avx512-kconstraints-att_inline_asm.c | 0 clang/test/CodeGen/{ => X86}/avx512-reduceIntrin.c | 0 clang/test/CodeGen/{ => X86}/avx512-reduceMinMaxIntrin.c | 0 clang/test/CodeGen/{ => X86}/avx512bf16-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512bitalg-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512bw-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512cdintrin.c | 0 clang/test/CodeGen/{ => X86}/avx512dq-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512er-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512f-builtins-constrained-cmp.c | 0 clang/test/CodeGen/{ => X86}/avx512f-builtins-constrained.c | 0 clang/test/CodeGen/{ => X86}/avx512f-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512ifma-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512ifmavl-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512pf-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vbmi-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vbmi2-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vbmivl-builtin.c | 0 clang/test/CodeGen/{ => X86}/avx512vl-builtins-constrained-cmp.c | 0 clang/test/CodeGen/{ => X86}/avx512vl-builtins-constrained.c | 0 clang/test/CodeGen/{ => X86}/avx512vl-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vlbf16-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vlbitalg-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vlbw-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vlcd-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vldq-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vlvbmi2-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vlvnni-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vnni-builtins.c | 0 clang/test/CodeGen/{ => X86}/avx512vpopcntdqintrin.c | 0 clang/test/CodeGen/{ => X86}/avx512vpopcntdqvlintrin.c | 0 clang/test/CodeGen/{ => X86}/bitscan-builtins.c | 0 clang/test/CodeGen/{ => X86}/bmi-builtins.c | 0 clang/test/CodeGen/{ => X86}/bmi2-builtins.c | 0 clang/test/CodeGen/{ => X86}/builtin-clflushopt.c | 0 clang/test/CodeGen/{ => X86}/builtin-clwb.c | 0 clang/test/CodeGen/{ => X86}/builtin-clzero.c | 0 clang/test/CodeGen/{ => X86}/builtin-movdir.c | 0 clang/test/CodeGen/{ => X86}/builtin-wbinvd.c | 0 clang/test/CodeGen/{ => X86}/builtin-wbnoinvd.c | 0 clang/test/CodeGen/{ => X86}/cetintrin.c | 0 clang/test/CodeGen/{ => X86}/cldemote.c | 0 clang/test/CodeGen/{ => X86}/f16c-builtins-constrained.c | 0 clang/test/CodeGen/{ => X86}/f16c-builtins.c | 0 clang/test/CodeGen/{ => X86}/fma-builtins-constrained.c | 0 clang/test/CodeGen/{ => X86}/fma-builtins.c | 0 clang/test/CodeGen/{ => X86}/fma4-builtins.c | 0 clang/test/CodeGen/{ => X86}/fsgsbase-builtins.c | 0 clang/test/CodeGen/{ => X86}/gfni-builtins.c | 0 clang/test/CodeGen/{ => X86}/intel-avx512vlvp2intersect.c | 0 clang/test/CodeGen/{ => X86}/intel-avx512vp2intersect.c | 0 clang/test/CodeGen/{ => X86}/invpcid.c | 0 clang/test/CodeGen/{ => X86}/lwp-builtins.c | 0 clang/test/CodeGen/{ => X86}/lzcnt-builtins.c | 0 clang/test/CodeGen/{ => X86}/mmx-builtins.c | 0 clang/test/CodeGen/{ => X86}/mmx-inline-asm-error.c | 0 clang/test/CodeGen/{ => X86}/mmx-inline-asm.c | 0 clang/test/CodeGen/{ => X86}/mmx-shift-with-immediate.c | 0 clang/test/CodeGen/{ => X86}/movbe-builtins.c | 0 clang/test/CodeGen/{ => X86}/pause.c | 0 clang/test/CodeGen/{ => X86}/pclmul-builtins.c | 0 clang/test/CodeGen/{ => X86}/pku.c | 0 clang/test/CodeGen/{ => X86}/popcnt-builtins.c | 0 clang/test/CodeGen/{ => X86}/prefetchw-builtins.c | 0 clang/test/CodeGen/{ => X86}/ptwrite.c | 0 clang/test/CodeGen/{ => X86}/rd-builtins.c | 0 clang/test/CodeGen/{ => X86}/rdpid-builtins.c | 0 clang/test/CodeGen/{ => X86}/rdrand-builtins.c | 0 clang/test/CodeGen/{ => X86}/rot-intrinsics.c | 0 clang/test/CodeGen/{ => X86}/rtm-builtins.c | 0 clang/test/CodeGen/{ => X86}/sha-builtins.c | 0 clang/test/CodeGen/{ => X86}/sse-builtins-constrained-cmp.c | 0 clang/test/CodeGen/{ => X86}/sse-builtins-constrained.c | 0 clang/test/CodeGen/{ => X86}/sse-builtins-dbg.c | 0 clang/test/CodeGen/{ => X86}/sse-builtins.c | 0 clang/test/CodeGen/{ => X86}/sse.c | 0 clang/test/CodeGen/{ => X86}/sse2-builtins-constrained-cmp.c | 0 clang/test/CodeGen/{ => X86}/sse2-builtins.c | 0 clang/test/CodeGen/{ => X86}/sse3-builtins.c | 0 clang/test/CodeGen/{ => X86}/sse41-builtins.c | 0 clang/test/CodeGen/{ => X86}/sse42-builtins.c | 0 clang/test/CodeGen/{ => X86}/sse4a-builtins.c | 0 clang/test/CodeGen/{ => X86}/ssse3-builtins.c | 0 clang/test/CodeGen/{ => X86}/tbm-builtins.c | 0 clang/test/CodeGen/{ => X86}/vaes-builtins.c | 0 clang/test/CodeGen/{ => X86}/vpclmulqdq-builtins.c | 0 clang/test/CodeGen/{ => X86}/waitpkg.c | 0 clang/test/CodeGen/{ => X86}/xop-builtins-cmp.c | 0 clang/test/CodeGen/{ => X86}/xop-builtins.c | 0 98 files changed, 0 insertions(+), 0 deletions(-) rename clang/test/CodeGen/{ => X86}/3dnow-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/adc-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/adx-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx-builtins-constrained-cmp.c (100%) rename clang/test/CodeGen/{ => X86}/avx-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx-cmp-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx-shuffle-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx2-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512-inline-asm-kregisters-basics.c (100%) rename clang/test/CodeGen/{ => X86}/avx512-kconstraints-att_inline_asm.c (100%) rename clang/test/CodeGen/{ => X86}/avx512-reduceIntrin.c (100%) rename clang/test/CodeGen/{ => X86}/avx512-reduceMinMaxIntrin.c (100%) rename clang/test/CodeGen/{ => X86}/avx512bf16-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512bitalg-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512bw-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512cdintrin.c (100%) rename clang/test/CodeGen/{ => X86}/avx512dq-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512er-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512f-builtins-constrained-cmp.c (100%) rename clang/test/CodeGen/{ => X86}/avx512f-builtins-constrained.c (100%) rename clang/test/CodeGen/{ => X86}/avx512f-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512ifma-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512ifmavl-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512pf-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vbmi-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vbmi2-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vbmivl-builtin.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vl-builtins-constrained-cmp.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vl-builtins-constrained.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vl-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vlbf16-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vlbitalg-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vlbw-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vlcd-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vldq-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vlvbmi2-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vlvnni-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vnni-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vpopcntdqintrin.c (100%) rename clang/test/CodeGen/{ => X86}/avx512vpopcntdqvlintrin.c (100%) rename clang/test/CodeGen/{ => X86}/bitscan-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/bmi-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/bmi2-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/builtin-clflushopt.c (100%) rename clang/test/CodeGen/{ => X86}/builtin-clwb.c (100%) rename clang/test/CodeGen/{ => X86}/builtin-clzero.c (100%) rename clang/test/CodeGen/{ => X86}/builtin-movdir.c (100%) rename clang/test/CodeGen/{ => X86}/builtin-wbinvd.c (100%) rename clang/test/CodeGen/{ => X86}/builtin-wbnoinvd.c (100%) rename clang/test/CodeGen/{ => X86}/cetintrin.c (100%) rename clang/test/CodeGen/{ => X86}/cldemote.c (100%) rename clang/test/CodeGen/{ => X86}/f16c-builtins-constrained.c (100%) rename clang/test/CodeGen/{ => X86}/f16c-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/fma-builtins-constrained.c (100%) rename clang/test/CodeGen/{ => X86}/fma-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/fma4-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/fsgsbase-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/gfni-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/intel-avx512vlvp2intersect.c (100%) rename clang/test/CodeGen/{ => X86}/intel-avx512vp2intersect.c (100%) rename clang/test/CodeGen/{ => X86}/invpcid.c (100%) rename clang/test/CodeGen/{ => X86}/lwp-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/lzcnt-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/mmx-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/mmx-inline-asm-error.c (100%) rename clang/test/CodeGen/{ => X86}/mmx-inline-asm.c (100%) rename clang/test/CodeGen/{ => X86}/mmx-shift-with-immediate.c (100%) rename clang/test/CodeGen/{ => X86}/movbe-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/pause.c (100%) rename clang/test/CodeGen/{ => X86}/pclmul-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/pku.c (100%) rename clang/test/CodeGen/{ => X86}/popcnt-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/prefetchw-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/ptwrite.c (100%) rename clang/test/CodeGen/{ => X86}/rd-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/rdpid-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/rdrand-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/rot-intrinsics.c (100%) rename clang/test/CodeGen/{ => X86}/rtm-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sha-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sse-builtins-constrained-cmp.c (100%) rename clang/test/CodeGen/{ => X86}/sse-builtins-constrained.c (100%) rename clang/test/CodeGen/{ => X86}/sse-builtins-dbg.c (100%) rename clang/test/CodeGen/{ => X86}/sse-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sse.c (100%) rename clang/test/CodeGen/{ => X86}/sse2-builtins-constrained-cmp.c (100%) rename clang/test/CodeGen/{ => X86}/sse2-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sse3-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sse41-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sse42-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/sse4a-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/ssse3-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/tbm-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/vaes-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/vpclmulqdq-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/waitpkg.c (100%) rename clang/test/CodeGen/{ => X86}/xop-builtins-cmp.c (100%) rename clang/test/CodeGen/{ => X86}/xop-builtins.c (100%) diff --git a/clang/test/CodeGen/3dnow-builtins.c b/clang/test/CodeGen/X86/3dnow-builtins.c similarity index 100% rename from clang/test/CodeGen/3dnow-builtins.c rename to clang/test/CodeGen/X86/3dnow-builtins.c diff --git a/clang/test/CodeGen/adc-builtins.c b/clang/test/CodeGen/X86/adc-builtins.c similarity index 100% rename from clang/test/CodeGen/adc-builtins.c rename to clang/test/CodeGen/X86/adc-builtins.c diff --git a/clang/test/CodeGen/adx-builtins.c b/clang/test/CodeGen/X86/adx-builtins.c similarity index 100% rename from clang/test/CodeGen/adx-builtins.c rename to clang/test/CodeGen/X86/adx-builtins.c diff --git a/clang/test/CodeGen/avx-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c similarity index 100% rename from clang/test/CodeGen/avx-builtins-constrained-cmp.c rename to clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c similarity index 100% rename from clang/test/CodeGen/avx-builtins.c rename to clang/test/CodeGen/X86/avx-builtins.c diff --git a/clang/test/CodeGen/avx-cmp-builtins.c b/clang/test/CodeGen/X86/avx-cmp-builtins.c similarity index 100% rename from clang/test/CodeGen/avx-cmp-builtins.c rename to clang/test/CodeGen/X86/avx-cmp-builtins.c diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c similarity index 100% rename from clang/test/CodeGen/avx-shuffle-builtins.c rename to clang/test/CodeGen/X86/avx-shuffle-builtins.c diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c similarity index 100% rename from clang/test/CodeGen/avx2-builtins.c rename to clang/test/CodeGen/X86/avx2-builtins.c diff --git a/clang/test/CodeGen/avx512-inline-asm-kregisters-basics.c b/clang/test/CodeGen/X86/avx512-inline-asm-kregisters-basics.c similarity index 100% rename from clang/test/CodeGen/avx512-inline-asm-kregisters-basics.c rename to clang/test/CodeGen/X86/avx512-inline-asm-kregisters-basics.c diff --git a/clang/test/CodeGen/avx512-kconstraints-att_inline_asm.c b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c similarity index 100% rename from clang/test/CodeGen/avx512-kconstraints-att_inline_asm.c rename to clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c diff --git a/clang/test/CodeGen/avx512-reduceIntrin.c b/clang/test/CodeGen/X86/avx512-reduceIntrin.c similarity index 100% rename from clang/test/CodeGen/avx512-reduceIntrin.c rename to clang/test/CodeGen/X86/avx512-reduceIntrin.c diff --git a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c similarity index 100% rename from clang/test/CodeGen/avx512-reduceMinMaxIntrin.c rename to clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c diff --git a/clang/test/CodeGen/avx512bf16-builtins.c b/clang/test/CodeGen/X86/avx512bf16-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512bf16-builtins.c rename to clang/test/CodeGen/X86/avx512bf16-builtins.c diff --git a/clang/test/CodeGen/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512bitalg-builtins.c rename to clang/test/CodeGen/X86/avx512bitalg-builtins.c diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512bw-builtins.c rename to clang/test/CodeGen/X86/avx512bw-builtins.c diff --git a/clang/test/CodeGen/avx512cdintrin.c b/clang/test/CodeGen/X86/avx512cdintrin.c similarity index 100% rename from clang/test/CodeGen/avx512cdintrin.c rename to clang/test/CodeGen/X86/avx512cdintrin.c diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512dq-builtins.c rename to clang/test/CodeGen/X86/avx512dq-builtins.c diff --git a/clang/test/CodeGen/avx512er-builtins.c b/clang/test/CodeGen/X86/avx512er-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512er-builtins.c rename to clang/test/CodeGen/X86/avx512er-builtins.c diff --git a/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx512f-builtins-constrained-cmp.c similarity index 100% rename from clang/test/CodeGen/avx512f-builtins-constrained-cmp.c rename to clang/test/CodeGen/X86/avx512f-builtins-constrained-cmp.c diff --git a/clang/test/CodeGen/avx512f-builtins-constrained.c b/clang/test/CodeGen/X86/avx512f-builtins-constrained.c similarity index 100% rename from clang/test/CodeGen/avx512f-builtins-constrained.c rename to clang/test/CodeGen/X86/avx512f-builtins-constrained.c diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512f-builtins.c rename to clang/test/CodeGen/X86/avx512f-builtins.c diff --git a/clang/test/CodeGen/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512ifma-builtins.c rename to clang/test/CodeGen/X86/avx512ifma-builtins.c diff --git a/clang/test/CodeGen/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512ifmavl-builtins.c rename to clang/test/CodeGen/X86/avx512ifmavl-builtins.c diff --git a/clang/test/CodeGen/avx512pf-builtins.c b/clang/test/CodeGen/X86/avx512pf-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512pf-builtins.c rename to clang/test/CodeGen/X86/avx512pf-builtins.c diff --git a/clang/test/CodeGen/avx512vbmi-builtins.c b/clang/test/CodeGen/X86/avx512vbmi-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vbmi-builtins.c rename to clang/test/CodeGen/X86/avx512vbmi-builtins.c diff --git a/clang/test/CodeGen/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vbmi2-builtins.c rename to clang/test/CodeGen/X86/avx512vbmi2-builtins.c diff --git a/clang/test/CodeGen/avx512vbmivl-builtin.c b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c similarity index 100% rename from clang/test/CodeGen/avx512vbmivl-builtin.c rename to clang/test/CodeGen/X86/avx512vbmivl-builtin.c diff --git a/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx512vl-builtins-constrained-cmp.c similarity index 100% rename from clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c rename to clang/test/CodeGen/X86/avx512vl-builtins-constrained-cmp.c diff --git a/clang/test/CodeGen/avx512vl-builtins-constrained.c b/clang/test/CodeGen/X86/avx512vl-builtins-constrained.c similarity index 100% rename from clang/test/CodeGen/avx512vl-builtins-constrained.c rename to clang/test/CodeGen/X86/avx512vl-builtins-constrained.c diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vl-builtins.c rename to clang/test/CodeGen/X86/avx512vl-builtins.c diff --git a/clang/test/CodeGen/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vlbf16-builtins.c rename to clang/test/CodeGen/X86/avx512vlbf16-builtins.c diff --git a/clang/test/CodeGen/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vlbitalg-builtins.c rename to clang/test/CodeGen/X86/avx512vlbitalg-builtins.c diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vlbw-builtins.c rename to clang/test/CodeGen/X86/avx512vlbw-builtins.c diff --git a/clang/test/CodeGen/avx512vlcd-builtins.c b/clang/test/CodeGen/X86/avx512vlcd-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vlcd-builtins.c rename to clang/test/CodeGen/X86/avx512vlcd-builtins.c diff --git a/clang/test/CodeGen/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vldq-builtins.c rename to clang/test/CodeGen/X86/avx512vldq-builtins.c diff --git a/clang/test/CodeGen/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vlvbmi2-builtins.c rename to clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c diff --git a/clang/test/CodeGen/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vlvnni-builtins.c rename to clang/test/CodeGen/X86/avx512vlvnni-builtins.c diff --git a/clang/test/CodeGen/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c similarity index 100% rename from clang/test/CodeGen/avx512vnni-builtins.c rename to clang/test/CodeGen/X86/avx512vnni-builtins.c diff --git a/clang/test/CodeGen/avx512vpopcntdqintrin.c b/clang/test/CodeGen/X86/avx512vpopcntdqintrin.c similarity index 100% rename from clang/test/CodeGen/avx512vpopcntdqintrin.c rename to clang/test/CodeGen/X86/avx512vpopcntdqintrin.c diff --git a/clang/test/CodeGen/avx512vpopcntdqvlintrin.c b/clang/test/CodeGen/X86/avx512vpopcntdqvlintrin.c similarity index 100% rename from clang/test/CodeGen/avx512vpopcntdqvlintrin.c rename to clang/test/CodeGen/X86/avx512vpopcntdqvlintrin.c diff --git a/clang/test/CodeGen/bitscan-builtins.c b/clang/test/CodeGen/X86/bitscan-builtins.c similarity index 100% rename from clang/test/CodeGen/bitscan-builtins.c rename to clang/test/CodeGen/X86/bitscan-builtins.c diff --git a/clang/test/CodeGen/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c similarity index 100% rename from clang/test/CodeGen/bmi-builtins.c rename to clang/test/CodeGen/X86/bmi-builtins.c diff --git a/clang/test/CodeGen/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c similarity index 100% rename from clang/test/CodeGen/bmi2-builtins.c rename to clang/test/CodeGen/X86/bmi2-builtins.c diff --git a/clang/test/CodeGen/builtin-clflushopt.c b/clang/test/CodeGen/X86/builtin-clflushopt.c similarity index 100% rename from clang/test/CodeGen/builtin-clflushopt.c rename to clang/test/CodeGen/X86/builtin-clflushopt.c diff --git a/clang/test/CodeGen/builtin-clwb.c b/clang/test/CodeGen/X86/builtin-clwb.c similarity index 100% rename from clang/test/CodeGen/builtin-clwb.c rename to clang/test/CodeGen/X86/builtin-clwb.c diff --git a/clang/test/CodeGen/builtin-clzero.c b/clang/test/CodeGen/X86/builtin-clzero.c similarity index 100% rename from clang/test/CodeGen/builtin-clzero.c rename to clang/test/CodeGen/X86/builtin-clzero.c diff --git a/clang/test/CodeGen/builtin-movdir.c b/clang/test/CodeGen/X86/builtin-movdir.c similarity index 100% rename from clang/test/CodeGen/builtin-movdir.c rename to clang/test/CodeGen/X86/builtin-movdir.c diff --git a/clang/test/CodeGen/builtin-wbinvd.c b/clang/test/CodeGen/X86/builtin-wbinvd.c similarity index 100% rename from clang/test/CodeGen/builtin-wbinvd.c rename to clang/test/CodeGen/X86/builtin-wbinvd.c diff --git a/clang/test/CodeGen/builtin-wbnoinvd.c b/clang/test/CodeGen/X86/builtin-wbnoinvd.c similarity index 100% rename from clang/test/CodeGen/builtin-wbnoinvd.c rename to clang/test/CodeGen/X86/builtin-wbnoinvd.c diff --git a/clang/test/CodeGen/cetintrin.c b/clang/test/CodeGen/X86/cetintrin.c similarity index 100% rename from clang/test/CodeGen/cetintrin.c rename to clang/test/CodeGen/X86/cetintrin.c diff --git a/clang/test/CodeGen/cldemote.c b/clang/test/CodeGen/X86/cldemote.c similarity index 100% rename from clang/test/CodeGen/cldemote.c rename to clang/test/CodeGen/X86/cldemote.c diff --git a/clang/test/CodeGen/f16c-builtins-constrained.c b/clang/test/CodeGen/X86/f16c-builtins-constrained.c similarity index 100% rename from clang/test/CodeGen/f16c-builtins-constrained.c rename to clang/test/CodeGen/X86/f16c-builtins-constrained.c diff --git a/clang/test/CodeGen/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c similarity index 100% rename from clang/test/CodeGen/f16c-builtins.c rename to clang/test/CodeGen/X86/f16c-builtins.c diff --git a/clang/test/CodeGen/fma-builtins-constrained.c b/clang/test/CodeGen/X86/fma-builtins-constrained.c similarity index 100% rename from clang/test/CodeGen/fma-builtins-constrained.c rename to clang/test/CodeGen/X86/fma-builtins-constrained.c diff --git a/clang/test/CodeGen/fma-builtins.c b/clang/test/CodeGen/X86/fma-builtins.c similarity index 100% rename from clang/test/CodeGen/fma-builtins.c rename to clang/test/CodeGen/X86/fma-builtins.c diff --git a/clang/test/CodeGen/fma4-builtins.c b/clang/test/CodeGen/X86/fma4-builtins.c similarity index 100% rename from clang/test/CodeGen/fma4-builtins.c rename to clang/test/CodeGen/X86/fma4-builtins.c diff --git a/clang/test/CodeGen/fsgsbase-builtins.c b/clang/test/CodeGen/X86/fsgsbase-builtins.c similarity index 100% rename from clang/test/CodeGen/fsgsbase-builtins.c rename to clang/test/CodeGen/X86/fsgsbase-builtins.c diff --git a/clang/test/CodeGen/gfni-builtins.c b/clang/test/CodeGen/X86/gfni-builtins.c similarity index 100% rename from clang/test/CodeGen/gfni-builtins.c rename to clang/test/CodeGen/X86/gfni-builtins.c diff --git a/clang/test/CodeGen/intel-avx512vlvp2intersect.c b/clang/test/CodeGen/X86/intel-avx512vlvp2intersect.c similarity index 100% rename from clang/test/CodeGen/intel-avx512vlvp2intersect.c rename to clang/test/CodeGen/X86/intel-avx512vlvp2intersect.c diff --git a/clang/test/CodeGen/intel-avx512vp2intersect.c b/clang/test/CodeGen/X86/intel-avx512vp2intersect.c similarity index 100% rename from clang/test/CodeGen/intel-avx512vp2intersect.c rename to clang/test/CodeGen/X86/intel-avx512vp2intersect.c diff --git a/clang/test/CodeGen/invpcid.c b/clang/test/CodeGen/X86/invpcid.c similarity index 100% rename from clang/test/CodeGen/invpcid.c rename to clang/test/CodeGen/X86/invpcid.c diff --git a/clang/test/CodeGen/lwp-builtins.c b/clang/test/CodeGen/X86/lwp-builtins.c similarity index 100% rename from clang/test/CodeGen/lwp-builtins.c rename to clang/test/CodeGen/X86/lwp-builtins.c diff --git a/clang/test/CodeGen/lzcnt-builtins.c b/clang/test/CodeGen/X86/lzcnt-builtins.c similarity index 100% rename from clang/test/CodeGen/lzcnt-builtins.c rename to clang/test/CodeGen/X86/lzcnt-builtins.c diff --git a/clang/test/CodeGen/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c similarity index 100% rename from clang/test/CodeGen/mmx-builtins.c rename to clang/test/CodeGen/X86/mmx-builtins.c diff --git a/clang/test/CodeGen/mmx-inline-asm-error.c b/clang/test/CodeGen/X86/mmx-inline-asm-error.c similarity index 100% rename from clang/test/CodeGen/mmx-inline-asm-error.c rename to clang/test/CodeGen/X86/mmx-inline-asm-error.c diff --git a/clang/test/CodeGen/mmx-inline-asm.c b/clang/test/CodeGen/X86/mmx-inline-asm.c similarity index 100% rename from clang/test/CodeGen/mmx-inline-asm.c rename to clang/test/CodeGen/X86/mmx-inline-asm.c diff --git a/clang/test/CodeGen/mmx-shift-with-immediate.c b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c similarity index 100% rename from clang/test/CodeGen/mmx-shift-with-immediate.c rename to clang/test/CodeGen/X86/mmx-shift-with-immediate.c diff --git a/clang/test/CodeGen/movbe-builtins.c b/clang/test/CodeGen/X86/movbe-builtins.c similarity index 100% rename from clang/test/CodeGen/movbe-builtins.c rename to clang/test/CodeGen/X86/movbe-builtins.c diff --git a/clang/test/CodeGen/pause.c b/clang/test/CodeGen/X86/pause.c similarity index 100% rename from clang/test/CodeGen/pause.c rename to clang/test/CodeGen/X86/pause.c diff --git a/clang/test/CodeGen/pclmul-builtins.c b/clang/test/CodeGen/X86/pclmul-builtins.c similarity index 100% rename from clang/test/CodeGen/pclmul-builtins.c rename to clang/test/CodeGen/X86/pclmul-builtins.c diff --git a/clang/test/CodeGen/pku.c b/clang/test/CodeGen/X86/pku.c similarity index 100% rename from clang/test/CodeGen/pku.c rename to clang/test/CodeGen/X86/pku.c diff --git a/clang/test/CodeGen/popcnt-builtins.c b/clang/test/CodeGen/X86/popcnt-builtins.c similarity index 100% rename from clang/test/CodeGen/popcnt-builtins.c rename to clang/test/CodeGen/X86/popcnt-builtins.c diff --git a/clang/test/CodeGen/prefetchw-builtins.c b/clang/test/CodeGen/X86/prefetchw-builtins.c similarity index 100% rename from clang/test/CodeGen/prefetchw-builtins.c rename to clang/test/CodeGen/X86/prefetchw-builtins.c diff --git a/clang/test/CodeGen/ptwrite.c b/clang/test/CodeGen/X86/ptwrite.c similarity index 100% rename from clang/test/CodeGen/ptwrite.c rename to clang/test/CodeGen/X86/ptwrite.c diff --git a/clang/test/CodeGen/rd-builtins.c b/clang/test/CodeGen/X86/rd-builtins.c similarity index 100% rename from clang/test/CodeGen/rd-builtins.c rename to clang/test/CodeGen/X86/rd-builtins.c diff --git a/clang/test/CodeGen/rdpid-builtins.c b/clang/test/CodeGen/X86/rdpid-builtins.c similarity index 100% rename from clang/test/CodeGen/rdpid-builtins.c rename to clang/test/CodeGen/X86/rdpid-builtins.c diff --git a/clang/test/CodeGen/rdrand-builtins.c b/clang/test/CodeGen/X86/rdrand-builtins.c similarity index 100% rename from clang/test/CodeGen/rdrand-builtins.c rename to clang/test/CodeGen/X86/rdrand-builtins.c diff --git a/clang/test/CodeGen/rot-intrinsics.c b/clang/test/CodeGen/X86/rot-intrinsics.c similarity index 100% rename from clang/test/CodeGen/rot-intrinsics.c rename to clang/test/CodeGen/X86/rot-intrinsics.c diff --git a/clang/test/CodeGen/rtm-builtins.c b/clang/test/CodeGen/X86/rtm-builtins.c similarity index 100% rename from clang/test/CodeGen/rtm-builtins.c rename to clang/test/CodeGen/X86/rtm-builtins.c diff --git a/clang/test/CodeGen/sha-builtins.c b/clang/test/CodeGen/X86/sha-builtins.c similarity index 100% rename from clang/test/CodeGen/sha-builtins.c rename to clang/test/CodeGen/X86/sha-builtins.c diff --git a/clang/test/CodeGen/sse-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/sse-builtins-constrained-cmp.c similarity index 100% rename from clang/test/CodeGen/sse-builtins-constrained-cmp.c rename to clang/test/CodeGen/X86/sse-builtins-constrained-cmp.c diff --git a/clang/test/CodeGen/sse-builtins-constrained.c b/clang/test/CodeGen/X86/sse-builtins-constrained.c similarity index 100% rename from clang/test/CodeGen/sse-builtins-constrained.c rename to clang/test/CodeGen/X86/sse-builtins-constrained.c diff --git a/clang/test/CodeGen/sse-builtins-dbg.c b/clang/test/CodeGen/X86/sse-builtins-dbg.c similarity index 100% rename from clang/test/CodeGen/sse-builtins-dbg.c rename to clang/test/CodeGen/X86/sse-builtins-dbg.c diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c similarity index 100% rename from clang/test/CodeGen/sse-builtins.c rename to clang/test/CodeGen/X86/sse-builtins.c diff --git a/clang/test/CodeGen/sse.c b/clang/test/CodeGen/X86/sse.c similarity index 100% rename from clang/test/CodeGen/sse.c rename to clang/test/CodeGen/X86/sse.c diff --git a/clang/test/CodeGen/sse2-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/sse2-builtins-constrained-cmp.c similarity index 100% rename from clang/test/CodeGen/sse2-builtins-constrained-cmp.c rename to clang/test/CodeGen/X86/sse2-builtins-constrained-cmp.c diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c similarity index 100% rename from clang/test/CodeGen/sse2-builtins.c rename to clang/test/CodeGen/X86/sse2-builtins.c diff --git a/clang/test/CodeGen/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c similarity index 100% rename from clang/test/CodeGen/sse3-builtins.c rename to clang/test/CodeGen/X86/sse3-builtins.c diff --git a/clang/test/CodeGen/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c similarity index 100% rename from clang/test/CodeGen/sse41-builtins.c rename to clang/test/CodeGen/X86/sse41-builtins.c diff --git a/clang/test/CodeGen/sse42-builtins.c b/clang/test/CodeGen/X86/sse42-builtins.c similarity index 100% rename from clang/test/CodeGen/sse42-builtins.c rename to clang/test/CodeGen/X86/sse42-builtins.c diff --git a/clang/test/CodeGen/sse4a-builtins.c b/clang/test/CodeGen/X86/sse4a-builtins.c similarity index 100% rename from clang/test/CodeGen/sse4a-builtins.c rename to clang/test/CodeGen/X86/sse4a-builtins.c diff --git a/clang/test/CodeGen/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c similarity index 100% rename from clang/test/CodeGen/ssse3-builtins.c rename to clang/test/CodeGen/X86/ssse3-builtins.c diff --git a/clang/test/CodeGen/tbm-builtins.c b/clang/test/CodeGen/X86/tbm-builtins.c similarity index 100% rename from clang/test/CodeGen/tbm-builtins.c rename to clang/test/CodeGen/X86/tbm-builtins.c diff --git a/clang/test/CodeGen/vaes-builtins.c b/clang/test/CodeGen/X86/vaes-builtins.c similarity index 100% rename from clang/test/CodeGen/vaes-builtins.c rename to clang/test/CodeGen/X86/vaes-builtins.c diff --git a/clang/test/CodeGen/vpclmulqdq-builtins.c b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c similarity index 100% rename from clang/test/CodeGen/vpclmulqdq-builtins.c rename to clang/test/CodeGen/X86/vpclmulqdq-builtins.c diff --git a/clang/test/CodeGen/waitpkg.c b/clang/test/CodeGen/X86/waitpkg.c similarity index 100% rename from clang/test/CodeGen/waitpkg.c rename to clang/test/CodeGen/X86/waitpkg.c diff --git a/clang/test/CodeGen/xop-builtins-cmp.c b/clang/test/CodeGen/X86/xop-builtins-cmp.c similarity index 100% rename from clang/test/CodeGen/xop-builtins-cmp.c rename to clang/test/CodeGen/X86/xop-builtins-cmp.c diff --git a/clang/test/CodeGen/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c similarity index 100% rename from clang/test/CodeGen/xop-builtins.c rename to clang/test/CodeGen/X86/xop-builtins.c From 8c0bbbade169d9fda6cac8f181660009599a7656 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 10 Sep 2020 18:45:12 +0700 Subject: [PATCH 0248/1079] [NFC] Refactoring in SCEV: add missing `const` qualifiers --- llvm/include/llvm/Analysis/ScalarEvolution.h | 10 ++--- llvm/lib/Analysis/ScalarEvolution.cpp | 39 ++++++++++---------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index ea841440e1803..8a88645f7cfc5 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1186,7 +1186,7 @@ class ScalarEvolution { ValueExprMapType ValueExprMap; /// Mark predicate values currently being processed by isImpliedCond. - SmallPtrSet PendingLoopPredicates; + SmallPtrSet PendingLoopPredicates; /// Mark SCEVUnknown Phis currently being processed by getRangeRef. SmallPtrSet PendingPhiRanges; @@ -1660,13 +1660,13 @@ class ScalarEvolution { /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if /// no such block is found. - std::pair - getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); + std::pair + getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the given FoundCondValue value evaluates to true. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - Value *FoundCondValue, bool Inverse); + const Value *FoundCondValue, bool Inverse); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is @@ -1713,7 +1713,7 @@ class ScalarEvolution { /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied /// by a call to @llvm.experimental.guard in \p BB. - bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred, + bool isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test whether the condition described by Pred, LHS, and RHS is true diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 795919458aaa3..c5745c0eebadd 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8735,18 +8735,19 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { return getCouldNotCompute(); } -std::pair -ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { +std::pair +ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) + const { // If the block has a unique predecessor, then there is no path from the // predecessor to the block that does not go through the direct edge // from the predecessor to the block. - if (BasicBlock *Pred = BB->getSinglePredecessor()) + if (const BasicBlock *Pred = BB->getSinglePredecessor()) return {Pred, BB}; // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. - if (Loop *L = LI.getLoopFor(BB)) + if (const Loop *L = LI.getLoopFor(BB)) return {L->getLoopPredecessor(), L->getHeader()}; return {nullptr, nullptr}; @@ -9319,14 +9320,14 @@ bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); } -bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB, +bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // No need to even try if we know the module has no guards. if (!HasGuards) return false; - return any_of(*BB, [&](Instruction &I) { + return any_of(*BB, [&](const Instruction &I) { using namespace llvm::PatternMatch; Value *Condition; @@ -9490,7 +9491,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, } // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard. - auto ProveViaGuard = [&](BasicBlock *Block) { + auto ProveViaGuard = [&](const BasicBlock *Block) { if (isImpliedViaGuard(Block, Pred, LHS, RHS)) return true; if (ProvingStrictComparison) { @@ -9507,7 +9508,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, }; // Try to prove (Pred, LHS, RHS) using isImpliedCond. - auto ProveViaCond = [&](Value *Condition, bool Inverse) { + auto ProveViaCond = [&](const Value *Condition, bool Inverse) { if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) return true; if (ProvingStrictComparison) { @@ -9526,16 +9527,15 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. - for (std::pair - Pair(L->getLoopPredecessor(), L->getHeader()); - Pair.first; - Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + for (std::pair Pair( + L->getLoopPredecessor(), L->getHeader()); + Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { if (ProveViaGuard(Pair.first)) return true; - BranchInst *LoopEntryPredicate = - dyn_cast(Pair.first->getTerminator()); + const BranchInst *LoopEntryPredicate = + dyn_cast(Pair.first->getTerminator()); if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) continue; @@ -9560,10 +9560,9 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } -bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - Value *FoundCondValue, - bool Inverse) { +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, + const Value *FoundCondValue, bool Inverse) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; @@ -9571,7 +9570,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); // Recursively handle And and Or conditions. - if (BinaryOperator *BO = dyn_cast(FoundCondValue)) { + if (const BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || @@ -9583,7 +9582,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, } } - ICmpInst *ICI = dyn_cast(FoundCondValue); + const ICmpInst *ICI = dyn_cast(FoundCondValue); if (!ICI) return false; // Now that we found a conditional branch that dominates the loop or controls From ec46cfefe80d58cdc7068ad4e4f8efde6d94d835 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 9 Sep 2020 16:14:56 -0400 Subject: [PATCH 0249/1079] [libcxx] Simplify back-deployment testing The needs of back-deployment testing currently require two different ways of running the test suite: one based on the deployment target, and one based on the target triple. Since the triple includes all the information we need, it's better to have just one way of doing things. Furthermore, `--param platform=XXX` is also supersedded by using the target triple. Previously, this parameter would serve the purpose of controling XFAILs for availability markup errors, however it is possible to achieve the same thing by using with_system_cxx_lib only and using .verify.cpp tests instead, as explained in the documentation changes. The motivation for this change is twofold: 1. This part of the Lit config has always been really confusing and complicated, and it has been a source of bugs in the past. I have simplified it iteratively in the past, but the complexity is still there. 2. The deployment-target detection started failing in weird ways in recent Clangs, breaking our CI. Instead of band-aid patching the issue, I decided to remove the complexity altogether by using target triples even on Apple platforms. A follow-up to this commit will bring the test suite in line with the recommended way of handling availability markup tests. --- libcxx/docs/DesignDocs/AvailabilityMarkup.rst | 48 ++++---- libcxx/test/configs/legacy.cfg.in | 1 - libcxx/utils/ci/macos-backdeployment.sh | 2 +- libcxx/utils/libcxx/test/config.py | 113 ++++-------------- libcxx/utils/libcxx/test/target_info.py | 28 +---- libcxxabi/test/lit.site.cfg.in | 1 - libunwind/test/lit.site.cfg.in | 1 - 7 files changed, 51 insertions(+), 143 deletions(-) diff --git a/libcxx/docs/DesignDocs/AvailabilityMarkup.rst b/libcxx/docs/DesignDocs/AvailabilityMarkup.rst index 87ad0abb62d79..2380385392876 100644 --- a/libcxx/docs/DesignDocs/AvailabilityMarkup.rst +++ b/libcxx/docs/DesignDocs/AvailabilityMarkup.rst @@ -64,31 +64,35 @@ Testing Some parameters can be passed to lit to run the test-suite and exercise the availability. -* The `platform` parameter controls the deployment target. For example lit can - be invoked with `--param=platform=macosx10.12`. Default is the current host. -* The `use_system_cxx_lib` parameter indicates that the test suite is being run - against a system library. +* The `target_triple` parameter controls the deployment target. For example lit + can be invoked with `--param=target_triple=x86_64-apple-macosx10.12`. + Default is the current host. +* The `use_system_cxx_lib` parameter indicates that the test suite is being + compiled with the intent of being run against the system library for the + given triple, AND that it is being run against it. -Tests can be marked as XFAIL based on multiple features made available by lit: - -* if `--param=platform=macosx10.12` is passed, the following features will be available: - - - availability=macosx - - availability=macosx10.12 - - This feature is used to XFAIL a test that *is* using a class or a method marked - as unavailable *and* that is expected to *fail* if deployed on an older system. - -* if `use_system_cxx_lib` and `--param=platform=macosx10.12` are passed to lit, - the following features will also be available: +Tests can be marked as XFAIL based on multiple features made available by lit. +If `use_system_cxx_lib` is true, then assuming `target_triple=x86_64-apple-macosx10.12`, +the following features will be made available: - with_system_cxx_lib=macosx - with_system_cxx_lib=macosx10.12 - with_system_cxx_lib=x86_64-apple-macosx10.12 + - availability=macosx + - availability=macosx10.12 - This feature is used to XFAIL a test that is *not* using a class or a method - marked as unavailable *but* that is expected to fail if deployed on an older - system. For example, if the test exhibits a bug in the libc on a particular - system version, or if the test uses a symbol that is not available on an - older version of the dylib (but for which there is no availability markup, - otherwise the XFAIL should use `availability` above). +These features are used to XFAIL a test that fails when deployed on (or is +compiled for) an older system. For example, if the test exhibits a bug in the +libc on a particular system version, or if the test uses a symbol that is not +available on an older version of the dylib, it can be marked as XFAIL with +one of the above features. + +It is sometimes useful to check that a test fails specifically when compiled +for a given deployment target. For example, this is the case when testing +availability markup, where we want to make sure that using the annotated +facility on a deployment target that doesn't support it will fail at compile +time, not at runtime. This can be achieved by creating a `.compile.pass.cpp` +and XFAILing it for the right deployment target. If the test doesn't fail at +compile-time like it's supposed to, the test will XPASS. Another option is to +create a `.verify.cpp` test that checks for the right errors, and mark that +test as requiring `with_system_cxx_lib=`. diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index 1f3370ccc9bc2..efb41a93e41b9 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -21,7 +21,6 @@ config.abi_library_path = "@LIBCXX_CXX_ABI_LIBRARY_PATH@" config.configuration_variant = "@LIBCXX_LIT_VARIANT@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" -config.use_target = bool("@LIBCXX_TARGET_TRIPLE@") config.sysroot = "@LIBCXX_SYSROOT@" config.gcc_toolchain = "@LIBCXX_GCC_TOOLCHAIN@" config.generate_coverage = @LIBCXX_GENERATE_COVERAGE@ diff --git a/libcxx/utils/ci/macos-backdeployment.sh b/libcxx/utils/ci/macos-backdeployment.sh index 24b866cdc1aef..04549aa346456 100755 --- a/libcxx/utils/ci/macos-backdeployment.sh +++ b/libcxx/utils/ci/macos-backdeployment.sh @@ -134,7 +134,7 @@ echo "@@@ Running tests for libc++ @@@" ${ENABLE_FILESYSTEM} \ --param=cxx_headers="${LLVM_INSTALL_DIR}/include/c++/v1" \ --param=std="${STD}" \ - --param=platform="macosx${DEPLOYMENT_TARGET}" \ + --param=target_triple="x86_64-apple-macosx${DEPLOYMENT_TARGET}" \ --param=cxx_library_root="${LLVM_INSTALL_DIR}/lib" \ --param=cxx_runtime_root="${LIBCXX_ROOT_ON_DEPLOYMENT_TARGET}" \ --param=abi_library_path="${LIBCXXABI_ROOT_ON_DEPLOYMENT_TARGET}" \ diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 82b696f76eec7..c8bfdda914631 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -8,7 +8,6 @@ import copy import os -import platform import pkgutil import pipes import re @@ -72,7 +71,6 @@ def __init__(self, lit_config, config): self.link_shared = self.get_lit_bool('enable_shared', default=True) self.debug_build = self.get_lit_bool('debug_build', default=False) self.exec_env = dict() - self.use_target = False self.use_system_cxx_lib = self.get_lit_bool('use_system_cxx_lib', False) self.use_clang_verify = False @@ -123,7 +121,6 @@ def configure(self): self.executor = self.get_lit_conf('executor') self.configure_cxx() self.configure_triple() - self.configure_deployment() self.configure_src_root() self.configure_obj_root() self.cxx_stdlib_under_test = self.get_lit_conf('cxx_stdlib_under_test', 'libc++') @@ -248,22 +245,15 @@ def configure_features(self): # XFAIL markers for tests that are known to fail with versions of # libc++ as were shipped with a particular triple. if self.use_system_cxx_lib: - self.config.available_features.add('with_system_cxx_lib=%s' % self.config.target_triple) - - # Add available features for more generic versions of the target - # triple attached to with_system_cxx_lib. - if self.use_deployment: - (_, name, version) = self.config.deployment - self.config.available_features.add('with_system_cxx_lib=%s' % name) - self.config.available_features.add('with_system_cxx_lib=%s%s' % (name, version)) - - # Configure the availability feature. Availability is only enabled - # with libc++, because other standard libraries do not provide - # availability markup. - if self.use_deployment and self.cxx_stdlib_under_test == 'libc++': - (_, name, version) = self.config.deployment - self.config.available_features.add('availability=%s' % name) - self.config.available_features.add('availability=%s%s' % (name, version)) + (arch, vendor, platform) = self.config.target_triple.split('-') + (sysname, version) = re.match(r'([^0-9]+)([0-9\.]*)', platform).groups() + + self.config.available_features.add('with_system_cxx_lib={}-{}-{}{}'.format(arch, vendor, sysname, version)) + self.config.available_features.add('with_system_cxx_lib={}{}'.format(sysname, version)) + self.config.available_features.add('with_system_cxx_lib={}'.format(sysname)) + + self.config.available_features.add('availability={}'.format(sysname)) + self.config.available_features.add('availability={}{}'.format(sysname, version)) if self.target_info.is_windows(): if self.cxx_stdlib_under_test == 'libc++': @@ -317,20 +307,19 @@ def configure_default_compile_flags(self): # being elided. if self.target_info.is_windows() and self.debug_build: self.cxx.compile_flags += ['-D_DEBUG'] - if self.use_target: - if not self.cxx.addFlagIfSupported( - ['--target=' + self.config.target_triple]): - self.lit_config.warning('use_target is true but --target is '\ - 'not supported by the compiler') - if self.use_deployment: - arch, name, version = self.config.deployment - self.cxx.flags += ['-arch', arch] - self.cxx.flags += ['-m' + name + '-version-min=' + version] + if not self.cxx.addFlagIfSupported(['--target=' + self.config.target_triple]): + self.lit_config.warning('Not adding any target triple -- the compiler does ' + 'not support --target=') # Add includes for support headers used in the tests. support_path = os.path.join(self.libcxx_src_root, 'test/support') self.cxx.compile_flags += ['-I' + support_path] + # If we're testing the upstream LLVM libc++, disable availability markup, + # which is not relevant for non-shipped flavors of libc++. + if not self.use_system_cxx_lib: + self.cxx.compile_flags += ['-D_LIBCPP_DISABLE_AVAILABILITY'] + # Add includes for the PSTL headers pstl_src_root = self.get_lit_conf('pstl_src_root') pstl_obj_root = self.get_lit_conf('pstl_obj_root') @@ -641,37 +630,15 @@ def configure_substitutions(self): if self.get_lit_conf('libcxx_gdb'): sub.append(('%{libcxx_gdb}', self.get_lit_conf('libcxx_gdb'))) - def can_use_deployment(self): - # Check if the host is on an Apple platform using clang. - if not self.target_info.is_darwin(): - return False - if not self.target_info.is_host_macosx(): - return False - if not self.cxx.type.endswith('clang'): - return False - return True - def configure_triple(self): # Get or infer the target triple. target_triple = self.get_lit_conf('target_triple') - self.use_target = self.get_lit_bool('use_target', False) - if self.use_target and target_triple: - self.lit_config.warning('use_target is true but no triple is specified') - - # Use deployment if possible. - self.use_deployment = not self.use_target and self.can_use_deployment() - if self.use_deployment: - return - - # Save the triple (and warn on Apple platforms). - self.config.target_triple = target_triple - if self.use_target and 'apple' in target_triple: - self.lit_config.warning('consider using arch and platform instead' - ' of target_triple on Apple platforms') # If no target triple was given, try to infer it from the compiler # under test. - if not self.config.target_triple: + if not target_triple: + self.lit_config.note('Trying to infer the target_triple because none was specified') + target_triple = self.cxx.getTriple() # Drop sub-major version components from the triple, because the # current XFAIL handling expects exact matches for feature checks. @@ -686,44 +653,10 @@ def configure_triple(self): if (target_triple.endswith('redhat-linux') or target_triple.endswith('suse-linux')): target_triple += '-gnu' - self.config.target_triple = target_triple - self.lit_config.note( - "inferred target_triple as: %r" % self.config.target_triple) - - def configure_deployment(self): - assert not self.use_deployment is None - assert not self.use_target is None - if not self.use_deployment: - # Warn about ignored parameters. - if self.get_lit_conf('arch'): - self.lit_config.warning('ignoring arch, using target_triple') - if self.get_lit_conf('platform'): - self.lit_config.warning('ignoring platform, using target_triple') - return - - assert not self.use_target - assert self.target_info.is_host_macosx() - - # Always specify deployment explicitly on Apple platforms, since - # otherwise a platform is picked up from the SDK. If the SDK version - # doesn't match the system version, tests that use the system library - # may fail spuriously. - arch = self.get_lit_conf('arch') - if not arch: - arch = self.cxx.getTriple().split('-', 1)[0] - - _, name, version = self.target_info.get_platform() - self.config.deployment = (arch, name, version) - - # Set the target triple for use by lit. - self.config.target_triple = arch + '-apple-' + name + version - self.lit_config.note( - "computed target_triple as: %r" % self.config.target_triple) - # If we're testing the upstream LLVM libc++, disable availability markup, - # which is not relevant for non-shipped flavors of libc++. - if not self.use_system_cxx_lib: - self.cxx.compile_flags += ['-D_LIBCPP_DISABLE_AVAILABILITY'] + # Save the triple + self.lit_config.note("Setting target_triple to {}".format(target_triple)) + self.config.target_triple = target_triple def configure_env(self): self.config.environment = dict(os.environ) diff --git a/libcxx/utils/libcxx/test/target_info.py b/libcxx/utils/libcxx/test/target_info.py index 3197276ffa5b5..4f19d60a1a875 100644 --- a/libcxx/utils/libcxx/test/target_info.py +++ b/libcxx/utils/libcxx/test/target_info.py @@ -73,34 +73,8 @@ def get_sdk_version(self, name): return re.sub(r'.*/[^0-9]+([0-9.]+)\.sdk', r'\1', out) - def get_platform(self): - platform = self.full_config.get_lit_conf('platform') - if platform: - platform = re.sub(r'([^0-9]+)([0-9\.]*)', r'\1-\2', platform) - name, version = tuple(platform.split('-', 1)) - else: - name = 'macosx' - version = None - - if version: - return (False, name, version) - - # Infer the version, either from the SDK or the system itself. For - # macosx, ignore the SDK version; what matters is what's at - # /usr/lib/libc++.dylib. - if name == 'macosx': - version = self.get_macosx_version() - else: - version = self.get_sdk_version(name) - return (True, name, version) - def add_cxx_compile_flags(self, flags): - if self.full_config.use_deployment: - _, name, _ = self.full_config.config.deployment - cmd = ['xcrun', '--sdk', name, '--show-sdk-path'] - else: - cmd = ['xcrun', '--show-sdk-path'] - out, err, exit_code = executeCommand(cmd) + out, err, exit_code = executeCommand(['xcrun', '--show-sdk-path']) if exit_code != 0: self.full_config.lit_config.warning("Could not determine macOS SDK path! stderr was " + err) if exit_code == 0 and out: diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index 06d5706da7d24..87f955e321610 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -25,7 +25,6 @@ config.enable_shared = @LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX@ config.enable_exceptions = @LIBCXXABI_ENABLE_EXCEPTIONS@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" -config.use_target = bool("@LIBCXXABI_TARGET_TRIPLE@") config.sysroot = "@LIBCXXABI_SYSROOT@" config.gcc_toolchain = "@LIBCXXABI_GCC_TOOLCHAIN@" config.cxx_ext_threads = @LIBCXXABI_BUILD_EXTERNAL_THREAD_LIBRARY@ diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index 30a996cf37837..8ff770fe29bc8 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -25,7 +25,6 @@ config.enable_shared = @LIBCXX_ENABLE_SHARED@ config.arm_ehabi = @LIBUNWIND_USES_ARM_EHABI@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" -config.use_target = bool("@LIBUNWIND_TARGET_TRIPLE@") config.sysroot = "@LIBUNWIND_SYSROOT@" config.gcc_toolchain = "@LIBUNWIND_GCC_TOOLCHAIN@" config.cxx_ext_threads = @LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY@ From a5ec99da6ea75a013ed201eb9c80066bd6f4131d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 10 Sep 2020 13:09:25 +0100 Subject: [PATCH 0250/1079] [DSE] Support eliminating memcpy.inline. MemoryLocation has been taught about memcpy.inline, which means we can get the memory locations read and written by it. This means DSE can handle memcpy.inline --- .../Scalar/DeadStoreElimination.cpp | 2 ++ .../MSSA/memset-and-memcpy.ll | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 12514be0e631a..d703f1337a721 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -229,6 +229,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: @@ -323,6 +324,7 @@ static bool isRemovable(Instruction *I) { case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: // Don't remove volatile memory intrinsics. return !cast(II)->isVolatile(); case Intrinsic::memcpy_element_unordered_atomic: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll index 5aeda18309724..02fc8f22b6b40 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -91,3 +92,21 @@ define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) ret void } + +define void @test_memset_memcpy_inline(i8* noalias %P, i8* noalias %Q) { + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + +define void @test_store_memcpy_inline(i8* noalias %P, i8* noalias %Q) { + store i8 0, i8* %P + %P.1 = getelementptr i8, i8* %P, i64 1 + store i8 1, i8* %P.1 + %P.4 = getelementptr i8, i8* %P, i64 4 + store i8 4, i8* %P.4 + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 4, i1 false) + ret void +} + +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) From ed95f7c7ce183564ed2266903fe605106f069beb Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 10 Sep 2020 12:01:18 +0000 Subject: [PATCH 0251/1079] Fix broken link for Sphinx installation --- llvm/docs/CMake.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 2972f1dec0e70..96994dbd8fda9 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -552,7 +552,7 @@ LLVM-specific variables **SPHINX_EXECUTABLE**:STRING The path to the ``sphinx-build`` executable detected by CMake. For installation instructions, see - http://www.sphinx-doc.org/en/latest/usage/installation.html + https://www.sphinx-doc.org/en/master/usage/installation.html **SPHINX_OUTPUT_HTML**:BOOL If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for From 05d02e5a4e54a04f050b52ee30d1860073bd8b34 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 10 Sep 2020 12:27:32 +0000 Subject: [PATCH 0252/1079] Fix invalid link format in Clang LanguageExtension --- clang/docs/LanguageExtensions.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index c89f924c58ba2..60b3f21b3e500 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2416,9 +2416,9 @@ Memory builtins ``__builtin_memcpy_inline(dst, src, size)`` is identical to ``__builtin_memcpy(dst, src, size)`` except that the generated code is -guaranteed not to call any external functions. See [LLVM IR ‘llvm.memcpy.inline’ -Intrinsic](https://llvm.org/docs/LangRef.html#llvm-memcpy-inline-intrinsic) for -more information. +guaranteed not to call any external functions. See LLVM IR `llvm.memcpy.inline +`_ Intrinsic +for more information. Note that the `size` argument must be a compile time constant. From 1ebb31b14cd175b3f272e232958d342221eb875c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 10 Sep 2020 08:45:36 -0400 Subject: [PATCH 0253/1079] [x86] add tests for fmax/fmin experimental intrinsics with 'fast' FMF; NFC D87391 proposes to change the lowerings for 'nnan'-only FMF. That's the minimal requirement to get good codegen for x86, but currently we have bugs hindering that output unless the full 'fast' FMF is applied. These tests provide coverage for the ideal lowerings. --- .../X86/vector-reduce-fmax-fmin-fast.ll | 328 ++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll new file mode 100644 index 0000000000000..50b88c2c55f5c --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll @@ -0,0 +1,328 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL + +; These tests are identical to corresponding tests in the 'nnan' versions +; of the files except that they use 'fast' FMF. If things are working as +; expected, the 'nnan' codegen should be the same as 'fast'. + +; +; vXf32 +; + +define float @test_v2f32(<2 x float> %a0) { +; SSE2-LABEL: test_v2f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v2f32: +; SSE41: # %bb.0: +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v2f32: +; AVX: # %bb.0: +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2f32: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %1 = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) + ret float %1 +} + +define float @test_v4f32(<4 x float> %a0) { +; SSE2-LABEL: test_v4f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v4f32: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v4f32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %1 = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0) + ret float %1 +} + +define float @test_v8f32(<8 x float> %a0) { +; SSE2-LABEL: test_v8f32: +; SSE2: # %bb.0: +; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v8f32: +; SSE41: # %bb.0: +; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v8f32: +; AVX: # %bb.0: +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v8f32: +; AVX512: # %bb.0: +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0) + ret float %1 +} + +define float @test_v16f32(<16 x float> %a0) { +; SSE2-LABEL: test_v16f32: +; SSE2: # %bb.0: +; SSE2-NEXT: maxps %xmm3, %xmm1 +; SSE2-NEXT: maxps %xmm2, %xmm0 +; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v16f32: +; SSE41: # %bb.0: +; SSE41-NEXT: maxps %xmm3, %xmm1 +; SSE41-NEXT: maxps %xmm2, %xmm0 +; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v16f32: +; AVX: # %bb.0: +; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v16f32: +; AVX512: # %bb.0: +; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call fast float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0) + ret float %1 +} + +; +; vXf64 +; + +define double @test_v2f64(<2 x double> %a0) { +; SSE-LABEL: test_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: minsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %1 = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0) + ret double %1 +} + +define double @test_v4f64(<4 x double> %a0) { +; SSE-LABEL: test_v4f64: +; SSE: # %bb.0: +; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: maxsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4f64: +; AVX: # %bb.0: +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v4f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0) + ret double %1 +} + +define double @test_v8f64(<8 x double> %a0) { +; SSE-LABEL: test_v8f64: +; SSE: # %bb.0: +; SSE-NEXT: minpd %xmm3, %xmm1 +; SSE-NEXT: minpd %xmm2, %xmm0 +; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: minsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_v8f64: +; AVX: # %bb.0: +; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v8f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call fast double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0) + ret double %1 +} + +define double @test_v16f64(<16 x double> %a0) { +; SSE-LABEL: test_v16f64: +; SSE: # %bb.0: +; SSE-NEXT: maxpd %xmm6, %xmm2 +; SSE-NEXT: maxpd %xmm4, %xmm0 +; SSE-NEXT: maxpd %xmm2, %xmm0 +; SSE-NEXT: maxpd %xmm7, %xmm3 +; SSE-NEXT: maxpd %xmm5, %xmm1 +; SSE-NEXT: maxpd %xmm3, %xmm1 +; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: maxsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_v16f64: +; AVX: # %bb.0: +; AVX-NEXT: vmaxpd %ymm3, %ymm1, %ymm1 +; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v16f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call fast double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0) + ret double %1 +} + +declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) + +declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) From 517202c720ea527aab689590c81703a70793cb97 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 10 Sep 2020 13:49:33 +0100 Subject: [PATCH 0254/1079] [TargetLowering] Fix comments describing XOR -> OR/AND transformations --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cbdd027f55fef..a80ca04921f45 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1325,15 +1325,15 @@ bool TargetLowering::SimplifyDemandedBits( return true; // If all of the unknown bits are known to be zero on one side or the other - // (but not both) turn this into an *inclusive* or. + // turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); if (C) { - // If one side is a constant, and all of the known set bits on the other - // side are also set in the constant, turn this into an AND, as we know + // If one side is a constant, and all of the set bits in the constant are + // also known set on the other side, turn this into an AND, as we know // the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested From ebf496d805521b53022a351f35854de977fee844 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 10 Sep 2020 16:31:56 +0300 Subject: [PATCH 0255/1079] Revert "[clang-tidy] New check readability-prefer-member-initializer" Either contains unbounded loops, or has *very* high runtime, 100+x of all the current clang-tidy checks. This reverts commit f5fd7486d6c0debb465de3e927fcc31884874280. --- .../cppcoreguidelines/CMakeLists.txt | 1 - .../CppCoreGuidelinesTidyModule.cpp | 3 - .../PreferMemberInitializerCheck.cpp | 233 --------- .../PreferMemberInitializerCheck.h | 41 -- clang-tools-extra/docs/ReleaseNotes.rst | 6 - ...reguidelines-prefer-member-initializer.rst | 102 ---- .../docs/clang-tidy/checks/list.rst | 1 - ...ize-use-default-member-init-assignment.cpp | 31 -- ...izer-modernize-use-default-member-init.cpp | 30 -- ...reguidelines-prefer-member-initializer.cpp | 454 ------------------ 10 files changed, 902 deletions(-) delete mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp delete mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h delete mode 100644 clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt index a9f5b3e0c15bc..39c2c552eb73e 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt @@ -13,7 +13,6 @@ add_clang_library(clangTidyCppCoreGuidelinesModule NarrowingConversionsCheck.cpp NoMallocCheck.cpp OwningMemoryCheck.cpp - PreferMemberInitializerCheck.cpp ProBoundsArrayToPointerDecayCheck.cpp ProBoundsConstantArrayIndexCheck.cpp ProBoundsPointerArithmeticCheck.cpp diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp index bf613109f0ebd..4cb5022888d3d 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp @@ -22,7 +22,6 @@ #include "NarrowingConversionsCheck.h" #include "NoMallocCheck.h" #include "OwningMemoryCheck.h" -#include "PreferMemberInitializerCheck.h" #include "ProBoundsArrayToPointerDecayCheck.h" #include "ProBoundsConstantArrayIndexCheck.h" #include "ProBoundsPointerArithmeticCheck.h" @@ -67,8 +66,6 @@ class CppCoreGuidelinesModule : public ClangTidyModule { "cppcoreguidelines-non-private-member-variables-in-classes"); CheckFactories.registerCheck( "cppcoreguidelines-owning-memory"); - CheckFactories.registerCheck( - "cppcoreguidelines-prefer-member-initializer"); CheckFactories.registerCheck( "cppcoreguidelines-pro-bounds-array-to-pointer-decay"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp deleted file mode 100644 index 97ae586f9fdb6..0000000000000 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp +++ /dev/null @@ -1,233 +0,0 @@ -//===--- PreferMemberInitializerCheck.cpp - clang-tidy -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PreferMemberInitializerCheck.h" -#include "clang/AST/ASTContext.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/Lex/Lexer.h" - -using namespace clang::ast_matchers; - -namespace clang { -namespace tidy { -namespace cppcoreguidelines { - -static bool isControlStatement(const Stmt *S) { - return isa(S) || isa(S) || isa(S) || - isa(S) || isa(S) || isa(S) || - isa(S) || isa(S) || isa(S); -} - -static bool isNoReturnCallStatement(const Stmt *S) { - const auto *Call = dyn_cast(S); - if (!Call) - return false; - - const FunctionDecl *Func = Call->getDirectCallee(); - if (!Func) - return false; - - return Func->isNoReturn(); -} - -static bool isLiteral(const Expr *E) { - return isa(E) || isa(E) || - isa(E) || isa(E) || - isa(E) || isa(E); -} - -static bool isUnaryExprOfLiteral(const Expr *E) { - if (const auto *UnOp = dyn_cast(E)) - return isLiteral(UnOp->getSubExpr()); - return false; -} - -static bool shouldBeDefaultMemberInitializer(const Expr *Value) { - if (isLiteral(Value) || isUnaryExprOfLiteral(Value)) - return true; - - if (const auto *DRE = dyn_cast(Value)) - return isa(DRE->getDecl()); - - return false; -} - -static const std::pair -isAssignmentToMemberOf(const RecordDecl *Rec, const Stmt *S) { - if (const auto *BO = dyn_cast(S)) { - if (BO->getOpcode() != BO_Assign) - return std::make_pair(nullptr, nullptr); - - const auto *ME = dyn_cast(BO->getLHS()->IgnoreParenImpCasts()); - if (!ME) - return std::make_pair(nullptr, nullptr); - - const auto *Field = dyn_cast(ME->getMemberDecl()); - if (!Field) - return std::make_pair(nullptr, nullptr); - - if (isa(ME->getBase())) - return std::make_pair(Field, BO->getRHS()->IgnoreParenImpCasts()); - } else if (const auto *COCE = dyn_cast(S)) { - if (COCE->getOperator() != OO_Equal) - return std::make_pair(nullptr, nullptr); - - const auto *ME = - dyn_cast(COCE->getArg(0)->IgnoreParenImpCasts()); - if (!ME) - return std::make_pair(nullptr, nullptr); - - const auto *Field = dyn_cast(ME->getMemberDecl()); - if (!Field) - return std::make_pair(nullptr, nullptr); - - if (isa(ME->getBase())) - return std::make_pair(Field, COCE->getArg(1)->IgnoreParenImpCasts()); - } - - return std::make_pair(nullptr, nullptr); -} - -PreferMemberInitializerCheck::PreferMemberInitializerCheck( - StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context), - IsUseDefaultMemberInitEnabled( - Context->isCheckEnabled("modernize-use-default-member-init")), - UseAssignment(OptionsView("modernize-use-default-member-init", - Context->getOptions().CheckOptions) - .get("UseAssignment", false)) {} - -void PreferMemberInitializerCheck::storeOptions( - ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "UseAssignment", UseAssignment); -} - -void PreferMemberInitializerCheck::registerMatchers(MatchFinder *Finder) { - Finder->addMatcher( - cxxConstructorDecl(hasBody(compoundStmt()), unless(isInstantiated())) - .bind("ctor"), - this); -} - -void PreferMemberInitializerCheck::check( - const MatchFinder::MatchResult &Result) { - const auto *Ctor = Result.Nodes.getNodeAs("ctor"); - const auto *Body = cast(Ctor->getBody()); - - const CXXRecordDecl *Class = Ctor->getParent(); - SourceLocation InsertPos; - bool FirstToCtorInits = true; - - for (const auto *S : Body->body()) { - if (isControlStatement(S)) - return; - - if (isNoReturnCallStatement(S)) - return; - - const FieldDecl *Field; - const Expr *InitValue; - std::tie(Field, InitValue) = isAssignmentToMemberOf(Class, S); - if (Field) { - if (IsUseDefaultMemberInitEnabled && getLangOpts().CPlusPlus11 && - Ctor->isDefaultConstructor() && - (getLangOpts().CPlusPlus20 || !Field->isBitField()) && - (!isa(Class->getDeclContext()) || - !cast(Class->getDeclContext())->isUnion()) && - shouldBeDefaultMemberInitializer(InitValue)) { - auto Diag = - diag(S->getBeginLoc(), "%0 should be initialized in an in-class" - " default member initializer") - << Field; - - SourceLocation FieldEnd = - Lexer::getLocForEndOfToken(Field->getSourceRange().getEnd(), 0, - *Result.SourceManager, getLangOpts()); - Diag << FixItHint::CreateInsertion(FieldEnd, - UseAssignment ? " = " : "{") - << FixItHint::CreateInsertionFromRange( - FieldEnd, - CharSourceRange(InitValue->getSourceRange(), true)) - << FixItHint::CreateInsertion(FieldEnd, UseAssignment ? "" : "}"); - - SourceLocation SemiColonEnd = - Lexer::findNextToken(S->getEndLoc(), *Result.SourceManager, - getLangOpts()) - ->getEndLoc(); - CharSourceRange StmtRange = - CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd); - - Diag << FixItHint::CreateRemoval(StmtRange); - } else { - auto Diag = - diag(S->getBeginLoc(), "%0 should be initialized in a member" - " initializer of the constructor") - << Field; - - bool AddComma = false; - if (!Ctor->getNumCtorInitializers() && FirstToCtorInits) { - SourceLocation BodyPos = Ctor->getBody()->getBeginLoc(); - SourceLocation NextPos = Ctor->getBeginLoc(); - do { - InsertPos = NextPos; - NextPos = Lexer::findNextToken(NextPos, *Result.SourceManager, - getLangOpts()) - ->getLocation(); - } while (NextPos != BodyPos); - InsertPos = Lexer::getLocForEndOfToken( - InsertPos, 0, *Result.SourceManager, getLangOpts()); - - Diag << FixItHint::CreateInsertion(InsertPos, " : "); - } else { - bool Found = false; - for (const auto *Init : Ctor->inits()) { - if (Result.SourceManager->isBeforeInTranslationUnit( - Field->getLocation(), Init->getMember()->getLocation())) { - InsertPos = Init->getSourceLocation(); - Found = true; - break; - } - } - - if (!Found) { - if (Ctor->getNumCtorInitializers()) { - InsertPos = Lexer::getLocForEndOfToken( - (*Ctor->init_rbegin())->getSourceRange().getEnd(), 0, - *Result.SourceManager, getLangOpts()); - } - Diag << FixItHint::CreateInsertion(InsertPos, ", "); - } else { - AddComma = true; - } - } - Diag << FixItHint::CreateInsertion(InsertPos, Field->getName()) - << FixItHint::CreateInsertion(InsertPos, "(") - << FixItHint::CreateInsertionFromRange( - InsertPos, - CharSourceRange(InitValue->getSourceRange(), true)) - << FixItHint::CreateInsertion(InsertPos, ")"); - if (AddComma) - Diag << FixItHint::CreateInsertion(InsertPos, ", "); - - SourceLocation SemiColonEnd = - Lexer::findNextToken(S->getEndLoc(), *Result.SourceManager, - getLangOpts()) - ->getEndLoc(); - CharSourceRange StmtRange = - CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd); - - Diag << FixItHint::CreateRemoval(StmtRange); - FirstToCtorInits = false; - } - } - } -} - -} // namespace cppcoreguidelines -} // namespace tidy -} // namespace clang diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h deleted file mode 100644 index dbef7c98d8e35..0000000000000 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h +++ /dev/null @@ -1,41 +0,0 @@ -//===--- PreferMemberInitializerCheck.h - clang-tidy ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H - -#include "../ClangTidyCheck.h" - -namespace clang { -namespace tidy { -namespace cppcoreguidelines { - -/// Finds member initializations in the constructor body which can be placed -/// into the initialization list instead. -/// -/// For the user-facing documentation see: -/// http://clang.llvm.org/extra/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.html -class PreferMemberInitializerCheck : public ClangTidyCheck { -public: - PreferMemberInitializerCheck(StringRef Name, ClangTidyContext *Context); - bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { - return LangOpts.CPlusPlus; - } - void storeOptions(ClangTidyOptions::OptionMap &Opts) override; - void registerMatchers(ast_matchers::MatchFinder *Finder) override; - void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - - const bool IsUseDefaultMemberInitEnabled; - const bool UseAssignment; -}; - -} // namespace cppcoreguidelines -} // namespace tidy -} // namespace clang - -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 53c3894914e52..192f200f34aca 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -94,12 +94,6 @@ New checks Finds condition variables in nested ``if`` statements that were also checked in the outer ``if`` statement and were not changed. -- New :doc:`cppcoreguidelines-prefer-member-initializer - ` check. - - Finds member initializations in the constructor body which can be placed into - the initialization list instead. - Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst deleted file mode 100644 index 749be14182153..0000000000000 --- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst +++ /dev/null @@ -1,102 +0,0 @@ -.. title:: clang-tidy - cppcoreguidelines-prefer-member-initializer - -cppcoreguidelines-prefer-member-initializer -=========================================== - -Finds member initializations in the constructor body which can be converted -into member initializers of the constructor instead. This not only improves -the readability of the code but also positively affects its performance. -Class-member assignments inside a control statement or following the first -control statement are ignored. - -This check implements `C.49 `_ from the CppCoreGuidelines. - -If the language version is `C++ 11` or above, the constructor is the default -constructor of the class, the field is not a bitfield (only in case of earlier -language version than `C++ 20`), furthermore the assigned value is a literal, -negated literal or ``enum`` constant then the preferred place of the -initialization is at the class member declaration. - -This latter rule is `C.48 `_ from CppCoreGuidelines. - -Please note, that this check does not enforce this latter rule for -initializations already implemented as member initializers. For that purpose -see check `modernize-use-default-member-init `_. - -Example 1 ---------- - -.. code-block:: c++ - - class C { - int n; - int m; - public: - C() { - n = 1; // Literal in default constructor - if (dice()) - return; - m = 1; - } - }; - -Here ``n`` can be initialized using a default member initializer, unlike -``m``, as ``m``'s initialization follows a control statement (``if``): - -.. code-block:: c++ - - class C { - int n{1}; - int m; - public: - C() { - if (dice()) - return; - m = 1; - } - -Example 2 ---------- - -.. code-block:: c++ - - class C { - int n; - int m; - public: - C(int nn, int mm) { - n = nn; // Neither default constructor nor literal - if (dice()) - return; - m = mm; - } - }; - -Here ``n`` can be initialized in the constructor initialization list, unlike -``m``, as ``m``'s initialization follows a control statement (``if``): - -.. code-block:: c++ - - C(int nn, int mm) : n(nn) { - if (dice()) - return; - m = mm; - } - -.. option:: UseAssignment - - If this option is set to non-zero (default is `0`), the check will initialize - members with an assignment. In this case the fix of the first example looks - like this: - -.. code-block:: c++ - - class C { - int n = 1; - int m; - public: - C() { - if (dice()) - return; - m = 1; - } diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index c569ce704d979..378e92cb66ddc 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -143,7 +143,6 @@ Clang-Tidy Checks `cppcoreguidelines-narrowing-conversions `_, `cppcoreguidelines-no-malloc `_, `cppcoreguidelines-owning-memory `_, - `cppcoreguidelines-prefer-member-initializer `_, `cppcoreguidelines-pro-bounds-array-to-pointer-decay `_, `cppcoreguidelines-pro-bounds-constant-array-index `_, "Yes" `cppcoreguidelines-pro-bounds-pointer-arithmetic `_, diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp deleted file mode 100644 index dc6cb7606a0de..0000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer,modernize-use-default-member-init %t -- \ -// RUN: -config="{CheckOptions: [{key: modernize-use-default-member-init.UseAssignment, value: 1}]}" - -class Simple1 { - int n; - // CHECK-FIXES: int n = 0; - double x; - // CHECK-FIXES: double x = 0.0; - -public: - Simple1() { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple1(int nn, double xx) { - // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple1() = default; -}; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp deleted file mode 100644 index fe5bb7c3bb989..0000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer,modernize-use-default-member-init %t - -class Simple1 { - int n; - // CHECK-FIXES: int n{0}; - double x; - // CHECK-FIXES: double x{0.0}; - -public: - Simple1() { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple1(int nn, double xx) { - // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple1() = default; -}; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp deleted file mode 100644 index a55a7d8208a6a..0000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp +++ /dev/null @@ -1,454 +0,0 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer %t -- -- -fcxx-exceptions - -class Simple1 { - int n; - double x; - -public: - Simple1() { - // CHECK-FIXES: Simple1() : n(0), x(0.0) { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple1(int nn, double xx) { - // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple1() = default; -}; - -class Simple2 { - int n; - double x; - -public: - Simple2() : n(0) { - // CHECK-FIXES: Simple2() : n(0), x(0.0) { - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple2(int nn, double xx) : n(nn) { - // CHECK-FIXES: Simple2(int nn, double xx) : n(nn), x(xx) { - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple2() = default; -}; - -class Simple3 { - int n; - double x; - -public: - Simple3() : x(0.0) { - // CHECK-FIXES: Simple3() : n(0), x(0.0) { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple3(int nn, double xx) : x(xx) { - // CHECK-FIXES: Simple3(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple3() = default; -}; - -int something_int(); -double something_double(); - -class Simple4 { - int n; - -public: - Simple4() { - // CHECK-FIXES: Simple4() : n(something_int()) { - n = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple4() = default; -}; - -static bool dice(); - -class Complex1 { - int n; - int m; - -public: - Complex1() : n(0) { - if (dice()) - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional expression - } - - ~Complex1() = default; -}; - -class Complex2 { - int n; - int m; - -public: - Complex2() : n(0) { - if (!dice()) - return; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional expression - } - - ~Complex2() = default; -}; - -class Complex3 { - int n; - int m; - -public: - Complex3() : n(0) { - while (dice()) - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional loop - } - - ~Complex3() = default; -}; - -class Complex4 { - int n; - int m; - -public: - Complex4() : n(0) { - while (!dice()) - return; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional loop - } - - ~Complex4() = default; -}; - -class Complex5 { - int n; - int m; - -public: - Complex5() : n(0) { - do { - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional loop - } while (dice()); - } - - ~Complex5() = default; -}; - -class Complex6 { - int n; - int m; - -public: - Complex6() : n(0) { - do { - return; - } while (!dice()); - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional loop - } - - ~Complex6() = default; -}; - -class Complex7 { - int n; - int m; - -public: - Complex7() : n(0) { - for (int i = 2; i < 1; ++i) { - m = 1; - } - // NO-MESSAGES: initialization of 'm' is nested into a conditional loop - } - - ~Complex7() = default; -}; - -class Complex8 { - int n; - int m; - -public: - Complex8() : n(0) { - for (int i = 0; i < 2; ++i) { - return; - } - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional loop - } - - ~Complex8() = default; -}; - -class Complex9 { - int n; - int m; - -public: - Complex9() : n(0) { - switch (dice()) { - case 1: - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional expression - break; - default: - break; - } - } - - ~Complex9() = default; -}; - -class Complex10 { - int n; - int m; - -public: - Complex10() : n(0) { - switch (dice()) { - case 1: - return; - break; - default: - break; - } - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional expression - } - - ~Complex10() = default; -}; - -class E {}; -int risky(); // may throw - -class Complex11 { - int n; - int m; - -public: - Complex11() : n(0) { - try { - risky(); - m = 1; - // NO-MESSAGES: initialization of 'm' follows is nested in a try-block - } catch (const E& e) { - return; - } - } - - ~Complex11() = default; -}; - -class Complex12 { - int n; - int m; - -public: - Complex12() : n(0) { - try { - risky(); - } catch (const E& e) { - return; - } - m = 1; - // NO-MESSAGES: initialization of 'm' follows a try-block - } - - ~Complex12() = default; -}; - -class Complex13 { - int n; - int m; - -public: - Complex13() : n(0) { - return; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a return statement - } - - ~Complex13() = default; -}; - -class Complex14 { - int n; - int m; - -public: - Complex14() : n(0) { - goto X; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a goto statement - X: - ; - } - - ~Complex14() = default; -}; - -void returning(); - -class Complex15 { - int n; - int m; - -public: - Complex15() : n(0) { - // CHECK-FIXES: Complex15() : n(0), m(1) { - returning(); - m = 1; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'm' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Complex15() = default; -}; - -[[noreturn]] void not_returning(); - -class Complex16 { - int n; - int m; - -public: - Complex16() : n(0) { - not_returning(); - m = 1; - // NO-MESSAGES: initialization of 'm' follows a non-returning function call - } - - ~Complex16() = default; -}; - -class Complex17 { - int n; - int m; - -public: - Complex17() : n(0) { - throw 1; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a 'throw' statement; - } - - ~Complex17() = default; -}; - -class Complex18 { - int n; - -public: - Complex18() try { - n = risky(); - // NO-MESSAGES: initialization of 'n' in a 'try' body; - } catch (const E& e) { - n = 0; - } - - ~Complex18() = default; -}; - -class Complex19 { - int n; -public: - Complex19() { - // CHECK-FIXES: Complex19() : n(0) { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - explicit Complex19(int) { - // CHECK-FIXES: Complex19(int) : n(12) { - n = 12; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Complex19() = default; -}; - -class VeryComplex1 { - int n1, n2, n3; - double x1, x2, x3; - int n4, n5, n6; - double x4, x5, x6; - - VeryComplex1() : n3(something_int()), x3(something_double()), - n5(something_int()), x4(something_double()), - x5(something_double()) { - // CHECK-FIXES: VeryComplex1() : n2(something_int()), n1(something_int()), n3(something_int()), x2(something_double()), x1(something_double()), x3(something_double()), - // CHECK-FIXES: n4(something_int()), n5(something_int()), n6(something_int()), x4(something_double()), - // CHECK-FIXES: x5(something_double()), x6(something_double()) { - -// FIXME: Order of elements on the constructor initializer list should match -// the order of the declaration of the fields. Thus the correct fixes -// should look like these: -// - // C ECK-FIXES: VeryComplex1() : n2(something_int()), n1(something_int()), n3(something_int()), x2(something_double()), x1(something_double()), x3(something_double()), - // C ECK-FIXES: n4(something_int()), n5(something_int()), n6(something_int()), x4(something_double()), - // C ECK-FIXES: x5(something_double()), x6(something_double()) { -// -// However, the Diagnostics Engine processes fixes in the order of the -// diagnostics and insertions to the same position are handled in left to -// right order thus in the case two adjacent fields are initialized -// inside the constructor in reverse order the provided fix is a -// constructor initializer list that does not match the order of the -// declaration of the fields. - - x2 = something_double(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x2' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n2 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n2' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x6 = something_double(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x6' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x1 = something_double(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x1' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n6 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n6' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n1 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n1' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n4 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n4' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } -}; From 64104db59d1386d7e6a2afcdb5d9e3cc5ff059b8 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 10 Sep 2020 13:33:11 +0000 Subject: [PATCH 0256/1079] [gn build] Port ebf496d8055 --- .../clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn index ff8b4e4c7d148..c31078df039d9 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn @@ -25,7 +25,6 @@ static_library("cppcoreguidelines") { "NarrowingConversionsCheck.cpp", "NoMallocCheck.cpp", "OwningMemoryCheck.cpp", - "PreferMemberInitializerCheck.cpp", "ProBoundsArrayToPointerDecayCheck.cpp", "ProBoundsConstantArrayIndexCheck.cpp", "ProBoundsPointerArithmeticCheck.cpp", From 52f42720b26a32c9dffc9331841415442f784700 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 10 Sep 2020 12:45:24 +0200 Subject: [PATCH 0257/1079] [lldb] [netbsd] Avoid comparison of signed and unsigned integers Cast ProcessID to ::pid_t. --- lldb/source/Host/netbsd/HostNetBSD.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Host/netbsd/HostNetBSD.cpp b/lldb/source/Host/netbsd/HostNetBSD.cpp index 4708fb45deed0..38e2aa5c1e058 100644 --- a/lldb/source/Host/netbsd/HostNetBSD.cpp +++ b/lldb/source/Host/netbsd/HostNetBSD.cpp @@ -220,7 +220,7 @@ uint32_t Host::FindProcessesImpl(const ProcessInstanceInfoMatch &match_info, if (proc_kinfo[i].p_nlwps > 1) { bool already_registered = false; for (size_t pi = 0; pi < process_infos.size(); pi++) { - if (process_infos[pi].GetProcessID() == proc_kinfo[i].p_pid) { + if ((::pid_t)process_infos[pi].GetProcessID() == proc_kinfo[i].p_pid) { already_registered = true; break; } From 4e413e16216d0c94ada2171f3c59e0a85f4fa4b6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 16:16:44 +0200 Subject: [PATCH 0258/1079] [InstCombine] Temporarily do not drop volatile stores before unreachable See discussion in D87149. Dropping volatile stores here is legal per LLVM semantics, but causes issues for real code and may result in a change to LLVM volatile semantics. Temporarily treat volatile stores as "not guaranteed to transfer execution" in just this place, until this issue has been resolved. --- .../InstCombine/InstructionCombining.cpp | 8 +++++++ .../Transforms/InstCombine/volatile_store.ll | 23 +++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 0ca256860c596..63ba7eb85c663 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2805,6 +2805,14 @@ Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { Instruction *Prev = I.getPrevNonDebugInstruction(); if (Prev && !Prev->isEHPad() && isGuaranteedToTransferExecutionToSuccessor(Prev)) { + // Temporarily disable removal of volatile stores preceding unreachable, + // pending a potential LangRef change permitting volatile stores to trap. + // TODO: Either remove this code, or properly integrate the check into + // isGuaranteedToTransferExecutionToSuccessor(). + if (auto *SI = dyn_cast(Prev)) + if (SI->isVolatile()) + return nullptr; + eraseInstFromFunction(*Prev); return &I; } diff --git a/llvm/test/Transforms/InstCombine/volatile_store.ll b/llvm/test/Transforms/InstCombine/volatile_store.ll index c2f63d6659f07..105ec83056d61 100644 --- a/llvm/test/Transforms/InstCombine/volatile_store.ll +++ b/llvm/test/Transforms/InstCombine/volatile_store.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s @x = weak global i32 0 @@ -8,7 +8,7 @@ define void @self_assign_1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load volatile i32, i32* @x, align 4 ; CHECK-NEXT: store volatile i32 [[TMP]], i32* @x, align 4 -; CHECK-NEXT: br label %return +; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: return: ; CHECK-NEXT: ret void ; @@ -20,3 +20,22 @@ entry: return: ret void } + +define void @volatile_store_before_unreachable(i1 %c, i8* %p) { +; CHECK-LABEL: @volatile_store_before_unreachable( +; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: store volatile i8 0, i8* [[P:%.*]], align 1 +; CHECK-NEXT: unreachable +; CHECK: false: +; CHECK-NEXT: ret void +; + br i1 %c, label %true, label %false + +true: + store volatile i8 0, i8* %p + unreachable + +false: + ret void +} From 82edd428f1856ff386716b4f836194252458d001 Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Thu, 10 Sep 2020 07:22:52 -0700 Subject: [PATCH 0259/1079] [flang] Fix check for distinguishable operators/assignments Change how generic operators and assignments are checked for distinguishable procedures. Because of how they are invoked, available type-bound generics and normal generics all have to be considered together. This is different from how generic names are checked. Move common part of checking into DistinguishabilityHelper so that it can be used in both cases after the appropriate procedures have been added. Cache result of Procedure::Characterize(Symbol) in a map in CheckHelper so that we don't have to worry about passing the characterized Procedures around or the cost of recomputing them. Add MakeOpName() to construct names for defined operators and assignment for using in error messages. This eliminates the need for different messages in those cases. When the procedures for a defined operator or assignment are undistinguishable, include the type name in the error message, otherwise it may be ambiguous. Add missing check that procedures for defined operators are functions and that their dummy arguments are INTENT(IN) or VALUE. Differential Revision: https://reviews.llvm.org/D87341 --- flang/include/flang/Semantics/tools.h | 2 + flang/lib/Evaluate/tools.cpp | 4 +- flang/lib/Semantics/check-declarations.cpp | 271 +++++++++++++------- flang/lib/Semantics/resolve-names-utils.cpp | 6 - flang/lib/Semantics/resolve-names-utils.h | 2 - flang/lib/Semantics/resolve-names.cpp | 31 +-- flang/lib/Semantics/tools.cpp | 13 + flang/test/Semantics/resolve11.f90 | 8 +- flang/test/Semantics/resolve13.f90 | 10 +- flang/test/Semantics/resolve15.f90 | 4 +- flang/test/Semantics/resolve25.f90 | 22 +- flang/test/Semantics/resolve53.f90 | 17 +- flang/test/Semantics/resolve96.f90 | 62 +++++ flang/test/Semantics/test_errors.sh | 2 +- 14 files changed, 301 insertions(+), 153 deletions(-) create mode 100644 flang/test/Semantics/resolve96.f90 diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index adc722c3847f7..58ba7bf700175 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -70,6 +70,8 @@ bool IsIntrinsicConcat( const evaluate::DynamicType &, int, const evaluate::DynamicType &, int); bool IsGenericDefinedOp(const Symbol &); +bool IsDefinedOperator(SourceName); +std::string MakeOpName(SourceName); bool DoesScopeContain(const Scope *maybeAncestor, const Scope &maybeDescendent); bool DoesScopeContain(const Scope *, const Symbol &); bool IsUseAssociated(const Symbol &, const Scope &); diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 128a73ad4c78f..4edf90d37fa59 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -813,8 +813,8 @@ parser::Message *AttachDeclaration( unhosted->detailsIf()}) { if (binding->symbol().name() != symbol.name()) { message.Attach(binding->symbol().name(), - "Procedure '%s' is bound to '%s'"_en_US, symbol.name(), - binding->symbol().name()); + "Procedure '%s' of type '%s' is bound to '%s'"_en_US, symbol.name(), + symbol.owner().GetName().value(), binding->symbol().name()); return &message; } unhosted = &binding->symbol(); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index df7ae6e53b1f6..896af3cc83e08 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -21,17 +21,19 @@ namespace Fortran::semantics { -using evaluate::characteristics::DummyArgument; -using evaluate::characteristics::DummyDataObject; -using evaluate::characteristics::DummyProcedure; -using evaluate::characteristics::FunctionResult; -using evaluate::characteristics::Procedure; +namespace characteristics = evaluate::characteristics; +using characteristics::DummyArgument; +using characteristics::DummyDataObject; +using characteristics::DummyProcedure; +using characteristics::FunctionResult; +using characteristics::Procedure; class CheckHelper { public: explicit CheckHelper(SemanticsContext &c) : context_{c} {} CheckHelper(SemanticsContext &c, const Scope &s) : context_{c}, scope_{&s} {} + SemanticsContext &context() { return context_; } void Check() { Check(context_.globalScope()); } void Check(const ParamValue &, bool canBeAssumed); void Check(const Bound &bound) { CheckSpecExpr(bound.GetExplicit()); } @@ -44,6 +46,7 @@ class CheckHelper { void Check(const Symbol &); void Check(const Scope &); void CheckInitialization(const Symbol &); + const Procedure *Characterize(const Symbol &); private: template void CheckSpecExpr(const A &x) { @@ -63,24 +66,20 @@ class CheckHelper { void CheckSubprogram(const Symbol &, const SubprogramDetails &); void CheckAssumedTypeEntity(const Symbol &, const ObjectEntityDetails &); void CheckDerivedType(const Symbol &, const DerivedTypeDetails &); - void CheckHostAssoc(const Symbol &, const HostAssocDetails &); void CheckGeneric(const Symbol &, const GenericDetails &); - std::optional> Characterize(const SymbolVector &); - bool CheckDefinedOperator(const SourceName &, const GenericKind &, - const Symbol &, const Procedure &); + void CheckHostAssoc(const Symbol &, const HostAssocDetails &); + bool CheckDefinedOperator( + SourceName, GenericKind, const Symbol &, const Procedure &); std::optional CheckNumberOfArgs( const GenericKind &, std::size_t); bool CheckDefinedOperatorArg( const SourceName &, const Symbol &, const Procedure &, std::size_t); bool CheckDefinedAssignment(const Symbol &, const Procedure &); bool CheckDefinedAssignmentArg(const Symbol &, const DummyArgument &, int); - void CheckSpecificsAreDistinguishable( - const Symbol &, const GenericDetails &, const std::vector &); + void CheckSpecificsAreDistinguishable(const Symbol &, const GenericDetails &); void CheckEquivalenceSet(const EquivalenceSet &); void CheckBlockData(const Scope &); - - void SayNotDistinguishable( - const SourceName &, GenericKind, const Symbol &, const Symbol &); + void CheckGenericOps(const Scope &); bool CheckConflicting(const Symbol &, Attr, Attr); bool InPure() const { return innermostSymbol_ && IsPureProcedure(*innermostSymbol_); @@ -108,6 +107,27 @@ class CheckHelper { // This symbol is the one attached to the innermost enclosing scope // that has a symbol. const Symbol *innermostSymbol_{nullptr}; + // Cache of calls to Procedure::Characterize(Symbol) + std::map> characterizeCache_; +}; + +class DistinguishabilityHelper { +public: + DistinguishabilityHelper(SemanticsContext &context) : context_{context} {} + void Add(const Symbol &, GenericKind, const Symbol &, const Procedure &); + void Check(); + +private: + void SayNotDistinguishable( + const SourceName &, GenericKind, const Symbol &, const Symbol &); + + SemanticsContext &context_; + struct ProcedureInfo { + GenericKind kind; + const Symbol &symbol; + const Procedure &procedure; + }; + std::map> nameToInfo_; }; void CheckHelper::Check(const ParamValue &value, bool canBeAssumed) { @@ -664,12 +684,13 @@ void CheckHelper::CheckProcEntity( // - C1551: NON_RECURSIVE prefix class SubprogramMatchHelper { public: - explicit SubprogramMatchHelper(SemanticsContext &context) - : context{context} {} + explicit SubprogramMatchHelper(CheckHelper &checkHelper) + : checkHelper{checkHelper} {} void Check(const Symbol &, const Symbol &); private: + SemanticsContext &context() { return checkHelper.context(); } void CheckDummyArg(const Symbol &, const Symbol &, const DummyArgument &, const DummyArgument &); void CheckDummyDataObject(const Symbol &, const Symbol &, @@ -692,7 +713,7 @@ class SubprogramMatchHelper { return parser::ToUpperCaseLetters(DummyProcedure::EnumToString(attr)); } - SemanticsContext &context; + CheckHelper &checkHelper; }; // 15.6.2.6 para 3 - can the result of an ENTRY differ from its function? @@ -719,7 +740,7 @@ bool CheckHelper::IsResultOkToDiffer(const FunctionResult &result) { void CheckHelper::CheckSubprogram( const Symbol &symbol, const SubprogramDetails &details) { if (const Symbol * iface{FindSeparateModuleSubprogramInterface(&symbol)}) { - SubprogramMatchHelper{context_}.Check(symbol, *iface); + SubprogramMatchHelper{*this}.Check(symbol, *iface); } if (const Scope * entryScope{details.entryScope()}) { // ENTRY 15.6.2.6, esp. C1571 @@ -834,66 +855,25 @@ void CheckHelper::CheckHostAssoc( void CheckHelper::CheckGeneric( const Symbol &symbol, const GenericDetails &details) { - const SymbolVector &specifics{details.specificProcs()}; - const auto &bindingNames{details.bindingNames()}; - std::optional> procs{Characterize(specifics)}; - if (!procs) { - return; - } - bool ok{true}; - if (details.kind().IsIntrinsicOperator()) { - for (std::size_t i{0}; i < specifics.size(); ++i) { - auto restorer{messages_.SetLocation(bindingNames[i])}; - ok &= CheckDefinedOperator( - symbol.name(), details.kind(), specifics[i], (*procs)[i]); - } - } - if (details.kind().IsAssignment()) { - for (std::size_t i{0}; i < specifics.size(); ++i) { - auto restorer{messages_.SetLocation(bindingNames[i])}; - ok &= CheckDefinedAssignment(specifics[i], (*procs)[i]); - } - } - if (ok) { - CheckSpecificsAreDistinguishable(symbol, details, *procs); - } + CheckSpecificsAreDistinguishable(symbol, details); } // Check that the specifics of this generic are distinguishable from each other -void CheckHelper::CheckSpecificsAreDistinguishable(const Symbol &generic, - const GenericDetails &details, const std::vector &procs) { +void CheckHelper::CheckSpecificsAreDistinguishable( + const Symbol &generic, const GenericDetails &details) { + GenericKind kind{details.kind()}; const SymbolVector &specifics{details.specificProcs()}; std::size_t count{specifics.size()}; - if (count < 2) { + if (count < 2 || !kind.IsName()) { return; } - GenericKind kind{details.kind()}; - auto distinguishable{kind.IsAssignment() || kind.IsOperator() - ? evaluate::characteristics::DistinguishableOpOrAssign - : evaluate::characteristics::Distinguishable}; - for (std::size_t i1{0}; i1 < count - 1; ++i1) { - auto &proc1{procs[i1]}; - for (std::size_t i2{i1 + 1}; i2 < count; ++i2) { - auto &proc2{procs[i2]}; - if (!distinguishable(proc1, proc2)) { - SayNotDistinguishable( - generic.name(), kind, specifics[i1], specifics[i2]); - } + DistinguishabilityHelper helper{context_}; + for (const Symbol &specific : specifics) { + if (const Procedure * procedure{Characterize(specific)}) { + helper.Add(generic, kind, specific, *procedure); } } -} - -void CheckHelper::SayNotDistinguishable(const SourceName &name, - GenericKind kind, const Symbol &proc1, const Symbol &proc2) { - auto &&text{kind.IsDefinedOperator() - ? "Generic operator '%s' may not have specific procedures '%s'" - " and '%s' as their interfaces are not distinguishable"_err_en_US - : "Generic '%s' may not have specific procedures '%s'" - " and '%s' as their interfaces are not distinguishable"_err_en_US}; - auto &msg{ - context_.Say(name, std::move(text), name, proc1.name(), proc2.name())}; - evaluate::AttachDeclaration(msg, proc1); - evaluate::AttachDeclaration(msg, proc2); + helper.Check(); } static bool ConflictsWithIntrinsicAssignment(const Procedure &proc) { @@ -905,6 +885,9 @@ static bool ConflictsWithIntrinsicAssignment(const Procedure &proc) { static bool ConflictsWithIntrinsicOperator( const GenericKind &kind, const Procedure &proc) { + if (!kind.IsIntrinsicOperator()) { + return false; + } auto arg0{std::get(proc.dummyArguments[0].u).type}; auto type0{arg0.type()}; if (proc.dummyArguments.size() == 1) { // unary @@ -942,8 +925,11 @@ static bool ConflictsWithIntrinsicOperator( } // Check if this procedure can be used for defined operators (see 15.4.3.4.2). -bool CheckHelper::CheckDefinedOperator(const SourceName &opName, - const GenericKind &kind, const Symbol &specific, const Procedure &proc) { +bool CheckHelper::CheckDefinedOperator(SourceName opName, GenericKind kind, + const Symbol &specific, const Procedure &proc) { + if (context_.HasError(specific)) { + return false; + } std::optional msg; if (specific.attrs().test(Attr::NOPASS)) { // C774 msg = "%s procedure '%s' may not have NOPASS attribute"_err_en_US; @@ -962,8 +948,9 @@ bool CheckHelper::CheckDefinedOperator(const SourceName &opName, } else { return true; // OK } - SayWithDeclaration(specific, std::move(msg.value()), - parser::ToUpperCaseLetters(opName.ToString()), specific.name()); + SayWithDeclaration( + specific, std::move(*msg), MakeOpName(opName), specific.name()); + context_.SetError(specific); return false; } @@ -971,6 +958,9 @@ bool CheckHelper::CheckDefinedOperator(const SourceName &opName, // false and return the error message in msg. std::optional CheckHelper::CheckNumberOfArgs( const GenericKind &kind, std::size_t nargs) { + if (!kind.IsIntrinsicOperator()) { + return std::nullopt; + } std::size_t min{2}, max{2}; // allowed number of args; default is binary std::visit(common::visitors{ [&](const common::NumericOperator &x) { @@ -1035,6 +1025,9 @@ bool CheckHelper::CheckDefinedOperatorArg(const SourceName &opName, // Check if this procedure can be used for defined assignment (see 15.4.3.4.3). bool CheckHelper::CheckDefinedAssignment( const Symbol &specific, const Procedure &proc) { + if (context_.HasError(specific)) { + return false; + } std::optional msg; if (specific.attrs().test(Attr::NOPASS)) { // C774 msg = "Defined assignment procedure '%s' may not have" @@ -1054,6 +1047,7 @@ bool CheckHelper::CheckDefinedAssignment( return true; // OK } SayWithDeclaration(specific, std::move(msg.value()), specific.name()); + context_.SetError(specific); return false; } @@ -1086,6 +1080,7 @@ bool CheckHelper::CheckDefinedAssignmentArg( } if (msg) { SayWithDeclaration(symbol, std::move(*msg), symbol.name(), arg.name); + context_.SetError(symbol); return false; } return true; @@ -1102,17 +1097,14 @@ bool CheckHelper::CheckConflicting(const Symbol &symbol, Attr a1, Attr a2) { } } -std::optional> CheckHelper::Characterize( - const SymbolVector &specifics) { - std::vector result; - for (const Symbol &specific : specifics) { - auto proc{Procedure::Characterize(specific, context_.intrinsics())}; - if (!proc || context_.HasError(specific)) { - return std::nullopt; - } - result.emplace_back(*proc); - } - return result; +const Procedure *CheckHelper::Characterize(const Symbol &symbol) { + auto it{characterizeCache_.find(symbol)}; + if (it == characterizeCache_.end()) { + auto pair{characterizeCache_.emplace(SymbolRef{symbol}, + Procedure::Characterize(symbol, context_.intrinsics()))}; + it = pair.first; + } + return common::GetPtrFromOptional(it->second); } void CheckHelper::CheckVolatile(const Symbol &symbol, bool isAssociated, @@ -1298,10 +1290,8 @@ void CheckHelper::CheckProcBinding( ? "A NOPASS type-bound procedure may not override a passed-argument procedure"_err_en_US : "A passed-argument type-bound procedure may not override a NOPASS procedure"_err_en_US); } else { - auto bindingChars{evaluate::characteristics::Procedure::Characterize( - binding.symbol(), context_.intrinsics())}; - auto overriddenChars{evaluate::characteristics::Procedure::Characterize( - overriddenBinding->symbol(), context_.intrinsics())}; + const auto *bindingChars{Characterize(binding.symbol())}; + const auto *overriddenChars{Characterize(overriddenBinding->symbol())}; if (bindingChars && overriddenChars) { if (isNopass) { if (!bindingChars->CanOverride(*overriddenChars, std::nullopt)) { @@ -1357,6 +1347,7 @@ void CheckHelper::Check(const Scope &scope) { if (scope.kind() == Scope::Kind::BlockData) { CheckBlockData(scope); } + CheckGenericOps(scope); } void CheckHelper::CheckEquivalenceSet(const EquivalenceSet &set) { @@ -1417,6 +1408,53 @@ void CheckHelper::CheckBlockData(const Scope &scope) { } } +// Check distinguishability of generic assignment and operators. +// For these, generics and generic bindings must be considered together. +void CheckHelper::CheckGenericOps(const Scope &scope) { + DistinguishabilityHelper helper{context_}; + auto addSpecifics{[&](const Symbol &generic) { + const auto *details{generic.GetUltimate().detailsIf()}; + if (!details) { + return; + } + GenericKind kind{details->kind()}; + if (!kind.IsAssignment() && !kind.IsOperator()) { + return; + } + const SymbolVector &specifics{details->specificProcs()}; + const std::vector &bindingNames{details->bindingNames()}; + for (std::size_t i{0}; i < specifics.size(); ++i) { + const Symbol &specific{*specifics[i]}; + if (const Procedure * proc{Characterize(specific)}) { + auto restorer{messages_.SetLocation(bindingNames[i])}; + if (kind.IsAssignment()) { + if (!CheckDefinedAssignment(specific, *proc)) { + continue; + } + } else { + if (!CheckDefinedOperator(generic.name(), kind, specific, *proc)) { + continue; + } + } + helper.Add(generic, kind, specific, *proc); + } + } + }}; + for (const auto &pair : scope) { + const Symbol &symbol{*pair.second}; + addSpecifics(symbol); + const Symbol &ultimate{symbol.GetUltimate()}; + if (ultimate.has()) { + if (const Scope * typeScope{ultimate.scope()}) { + for (const auto &pair2 : *typeScope) { + addSpecifics(*pair2.second); + } + } + } + } + helper.Check(); +} + void SubprogramMatchHelper::Check( const Symbol &symbol1, const Symbol &symbol2) { const auto details1{symbol1.get()}; @@ -1469,8 +1507,8 @@ void SubprogramMatchHelper::Check( string1, string2); } } - auto proc1{Procedure::Characterize(symbol1, context.intrinsics())}; - auto proc2{Procedure::Characterize(symbol2, context.intrinsics())}; + const Procedure *proc1{checkHelper.Characterize(symbol1)}; + const Procedure *proc2{checkHelper.Characterize(symbol2)}; if (!proc1 || !proc2) { return; } @@ -1583,7 +1621,7 @@ bool SubprogramMatchHelper::CheckSameIntent(const Symbol &symbol1, template void SubprogramMatchHelper::Say(const Symbol &symbol1, const Symbol &symbol2, parser::MessageFixedText &&text, A &&...args) { - auto &message{context.Say(symbol1.name(), std::move(text), symbol1.name(), + auto &message{context().Say(symbol1.name(), std::move(text), symbol1.name(), std::forward(args)...)}; evaluate::AttachDeclaration(message, symbol2); } @@ -1615,7 +1653,7 @@ bool SubprogramMatchHelper::CheckSameAttrs( bool SubprogramMatchHelper::ShapesAreCompatible( const DummyDataObject &obj1, const DummyDataObject &obj2) { - return evaluate::characteristics::ShapesAreCompatible( + return characteristics::ShapesAreCompatible( FoldShape(obj1.type.shape()), FoldShape(obj2.type.shape())); } @@ -1623,11 +1661,58 @@ evaluate::Shape SubprogramMatchHelper::FoldShape(const evaluate::Shape &shape) { evaluate::Shape result; for (const auto &extent : shape) { result.emplace_back( - evaluate::Fold(context.foldingContext(), common::Clone(extent))); + evaluate::Fold(context().foldingContext(), common::Clone(extent))); } return result; } +void DistinguishabilityHelper::Add(const Symbol &generic, GenericKind kind, + const Symbol &specific, const Procedure &procedure) { + if (!context_.HasError(specific)) { + nameToInfo_[generic.name()].emplace_back( + ProcedureInfo{kind, specific, procedure}); + } +} + +void DistinguishabilityHelper::Check() { + for (const auto &[name, info] : nameToInfo_) { + auto count{info.size()}; + for (std::size_t i1{0}; i1 < count - 1; ++i1) { + const auto &[kind1, symbol1, proc1] = info[i1]; + for (std::size_t i2{i1 + 1}; i2 < count; ++i2) { + const auto &[kind2, symbol2, proc2] = info[i2]; + auto distinguishable{kind1.IsName() + ? evaluate::characteristics::Distinguishable + : evaluate::characteristics::DistinguishableOpOrAssign}; + if (!distinguishable(proc1, proc2)) { + SayNotDistinguishable(name, kind1, symbol1, symbol2); + } + } + } + } +} + +void DistinguishabilityHelper::SayNotDistinguishable(const SourceName &name, + GenericKind kind, const Symbol &proc1, const Symbol &proc2) { + std::string name1{proc1.name().ToString()}; + std::string name2{proc2.name().ToString()}; + if (kind.IsOperator() || kind.IsAssignment()) { + // proc1 and proc2 may come from different scopes so qualify their names + if (proc1.owner().IsDerivedType()) { + name1 = proc1.owner().GetName()->ToString() + '%' + name1; + } + if (proc2.owner().IsDerivedType()) { + name2 = proc2.owner().GetName()->ToString() + '%' + name2; + } + } + auto &msg{context_.Say(name, + "Generic '%s' may not have specific procedures '%s' and '%s'" + " as their interfaces are not distinguishable"_err_en_US, + MakeOpName(name), name1, name2)}; + evaluate::AttachDeclaration(msg, proc1); + evaluate::AttachDeclaration(msg, proc2); +} + void CheckDeclarations(SemanticsContext &context) { CheckHelper{context}.Check(); } diff --git a/flang/lib/Semantics/resolve-names-utils.cpp b/flang/lib/Semantics/resolve-names-utils.cpp index d6f0302e98545..8dbd25e163acb 100644 --- a/flang/lib/Semantics/resolve-names-utils.cpp +++ b/flang/lib/Semantics/resolve-names-utils.cpp @@ -47,12 +47,6 @@ parser::MessageFixedText WithIsFatal( msg.text().begin(), msg.text().size(), isFatal}; } -bool IsDefinedOperator(const SourceName &name) { - const char *begin{name.begin()}; - const char *end{name.end()}; - return begin != end && begin[0] == '.' && end[-1] == '.'; -} - bool IsIntrinsicOperator( const SemanticsContext &context, const SourceName &name) { std::string str{name.ToString()}; diff --git a/flang/lib/Semantics/resolve-names-utils.h b/flang/lib/Semantics/resolve-names-utils.h index 08db70345f152..17462d111d970 100644 --- a/flang/lib/Semantics/resolve-names-utils.h +++ b/flang/lib/Semantics/resolve-names-utils.h @@ -47,8 +47,6 @@ Symbol *Resolve(const parser::Name &, Symbol *); parser::MessageFixedText WithIsFatal( const parser::MessageFixedText &msg, bool isFatal); -// Is this the name of a defined operator, e.g. ".foo." -bool IsDefinedOperator(const SourceName &); bool IsIntrinsicOperator(const SemanticsContext &, const SourceName &); bool IsLogicalConstant(const SemanticsContext &, const SourceName &); diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 54686232dc0d0..b501ac69098f9 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -2276,19 +2276,13 @@ ModuleVisitor::SymbolRename ModuleVisitor::AddUse( return {}; // error occurred finding module } if (!useSymbol) { - Say(useName, - IsDefinedOperator(useName) - ? "Operator '%s' not found in module '%s'"_err_en_US - : "'%s' not found in module '%s'"_err_en_US, - useName, useModuleScope_->GetName().value()); + Say(useName, "'%s' not found in module '%s'"_err_en_US, MakeOpName(useName), + useModuleScope_->GetName().value()); return {}; } if (useSymbol->attrs().test(Attr::PRIVATE)) { - Say(useName, - IsDefinedOperator(useName) - ? "Operator '%s' is PRIVATE in '%s'"_err_en_US - : "'%s' is PRIVATE in '%s'"_err_en_US, - useName, useModuleScope_->GetName().value()); + Say(useName, "'%s' is PRIVATE in '%s'"_err_en_US, MakeOpName(useName), + useModuleScope_->GetName().value()); return {}; } auto &localSymbol{MakeSymbol(localName)}; @@ -2550,11 +2544,9 @@ void InterfaceVisitor::ResolveSpecificsInGeneric(Symbol &generic) { } } if (!namesSeen.insert(name->source).second) { - Say(*name, - details.kind().IsDefinedOperator() - ? "Procedure '%s' is already specified in generic operator '%s'"_err_en_US - : "Procedure '%s' is already specified in generic '%s'"_err_en_US, - name->source, generic.name()); + Say(name->source, + "Procedure '%s' is already specified in generic '%s'"_err_en_US, + name->source, MakeOpName(generic.name())); continue; } details.AddSpecificProc(*symbol, name->source); @@ -5932,10 +5924,11 @@ Symbol &ModuleVisitor::SetAccess( if (attrs.HasAny({Attr::PUBLIC, Attr::PRIVATE})) { // PUBLIC/PRIVATE already set: make it a fatal error if it changed Attr prev = attrs.test(Attr::PUBLIC) ? Attr::PUBLIC : Attr::PRIVATE; - auto msg{IsDefinedOperator(name) - ? "The accessibility of operator '%s' has already been specified as %s"_en_US - : "The accessibility of '%s' has already been specified as %s"_en_US}; - Say(name, WithIsFatal(msg, attr != prev), name, EnumToString(prev)); + Say(name, + WithIsFatal( + "The accessibility of '%s' has already been specified as %s"_en_US, + attr != prev), + MakeOpName(name), EnumToString(prev)); } else { attrs.set(attr); } diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 7a79dedb00a33..848aef08e3a1f 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -156,6 +156,19 @@ bool IsGenericDefinedOp(const Symbol &symbol) { } } +bool IsDefinedOperator(SourceName name) { + const char *begin{name.begin()}; + const char *end{name.end()}; + return begin != end && begin[0] == '.' && end[-1] == '.'; +} + +std::string MakeOpName(SourceName name) { + std::string result{name.ToString()}; + return IsDefinedOperator(name) ? "OPERATOR(" + result + ")" + : result.find("operator(", 0) == 0 ? parser::ToUpperCaseLetters(result) + : result; +} + bool IsCommonBlockContaining(const Symbol &block, const Symbol &object) { const auto &objects{block.get().objects()}; auto found{std::find(objects.begin(), objects.end(), object)}; diff --git a/flang/test/Semantics/resolve11.f90 b/flang/test/Semantics/resolve11.f90 index 60dfcb8a10247..06c57b6e4cb89 100644 --- a/flang/test/Semantics/resolve11.f90 +++ b/flang/test/Semantics/resolve11.f90 @@ -13,13 +13,13 @@ module m2 module procedure ifoo end interface public :: operator(.foo.) - !ERROR: The accessibility of operator '.foo.' has already been specified as PUBLIC + !ERROR: The accessibility of 'OPERATOR(.foo.)' has already been specified as PUBLIC private :: operator(.foo.) interface operator(+) module procedure ifoo end interface public :: operator(+) - !ERROR: The accessibility of 'operator(+)' has already been specified as PUBLIC + !ERROR: The accessibility of 'OPERATOR(+)' has already been specified as PUBLIC private :: operator(+) , ifoo contains integer function ifoo(x, y) @@ -37,7 +37,7 @@ logical function lt(x, y) type(t), intent(in) :: x, y end function end interface - !ERROR: The accessibility of 'operator(<)' has already been specified as PRIVATE + !ERROR: The accessibility of 'OPERATOR(<)' has already been specified as PRIVATE public :: operator(<) interface operator(.gt.) logical function gt(x, y) @@ -46,6 +46,6 @@ logical function gt(x, y) end function end interface public :: operator(>) - !ERROR: The accessibility of 'operator(.gt.)' has already been specified as PUBLIC + !ERROR: The accessibility of 'OPERATOR(.GT.)' has already been specified as PUBLIC private :: operator(.gt.) end diff --git a/flang/test/Semantics/resolve13.f90 b/flang/test/Semantics/resolve13.f90 index a611aa09e5ccf..f6105b1ec8a87 100644 --- a/flang/test/Semantics/resolve13.f90 +++ b/flang/test/Semantics/resolve13.f90 @@ -27,24 +27,24 @@ integer function ifoo(x, y) !ERROR: 'z' not found in module 'm1' use m1, local_z => z use m1, operator(.localfoo.) => operator(.foo.) -!ERROR: Operator '.bar.' not found in module 'm1' +!ERROR: 'OPERATOR(.bar.)' not found in module 'm1' use m1, operator(.localbar.) => operator(.bar.) !ERROR: 'y' is PRIVATE in 'm1' use m1, only: y -!ERROR: Operator '.priv.' is PRIVATE in 'm1' +!ERROR: 'OPERATOR(.priv.)' is PRIVATE in 'm1' use m1, only: operator(.priv.) -!ERROR: 'operator(*)' is PRIVATE in 'm1' +!ERROR: 'OPERATOR(*)' is PRIVATE in 'm1' use m1, only: operator(*) !ERROR: 'z' not found in module 'm1' use m1, only: z !ERROR: 'z' not found in module 'm1' use m1, only: my_x => z use m1, only: operator(.foo.) -!ERROR: Operator '.bar.' not found in module 'm1' +!ERROR: 'OPERATOR(.bar.)' not found in module 'm1' use m1, only: operator(.bar.) use m1, only: operator(-) , ifoo -!ERROR: 'operator(+)' not found in module 'm1' +!ERROR: 'OPERATOR(+)' not found in module 'm1' use m1, only: operator(+) end diff --git a/flang/test/Semantics/resolve15.f90 b/flang/test/Semantics/resolve15.f90 index 3658a68e1e884..c520c5886599b 100644 --- a/flang/test/Semantics/resolve15.f90 +++ b/flang/test/Semantics/resolve15.f90 @@ -9,7 +9,9 @@ module m end interface interface operator(.foo.) !ERROR: 'var' is not a subprogram - procedure :: sub, var + procedure :: var + !ERROR: OPERATOR(.foo.) procedure 'sub' must be a function + procedure :: sub !ERROR: Procedure 'bad' not found procedure :: bad end interface diff --git a/flang/test/Semantics/resolve25.f90 b/flang/test/Semantics/resolve25.f90 index 3264194993ead..ec0a98ad6a59a 100644 --- a/flang/test/Semantics/resolve25.f90 +++ b/flang/test/Semantics/resolve25.f90 @@ -1,7 +1,7 @@ ! RUN: %S/test_errors.sh %s %t %f18 module m interface foo - subroutine s1(x) + real function s1(x) real x end !ERROR: 's2' is not a module procedure @@ -12,12 +12,12 @@ subroutine s1(x) procedure s1 end interface interface - subroutine s4(x,y) - real x,y - end subroutine - subroutine s2(x,y) - complex x,y - end subroutine + real function s4(x,y) + real, intent(in) :: x,y + end function + complex function s2(x,y) + complex, intent(in) :: x,y + end function end interface generic :: bar => s4 generic :: bar => s2 @@ -26,7 +26,7 @@ subroutine s2(x,y) generic :: operator(.foo.)=> s4 generic :: operator(.foo.)=> s2 - !ERROR: Procedure 's4' is already specified in generic operator '.foo.' + !ERROR: Procedure 's4' is already specified in generic 'OPERATOR(.foo.)' generic :: operator(.foo.)=> s4 end module @@ -37,7 +37,7 @@ integer function f(x, y) end function end interface generic :: operator(+)=> f - !ERROR: Procedure 'f' is already specified in generic 'operator(+)' + !ERROR: Procedure 'f' is already specified in generic 'OPERATOR(+)' generic :: operator(+)=> f end @@ -46,11 +46,11 @@ module m3 procedure f end interface interface operator(>=) - !ERROR: Procedure 'f' is already specified in generic 'operator(.ge.)' + !ERROR: Procedure 'f' is already specified in generic 'OPERATOR(.GE.)' procedure f end interface generic :: operator(>) => f - !ERROR: Procedure 'f' is already specified in generic 'operator(>)' + !ERROR: Procedure 'f' is already specified in generic 'OPERATOR(>)' generic :: operator(.gt.) => f contains logical function f(x, y) result(result) diff --git a/flang/test/Semantics/resolve53.f90 b/flang/test/Semantics/resolve53.f90 index acb27c8575b7d..1487873bd86b3 100644 --- a/flang/test/Semantics/resolve53.f90 +++ b/flang/test/Semantics/resolve53.f90 @@ -210,7 +210,7 @@ module m14 module procedure f1 module procedure f2 end interface - !ERROR: Generic 'operator(+)' may not have specific procedures 'f1' and 'f3' as their interfaces are not distinguishable + !ERROR: Generic 'OPERATOR(+)' may not have specific procedures 'f1' and 'f3' as their interfaces are not distinguishable interface operator(+) module procedure f1 module procedure f3 @@ -219,7 +219,7 @@ module m14 module procedure f1 module procedure f2 end interface - !ERROR: Generic operator '.bar.' may not have specific procedures 'f1' and 'f3' as their interfaces are not distinguishable + !ERROR: Generic 'OPERATOR(.bar.)' may not have specific procedures 'f1' and 'f3' as their interfaces are not distinguishable interface operator(.bar.) module procedure f1 module procedure f3 @@ -332,7 +332,6 @@ subroutine s9(x) end subroutine end - ! Check that specifics for type-bound generics can be distinguished module m16 type :: t @@ -441,20 +440,20 @@ module m19 module procedure f1 module procedure f2 end interface - !ERROR: Generic operator '.bar.' may not have specific procedures 'f2' and 'f3' as their interfaces are not distinguishable + !ERROR: Generic 'OPERATOR(.bar.)' may not have specific procedures 'f2' and 'f3' as their interfaces are not distinguishable interface operator(.bar.) module procedure f2 module procedure f3 end interface contains integer function f1(i) - integer :: i + integer, intent(in) :: i end integer function f2(i, j) - integer :: i, j + integer, value :: i, j end integer function f3(i, j) - integer :: i, j + integer, intent(in) :: i, j end end @@ -472,11 +471,11 @@ real function f(x) subroutine s1() use m20 interface operator(.not.) - !ERROR: Procedure 'f' is already specified in generic 'operator(.not.)' + !ERROR: Procedure 'f' is already specified in generic 'OPERATOR(.NOT.)' procedure f end interface interface operator(+) - !ERROR: Procedure 'f' is already specified in generic 'operator(+)' + !ERROR: Procedure 'f' is already specified in generic 'OPERATOR(+)' procedure f end interface end subroutine s1 diff --git a/flang/test/Semantics/resolve96.f90 b/flang/test/Semantics/resolve96.f90 new file mode 100644 index 0000000000000..b026e042397ec --- /dev/null +++ b/flang/test/Semantics/resolve96.f90 @@ -0,0 +1,62 @@ +! RUN: %S/test_errors.sh %s %t %f18 + +! Check distinguishability for specific procedures of defined operators and +! assignment. These are different from names because there a normal generic +! is invoked the same way as a type-bound generic. +! E.g. for a generic name like 'foo', the generic name is invoked as 'foo(x, y)' +! while the type-bound generic is invoked as 'x%foo(y)'. +! But for 'operator(.foo.)', it is 'x .foo. y' in either case. +! So to check the specifics of 'operator(.foo.)' we have to consider all +! definitions of it visible in the current scope. + +! One operator(.foo.) comes from interface-stmt, the other is type-bound. +module m1 + type :: t1 + contains + procedure, pass :: p => s1 + generic :: operator(.foo.) => p + end type + type :: t2 + end type + !ERROR: Generic 'OPERATOR(.foo.)' may not have specific procedures 's2' and 't1%p' as their interfaces are not distinguishable + interface operator(.foo.) + procedure :: s2 + end interface +contains + integer function s1(x1, x2) + class(t1), intent(in) :: x1 + class(t2), intent(in) :: x2 + end + integer function s2(x1, x2) + class(t1), intent(in) :: x1 + class(t2), intent(in) :: x2 + end +end module + +! assignment(=) as type-bound generic in each type +module m2 + type :: t1 + integer :: n + contains + procedure, pass(x1) :: p1 => s1 + !ERROR: Generic 'assignment(=)' may not have specific procedures 't1%p1' and 't2%p2' as their interfaces are not distinguishable + generic :: assignment(=) => p1 + end type + type :: t2 + integer :: n + contains + procedure, pass(x2) :: p2 => s2 + generic :: assignment(=) => p2 + end type +contains + subroutine s1(x1, x2) + class(t1), intent(out) :: x1 + class(t2), intent(in) :: x2 + x1%n = x2%n + 1 + end subroutine + subroutine s2(x1, x2) + class(t1), intent(out) :: x1 + class(t2), intent(in) :: x2 + x1%n = x2%n + 2 + end subroutine +end module diff --git a/flang/test/Semantics/test_errors.sh b/flang/test/Semantics/test_errors.sh index 15383475c5051..5411482e4d3b6 100755 --- a/flang/test/Semantics/test_errors.sh +++ b/flang/test/Semantics/test_errors.sh @@ -2,7 +2,7 @@ # Compile a source file and check errors against those listed in the file. # Change the compiler by setting the F18 environment variable. -F18_OPTIONS="-fdebug-resolve-names -fparse-only" +F18_OPTIONS="-fparse-only" srcdir=$(dirname $0) source $srcdir/common.sh [[ ! -f $src ]] && die "File not found: $src" From 0841916e87a39e3c223c986e8da31e4a9a1432e3 Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Thu, 10 Sep 2020 16:40:40 +0200 Subject: [PATCH 0260/1079] [TableGen] Do not construct string from nullptr While I am trying to forbid such usages systematically in https://reviews.llvm.org/D79427 / P2166R0 to C++ standard, this PR fixes this (definitelly incorrect) usage in llvm. Differential Revision: https://reviews.llvm.org/D87185 --- llvm/utils/TableGen/DFAEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index 7391f6845a4b2..e877650852898 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -174,7 +174,7 @@ namespace { struct Action { Record *R = nullptr; unsigned I = 0; - std::string S = nullptr; + std::string S; Action() = default; Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {} From 018f6936dbcee63e0a1ffd3777e854150b8cf957 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Thu, 10 Sep 2020 14:41:39 +0000 Subject: [PATCH 0261/1079] [MLIR][Standard] Simplify `tensor_from_elements` Define assembly format and add required traits. Differential Revision: https://reviews.llvm.org/D87366 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 18 ++++++-- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 44 ++----------------- mlir/test/IR/invalid-ops.mlir | 4 +- 3 files changed, 19 insertions(+), 47 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 44bbb423b2d95..ec7ecf9b92d40 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1611,8 +1611,14 @@ def ExtractElementOp : Std_Op<"extract_element", // TensorFromElementsOp //===----------------------------------------------------------------------===// -def TensorFromElementsOp : Std_Op<"tensor_from_elements", - [NoSideEffect, SameOperandsAndResultElementType]> { +def TensorFromElementsOp : Std_Op<"tensor_from_elements", [ + NoSideEffect, + SameOperandsAndResultElementType, + TypesMatchWith<"operand types match result element type", + "result", "elements", "SmallVector(" + "$_self.cast().getDimSize(0), " + "$_self.cast().getElementType())"> + ]> { string summary = "tensor from elements operation."; string description = [{ Create a 1D tensor from a range of same-type arguments. @@ -1625,9 +1631,13 @@ def TensorFromElementsOp : Std_Op<"tensor_from_elements", }]; let arguments = (ins Variadic:$elements); - let results = (outs AnyTensor:$result); + let results = (outs 1DTensorOf<[AnyType]>:$result); + + let assemblyFormat = "$elements attr-dict `:` type($result)"; + + // This op is fully verified by its traits. + let verifier = ?; - let skipDefaultBuilders = 1; let builders = [ OpBuilder<"OpBuilder &b, OperationState &result, ValueRange elements"> ]; diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index a0ad05852e230..dc45d5175277c 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1756,50 +1756,12 @@ OpFoldResult ExtractElementOp::fold(ArrayRef operands) { // TensorFromElementsOp //===----------------------------------------------------------------------===// -static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, - OperationState &result) { - SmallVector elementsOperands; - Type resultType; - if (parser.parseOperandList(elementsOperands) || - parser.parseOptionalAttrDict(result.attributes) || - parser.parseColonType(resultType)) - return failure(); - - if (parser.resolveOperands(elementsOperands, - resultType.cast().getElementType(), - result.operands)) - return failure(); - - result.addTypes(resultType); - return success(); -} - -static void print(OpAsmPrinter &p, TensorFromElementsOp op) { - p << "tensor_from_elements " << op.elements(); - p.printOptionalAttrDict(op.getAttrs()); - p << " : " << op.getType(); -} - -static LogicalResult verify(TensorFromElementsOp op) { - auto resultTensorType = op.result().getType().dyn_cast(); - if (!resultTensorType) - return op.emitOpError("expected result type to be a ranked tensor"); - - int64_t elementsCount = static_cast(op.elements().size()); - if (resultTensorType.getRank() != 1 || - resultTensorType.getShape().front() != elementsCount) - return op.emitOpError() - << "expected result type to be a 1D tensor with " << elementsCount - << (elementsCount == 1 ? " element" : " elements"); - return success(); -} - void TensorFromElementsOp::build(OpBuilder &builder, OperationState &result, ValueRange elements) { assert(!elements.empty() && "expected at least one element"); - result.addOperands(elements); - result.addTypes(RankedTensorType::get({static_cast(elements.size())}, - *elements.getTypes().begin())); + Type resultTy = RankedTensorType::get({static_cast(elements.size())}, + elements.front().getType()); + build(builder, result, resultTy, elements); } namespace { diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 71b007ef6e39f..e02dbca494df6 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -595,7 +595,7 @@ func @extract_element_tensor_too_few_indices(%t : tensor<2x3xf32>, %i : index) { // ----- func @tensor_from_elements_wrong_result_type() { - // expected-error@+2 {{expected result type to be a ranked tensor}} + // expected-error@+2 {{'result' must be 1D tensor of any type values, but got 'tensor<*xi32>'}} %c0 = constant 0 : i32 %0 = tensor_from_elements %c0 : tensor<*xi32> return @@ -604,7 +604,7 @@ func @tensor_from_elements_wrong_result_type() { // ----- func @tensor_from_elements_wrong_elements_count() { - // expected-error@+2 {{expected result type to be a 1D tensor with 1 element}} + // expected-error@+2 {{1 operands present, but expected 2}} %c0 = constant 0 : index %0 = tensor_from_elements %c0 : tensor<2xindex> return From 33c9dbbd380913e8ab7e5a8e82468f9f7d092187 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 3 Sep 2020 19:37:29 -0400 Subject: [PATCH 0262/1079] Add an explicit toggle for the static analyzer in clang-tidy Instead of using CLANG_ENABLE_STATIC_ANALYZER for use of the static analyzer in both clang and clang-tidy, add a second toggle CLANG_TIDY_ENABLE_STATIC_ANALYZER. This allows enabling the static analyzer in clang-tidy while disabling it in clang. Differential Revison: https://reviews.llvm.org/D87118 --- clang-tools-extra/CMakeLists.txt | 3 +++ clang-tools-extra/clang-tidy/CMakeLists.txt | 11 +++++--- clang-tools-extra/clang-tidy/ClangTidy.cpp | 22 +++++++-------- .../clang-tidy/ClangTidyForceLinker.h | 4 +-- .../clang-tidy/clang-tidy-config.h.cmake | 10 +++++++ .../docs/clang-tidy/Contributing.rst | 2 +- clang-tools-extra/test/CMakeLists.txt | 2 +- clang-tools-extra/test/lit.cfg.py | 2 +- clang-tools-extra/test/lit.site.cfg.py.in | 2 +- clang/CMakeLists.txt | 3 ++- clang/cmake/caches/Android.cmake | 1 + clang/lib/CMakeLists.txt | 4 +-- .../clang-tools-extra/clang-tidy/BUILD.gn | 27 +++++++++++++++++-- .../clang-tools-extra/clang-tidy/enable.gni | 4 +++ .../clang-tidy/tool/BUILD.gn | 1 + .../secondary/clang-tools-extra/test/BUILD.gn | 7 ++--- 16 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake create mode 100644 llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/enable.gni diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 57bb970575608..2e73b6ba81d2e 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -1,5 +1,8 @@ include(CMakeDependentOption) +option(CLANG_TIDY_ENABLE_STATIC_ANALYZER + "Include static analyzer checks in clang-tidy" ON) + add_subdirectory(clang-apply-replacements) add_subdirectory(clang-reorder-fields) add_subdirectory(modularize) diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 923976197ebe8..ca7a5afed6b0b 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -3,6 +3,11 @@ set(LLVM_LINK_COMPONENTS Support ) +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/clang-tidy-config.h.cmake + ${CMAKE_CURRENT_BINARY_DIR}/clang-tidy-config.h) +include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}) + add_clang_library(clangTidy ClangTidy.cpp ClangTidyCheck.cpp @@ -34,7 +39,7 @@ clang_target_link_libraries(clangTidy clangToolingCore ) -if(CLANG_ENABLE_STATIC_ANALYZER) +if(CLANG_TIDY_ENABLE_STATIC_ANALYZER) clang_target_link_libraries(clangTidy PRIVATE clangStaticAnalyzerCore @@ -60,7 +65,7 @@ add_subdirectory(llvm) add_subdirectory(llvmlibc) add_subdirectory(misc) add_subdirectory(modernize) -if(CLANG_ENABLE_STATIC_ANALYZER) +if(CLANG_TIDY_ENABLE_STATIC_ANALYZER) add_subdirectory(mpi) endif() add_subdirectory(objc) @@ -93,7 +98,7 @@ set(ALL_CLANG_TIDY_CHECKS clangTidyReadabilityModule clangTidyZirconModule ) -if(CLANG_ENABLE_STATIC_ANALYZER) +if(CLANG_TIDY_ENABLE_STATIC_ANALYZER) list(APPEND ALL_CLANG_TIDY_CHECKS clangTidyMPIModule) endif() set(ALL_CLANG_TIDY_CHECKS ${ALL_CLANG_TIDY_CHECKS} PARENT_SCOPE) diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 90b39347bc9ac..1f94ab4977c23 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -20,11 +20,11 @@ #include "ClangTidyModuleRegistry.h" #include "ClangTidyProfiling.h" #include "ExpandModularHeadersPPCallbacks.h" +#include "clang-tidy-config.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/Config/config.h" #include "clang/Format/Format.h" #include "clang/Frontend/ASTConsumers.h" #include "clang/Frontend/CompilerInstance.h" @@ -47,10 +47,10 @@ #include #include -#if CLANG_ENABLE_STATIC_ANALYZER +#if CLANG_TIDY_ENABLE_STATIC_ANALYZER #include "clang/Analysis/PathDiagnostic.h" #include "clang/StaticAnalyzer/Frontend/AnalysisConsumer.h" -#endif // CLANG_ENABLE_STATIC_ANALYZER +#endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER using namespace clang::ast_matchers; using namespace clang::driver; @@ -63,7 +63,7 @@ namespace clang { namespace tidy { namespace { -#if CLANG_ENABLE_STATIC_ANALYZER +#if CLANG_TIDY_ENABLE_STATIC_ANALYZER static const char *AnalyzerCheckNamePrefix = "clang-analyzer-"; class AnalyzerDiagnosticConsumer : public ento::PathDiagnosticConsumer { @@ -95,7 +95,7 @@ class AnalyzerDiagnosticConsumer : public ento::PathDiagnosticConsumer { private: ClangTidyContext &Context; }; -#endif // CLANG_ENABLE_STATIC_ANALYZER +#endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER class ErrorReporter { public: @@ -324,7 +324,7 @@ ClangTidyASTConsumerFactory::ClangTidyASTConsumerFactory( } } -#if CLANG_ENABLE_STATIC_ANALYZER +#if CLANG_TIDY_ENABLE_STATIC_ANALYZER static void setStaticAnalyzerCheckerOpts(const ClangTidyOptions &Opts, AnalyzerOptionsRef AnalyzerOptions) { StringRef AnalyzerPrefix(AnalyzerCheckNamePrefix); @@ -369,7 +369,7 @@ static CheckersList getAnalyzerCheckersAndPackages(ClangTidyContext &Context, } return List; } -#endif // CLANG_ENABLE_STATIC_ANALYZER +#endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER std::unique_ptr ClangTidyASTConsumerFactory::CreateASTConsumer( @@ -424,7 +424,7 @@ ClangTidyASTConsumerFactory::CreateASTConsumer( if (!Checks.empty()) Consumers.push_back(Finder->newASTConsumer()); -#if CLANG_ENABLE_STATIC_ANALYZER +#if CLANG_TIDY_ENABLE_STATIC_ANALYZER AnalyzerOptionsRef AnalyzerOptions = Compiler.getAnalyzerOpts(); AnalyzerOptions->CheckersAndPackages = getAnalyzerCheckersAndPackages( Context, Context.canEnableAnalyzerAlphaCheckers()); @@ -440,7 +440,7 @@ ClangTidyASTConsumerFactory::CreateASTConsumer( new AnalyzerDiagnosticConsumer(Context)); Consumers.push_back(std::move(AnalysisConsumer)); } -#endif // CLANG_ENABLE_STATIC_ANALYZER +#endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER return std::make_unique( std::move(Consumers), std::move(Profiling), std::move(Finder), std::move(Checks)); @@ -453,11 +453,11 @@ std::vector ClangTidyASTConsumerFactory::getCheckNames() { CheckNames.emplace_back(CheckFactory.getKey()); } -#if CLANG_ENABLE_STATIC_ANALYZER +#if CLANG_TIDY_ENABLE_STATIC_ANALYZER for (const auto &AnalyzerCheck : getAnalyzerCheckersAndPackages( Context, Context.canEnableAnalyzerAlphaCheckers())) CheckNames.push_back(AnalyzerCheckNamePrefix + AnalyzerCheck.first); -#endif // CLANG_ENABLE_STATIC_ANALYZER +#endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER llvm::sort(CheckNames); return CheckNames; diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h index 63e681f878db2..3a5330c85c3b0 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h @@ -9,7 +9,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CLANGTIDYFORCELINKER_H #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CLANGTIDYFORCELINKER_H -#include "clang/Config/config.h" +#include "clang-tidy-config.h" #include "llvm/Support/Compiler.h" namespace clang { @@ -95,7 +95,7 @@ extern volatile int ModernizeModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED ModernizeModuleAnchorDestination = ModernizeModuleAnchorSource; -#if CLANG_ENABLE_STATIC_ANALYZER && \ +#if CLANG_TIDY_ENABLE_STATIC_ANALYZER && \ !defined(CLANG_TIDY_DISABLE_STATIC_ANALYZER_CHECKS) // This anchor is used to force the linker to link the MPIModule. extern volatile int MPIModuleAnchorSource; diff --git a/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake b/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake new file mode 100644 index 0000000000000..f4d1a4b38004b --- /dev/null +++ b/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake @@ -0,0 +1,10 @@ +/* This generated file is for internal use. Do not include it from headers. */ + +#ifdef CLANG_TIDY_CONFIG_H +#error clang-tidy-config.h can only be included once +#else +#define CLANG_TIDY_CONFIG_H + +#cmakedefine01 CLANG_TIDY_ENABLE_STATIC_ANALYZER + +#endif diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst index 6b7af479804de..c7e7e804a0ff4 100644 --- a/clang-tools-extra/docs/clang-tidy/Contributing.rst +++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst @@ -27,7 +27,7 @@ There are a few tools particularly useful when developing clang-tidy checks: * `clang-check`_ with the ``-ast-dump`` (and optionally ``-ast-dump-filter``) provides a convenient way to dump AST of a C++ program. -If CMake is configured with ``CLANG_ENABLE_STATIC_ANALYZER``, +If CMake is configured with ``CLANG_TIDY_ENABLE_STATIC_ANALYZER=NO``, :program:`clang-tidy` will not be built with support for the ``clang-analyzer-*`` checks or the ``mpi-*`` checks. diff --git a/clang-tools-extra/test/CMakeLists.txt b/clang-tools-extra/test/CMakeLists.txt index 60217b8c50cd4..15b756f0a3207 100644 --- a/clang-tools-extra/test/CMakeLists.txt +++ b/clang-tools-extra/test/CMakeLists.txt @@ -16,7 +16,7 @@ endif () string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) llvm_canonicalize_cmake_booleans( - CLANG_ENABLE_STATIC_ANALYZER + CLANG_TIDY_ENABLE_STATIC_ANALYZER LIBCLANG_INCLUDE_CLANG_TOOLS_EXTRA ) diff --git a/clang-tools-extra/test/lit.cfg.py b/clang-tools-extra/test/lit.cfg.py index 2366f4613db23..24cabd823844e 100644 --- a/clang-tools-extra/test/lit.cfg.py +++ b/clang-tools-extra/test/lit.cfg.py @@ -115,7 +115,7 @@ if platform.system() not in ['Windows']: config.available_features.add('ansi-escape-sequences') -if config.clang_staticanalyzer: +if config.clang_tidy_staticanalyzer: config.available_features.add('static-analyzer') # Get shlex.quote if available (added in 3.3), and fall back to pipes.quote if diff --git a/clang-tools-extra/test/lit.site.cfg.py.in b/clang-tools-extra/test/lit.site.cfg.py.in index 31ce2eaa27d00..7eef661b85fd1 100644 --- a/clang-tools-extra/test/lit.site.cfg.py.in +++ b/clang-tools-extra/test/lit.site.cfg.py.in @@ -10,7 +10,7 @@ config.clang_tools_dir = "@CLANG_TOOLS_DIR@" config.clang_libs_dir = "@SHLIBDIR@" config.python_executable = "@Python3_EXECUTABLE@" config.target_triple = "@TARGET_TRIPLE@" -config.clang_staticanalyzer = @CLANG_ENABLE_STATIC_ANALYZER@ +config.clang_tidy_staticanalyzer = @CLANG_TIDY_ENABLE_STATIC_ANALYZER@ config.libclang_include_clang_tools_extra = @LIBCLANG_INCLUDE_CLANG_TOOLS_EXTRA@ # Support substitution of the tools and libs dirs with user parameters. This is diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index f015951c7ec72..3db476cffbf00 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -473,7 +473,8 @@ option(CLANG_BUILD_TOOLS "Build the Clang tools. If OFF, just generate build targets." ON) option(CLANG_ENABLE_ARCMT "Build ARCMT." ON) -option(CLANG_ENABLE_STATIC_ANALYZER "Build static analyzer." ON) +option(CLANG_ENABLE_STATIC_ANALYZER + "Include static analyzer in clang binary." ON) option(CLANG_ENABLE_PROTO_FUZZER "Build Clang protobuf fuzzer." OFF) diff --git a/clang/cmake/caches/Android.cmake b/clang/cmake/caches/Android.cmake index 6fbc4a53951e3..9e15fff033761 100644 --- a/clang/cmake/caches/Android.cmake +++ b/clang/cmake/caches/Android.cmake @@ -4,6 +4,7 @@ set(LLVM_TARGETS_TO_BUILD X86 CACHE STRING "") set(CLANG_ENABLE_ARCMT OFF CACHE BOOL "") set(CLANG_ENABLE_STATIC_ANALYZER OFF CACHE BOOL "") +set(CLANG_TIDY_ENABLE_STATIC_ANALYZER OFF CACHE BOOL "") set(CLANG_VENDOR Android CACHE STRING "") set(CMAKE_BUILD_TYPE RELEASE CACHE STRING "") diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt index 23082789ff9a2..1068288100fd6 100644 --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -21,8 +21,6 @@ add_subdirectory(Tooling) add_subdirectory(DirectoryWatcher) add_subdirectory(Index) add_subdirectory(IndexSerialization) -if(CLANG_ENABLE_STATIC_ANALYZER) - add_subdirectory(StaticAnalyzer) -endif() +add_subdirectory(StaticAnalyzer) add_subdirectory(Format) add_subdirectory(Testing) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 18aa728b0db90..69217b702a601 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -1,9 +1,32 @@ import("//clang/lib/StaticAnalyzer/Frontend/enable.gni") +import("//llvm/utils/gn/build/write_cmake_config.gni") +import("enable.gni") + +config("clang-tidy-config_Config") { + visibility = [ ":clang-tidy-config" ] + include_dirs = [ "$target_gen_dir" ] +} + +write_cmake_config("clang-tidy-config") { + input = "clang-tidy-config.h.cmake" + output = "$target_gen_dir/clang-tidy-config.h" + values = [] + + if (clang_tidy_enable_static_analyzer) { + values += [ "CLANG_TIDY_ENABLE_STATIC_ANALYZER=1" ] + } else { + values += [ "CLANG_TIDY_ENABLE_STATIC_ANALYZER=" ] + } + + # Let targets depending on this find the generated file. + public_configs = [ ":clang-tidy-config_Config" ] +} static_library("clang-tidy") { output_name = "clangTidy" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ + ":clang-tidy-config", "//clang/include/clang/StaticAnalyzer/Checkers", "//clang/lib/AST", "//clang/lib/ASTMatchers", @@ -19,7 +42,7 @@ static_library("clang-tidy") { "//llvm/lib/Support", ] - if (clang_enable_static_analyzer) { + if (clang_tidy_enable_static_analyzer) { deps += [ "//clang/lib/StaticAnalyzer/Core", "//clang/lib/StaticAnalyzer/Frontend", @@ -64,7 +87,7 @@ group("all-checks") { "//clang-tools-extra/clang-tidy/readability", "//clang-tools-extra/clang-tidy/zircon", ] - if (clang_enable_static_analyzer) { + if (clang_tidy_enable_static_analyzer) { deps += [ "//clang-tools-extra/clang-tidy/mpi" ] } } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/enable.gni b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/enable.gni new file mode 100644 index 0000000000000..9fc3e6e4d64b2 --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/enable.gni @@ -0,0 +1,4 @@ +declare_args() { + # Whether to include the static analyzer in the clang-tidy binary. + clang_tidy_enable_static_analyzer = true +} diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn index 3f06214498d60..7ee93b521c812 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn @@ -3,6 +3,7 @@ executable("clang-tidy") { deps = [ "//clang-tools-extra/clang-tidy", "//clang-tools-extra/clang-tidy:all-checks", + "//clang-tools-extra/clang-tidy:clang-tidy-config", "//clang/lib/AST", "//clang/lib/ASTMatchers", "//clang/lib/Basic", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn index 383cb2e1b15cd..e8b1f155a5205 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn @@ -1,3 +1,4 @@ +import("//clang-tools-extra/clang-tidy/enable.gni") import("//clang/lib/StaticAnalyzer/Frontend/enable.gni") import("//clang/tools/libclang/include_clang_tools_extra.gni") import("//llvm/triples.gni") @@ -38,10 +39,10 @@ write_lit_config("lit_site_cfg") { "Python3_EXECUTABLE=$python_path", ] - if (clang_enable_static_analyzer) { - extra_values += [ "CLANG_ENABLE_STATIC_ANALYZER=1" ] + if (clang_tidy_enable_static_analyzer) { + extra_values += [ "CLANG_TIDY_ENABLE_STATIC_ANALYZER=1" ] } else { - extra_values += [ "CLANG_ENABLE_STATIC_ANALYZER=0" ] + extra_values += [ "CLANG_TIDY_ENABLE_STATIC_ANALYZER=0" ] } if (libclang_include_clang_tools_extra) { From 9f830e0af7b05e6ec970f1e5f8815063a196fae8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 13:09:48 +0100 Subject: [PATCH 0263/1079] AArch64MachineFunctionInfo.h - remove unnecessary TargetFrameLowering.h include. NFCI. --- llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 9562269336d8d..12e938c0f66ce 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -20,7 +20,6 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include From b585fdae249e7b3524376222287e76e155ecd34b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 15:12:05 +0100 Subject: [PATCH 0264/1079] [X86] Use Register instead of unsigned. NFCI. Fixes llvm-prefer-register-over-unsigned clang-tidy warnings. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 031234925de47..4449a00b95c46 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32178,7 +32178,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, const TargetRegisterClass *AddrRegClass = getRegClassFor(getPointerTy(MF->getDataLayout())); - unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), + Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), From 29cecbc5d6fe2ee36635d593171d59eab631639f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 10 Sep 2020 11:05:28 -0400 Subject: [PATCH 0265/1079] Fix clangd build after 33c9dbbd380 --- clang-tools-extra/clangd/CMakeLists.txt | 2 ++ llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 + 2 files changed, 3 insertions(+) diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 639441e8130ab..3a1a034ed17ba 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -33,6 +33,8 @@ if(MSVC AND NOT CLANG_CL) set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant endif() +include_directories(BEFORE "${CMAKE_CURRENT_BINARY_DIR}/../clang-tidy") + add_clang_library(clangDaemon AST.cpp ClangdLSPServer.cpp diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 84d3f14bb2f27..7fa4cc8fd32c1 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -27,6 +27,7 @@ static_library("clangd") { ":features", "//clang-tools-extra/clang-tidy", "//clang-tools-extra/clang-tidy:all-checks", + "//clang-tools-extra/clang-tidy:clang-tidy-config", "//clang-tools-extra/clangd/support", "//clang/lib/AST", "//clang/lib/ASTMatchers", From f5ad9c2e0ea60dc5426def7a54f04347a33a952e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 10 Sep 2020 06:55:00 -0700 Subject: [PATCH 0266/1079] [builtins] Write __divmoddi4/__divmodsi4 in terms __udivmod instead of __div and multiply. Previously we calculating the remainder by multiplying the quotient and divisor and subtracting from the dividend. __udivmod can calculate the remainder while calculating the quotient. We just need to correct the sign afterward. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D87433 --- compiler-rt/lib/builtins/divmoddi4.c | 13 ++++++++++--- compiler-rt/lib/builtins/divmodsi4.c | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/compiler-rt/lib/builtins/divmoddi4.c b/compiler-rt/lib/builtins/divmoddi4.c index 7f333510c0034..e7cbbb1aaa304 100644 --- a/compiler-rt/lib/builtins/divmoddi4.c +++ b/compiler-rt/lib/builtins/divmoddi4.c @@ -15,7 +15,14 @@ // Returns: a / b, *rem = a % b COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) { - di_int d = __divdi3(a, b); - *rem = a - (d * b); - return d; + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; // s_a = a < 0 ? -1 : 0 + di_int s_b = b >> bits_in_dword_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + du_int r; + di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; } diff --git a/compiler-rt/lib/builtins/divmodsi4.c b/compiler-rt/lib/builtins/divmodsi4.c index 402eed22fe7a0..a85e2993b4e9b 100644 --- a/compiler-rt/lib/builtins/divmodsi4.c +++ b/compiler-rt/lib/builtins/divmodsi4.c @@ -16,7 +16,14 @@ // Returns: a / b, *rem = a % b COMPILER_RT_ABI si_int __divmodsi4(si_int a, si_int b, si_int *rem) { - si_int d = __divsi3(a, b); - *rem = a - (d * b); - return d; + const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; + si_int s_a = a >> bits_in_word_m1; // s_a = a < 0 ? -1 : 0 + si_int s_b = b >> bits_in_word_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + su_int r; + si_int q = (__udivmodsi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; } From 66ac212ea97a529e171a7b8aea10638d7b9b9907 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 10 Sep 2020 11:35:10 -0400 Subject: [PATCH 0267/1079] Speculatively fix the Sphinx builder. --- clang-tools-extra/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 192f200f34aca..563c0eced92ef 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -70,7 +70,7 @@ Improvements to clang-tidy New modules ^^^^^^^^^^^ -- New :doc:`altera ` module. +- New ``altera`` module. Includes checks related to OpenCL for FPGA coding guidelines, based on the `Altera SDK for OpenCL: Best Practices Guide From 8a08740db6e13a3a36363c65b7e270cb7c66eb3c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 9 Sep 2020 18:05:00 -0500 Subject: [PATCH 0268/1079] [GVN] Account for masked loads/stores depending on load/store instructions This is a case where an intrinsic depends on a non-call instruction. Differential Revision: https://reviews.llvm.org/D87423 --- llvm/lib/Transforms/Scalar/GVN.cpp | 7 +++++-- .../GVN/masked-load-store-vn-crash.ll | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/GVN/masked-load-store-vn-crash.ll diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 036ca1d1054fe..2523cb178ddb7 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -410,9 +410,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { } if (local_dep.isDef()) { - CallInst* local_cdep = cast(local_dep.getInst()); + // For masked load/store intrinsics, the local_dep may actully be + // a normal load or store instruction. + CallInst *local_cdep = dyn_cast(local_dep.getInst()); - if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { + if (!local_cdep || + local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } diff --git a/llvm/test/Transforms/GVN/masked-load-store-vn-crash.ll b/llvm/test/Transforms/GVN/masked-load-store-vn-crash.ll new file mode 100644 index 0000000000000..ae8369cd19452 --- /dev/null +++ b/llvm/test/Transforms/GVN/masked-load-store-vn-crash.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s +@file_mask = external global [8 x i64], align 32 + +define fastcc void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDE_MASKED_LOAD_1_I:%.*]] = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* nonnull bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @file_mask, i64 0, i64 7) to <4 x i64>*), i32 8, <4 x i1> , <4 x i64> undef) +; CHECK-NEXT: unreachable +; +entry: + %wide.masked.load.1.i = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* nonnull bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @file_mask, i64 0, i64 7) to <4 x i64>*), i32 8, <4 x i1> , <4 x i64> undef) #2 + %.pre392.i = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @file_mask, i64 0, i64 7), align 8 + %or156.4.i = or i64 %.pre392.i, undef + %wide.masked.load614.1.i = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* nonnull bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @file_mask, i64 0, i64 7) to <4 x i64>*), i32 8, <4 x i1> , <4 x i64> undef) #2 + unreachable +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32 immarg, <4 x i1>, <4 x i64>) From 601557e9f9e829e5a798a1dbb6b46a98c8fb7810 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 16:52:05 +0100 Subject: [PATCH 0269/1079] Hexagon.h - remove unnecessary includes. NFCI. Replace with forward declarations and move includes to implicit dependent files. --- llvm/lib/Target/Hexagon/Hexagon.h | 5 +---- llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 1 + llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp | 2 ++ llvm/lib/Target/Hexagon/HexagonFrameLowering.h | 1 + llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 + llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp | 4 ++-- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 4 ++-- 7 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h index 58dadf012da56..98e5710d4fc1d 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.h +++ b/llvm/lib/Target/Hexagon/Hexagon.h @@ -14,12 +14,9 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H -#include "MCTargetDesc/HexagonMCTargetDesc.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" - namespace llvm { class HexagonTargetMachine; + class ImmutablePass; /// Creates a Hexagon-specific Target Transformation Info pass. ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM); diff --git a/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 11a455ce43470..b456cf139c55c 100644 --- a/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Hexagon.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 587527d8c32cb..23d0cc829e52a 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -10,6 +10,7 @@ // to move them together. If we can move them next to each other we do so and // replace them with a combine instruction. //===----------------------------------------------------------------------===// + #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/DenseMap.h" @@ -26,6 +27,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index 87d385e1ce3c4..c8871cc56c486 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -11,6 +11,7 @@ #include "Hexagon.h" #include "HexagonBlockRanges.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 9e7176cd94218..a396ff8ef7ec2 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H #include "Hexagon.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" diff --git a/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp index d818e0897f750..e026bb6d601d0 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -19,8 +21,6 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" -#include "Hexagon.h" - using namespace llvm; namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index b1d06b0c3937a..60792929be918 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// +#include "HexagonSubtarget.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" -#include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -26,6 +26,7 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -38,7 +39,6 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "HexagonGenSubtargetInfo.inc" - static cl::opt EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true)); From b0ae5332dc2be682564d6fbcc9755c6ae5120086 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 10 Sep 2020 12:20:18 -0400 Subject: [PATCH 0270/1079] [libcxx] Make sure we pass -isysroot when linking AND when compiling --- libcxx/utils/libcxx/test/config.py | 1 + libcxx/utils/libcxx/test/target_info.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index c8bfdda914631..086db1d7f560d 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -289,6 +289,7 @@ def configure_default_compile_flags(self): # Configure include paths self.configure_compile_flags_header_includes() self.target_info.add_cxx_compile_flags(self.cxx.compile_flags) + self.target_info.add_cxx_flags(self.cxx.flags) # Configure feature flags. enable_32bit = self.get_lit_bool('enable_32bit', False) if enable_32bit: diff --git a/libcxx/utils/libcxx/test/target_info.py b/libcxx/utils/libcxx/test/target_info.py index 4f19d60a1a875..130d5600ed173 100644 --- a/libcxx/utils/libcxx/test/target_info.py +++ b/libcxx/utils/libcxx/test/target_info.py @@ -30,6 +30,7 @@ def is_windows(self): def is_darwin(self): return self.platform() == 'darwin' + def add_cxx_flags(self, flags): pass def add_cxx_compile_flags(self, flags): pass def add_cxx_link_flags(self, flags): pass def allow_cxxabi_link(self): return True @@ -73,7 +74,7 @@ def get_sdk_version(self, name): return re.sub(r'.*/[^0-9]+([0-9.]+)\.sdk', r'\1', out) - def add_cxx_compile_flags(self, flags): + def add_cxx_flags(self, flags): out, err, exit_code = executeCommand(['xcrun', '--show-sdk-path']) if exit_code != 0: self.full_config.lit_config.warning("Could not determine macOS SDK path! stderr was " + err) From 3d9c85e4d85bef3db495a37577f80b90ec9770b6 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 8 Sep 2020 18:41:56 +0000 Subject: [PATCH 0271/1079] Mark FMOV constant materialization as being as cheap as a move. This prevents us from doing things like LICM'ing it out of a loop, which is usually a net loss because we end up having to spill a callee-saved FPR to accomodate it. This does perturb instruction scheduling around this instruction, so a number of tests had to be updated to account for it. Reviewed By: t.p.northover Differential Revision: https://reviews.llvm.org/D87316 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- llvm/test/CodeGen/AArch64/arm64-aapcs.ll | 2 +- llvm/test/CodeGen/AArch64/fmov-imm-licm.ll | 33 +++++++++++++++++++ llvm/test/CodeGen/AArch64/fp-cond-sel.ll | 4 +-- llvm/test/CodeGen/AArch64/func-calls.ll | 6 ++-- llvm/test/CodeGen/AArch64/pow.ll | 26 ++++++--------- llvm/test/CodeGen/AArch64/swifterror.ll | 6 ++-- .../AArch64/small-constant.ll | 12 +++---- 8 files changed, 57 insertions(+), 34 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/fmov-imm-licm.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 85cb230517433..6a0bb14f55147 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3802,7 +3802,7 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, // Floating point immediate move. //===----------------------------------------------------------------------===// -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm FMOV : FPMoveImmediate<"fmov">; } diff --git a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll index 7887facb9accc..ac1678569ecb4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll @@ -90,8 +90,8 @@ declare void @variadic(i32 %a, ...) ; others. The extra arguments should go in registers rather than on the stack. define void @test_variadic() { call void(i32, ...) @variadic(i32 0, i64 1, double 2.0) -; CHECK: fmov d0, #2.0 ; CHECK: mov w1, #1 +; CHECK: fmov d0, #2.0 ; CHECK: bl variadic ret void } diff --git a/llvm/test/CodeGen/AArch64/fmov-imm-licm.ll b/llvm/test/CodeGen/AArch64/fmov-imm-licm.ll new file mode 100644 index 0000000000000..29061840c96bf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fmov-imm-licm.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; The purpose of this test is to check that an FMOV instruction that +; only materializes an immediate is not MachineLICM'd out of a loop. +; We check this in two ways: by looking for the FMOV inside the loop, +; and also by checking that we're not spilling any FP callee-saved +; registers. + +%struct.Node = type { %struct.Node*, i8* } + +define void @process_nodes(%struct.Node* %0) { +; CHECK-LABEL: process_nodes: +; CHECK-NOT: stp {{d[0-9]+}} +; CHECK-LABEL: .LBB0_2: +; CHECK: fmov s0, #1.00000000 +; CHECK: bl do_it +entry: + %1 = icmp eq %struct.Node* %0, null + br i1 %1, label %exit, label %loop + +loop: + %2 = phi %struct.Node* [ %4, %loop ], [ %0, %entry ] + tail call void @do_it(float 1.000000e+00, %struct.Node* nonnull %2) + %3 = getelementptr inbounds %struct.Node, %struct.Node* %2, i64 0, i32 0 + %4 = load %struct.Node*, %struct.Node** %3, align 8 + %5 = icmp eq %struct.Node* %4, null + br i1 %5, label %exit, label %loop + +exit: + ret void +} + +declare void @do_it(float, %struct.Node*) diff --git a/llvm/test/CodeGen/AArch64/fp-cond-sel.ll b/llvm/test/CodeGen/AArch64/fp-cond-sel.ll index f74e9c3509429..570088385d0d8 100644 --- a/llvm/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/llvm/test/CodeGen/AArch64/fp-cond-sel.ll @@ -20,8 +20,8 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst2 = icmp sle i64 %lhs64, %rhs64 %val2 = select i1 %tst2, double 1.0, double 0.0 store double %val2, double* @vardouble -; FLT0 is reused from above on ARM64. -; CHECK: fmov d[[FLT1:[0-9]+]], #1.0 +; CHECK-DAG: fmov d[[FLT0:[0-9]+]], xzr +; CHECK-DAG: fmov d[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{d[0-9]+}}, d[[FLT1]], d[[FLT0]], le call void @use_float(float 0.0) diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll index 54d38a91c3873..fe48fd308265a 100644 --- a/llvm/test/CodeGen/AArch64/func-calls.ll +++ b/llvm/test/CodeGen/AArch64/func-calls.ll @@ -90,12 +90,10 @@ define void @check_stack_args() { ; memcpy gets created, but the following works for now. ; CHECK-DAG: str {{q[0-9]+}}, [sp] -; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 -; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b +; CHECK-DAG: fmov d0, #1.0 ; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp] -; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 -; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]] +; CHECK-NONEON-DAG: fmov d0, #1.0 ; CHECK: bl struct_on_stack ; CHECK-NOFP-NOT: fmov diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll index 0f0e2597d25a8..c8e8ab9fc9f7d 100644 --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -69,16 +69,14 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind ; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 // =48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: fmov s8, #0.25000000 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: mov v1.16b, v8.16b -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: fmov s1, #0.25000000 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl powf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v1.16b, v8.16b ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload @@ -86,7 +84,7 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v1.16b, v8.16b +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload @@ -94,12 +92,11 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v1.16b, v8.16b +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b @@ -113,21 +110,18 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi ; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 // =48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: fmov d8, #0.25000000 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: mov v1.16b, v8.16b -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: fmov d1, #0.25000000 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl pow ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov v1.16b, v8.16b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl pow ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 // =48 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll index 1eedb76204317..a8635f682ff10 100644 --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -339,14 +339,14 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { ; CHECK-APPLE: malloc ; First vararg -; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16] ; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1 +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16] ; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16 +; Third vararg +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] ; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8] ; Second vararg ; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] -; Third vararg -; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] ; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1 ; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll index 07ad549ebb9d8..af39bec33013e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll @@ -18,7 +18,6 @@ define float @test1(float* nocapture readonly %arr, i64 %start, float %threshold) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s2, #-7.00000000 ; CHECK-NEXT: cbz x1, .LBB0_4 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: add x8, x0, #28 // =28 @@ -32,7 +31,7 @@ define float @test1(float* nocapture readonly %arr, i64 %start, float %threshold ; CHECK-NEXT: add x1, x1, #1 // =1 ; CHECK-NEXT: cbnz x1, .LBB0_2 ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov s0, #-7.00000000 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_5: // %cleanup2 ; CHECK-NEXT: mov v0.16b, v1.16b @@ -64,23 +63,22 @@ cleanup2: ; preds = %for.cond, %for.body define float @test2(float* nocapture readonly %arr, i64 %start, float %threshold) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s2, #-7.00000000 ; CHECK-NEXT: cbz x1, .LBB1_4 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: add x8, x0, #28 // =28 ; CHECK-NEXT: .LBB1_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr s1, [x8, x1, lsl #2] -; CHECK-NEXT: scvtf s3, x1 -; CHECK-NEXT: fadd s3, s3, s0 -; CHECK-NEXT: fcmp s1, s3 +; CHECK-NEXT: scvtf s2, x1 +; CHECK-NEXT: fadd s2, s2, s0 +; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: b.gt .LBB1_5 ; CHECK-NEXT: // %bb.3: // %for.cond ; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: add x1, x1, #1 // =1 ; CHECK-NEXT: cbnz x1, .LBB1_2 ; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov s0, #-7.00000000 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_5: // %cleanup4 ; CHECK-NEXT: mov v0.16b, v1.16b From f42f733af968e75948442c578e8ad0ae101cc8a3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 17:35:02 +0100 Subject: [PATCH 0272/1079] SwitchLoweringUtils.h - reduce TargetLowering.h include. NFCI. Only include the headers we actually need, and move the remaining includes down to implicit dependent files. --- llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h | 1 + llvm/include/llvm/CodeGen/SwitchLoweringUtils.h | 11 ++++++++--- llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 0674b53c604a7..37c94ccbbd20d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -38,6 +38,7 @@ class BasicBlock; class CallInst; class CallLowering; class Constant; +class ConstrainedFPIntrinsic; class DataLayout; class Instruction; class MachineBasicBlock; diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 4d6afa617d3a2..51f1d7d6fd218 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -10,16 +10,21 @@ #define LLVM_CODEGEN_SWITCHLOWERINGUTILS_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/BranchProbability.h" +#include namespace llvm { +class BlockFrequencyInfo; +class ConstantInt; class FunctionLoweringInfo; class MachineBasicBlock; -class BlockFrequencyInfo; +class ProfileSummaryInfo; +class TargetLowering; +class TargetMachine; namespace SwitchCG { diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 12745747f5f80..dfcec32d95376 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; From c01d28dc51bdd33404828a327320e3307a51bb22 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Wed, 9 Sep 2020 08:36:39 +0000 Subject: [PATCH 0273/1079] [SyntaxTree] Specialize `TreeTestBase` for `BuildTreeTest`, `MutationsTest` and `SynthesisTest` Differential Revision: https://reviews.llvm.org/D87374 --- .../Tooling/Syntax/BuildTreeTest.cpp | 407 ++++++++++-------- clang/unittests/Tooling/Syntax/CMakeLists.txt | 1 + .../Tooling/Syntax/MutationsTest.cpp | 57 +-- .../Tooling/Syntax/SynthesisTest.cpp | 44 ++ .../unittests/Tooling/Syntax/TreeTestBase.cpp | 63 +-- clang/unittests/Tooling/Syntax/TreeTestBase.h | 7 +- 6 files changed, 310 insertions(+), 269 deletions(-) create mode 100644 clang/unittests/Tooling/Syntax/SynthesisTest.cpp diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 225885437267b..6fcc74ba55d0c 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -17,7 +17,70 @@ using namespace clang::syntax; namespace { -TEST_P(SyntaxTreeTest, Simple) { +class BuildSyntaxTreeTest : public SyntaxTreeTest { +protected: + ::testing::AssertionResult treeDumpEqual(StringRef Code, StringRef Tree) { + SCOPED_TRACE(llvm::join(GetParam().getCommandLineArgs(), " ")); + + auto *Root = buildTree(Code, GetParam()); + if (Diags->getClient()->getNumErrors() != 0) { + return ::testing::AssertionFailure() + << "Source file has syntax errors, they were printed to the test " + "log"; + } + auto Actual = StringRef(Root->dump(Arena->sourceManager())).trim().str(); + // EXPECT_EQ shows the diff between the two strings if they are different. + EXPECT_EQ(Tree.trim().str(), Actual); + if (Actual != Tree.trim().str()) { + return ::testing::AssertionFailure(); + } + return ::testing::AssertionSuccess(); + } + + ::testing::AssertionResult + treeDumpEqualOnAnnotations(StringRef CodeWithAnnotations, + ArrayRef TreeDumps) { + SCOPED_TRACE(llvm::join(GetParam().getCommandLineArgs(), " ")); + + auto AnnotatedCode = llvm::Annotations(CodeWithAnnotations); + auto *Root = buildTree(AnnotatedCode.code(), GetParam()); + + if (Diags->getClient()->getNumErrors() != 0) { + return ::testing::AssertionFailure() + << "Source file has syntax errors, they were printed to the test " + "log"; + } + + auto AnnotatedRanges = AnnotatedCode.ranges(); + if (AnnotatedRanges.size() != TreeDumps.size()) { + return ::testing::AssertionFailure() + << "The number of annotated ranges in the source code is " + "different " + "to the number of their corresponding tree dumps."; + } + bool Failed = false; + for (unsigned i = 0; i < AnnotatedRanges.size(); i++) { + auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); + assert(AnnotatedNode); + auto AnnotatedNodeDump = + StringRef(AnnotatedNode->dump(Arena->sourceManager())).trim().str(); + // EXPECT_EQ shows the diff between the two strings if they are different. + EXPECT_EQ(TreeDumps[i].trim().str(), AnnotatedNodeDump) + << "Dumps diverged for the code:\n" + << AnnotatedCode.code().slice(AnnotatedRanges[i].Begin, + AnnotatedRanges[i].End); + if (AnnotatedNodeDump != TreeDumps[i].trim().str()) + Failed = true; + } + return Failed ? ::testing::AssertionFailure() + : ::testing::AssertionSuccess(); + } +}; + +INSTANTIATE_TEST_CASE_P(SyntaxTreeTests, BuildSyntaxTreeTest, + testing::ValuesIn(allTestClangConfigs()), ); + +TEST_P(BuildSyntaxTreeTest, Simple) { EXPECT_TRUE(treeDumpEqual( R"cpp( int main() {} @@ -48,7 +111,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, SimpleVariable) { +TEST_P(BuildSyntaxTreeTest, SimpleVariable) { EXPECT_TRUE(treeDumpEqual( R"cpp( int a; @@ -72,7 +135,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, SimpleFunction) { +TEST_P(BuildSyntaxTreeTest, SimpleFunction) { EXPECT_TRUE(treeDumpEqual( R"cpp( void foo(int a, int b) {} @@ -102,7 +165,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, If) { +TEST_P(BuildSyntaxTreeTest, If) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -144,7 +207,7 @@ IfStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, For) { +TEST_P(BuildSyntaxTreeTest, For) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -164,7 +227,7 @@ ForStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, RangeBasedFor) { +TEST_P(BuildSyntaxTreeTest, RangeBasedFor) { if (!GetParam().isCXX11OrLater()) { return; } @@ -194,7 +257,7 @@ RangeBasedForStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, DeclarationStatement) { +TEST_P(BuildSyntaxTreeTest, DeclarationStatement) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -214,7 +277,7 @@ DeclarationStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, Switch) { +TEST_P(BuildSyntaxTreeTest, Switch) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -247,7 +310,7 @@ SwitchStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, While) { +TEST_P(BuildSyntaxTreeTest, While) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -273,7 +336,7 @@ WhileStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, UnhandledStatement) { +TEST_P(BuildSyntaxTreeTest, UnhandledStatement) { // Unhandled statements should end up as 'unknown statement'. // This example uses a 'label statement', which does not yet have a syntax // counterpart. @@ -295,7 +358,7 @@ UnknownStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, Expressions) { +TEST_P(BuildSyntaxTreeTest, Expressions) { // expressions should be wrapped in 'ExpressionStatement' when they appear // in a statement position. EXPECT_TRUE(treeDumpEqual( @@ -351,7 +414,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, UnqualifiedId_Identifier) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_Identifier) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { @@ -365,7 +428,7 @@ IdExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UnqualifiedId_OperatorFunctionId) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_OperatorFunctionId) { if (!GetParam().isCXX()) { return; } @@ -397,7 +460,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UnqualifiedId_ConversionFunctionId) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_ConversionFunctionId) { if (!GetParam().isCXX()) { return; } @@ -426,7 +489,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UnqualifiedId_LiteralOperatorId) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_LiteralOperatorId) { if (!GetParam().isCXX11OrLater()) { return; } @@ -452,7 +515,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UnqualifiedId_Destructor) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_Destructor) { if (!GetParam().isCXX()) { return; } @@ -479,7 +542,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UnqualifiedId_DecltypeDestructor) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_DecltypeDestructor) { if (!GetParam().isCXX11OrLater()) { return; } @@ -513,7 +576,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UnqualifiedId_TemplateId) { +TEST_P(BuildSyntaxTreeTest, UnqualifiedId_TemplateId) { if (!GetParam().isCXX()) { return; } @@ -538,7 +601,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, QualifiedId_NamespaceSpecifier) { +TEST_P(BuildSyntaxTreeTest, QualifiedId_NamespaceSpecifier) { if (!GetParam().isCXX()) { return; } @@ -575,7 +638,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, QualifiedId_TemplateSpecifier) { +TEST_P(BuildSyntaxTreeTest, QualifiedId_TemplateSpecifier) { if (!GetParam().isCXX()) { return; } @@ -621,7 +684,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, QualifiedId_DecltypeSpecifier) { +TEST_P(BuildSyntaxTreeTest, QualifiedId_DecltypeSpecifier) { if (!GetParam().isCXX11OrLater()) { return; } @@ -653,7 +716,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, QualifiedId_OptionalTemplateKw) { +TEST_P(BuildSyntaxTreeTest, QualifiedId_OptionalTemplateKw) { if (!GetParam().isCXX()) { return; } @@ -701,7 +764,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, QualifiedId_Complex) { +TEST_P(BuildSyntaxTreeTest, QualifiedId_Complex) { if (!GetParam().isCXX()) { return; } @@ -744,7 +807,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, QualifiedId_DependentType) { +TEST_P(BuildSyntaxTreeTest, QualifiedId_DependentType) { if (!GetParam().isCXX()) { return; } @@ -815,7 +878,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, This_Simple) { +TEST_P(BuildSyntaxTreeTest, This_Simple) { if (!GetParam().isCXX()) { return; } @@ -833,7 +896,7 @@ ThisExpression ReturnValue )txt"})); } -TEST_P(SyntaxTreeTest, This_ExplicitMemberAccess) { +TEST_P(BuildSyntaxTreeTest, This_ExplicitMemberAccess) { if (!GetParam().isCXX()) { return; } @@ -857,7 +920,7 @@ MemberExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, This_ImplicitMemberAccess) { +TEST_P(BuildSyntaxTreeTest, This_ImplicitMemberAccess) { if (!GetParam().isCXX()) { return; } @@ -877,7 +940,7 @@ IdExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, ParenExpr) { +TEST_P(BuildSyntaxTreeTest, ParenExpr) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -919,7 +982,7 @@ ParenExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UserDefinedLiteral_Char) { +TEST_P(BuildSyntaxTreeTest, UserDefinedLiteral_Char) { if (!GetParam().isCXX11OrLater()) { return; } @@ -936,7 +999,7 @@ CharUserDefinedLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UserDefinedLiteral_String) { +TEST_P(BuildSyntaxTreeTest, UserDefinedLiteral_String) { if (!GetParam().isCXX11OrLater()) { return; } @@ -956,7 +1019,7 @@ StringUserDefinedLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UserDefinedLiteral_Integer) { +TEST_P(BuildSyntaxTreeTest, UserDefinedLiteral_Integer) { if (!GetParam().isCXX11OrLater()) { return; } @@ -987,7 +1050,7 @@ IntegerUserDefinedLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, UserDefinedLiteral_Float) { +TEST_P(BuildSyntaxTreeTest, UserDefinedLiteral_Float) { if (!GetParam().isCXX11OrLater()) { return; } @@ -1018,7 +1081,7 @@ FloatUserDefinedLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, IntegerLiteral_LongLong) { +TEST_P(BuildSyntaxTreeTest, IntegerLiteral_LongLong) { if (!GetParam().isCXX11OrLater()) { return; } @@ -1039,7 +1102,7 @@ IntegerLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, IntegerLiteral_Binary) { +TEST_P(BuildSyntaxTreeTest, IntegerLiteral_Binary) { if (!GetParam().isCXX14OrLater()) { return; } @@ -1055,7 +1118,7 @@ IntegerLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, IntegerLiteral_WithDigitSeparators) { +TEST_P(BuildSyntaxTreeTest, IntegerLiteral_WithDigitSeparators) { if (!GetParam().isCXX14OrLater()) { return; } @@ -1071,7 +1134,7 @@ IntegerLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CharacterLiteral) { +TEST_P(BuildSyntaxTreeTest, CharacterLiteral) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -1109,7 +1172,7 @@ CharacterLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CharacterLiteral_Utf) { +TEST_P(BuildSyntaxTreeTest, CharacterLiteral_Utf) { if (!GetParam().isCXX11OrLater()) { return; } @@ -1140,7 +1203,7 @@ CharacterLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CharacterLiteral_Utf8) { +TEST_P(BuildSyntaxTreeTest, CharacterLiteral_Utf8) { if (!GetParam().isCXX17OrLater()) { return; } @@ -1161,7 +1224,7 @@ CharacterLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, FloatingLiteral) { +TEST_P(BuildSyntaxTreeTest, FloatingLiteral) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -1189,7 +1252,7 @@ FloatingLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, FloatingLiteral_Hexadecimal) { +TEST_P(BuildSyntaxTreeTest, FloatingLiteral_Hexadecimal) { if (!GetParam().isCXX17OrLater()) { return; } @@ -1220,7 +1283,7 @@ FloatingLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, StringLiteral) { +TEST_P(BuildSyntaxTreeTest, StringLiteral) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -1238,7 +1301,7 @@ StringLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, StringLiteral_Utf) { +TEST_P(BuildSyntaxTreeTest, StringLiteral_Utf) { if (!GetParam().isCXX11OrLater()) { return; } @@ -1264,7 +1327,7 @@ StringLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, StringLiteral_Raw) { +TEST_P(BuildSyntaxTreeTest, StringLiteral_Raw) { if (!GetParam().isCXX11OrLater()) { return; } @@ -1297,7 +1360,7 @@ TEST_P(SyntaxTreeTest, StringLiteral_Raw) { " `-'}' CloseParen\n")); } -TEST_P(SyntaxTreeTest, BoolLiteral) { +TEST_P(BuildSyntaxTreeTest, BoolLiteral) { if (GetParam().isC()) { return; } @@ -1318,7 +1381,7 @@ BoolLiteralExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CxxNullPtrLiteral) { +TEST_P(BuildSyntaxTreeTest, CxxNullPtrLiteral) { if (!GetParam().isCXX11OrLater()) { return; } @@ -1334,7 +1397,7 @@ CxxNullPtrExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, PostfixUnaryOperator) { +TEST_P(BuildSyntaxTreeTest, PostfixUnaryOperator) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { @@ -1358,7 +1421,7 @@ PostfixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, PrefixUnaryOperator) { +TEST_P(BuildSyntaxTreeTest, PrefixUnaryOperator) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a, int *ap) { @@ -1444,7 +1507,7 @@ PrefixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, PrefixUnaryOperatorCxx) { +TEST_P(BuildSyntaxTreeTest, PrefixUnaryOperatorCxx) { if (!GetParam().isCXX()) { return; } @@ -1471,7 +1534,7 @@ PrefixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, BinaryOperator) { +TEST_P(BuildSyntaxTreeTest, BinaryOperator) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { @@ -1545,7 +1608,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, BinaryOperatorCxx) { +TEST_P(BuildSyntaxTreeTest, BinaryOperatorCxx) { if (!GetParam().isCXX()) { return; } @@ -1593,7 +1656,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, BinaryOperator_NestedWithParenthesis) { +TEST_P(BuildSyntaxTreeTest, BinaryOperator_NestedWithParenthesis) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -1624,7 +1687,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, BinaryOperator_Associativity) { +TEST_P(BuildSyntaxTreeTest, BinaryOperator_Associativity) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a, int b) { @@ -1662,7 +1725,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, BinaryOperator_Precedence) { +TEST_P(BuildSyntaxTreeTest, BinaryOperator_Precedence) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { @@ -1704,7 +1767,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_Assignment) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_Assignment) { if (!GetParam().isCXX()) { return; } @@ -1729,7 +1792,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_Plus) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_Plus) { if (!GetParam().isCXX()) { return; } @@ -1754,7 +1817,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_Less) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_Less) { if (!GetParam().isCXX()) { return; } @@ -1779,7 +1842,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_LeftShift) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_LeftShift) { if (!GetParam().isCXX()) { return; } @@ -1804,7 +1867,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_Comma) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_Comma) { if (!GetParam().isCXX()) { return; } @@ -1829,7 +1892,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_PointerToMember) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_PointerToMember) { if (!GetParam().isCXX()) { return; } @@ -1854,7 +1917,7 @@ BinaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_Negation) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_Negation) { if (!GetParam().isCXX()) { return; } @@ -1876,7 +1939,7 @@ PrefixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_AddressOf) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_AddressOf) { if (!GetParam().isCXX()) { return; } @@ -1898,7 +1961,7 @@ PrefixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_PrefixIncrement) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_PrefixIncrement) { if (!GetParam().isCXX()) { return; } @@ -1920,7 +1983,7 @@ PrefixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperator_PostfixIncrement) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperator_PostfixIncrement) { if (!GetParam().isCXX()) { return; } @@ -1942,7 +2005,7 @@ PostfixUnaryOperatorExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_SimpleWithDot) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_SimpleWithDot) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct S { @@ -1964,7 +2027,7 @@ MemberExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_StaticDataMember) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_StaticDataMember) { if (!GetParam().isCXX()) { return; } @@ -1989,7 +2052,7 @@ MemberExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_SimpleWithArrow) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_SimpleWithArrow) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct S { @@ -2011,7 +2074,7 @@ MemberExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_Chaining) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_Chaining) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct S { @@ -2038,7 +2101,7 @@ MemberExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_OperatorFunction) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_OperatorFunction) { if (!GetParam().isCXX()) { return; } @@ -2067,7 +2130,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_VariableTemplate) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_VariableTemplate) { if (!GetParam().isCXX14OrLater()) { return; } @@ -2103,7 +2166,7 @@ CompoundStatement )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_FunctionTemplate) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_FunctionTemplate) { if (!GetParam().isCXX()) { return; } @@ -2135,7 +2198,8 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_FunctionTemplateWithTemplateKeyword) { +TEST_P(BuildSyntaxTreeTest, + MemberExpression_FunctionTemplateWithTemplateKeyword) { if (!GetParam().isCXX()) { return; } @@ -2168,7 +2232,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_WithQualifier) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_WithQualifier) { if (!GetParam().isCXX()) { return; } @@ -2221,7 +2285,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MemberExpression_Complex) { +TEST_P(BuildSyntaxTreeTest, MemberExpression_Complex) { if (!GetParam().isCXX()) { return; } @@ -2279,7 +2343,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Callee_Member) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Callee_Member) { if (!GetParam().isCXX()) { return; } @@ -2307,7 +2371,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Callee_OperatorParens) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Callee_OperatorParens) { if (!GetParam().isCXX()) { return; } @@ -2330,7 +2394,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Callee_OperatorParensChaining) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Callee_OperatorParensChaining) { if (!GetParam().isCXX()) { return; } @@ -2356,7 +2420,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Callee_MemberWithThis) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Callee_MemberWithThis) { if (!GetParam().isCXX()) { return; } @@ -2412,7 +2476,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Callee_FunctionPointer) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Callee_FunctionPointer) { if (!GetParam().isCXX()) { return; } @@ -2447,7 +2511,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Callee_MemberFunctionPointer) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Callee_MemberFunctionPointer) { if (!GetParam().isCXX()) { return; } @@ -2480,7 +2544,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_Zero) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_Zero) { if (!GetParam().isCXX()) { return; } @@ -2503,7 +2567,7 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_One) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_One) { if (!GetParam().isCXX()) { return; } @@ -2529,7 +2593,7 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_Multiple) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_Multiple) { if (!GetParam().isCXX()) { return; } @@ -2561,7 +2625,7 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_Assignment) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_Assignment) { if (!GetParam().isCXX()) { return; } @@ -2592,7 +2656,7 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_BracedInitList_Empty) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_BracedInitList_Empty) { if (!GetParam().isCXX11OrLater()) { return; } @@ -2620,7 +2684,7 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_BracedInitList_Simple) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_BracedInitList_Simple) { if (!GetParam().isCXX11OrLater()) { return; } @@ -2660,7 +2724,8 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_BracedInitList_Designated) { +TEST_P(BuildSyntaxTreeTest, + CallExpression_Arguments_BracedInitList_Designated) { if (!GetParam().isCXX11OrLater()) { return; } @@ -2707,7 +2772,7 @@ ExpressionStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_Arguments_ParameterPack) { +TEST_P(BuildSyntaxTreeTest, CallExpression_Arguments_ParameterPack) { if (!GetParam().isCXX11OrLater() || GetParam().hasDelayedTemplateParsing()) { return; } @@ -2733,7 +2798,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, CallExpression_DefaultArguments) { +TEST_P(BuildSyntaxTreeTest, CallExpression_DefaultArguments) { if (!GetParam().isCXX11OrLater()) { return; } @@ -2781,7 +2846,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, MultipleDeclaratorsGrouping) { +TEST_P(BuildSyntaxTreeTest, MultipleDeclaratorsGrouping) { EXPECT_TRUE(treeDumpEqual( R"cpp( int *a, b; @@ -2810,7 +2875,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, MultipleDeclaratorsGroupingTypedef) { +TEST_P(BuildSyntaxTreeTest, MultipleDeclaratorsGroupingTypedef) { EXPECT_TRUE(treeDumpEqual( R"cpp( typedef int *a, b; @@ -2830,7 +2895,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, MultipleDeclaratorsInsideStatement) { +TEST_P(BuildSyntaxTreeTest, MultipleDeclaratorsInsideStatement) { EXPECT_TRUE(treeDumpEqual( R"cpp( void foo() { @@ -2874,7 +2939,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, SizeTTypedef) { +TEST_P(BuildSyntaxTreeTest, SizeTTypedef) { if (!GetParam().isCXX11OrLater()) { return; } @@ -2901,7 +2966,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Namespace_Nested) { +TEST_P(BuildSyntaxTreeTest, Namespace_Nested) { if (!GetParam().isCXX()) { return; } @@ -2924,7 +2989,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Namespace_NestedDefinition) { +TEST_P(BuildSyntaxTreeTest, Namespace_NestedDefinition) { if (!GetParam().isCXX17OrLater()) { return; } @@ -2944,7 +3009,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Namespace_Unnamed) { +TEST_P(BuildSyntaxTreeTest, Namespace_Unnamed) { if (!GetParam().isCXX()) { return; } @@ -2961,7 +3026,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Namespace_Alias) { +TEST_P(BuildSyntaxTreeTest, Namespace_Alias) { if (!GetParam().isCXX()) { return; } @@ -2980,7 +3045,7 @@ NamespaceAliasDefinition )txt"})); } -TEST_P(SyntaxTreeTest, UsingDirective) { +TEST_P(BuildSyntaxTreeTest, UsingDirective) { if (!GetParam().isCXX()) { return; } @@ -3000,7 +3065,7 @@ UsingNamespaceDirective )txt"})); } -TEST_P(SyntaxTreeTest, UsingDeclaration_Namespace) { +TEST_P(BuildSyntaxTreeTest, UsingDeclaration_Namespace) { if (!GetParam().isCXX()) { return; } @@ -3021,7 +3086,7 @@ UsingDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, UsingDeclaration_ClassMember) { +TEST_P(BuildSyntaxTreeTest, UsingDeclaration_ClassMember) { if (!GetParam().isCXX()) { return; } @@ -3055,7 +3120,7 @@ UsingDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, UsingTypeAlias) { +TEST_P(BuildSyntaxTreeTest, UsingTypeAlias) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3074,7 +3139,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, FreeStandingClass_ForwardDeclaration) { +TEST_P(BuildSyntaxTreeTest, FreeStandingClass_ForwardDeclaration) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( [[struct X;]] @@ -3097,7 +3162,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, FreeStandingClasses_Definition) { +TEST_P(BuildSyntaxTreeTest, FreeStandingClasses_Definition) { EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( [[struct X {};]] @@ -3135,7 +3200,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, StaticMemberFunction) { +TEST_P(BuildSyntaxTreeTest, StaticMemberFunction) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3160,7 +3225,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, OutOfLineMemberFunctionDefinition) { +TEST_P(BuildSyntaxTreeTest, OutOfLineMemberFunctionDefinition) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3189,7 +3254,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, ConversionMemberFunction) { +TEST_P(BuildSyntaxTreeTest, ConversionMemberFunction) { if (!GetParam().isCXX()) { return; } @@ -3211,7 +3276,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, LiteralOperatorDeclaration) { +TEST_P(BuildSyntaxTreeTest, LiteralOperatorDeclaration) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3237,7 +3302,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, NumericLiteralOperatorTemplateDeclaration) { +TEST_P(BuildSyntaxTreeTest, NumericLiteralOperatorTemplateDeclaration) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3268,7 +3333,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, OverloadedOperatorDeclaration) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperatorDeclaration) { if (!GetParam().isCXX()) { return; } @@ -3298,7 +3363,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, OverloadedOperatorFriendDeclaration) { +TEST_P(BuildSyntaxTreeTest, OverloadedOperatorFriendDeclaration) { if (!GetParam().isCXX()) { return; } @@ -3332,7 +3397,7 @@ UnknownDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, ClassTemplateDeclaration) { +TEST_P(BuildSyntaxTreeTest, ClassTemplateDeclaration) { if (!GetParam().isCXX()) { return; } @@ -3359,7 +3424,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, FunctionTemplateDeclaration) { +TEST_P(BuildSyntaxTreeTest, FunctionTemplateDeclaration) { if (!GetParam().isCXX()) { return; } @@ -3388,7 +3453,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, VariableTemplateDeclaration) { +TEST_P(BuildSyntaxTreeTest, VariableTemplateDeclaration) { if (!GetParam().isCXX14OrLater()) { return; } @@ -3416,7 +3481,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, StaticMemberFunctionTemplate) { +TEST_P(BuildSyntaxTreeTest, StaticMemberFunctionTemplate) { if (!GetParam().isCXX()) { return; } @@ -3447,7 +3512,7 @@ TemplateDeclaration Declaration )txt"})); } -TEST_P(SyntaxTreeTest, NestedTemplates) { +TEST_P(BuildSyntaxTreeTest, NestedTemplates) { if (!GetParam().isCXX()) { return; } @@ -3492,7 +3557,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, NestedTemplatesInNamespace) { +TEST_P(BuildSyntaxTreeTest, NestedTemplatesInNamespace) { if (!GetParam().isCXX()) { return; } @@ -3545,7 +3610,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ClassTemplate_MemberClassDefinition) { +TEST_P(BuildSyntaxTreeTest, ClassTemplate_MemberClassDefinition) { if (!GetParam().isCXX()) { return; } @@ -3578,7 +3643,7 @@ TemplateDeclaration Declaration )txt"})); } -TEST_P(SyntaxTreeTest, ExplicitClassTemplateInstantation_Definition) { +TEST_P(BuildSyntaxTreeTest, ExplicitClassTemplateInstantation_Definition) { if (!GetParam().isCXX()) { return; } @@ -3600,7 +3665,7 @@ ExplicitTemplateInstantiation )txt"})); } -TEST_P(SyntaxTreeTest, ExplicitClassTemplateInstantation_Declaration) { +TEST_P(BuildSyntaxTreeTest, ExplicitClassTemplateInstantation_Declaration) { if (!GetParam().isCXX()) { return; } @@ -3623,7 +3688,7 @@ ExplicitTemplateInstantiation )txt"})); } -TEST_P(SyntaxTreeTest, ClassTemplateSpecialization_Partial) { +TEST_P(BuildSyntaxTreeTest, ClassTemplateSpecialization_Partial) { if (!GetParam().isCXX()) { return; } @@ -3653,7 +3718,7 @@ TemplateDeclaration Declaration )txt"})); } -TEST_P(SyntaxTreeTest, ClassTemplateSpecialization_Full) { +TEST_P(BuildSyntaxTreeTest, ClassTemplateSpecialization_Full) { if (!GetParam().isCXX()) { return; } @@ -3679,7 +3744,7 @@ TemplateDeclaration Declaration )txt"})); } -TEST_P(SyntaxTreeTest, EmptyDeclaration) { +TEST_P(BuildSyntaxTreeTest, EmptyDeclaration) { EXPECT_TRUE(treeDumpEqual( R"cpp( ; @@ -3691,7 +3756,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, StaticAssert) { +TEST_P(BuildSyntaxTreeTest, StaticAssert) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3714,7 +3779,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, StaticAssert_WithoutMessage) { +TEST_P(BuildSyntaxTreeTest, StaticAssert_WithoutMessage) { if (!GetParam().isCXX17OrLater()) { return; } @@ -3734,7 +3799,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ExternC) { +TEST_P(BuildSyntaxTreeTest, ExternC) { if (!GetParam().isCXX()) { return; } @@ -3771,7 +3836,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, NonModifiableNodes) { +TEST_P(BuildSyntaxTreeTest, NonModifiableNodes) { // Some nodes are non-modifiable, they are marked with 'I:'. EXPECT_TRUE(treeDumpEqual( R"cpp( @@ -3812,7 +3877,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ModifiableNodes) { +TEST_P(BuildSyntaxTreeTest, ModifiableNodes) { // All nodes can be mutated. EXPECT_TRUE(treeDumpEqual( R"cpp( @@ -3858,7 +3923,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, InitDeclarator_Equal) { +TEST_P(BuildSyntaxTreeTest, InitDeclarator_Equal) { if (!GetParam().isCXX()) { return; } @@ -3880,7 +3945,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, InitDeclarator_Brace) { +TEST_P(BuildSyntaxTreeTest, InitDeclarator_Brace) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3934,7 +3999,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, InitDeclarator_EqualBrace) { +TEST_P(BuildSyntaxTreeTest, InitDeclarator_EqualBrace) { if (!GetParam().isCXX11OrLater()) { return; } @@ -3991,7 +4056,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { +TEST_P(BuildSyntaxTreeTest, InitDeclarator_Paren) { if (!GetParam().isCXX()) { return; } @@ -4034,7 +4099,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, InitDeclarator_Paren_DefaultArguments) { +TEST_P(BuildSyntaxTreeTest, InitDeclarator_Paren_DefaultArguments) { if (!GetParam().isCXX()) { return; } @@ -4084,7 +4149,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, ImplicitConversion_Argument) { +TEST_P(BuildSyntaxTreeTest, ImplicitConversion_Argument) { if (!GetParam().isCXX()) { return; } @@ -4111,7 +4176,7 @@ CallExpression Expression )txt"})); } -TEST_P(SyntaxTreeTest, ImplicitConversion_Return) { +TEST_P(BuildSyntaxTreeTest, ImplicitConversion_Return) { if (!GetParam().isCXX()) { return; } @@ -4133,7 +4198,7 @@ ReturnStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, ConstructorCall_ZeroArguments) { +TEST_P(BuildSyntaxTreeTest, ConstructorCall_ZeroArguments) { if (!GetParam().isCXX()) { return; } @@ -4157,7 +4222,7 @@ ReturnStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, ConstructorCall_OneArgument) { +TEST_P(BuildSyntaxTreeTest, ConstructorCall_OneArgument) { if (!GetParam().isCXX()) { return; } @@ -4183,7 +4248,7 @@ ReturnStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, ConstructorCall_MultipleArguments) { +TEST_P(BuildSyntaxTreeTest, ConstructorCall_MultipleArguments) { if (!GetParam().isCXX()) { return; } @@ -4212,7 +4277,7 @@ ReturnStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, ConstructorCall_DefaultArguments) { +TEST_P(BuildSyntaxTreeTest, ConstructorCall_DefaultArguments) { if (!GetParam().isCXX()) { return; } @@ -4254,7 +4319,7 @@ UnknownExpression )txt"})); } -TEST_P(SyntaxTreeTest, TypeConversion_FunctionalNotation) { +TEST_P(BuildSyntaxTreeTest, TypeConversion_FunctionalNotation) { if (!GetParam().isCXX()) { return; } @@ -4277,7 +4342,7 @@ ReturnStatement Statement )txt"})); } -TEST_P(SyntaxTreeTest, ArrayDeclarator_Simple) { +TEST_P(BuildSyntaxTreeTest, ArrayDeclarator_Simple) { EXPECT_TRUE(treeDumpEqual( R"cpp( int a[10]; @@ -4297,7 +4362,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ArrayDeclarator_Multidimensional) { +TEST_P(BuildSyntaxTreeTest, ArrayDeclarator_Multidimensional) { EXPECT_TRUE(treeDumpEqual( R"cpp( int b[1][2][3]; @@ -4327,7 +4392,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ArrayDeclarator_UnknownBound) { +TEST_P(BuildSyntaxTreeTest, ArrayDeclarator_UnknownBound) { EXPECT_TRUE(treeDumpEqual( R"cpp( int c[] = {1,2,3}; @@ -4358,7 +4423,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ArrayDeclarator_Static) { +TEST_P(BuildSyntaxTreeTest, ArrayDeclarator_Static) { if (!GetParam().isC99OrLater()) { return; } @@ -4390,7 +4455,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Empty) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Empty) { EXPECT_TRUE(treeDumpEqual( R"cpp( int func(); @@ -4408,7 +4473,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Named) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Named) { EXPECT_TRUE(treeDumpEqual( R"cpp( int func1(int a); @@ -4465,7 +4530,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Unnamed) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Unnamed) { EXPECT_TRUE(treeDumpEqual( R"cpp( int func1(int); @@ -4515,7 +4580,8 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Default_One) { +TEST_P(BuildSyntaxTreeTest, + ParametersAndQualifiers_InFreeFunctions_Default_One) { if (!GetParam().isCXX()) { return; } @@ -4535,7 +4601,7 @@ ParameterDeclarationList Parameters )txt"})); } -TEST_P(SyntaxTreeTest, +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Default_Multiple) { if (!GetParam().isCXX()) { return; @@ -4570,7 +4636,7 @@ ParameterDeclarationList Parameters )txt"})); } -TEST_P(SyntaxTreeTest, +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InVariadicFunctionTemplate_ParameterPack) { if (!GetParam().isCXX11OrLater() || GetParam().hasDelayedTemplateParsing()) { return; @@ -4599,7 +4665,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InVariadicFunctionTemplate_NamedParameterPack) { if (!GetParam().isCXX11OrLater() || GetParam().hasDelayedTemplateParsing()) { return; @@ -4632,7 +4698,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_VariadicArguments) { if (!GetParam().isCXX11OrLater()) { return; @@ -4661,7 +4727,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Cxx_CvQualifiers) { if (!GetParam().isCXX()) { return; @@ -4702,7 +4768,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Cxx_Ref) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Cxx_Ref) { if (!GetParam().isCXX()) { return; } @@ -4729,7 +4795,8 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Cxx11_RefRef) { +TEST_P(BuildSyntaxTreeTest, + ParametersAndQualifiers_InFreeFunctions_Cxx11_RefRef) { if (!GetParam().isCXX11OrLater()) { return; } @@ -4756,7 +4823,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_Simple) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_Simple) { if (!GetParam().isCXX()) { return; } @@ -4785,7 +4852,8 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_CvQualifiers) { +TEST_P(BuildSyntaxTreeTest, + ParametersAndQualifiers_InMemberFunctions_CvQualifiers) { if (!GetParam().isCXX()) { return; } @@ -4833,7 +4901,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_Ref) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_Ref) { if (!GetParam().isCXX11OrLater()) { return; } @@ -4856,7 +4924,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_RefRef) { +TEST_P(BuildSyntaxTreeTest, ParametersAndQualifiers_InMemberFunctions_RefRef) { if (!GetParam().isCXX11OrLater()) { return; } @@ -4879,7 +4947,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, TrailingReturn) { +TEST_P(BuildSyntaxTreeTest, TrailingReturn) { if (!GetParam().isCXX11OrLater()) { return; } @@ -4903,7 +4971,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, DynamicExceptionSpecification) { +TEST_P(BuildSyntaxTreeTest, DynamicExceptionSpecification) { if (!GetParam().supportsCXXDynamicExceptionSpecification()) { return; } @@ -4975,7 +5043,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, NoexceptExceptionSpecification) { +TEST_P(BuildSyntaxTreeTest, NoexceptExceptionSpecification) { if (!GetParam().isCXX11OrLater()) { return; } @@ -5011,7 +5079,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, DeclaratorsInParentheses) { +TEST_P(BuildSyntaxTreeTest, DeclaratorsInParentheses) { EXPECT_TRUE(treeDumpEqual( R"cpp( int (a); @@ -5071,7 +5139,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Declaration_ConstVolatileQualifiers_SimpleConst) { +TEST_P(BuildSyntaxTreeTest, Declaration_ConstVolatileQualifiers_SimpleConst) { EXPECT_TRUE(treeDumpEqual( R"cpp( const int west = -1; @@ -5102,7 +5170,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Declaration_ConstVolatileQualifiers_MultipleConst) { +TEST_P(BuildSyntaxTreeTest, Declaration_ConstVolatileQualifiers_MultipleConst) { EXPECT_TRUE(treeDumpEqual( R"cpp( const int const universal = 0; @@ -5122,7 +5190,8 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, Declaration_ConstVolatileQualifiers_ConstAndVolatile) { +TEST_P(BuildSyntaxTreeTest, + Declaration_ConstVolatileQualifiers_ConstAndVolatile) { EXPECT_TRUE(treeDumpEqual( R"cpp( const int const *const *volatile b; @@ -5143,7 +5212,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, RangesOfDeclaratorsWithTrailingReturnTypes) { +TEST_P(BuildSyntaxTreeTest, RangesOfDeclaratorsWithTrailingReturnTypes) { if (!GetParam().isCXX11OrLater()) { return; } @@ -5183,7 +5252,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, MemberPointers) { +TEST_P(BuildSyntaxTreeTest, MemberPointers) { if (!GetParam().isCXX()) { return; } @@ -5218,7 +5287,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, MemberFunctionPointer) { +TEST_P(BuildSyntaxTreeTest, MemberFunctionPointer) { if (!GetParam().isCXX()) { return; } @@ -5304,7 +5373,7 @@ SimpleDeclaration )txt"})); } -TEST_P(SyntaxTreeTest, ComplexDeclarator) { +TEST_P(BuildSyntaxTreeTest, ComplexDeclarator) { EXPECT_TRUE(treeDumpEqual( R"cpp( void x(char a, short (*b)(int)); @@ -5342,7 +5411,7 @@ TranslationUnit Detached )txt")); } -TEST_P(SyntaxTreeTest, ComplexDeclarator2) { +TEST_P(BuildSyntaxTreeTest, ComplexDeclarator2) { EXPECT_TRUE(treeDumpEqual( R"cpp( void x(char a, short (*b)(int), long (**c)(long long)); diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt index 46ff4c9c3e27a..34a480503def6 100644 --- a/clang/unittests/Tooling/Syntax/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt @@ -6,6 +6,7 @@ add_clang_unittest(SyntaxTests TreeTestBase.cpp BuildTreeTest.cpp MutationsTest.cpp + SynthesisTest.cpp TokensTest.cpp ) diff --git a/clang/unittests/Tooling/Syntax/MutationsTest.cpp b/clang/unittests/Tooling/Syntax/MutationsTest.cpp index 6ef71e3a80900..f63d3dffa4597 100644 --- a/clang/unittests/Tooling/Syntax/MutationsTest.cpp +++ b/clang/unittests/Tooling/Syntax/MutationsTest.cpp @@ -19,15 +19,12 @@ using namespace clang::syntax; namespace { -TEST_P(SyntaxTreeTest, Mutations) { - if (!GetParam().isCXX11OrLater()) { - return; - } - - using Transformation = std::function; - auto CheckTransformation = [this](std::string Input, std::string Expected, - Transformation Transform) -> void { +class MutationTest : public SyntaxTreeTest { +protected: + using Transformation = std::function; + void CheckTransformation(Transformation Transform, std::string Input, + std::string Expected) { llvm::Annotations Source(Input); auto *Root = buildTree(Source.code(), GetParam()); @@ -46,40 +43,32 @@ TEST_P(SyntaxTreeTest, Mutations) { // Removes the selected statement. Input should have exactly one selected // range and it should correspond to a single statement. - auto RemoveStatement = [this](const llvm::Annotations &Input, - syntax::TranslationUnit *TU) { - auto *S = cast(nodeByRange(Input.range(), TU)); + Transformation RemoveStatement = [this](const llvm::Annotations &Input, + TranslationUnit *Root) { + auto *S = cast(nodeByRange(Input.range(), Root)); ASSERT_TRUE(S->canModify()) << "cannot remove a statement"; syntax::removeStatement(*Arena, S); EXPECT_TRUE(S->isDetached()); EXPECT_FALSE(S->isOriginal()) << "node removed from tree cannot be marked as original"; }; +}; - std::vector> - Cases = { - {"void test() { [[100+100;]] test(); }", "void test() { test(); }"}, - {"void test() { if (true) [[{}]] else {} }", - "void test() { if (true) ; else {} }"}, - {"void test() { [[;]] }", "void test() { }"}}; - for (const auto &C : Cases) - CheckTransformation(C.first, C.second, RemoveStatement); -} +INSTANTIATE_TEST_CASE_P(SyntaxTreeTests, MutationTest, + ::testing::ValuesIn(allTestClangConfigs()), ); -TEST_P(SyntaxTreeTest, SynthesizedNodes) { - buildTree("", GetParam()); +TEST_P(MutationTest, RemoveStatement_InCompound) { + CheckTransformation(RemoveStatement, "void test() { [[100+100;]] test(); }", + "void test() { test(); }"); +} - auto *C = syntax::createPunctuation(*Arena, tok::comma); - ASSERT_NE(C, nullptr); - EXPECT_EQ(C->token()->kind(), tok::comma); - EXPECT_TRUE(C->canModify()); - EXPECT_FALSE(C->isOriginal()); - EXPECT_TRUE(C->isDetached()); +TEST_P(MutationTest, RemoveStatement_InCompound_Empty) { + CheckTransformation(RemoveStatement, "void test() { [[;]] }", + "void test() { }"); +} - auto *S = syntax::createEmptyStatement(*Arena); - ASSERT_NE(S, nullptr); - EXPECT_TRUE(S->canModify()); - EXPECT_FALSE(S->isOriginal()); - EXPECT_TRUE(S->isDetached()); +TEST_P(MutationTest, RemoveStatement_LeaveEmpty) { + CheckTransformation(RemoveStatement, "void test() { if (1) [[{}]] else {} }", + "void test() { if (1) ; else {} }"); } } // namespace diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp new file mode 100644 index 0000000000000..db4ee6b585fb5 --- /dev/null +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -0,0 +1,44 @@ +//===- SynthesisTest.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file tests synthesis API for syntax trees. +// +//===----------------------------------------------------------------------===// + +#include "TreeTestBase.h" +#include "clang/Tooling/Syntax/BuildTree.h" + +using namespace clang; +using namespace clang::syntax; + +namespace { + +INSTANTIATE_TEST_CASE_P(SyntaxTreeTests, SyntaxTreeTest, + ::testing::ValuesIn(allTestClangConfigs()), ); + +TEST_P(SyntaxTreeTest, Leaf_Punctuation) { + buildTree("", GetParam()); + + auto *C = syntax::createPunctuation(*Arena, tok::comma); + ASSERT_NE(C, nullptr); + EXPECT_EQ(C->token()->kind(), tok::comma); + EXPECT_TRUE(C->canModify()); + EXPECT_FALSE(C->isOriginal()); + EXPECT_TRUE(C->isDetached()); +} + +TEST_P(SyntaxTreeTest, Statement_Empty) { + buildTree("", GetParam()); + + auto *S = syntax::createEmptyStatement(*Arena); + ASSERT_NE(S, nullptr); + EXPECT_TRUE(S->canModify()); + EXPECT_FALSE(S->isOriginal()); + EXPECT_TRUE(S->isDetached()); +} +} // namespace diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp index ebee0115cb727..3618949c36ae2 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -43,8 +43,9 @@ ArrayRef tokens(syntax::Node *N) { return llvm::makeArrayRef(T->firstLeaf()->token(), T->lastLeaf()->token() + 1); } +} // namespace -std::vector allTestClangConfigs() { +std::vector clang::syntax::allTestClangConfigs() { std::vector all_configs; for (TestLanguage lang : {Lang_C89, Lang_C99, Lang_CXX03, Lang_CXX11, Lang_CXX14, Lang_CXX17, Lang_CXX20}) { @@ -61,10 +62,6 @@ std::vector allTestClangConfigs() { return all_configs; } -INSTANTIATE_TEST_CASE_P(SyntaxTreeTests, SyntaxTreeTest, - testing::ValuesIn(allTestClangConfigs()), ); -} // namespace - syntax::TranslationUnit * SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) { // FIXME: this code is almost the identical to the one in TokensTest. Share @@ -161,62 +158,6 @@ SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) { return Root; } -::testing::AssertionResult SyntaxTreeTest::treeDumpEqual(StringRef Code, - StringRef Tree) { - SCOPED_TRACE(llvm::join(GetParam().getCommandLineArgs(), " ")); - - auto *Root = buildTree(Code, GetParam()); - if (Diags->getClient()->getNumErrors() != 0) { - return ::testing::AssertionFailure() - << "Source file has syntax errors, they were printed to the test " - "log"; - } - auto Actual = StringRef(Root->dump(Arena->sourceManager())).trim().str(); - // EXPECT_EQ shows the diff between the two strings if they are different. - EXPECT_EQ(Tree.trim().str(), Actual); - if (Actual != Tree.trim().str()) { - return ::testing::AssertionFailure(); - } - return ::testing::AssertionSuccess(); -} - -::testing::AssertionResult -SyntaxTreeTest::treeDumpEqualOnAnnotations(StringRef CodeWithAnnotations, - ArrayRef TreeDumps) { - SCOPED_TRACE(llvm::join(GetParam().getCommandLineArgs(), " ")); - - auto AnnotatedCode = llvm::Annotations(CodeWithAnnotations); - auto *Root = buildTree(AnnotatedCode.code(), GetParam()); - - if (Diags->getClient()->getNumErrors() != 0) { - return ::testing::AssertionFailure() - << "Source file has syntax errors, they were printed to the test " - "log"; - } - - auto AnnotatedRanges = AnnotatedCode.ranges(); - if (AnnotatedRanges.size() != TreeDumps.size()) { - return ::testing::AssertionFailure() - << "The number of annotated ranges in the source code is different " - "to the number of their corresponding tree dumps."; - } - bool Failed = false; - for (unsigned i = 0; i < AnnotatedRanges.size(); i++) { - auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); - assert(AnnotatedNode); - auto AnnotatedNodeDump = - StringRef(AnnotatedNode->dump(Arena->sourceManager())).trim().str(); - // EXPECT_EQ shows the diff between the two strings if they are different. - EXPECT_EQ(TreeDumps[i].trim().str(), AnnotatedNodeDump) - << "Dumps diverged for the code:\n" - << AnnotatedCode.code().slice(AnnotatedRanges[i].Begin, - AnnotatedRanges[i].End); - if (AnnotatedNodeDump != TreeDumps[i].trim().str()) - Failed = true; - } - return Failed ? ::testing::AssertionFailure() : ::testing::AssertionSuccess(); -} - syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { ArrayRef Toks = tokens(Root); diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h index c282bbf45fd39..8b0ca979dec3d 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.h +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -32,11 +32,6 @@ class SyntaxTreeTest : public ::testing::Test, TranslationUnit *buildTree(StringRef Code, const TestClangConfig &ClangConfig); - ::testing::AssertionResult treeDumpEqual(StringRef Code, StringRef Tree); - - ::testing::AssertionResult - treeDumpEqualOnAnnotations(StringRef CodeWithAnnotations, - ArrayRef TreeDumps); /// Finds the deepest node in the tree that covers exactly \p R. /// FIXME: implement this efficiently and move to public syntax tree API. syntax::Node *nodeByRange(llvm::Annotations::Range R, syntax::Node *Root); @@ -56,6 +51,8 @@ class SyntaxTreeTest : public ::testing::Test, std::unique_ptr TB; std::unique_ptr Arena; }; + +std::vector allTestClangConfigs(); } // namespace syntax } // namespace clang #endif // LLVM_CLANG_UNITTESTS_TOOLING_SYNTAX_TREETESTBASE_H From e5d92691bdf187c6815d33c32201fb8187010748 Mon Sep 17 00:00:00 2001 From: YangZhihui Date: Thu, 10 Sep 2020 09:45:13 -0700 Subject: [PATCH 0274/1079] Fix typo in dsymutil.rst Differential revision: https://reviews.llvm.org/D87438 --- llvm/docs/CommandGuide/dsymutil.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/CommandGuide/dsymutil.rst b/llvm/docs/CommandGuide/dsymutil.rst index 78954fcc8d876..ca489cdabf693 100644 --- a/llvm/docs/CommandGuide/dsymutil.rst +++ b/llvm/docs/CommandGuide/dsymutil.rst @@ -111,7 +111,7 @@ OPTIONS debug info. This prints a table after linking with the object file name, the size of the debug info in the object file (in bytes) and the size contributed (in bytes) to the linked dSYM. The table is sorted by the output size listing - the obj ect files with the largest contribution first. + the object files with the largest contribution first. .. option:: --symbol-map From 5638df195048eef74d4ec2633f8fb6f3dd935f1d Mon Sep 17 00:00:00 2001 From: Eugene Burmako Date: Thu, 10 Sep 2020 18:48:13 +0200 Subject: [PATCH 0275/1079] Introduce linalg.vecmat This patch adds a new named structured op to accompany linalg.matmul and linalg.matvec. We needed it for our codegen, so I figured it would be useful to add it to Linalg. Reviewed By: nicolasvasilache, mravishankar Differential Revision: https://reviews.llvm.org/D87292 --- mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h | 1 + .../mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc | 7 ++++++- mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp | 1 + mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 1 + mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 2 ++ mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 2 +- mlir/test/lib/Transforms/TestLinalgTransforms.cpp | 1 + 7 files changed, 13 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h index 399c49d1e5721..d842069f65705 100644 --- a/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h @@ -20,6 +20,7 @@ using linalg_dot = OperationBuilder; using linalg_fill = OperationBuilder; using linalg_matmul = OperationBuilder; using linalg_matvec = OperationBuilder; +using linalg_vecmat = OperationBuilder; using linalg_range = ValueBuilder; using linalg_reshape = ValueBuilder; using linalg_slice = ValueBuilder; diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc index 9c54a5f0c3c70..765e045e9e77c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -8,6 +8,11 @@ def matvec(A: f32(M, N), y: f32(N)) -> (x: f32(M)) { x(m) = std_addf(std_mulf(A(m, n), y(n))); } +ods_def: +def vecmat(y: f32(M), A: f32(M, N)) -> (x: f32(N)) { + x(n) = std_addf(std_mulf(y(m), A(m, n))); +} + ods_def: def dot(A: f32(M), B: f32(M)) -> (C: f32()) { C() = std_addf(std_mulf(A(m), B(m))); @@ -66,4 +71,4 @@ ods_def: def conv_3d_ncdhw(I: f32(N, C, D, H, W), K: f32(F, C, KD, KH, KW)) -> (O: f32(N, F, D, H, W)) { O(n, f, d, h, w) = std_addf(std_mulf( I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw))); -} \ No newline at end of file +} diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp index d56dffdd0dc17..93b7764a6a773 100644 --- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp +++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp @@ -244,6 +244,7 @@ void mlir::populateLinalgToStandardConversionPatterns( LinalgOpConversion, LinalgOpConversion, LinalgOpConversion, + LinalgOpConversion, LinalgOpConversion, LinalgOpConversion, LinalgOpConversion, diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index c9b05f89f30b1..fcead984dfe55 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1350,6 +1350,7 @@ CANONICALIZERS_AND_FOLDERS(BatchMatmulOp) CANONICALIZERS_AND_FOLDERS(DotOp) CANONICALIZERS_AND_FOLDERS(MatmulOp) CANONICALIZERS_AND_FOLDERS(MatvecOp) +CANONICALIZERS_AND_FOLDERS(VecmatOp) CANONICALIZERS_AND_FOLDERS(ConvWOp) CANONICALIZERS_AND_FOLDERS(ConvNWCOp) CANONICALIZERS_AND_FOLDERS(ConvNCWOp) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index d4d1d108be71a..d3c90ffab06fd 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -679,6 +679,8 @@ static Optional linalgOpToLoopsImplSwitch(Operation *op, return linalgOpToLoopsImpl(op, builder); if (isa(op)) return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); if (isa(op)) return linalgOpToLoopsImpl(op, builder); if (isa(op)) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index f4aabf8a8302f..a8b11a48df174 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -69,7 +69,7 @@ static bool hasMultiplyAddBody(Region &r) { static LogicalResult isContraction(Operation *op) { // TODO: interface for named ops. if (isa(op)) + linalg::VecmatOp, linalg::DotOp>(op)) return success(); auto genericOp = dyn_cast(op); diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 4fc880a24277b..edcc66c9b6a61 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -449,6 +449,7 @@ static void applyContractionToVectorPatterns(FuncOp funcOp) { patterns.insert, LinalgVectorizationPattern, LinalgVectorizationPattern, + LinalgVectorizationPattern, LinalgVectorizationPattern, LinalgVectorizationPattern>(funcOp.getContext()); applyPatternsAndFoldGreedily(funcOp, patterns); From 626209cac0559ebe06a9bd4792fac5d31333c597 Mon Sep 17 00:00:00 2001 From: Azharuddin Mohammed Date: Thu, 10 Sep 2020 09:49:45 -0700 Subject: [PATCH 0276/1079] Revert "[gcov] Delete flush_fn_list (unused since D83149)" This reverts commit 01cdab0b335e21321987505e66f34c24dc55b0d7. It was causing the instrprof-darwin-exports.c test to fail. ``` Undefined symbols for architecture x86_64: "_flush_fn_list", referenced from: -exported_symbol[s_list] command line option ``` --- compiler-rt/lib/profile/GCDAProfiling.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index 4055681872415..cf6c44bae6415 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -127,6 +127,11 @@ struct fn_list { */ struct fn_list writeout_fn_list; +/* + * A list of flush functions that our __gcov_flush() function should call, shared between all dynamic objects. + */ +struct fn_list flush_fn_list; + /* * A list of reset functions, shared between all dynamic objects. */ From be7cef789e75a354831d528ecc76b325f0f5da68 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 10 Sep 2020 16:54:11 +0000 Subject: [PATCH 0277/1079] [gn build] Port c01d28dc51b --- llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/BUILD.gn index d6072517391ff..4716d42bfdc18 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/BUILD.gn @@ -18,6 +18,7 @@ unittest("SyntaxTests") { sources = [ "BuildTreeTest.cpp", "MutationsTest.cpp", + "SynthesisTest.cpp", "TokensTest.cpp", "TreeTestBase.cpp", ] From 009cd4e491033f57f547a7bda63e35b50a6e5cf7 Mon Sep 17 00:00:00 2001 From: Kit Barton Date: Mon, 17 Aug 2020 15:33:47 -0500 Subject: [PATCH 0278/1079] [PPC][GlobalISel] Add initial GlobalIsel infrastructure This adds the initial GlobalISel skeleton for PowerPC. It can only run ir-translator and legalizer for `ret void`. This is largely based on the initial GlobalISel patch for RISCV (https://reviews.llvm.org/D65219). Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D83100 --- llvm/lib/Target/PowerPC/CMakeLists.txt | 6 ++ .../Target/PowerPC/GISel/PPCCallLowering.cpp | 51 ++++++++++ .../Target/PowerPC/GISel/PPCCallLowering.h | 39 ++++++++ .../PowerPC/GISel/PPCInstructionSelector.cpp | 92 +++++++++++++++++++ .../Target/PowerPC/GISel/PPCLegalizerInfo.cpp | 20 ++++ .../Target/PowerPC/GISel/PPCLegalizerInfo.h | 28 ++++++ .../PowerPC/GISel/PPCRegisterBankInfo.cpp | 27 ++++++ .../PowerPC/GISel/PPCRegisterBankInfo.h | 39 ++++++++ .../Target/PowerPC/GISel/PPCRegisterBanks.td | 15 +++ llvm/lib/Target/PowerPC/LLVMBuild.txt | 2 +- llvm/lib/Target/PowerPC/PPC.h | 30 +++--- llvm/lib/Target/PowerPC/PPC.td | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 31 ++++++- llvm/lib/Target/PowerPC/PPCSubtarget.h | 15 +++ llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 36 +++++++- .../PowerPC/GlobalISel/irtranslator-ret.ll | 7 ++ .../PowerPC/GlobalISel/legalize-ret.mir | 17 ++++ 17 files changed, 441 insertions(+), 15 deletions(-) create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h create mode 100644 llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td create mode 100644 llvm/test/CodeGen/PowerPC/GlobalISel/irtranslator-ret.ll create mode 100644 llvm/test/CodeGen/PowerPC/GlobalISel/legalize-ret.mir diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 5a06faa16be19..882fb0a5b7e2b 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -11,10 +11,13 @@ tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM PPCGenExegesis.inc -gen-exegesis) +tablegen(LLVM PPCGenRegisterBank.inc -gen-register-bank) +tablegen(LLVM PPCGenGlobalISel.inc -gen-global-isel) add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen + GISel/PPCInstructionSelector.cpp PPCBoolRetToInt.cpp PPCAsmPrinter.cpp PPCBranchSelector.cpp @@ -49,6 +52,9 @@ add_llvm_target(PowerPCCodeGen PPCExpandISEL.cpp PPCPreEmitPeephole.cpp PPCLowerMASSVEntries.cpp + GISel/PPCCallLowering.cpp + GISel/PPCRegisterBankInfo.cpp + GISel/PPCLegalizerInfo.cpp ) add_subdirectory(AsmParser) diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp new file mode 100644 index 0000000000000..dea28e971fedd --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp @@ -0,0 +1,51 @@ +//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +/// +//===----------------------------------------------------------------------===// + +#include "PPCCallLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-call-lowering" + +using namespace llvm; + +PPCCallLowering::PPCCallLowering(const PPCTargetLowering &TLI) + : CallLowering(&TLI) {} + +bool PPCCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, ArrayRef VRegs, + Register SwiftErrorVReg) const { + assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && + "Return value without a vreg"); + if (VRegs.size() > 0) + return false; + + MIRBuilder.buildInstr(PPC::BLR8); + return true; +} + +bool PPCCallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { + + // If VRegs is empty, then there are no formal arguments to lower and thus can + // always return true. If there are formal arguments, we currently do not + // handle them and thus return false. + return VRegs.empty(); +} + +bool PPCCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + return false; +} diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h new file mode 100644 index 0000000000000..ef078aa8ed838 --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h @@ -0,0 +1,39 @@ +//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes how to lower LLVM calls to machine code calls. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H +#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H + +#include "PPCISelLowering.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/IR/CallingConv.h" + +namespace llvm { + +class PPCTargetLowering; + +class PPCCallLowering : public CallLowering { +public: + PPCCallLowering(const PPCTargetLowering &TLI); + + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs, + Register SwiftErrorVReg) const override; + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; +}; +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp new file mode 100644 index 0000000000000..7d64816ed6c7f --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -0,0 +1,92 @@ +//===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for +/// PowerPC. +//===----------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPCRegisterBankInfo.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-gisel" + +using namespace llvm; + +namespace { + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +class PPCInstructionSelector : public InstructionSelector { +public: + PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI, + const PPCRegisterBankInfo &RBI); + + bool select(MachineInstr &I) override; + static const char *getName() { return DEBUG_TYPE; } + +private: + /// tblgen generated 'select' implementation that is used as the initial + /// selector for the patterns that do not require complex C++. + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + + const PPCInstrInfo &TII; + const PPCRegisterInfo &TRI; + const PPCRegisterBankInfo &RBI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; + +} // end anonymous namespace + +#define GET_GLOBALISEL_IMPL +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM, + const PPCSubtarget &STI, + const PPCRegisterBankInfo &RBI) + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} + +bool PPCInstructionSelector::select(MachineInstr &I) { + if (selectImpl(I, *CoverageInfo)) + return true; + return false; +} + +namespace llvm { +InstructionSelector * +createPPCInstructionSelector(const PPCTargetMachine &TM, + const PPCSubtarget &Subtarget, + const PPCRegisterBankInfo &RBI) { + return new PPCInstructionSelector(TM, Subtarget, RBI); +} +} // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp new file mode 100644 index 0000000000000..c16bcaea592bf --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp @@ -0,0 +1,20 @@ +//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for PowerPC +//===----------------------------------------------------------------------===// + +#include "PPCLegalizerInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-legalinfo" + +using namespace llvm; +using namespace LegalizeActions; + +PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { computeTables(); } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h new file mode 100644 index 0000000000000..c73186d3d0c11 --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h @@ -0,0 +1,28 @@ +//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the Machinelegalizer class for PowerPC +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class PPCSubtarget; + +/// This class provides the information for the PowerPC target legalizer for +/// GlobalISel. +class PPCLegalizerInfo : public LegalizerInfo { +public: + PPCLegalizerInfo(const PPCSubtarget &ST); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp new file mode 100644 index 0000000000000..6af79324919cc --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -0,0 +1,27 @@ +//===- PPCRegisterBankInfo.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for +/// PowerPC. +//===----------------------------------------------------------------------===// + +#include "PPCRegisterBankInfo.h" +#include "PPCRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-reg-bank-info" + +#define GET_TARGET_REGBANK_IMPL +#include "PPCGenRegisterBank.inc" + +using namespace llvm; + +PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) + : PPCGenRegisterBankInfo() {} diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h new file mode 100644 index 0000000000000..358d5ed3cf14e --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h @@ -0,0 +1,39 @@ +//===-- PPCRegisterBankInfo.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for PowerPC. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGBANK_DECLARATIONS +#include "PPCGenRegisterBank.inc" + +namespace llvm { +class TargetRegisterInfo; + +class PPCGenRegisterBankInfo : public RegisterBankInfo { +protected: +#define GET_TARGET_REGBANK_CLASS +#include "PPCGenRegisterBank.inc" +}; + +class PPCRegisterBankInfo final : public PPCGenRegisterBankInfo { +public: + PPCRegisterBankInfo(const TargetRegisterInfo &TRI); +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td new file mode 100644 index 0000000000000..0e8a4b7061c5a --- /dev/null +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td @@ -0,0 +1,15 @@ +//===-- PPCRegisterBanks.td - Describe the PPC Banks -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Define the PPC register banks used for GlobalISel. +/// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers +def GPRRegBank : RegisterBank<"GPR", [G8RC]>; diff --git a/llvm/lib/Target/PowerPC/LLVMBuild.txt b/llvm/lib/Target/PowerPC/LLVMBuild.txt index 34c295731697c..ed38d2a402141 100644 --- a/llvm/lib/Target/PowerPC/LLVMBuild.txt +++ b/llvm/lib/Target/PowerPC/LLVMBuild.txt @@ -30,5 +30,5 @@ has_jit = 1 type = Library name = PowerPCCodeGen parent = PowerPC -required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils +required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils GlobalISel add_to_library_groups = PowerPC diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index e8a9032bfbeec..e242d319470bc 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -20,17 +20,20 @@ #undef PPC namespace llvm { - class PPCTargetMachine; - class PassRegistry; - class FunctionPass; - class MachineInstr; - class MachineOperand; - class AsmPrinter; - class MCInst; - class MCOperand; - class ModulePass; - - FunctionPass *createPPCCTRLoops(); +class PPCRegisterBankInfo; +class PPCSubtarget; +class PPCTargetMachine; +class PassRegistry; +class FunctionPass; +class InstructionSelector; +class MachineInstr; +class MachineOperand; +class AsmPrinter; +class MCInst; +class MCOperand; +class ModulePass; + +FunctionPass *createPPCCTRLoops(); #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif @@ -78,7 +81,10 @@ namespace llvm { ModulePass *createPPCLowerMASSVEntriesPass(); void initializePPCLowerMASSVEntriesPass(PassRegistry &); extern char &PPCLowerMASSVEntriesID; - + + InstructionSelector * + createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &, + const PPCRegisterBankInfo &); namespace PPCII { /// Target Operand Flag enum. diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index a617715d4bd86..c572e210093a3 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -433,6 +433,7 @@ def getAltVSXFMAOpcode : InstrMapping { include "PPCRegisterInfo.td" include "PPCSchedule.td" +include "GISel/PPCRegisterBanks.td" //===----------------------------------------------------------------------===// // PowerPC processors supported. diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 8021cfa4a18c6..5546ba9de5d75 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -11,9 +11,13 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" +#include "GISel/PPCCallLowering.h" +#include "GISel/PPCLegalizerInfo.h" +#include "GISel/PPCRegisterBankInfo.h" #include "PPC.h" #include "PPCRegisterInfo.h" #include "PPCTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" @@ -53,7 +57,15 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, IsPPC64(TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le), TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)), - InstrInfo(*this), TLInfo(TM, *this) {} + InstrInfo(*this), TLInfo(TM, *this) { + CallLoweringInfo.reset(new PPCCallLowering(*getTargetLowering())); + Legalizer.reset(new PPCLegalizerInfo(*this)); + auto *RBI = new PPCRegisterBankInfo(*getRegisterInfo()); + RegBankInfo.reset(RBI); + + InstSelector.reset(createPPCInstructionSelector( + *static_cast(&TM), *this, *RBI)); +} void PPCSubtarget::initializeEnvironment() { StackAlignment = Align(16); @@ -227,3 +239,20 @@ bool PPCSubtarget::isUsingPCRelativeCalls() const { return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() && CodeModel::Medium == getTargetMachine().getCodeModel(); } + +// GlobalISEL +const CallLowering *PPCSubtarget::getCallLowering() const { + return CallLoweringInfo.get(); +} + +const RegisterBankInfo *PPCSubtarget::getRegBankInfo() const { + return RegBankInfo.get(); +} + +const LegalizerInfo *PPCSubtarget::getLegalizerInfo() const { + return Legalizer.get(); +} + +InstructionSelector *PPCSubtarget::getInstructionSelector() const { + return InstSelector.get(); +} diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 76b43dfc7a723..ee430529ad564 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -17,6 +17,9 @@ #include "PPCISelLowering.h" #include "PPCInstrInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -157,6 +160,12 @@ class PPCSubtarget : public PPCGenSubtargetInfo { PPCTargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; + /// GlobalISel related APIs. + std::unique_ptr CallLoweringInfo; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + std::unique_ptr InstSelector; + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -394,6 +403,12 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isPredictableSelectIsExpensive() const { return PredictableSelectIsExpensive; } + + // GlobalISEL + const CallLowering *getCallLowering() const override; + const RegisterBankInfo *getRegBankInfo() const override; + const LegalizerInfo *getLegalizerInfo() const override; + InstructionSelector *getInstructionSelector() const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index ea9b37de6ff39..7fd7b82fb4352 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -24,12 +24,18 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -116,6 +122,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { initializePPCTLSDynamicCallPass(PR); initializePPCMIPeepholePass(PR); initializePPCLowerMASSVEntriesPass(PR); + initializeGlobalISel(PR); } /// Return the datalayout string of a subtarget. @@ -381,6 +388,12 @@ class PPCPassConfig : public TargetPassConfig { void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + // GlobalISEL + bool addIRTranslator() override; + bool addLegalizeMachineIR() override; + bool addRegBankSelect() override; + bool addGlobalInstructionSelect() override; + ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { return createPPCMachineScheduler(C); @@ -531,3 +544,24 @@ static MachineSchedRegistry PPCPostRASchedRegistry("ppc-postra", "Run PowerPC PostRA specific scheduler", createPPCPostMachineScheduler); + +// Global ISEL +bool PPCPassConfig::addIRTranslator() { + addPass(new IRTranslator()); + return false; +} + +bool PPCPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); + return false; +} + +bool PPCPassConfig::addRegBankSelect() { + addPass(new RegBankSelect()); + return false; +} + +bool PPCPassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); + return false; +} diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/irtranslator-ret.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/irtranslator-ret.ll new file mode 100644 index 0000000000000..86f27a126d5a3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/GlobalISel/irtranslator-ret.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -global-isel -verify-machineinstrs -stop-after=irtranslator < %s | FileCheck %s + +; CHECK: name: f +; CHECK: BLR8 +define void @f() { + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/legalize-ret.mir b/llvm/test/CodeGen/PowerPC/GlobalISel/legalize-ret.mir new file mode 100644 index 0000000000000..7226511688105 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/GlobalISel/legalize-ret.mir @@ -0,0 +1,17 @@ +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_simple +body: | + ; CHECK-LABEL: name: test_simple + ; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: $x3 = COPY [[IN]] + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 + bb.1.entry: + liveins: $x3 + + %0:_(s64) = COPY $x3 + $x3 = COPY %0(s64) + BLR8 implicit $lr8, implicit $rm, implicit $x3 + +... From 6b13cfe7399b0aba726873f807ddfcdd9f967563 Mon Sep 17 00:00:00 2001 From: Ettore Tiotto Date: Thu, 10 Sep 2020 13:08:57 -0400 Subject: [PATCH 0279/1079] [ArgumentPromotion]: Copy function metadata after promoting arguments The argument promotion pass currently fails to copy function annotations over to the modified function after promoting arguments. This patch copies the original function annotation to the new function. Reviewed By: fhann Differential Revision: https://reviews.llvm.org/D86630 --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 6 ++++-- llvm/test/Transforms/ArgumentPromotion/profile.ll | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index d511ad2729abc..348717ec5618a 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -215,9 +215,11 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), F->getName()); NF->copyAttributesFrom(F); + NF->copyMetadata(F, 0); - // Patch the pointer to LLVM function in debug info descriptor. - NF->setSubprogram(F->getSubprogram()); + // The new function will have the !dbg metadata copied from the original + // function. The original function may not be deleted, and dbg metadata need + // to be unique so we need to drop it. F->setSubprogram(nullptr); LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" diff --git a/llvm/test/Transforms/ArgumentPromotion/profile.ll b/llvm/test/Transforms/ArgumentPromotion/profile.ll index f4bceb3eb913d..941eafad1af3e 100644 --- a/llvm/test/Transforms/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/ArgumentPromotion/profile.ll @@ -15,9 +15,9 @@ define void @caller() #0 { ret void } -define internal void @promote_i32_ptr(i32* %xp) { +define internal void @promote_i32_ptr(i32* %xp) !prof !1 { ; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr -; CHECK-SAME: (i32 [[XP_VAL:%.*]]) +; CHECK-SAME: (i32 [[XP_VAL:%.*]]) !prof !1 ; CHECK-NEXT: call void @use_i32(i32 [[XP_VAL]]) ; CHECK-NEXT: ret void ; @@ -29,3 +29,4 @@ define internal void @promote_i32_ptr(i32* %xp) { declare void @use_i32(i32) !0 = !{!"branch_weights", i32 30} +!1 = !{!"function_entry_count", i64 100} From c464f1d8f9a04d7b4b6cc81eac0891c46aba5950 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Thu, 10 Sep 2020 10:09:35 -0700 Subject: [PATCH 0280/1079] [lldb, tests] Correctly configure the yaml2obj paths They are currently not being set correctly for the case of multi-config generators like XCode and VS. There's also a typo in one of the cmake files. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D87466 --- lldb/test/API/lit.site.cfg.py.in | 1 + lldb/utils/lldb-dotest/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 6554d05d7df97..f2e1f855fe390 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -58,6 +58,7 @@ try: config.test_compiler = config.test_compiler % lit_config.params config.dsymutil = config.dsymutil % lit_config.params config.filecheck = config.filecheck % lit_config.params + config.yaml2obj = config.yaml2obj % lit_config.params config.dotest_args_str = config.dotest_args_str % lit_config.params except KeyError as e: key, = e.args diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index 0ef60c1427610..e5a73c2b1dec3 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -49,7 +49,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") - string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ_CONFIGURED}") + string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") endif() From ab1de1fcfb0c53bc768deb8f8bacefad7d378b7b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 10 Sep 2020 10:15:27 -0700 Subject: [PATCH 0281/1079] [gcov] Delete flush_fn_list (unused since D83149) --- clang/lib/Driver/ToolChains/Darwin.cpp | 1 - compiler-rt/lib/profile/GCDAProfiling.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 9d22cda217116..8f2be2a343cc5 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1197,7 +1197,6 @@ void Darwin::addProfileRTLibs(const ArgList &Args, if (ForGCOV) { addExportedSymbol(CmdArgs, "___gcov_dump"); addExportedSymbol(CmdArgs, "___gcov_reset"); - addExportedSymbol(CmdArgs, "_flush_fn_list"); addExportedSymbol(CmdArgs, "_writeout_fn_list"); addExportedSymbol(CmdArgs, "_reset_fn_list"); } else { diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index cf6c44bae6415..4055681872415 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -127,11 +127,6 @@ struct fn_list { */ struct fn_list writeout_fn_list; -/* - * A list of flush functions that our __gcov_flush() function should call, shared between all dynamic objects. - */ -struct fn_list flush_fn_list; - /* * A list of reset functions, shared between all dynamic objects. */ From e543708e5ea7af0ec3ef11d6fe932db507472aa1 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 8 Sep 2020 17:18:04 -0700 Subject: [PATCH 0282/1079] [NFC][ThinLTO] Let llvm::EmbedBitcodeInModule handle serialization. llvm::EmbedBitcodeInModule handles serializing the passed-in module, if the provided MemoryBufferRef is invalid. This is already the path taken in one of the uses of the API - clang::EmbedBitcode, when called from BackendConsumer::HandleTranslationUnit - so might as well do the same here and reduce (by very little) code duplication. The only difference this patch introduces is that the serialization happens with ShouldPreserveUseListOrder set to true. Differential Revision: https://reviews.llvm.org/D87339 --- llvm/include/llvm/Bitcode/BitcodeWriter.h | 4 ++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 7 +++---- llvm/lib/LTO/LTOBackend.cpp | 8 +------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h index 4beb89d30e008..5701c07a2c4ab 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -153,6 +153,10 @@ class raw_ostream; *ModuleToSummariesForIndex = nullptr); /// Save a copy of the llvm IR as data in the __LLVM,__bitcode section. + /// If available, pass the serialized module via the Buf parameter. If not, + /// pass an empty (default-initialized) MemoryBufferRef, and the serialization + /// will be handled by this API. The same behavior happens if the provided Buf + /// is not bitcode (i.e. if it's invalid data or even textual LLVM assembly). void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedMarker, const std::vector *CmdArgs); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index eaea026681b1d..28384bcb354fd 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4829,11 +4829,10 @@ void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, std::string Data; ArrayRef ModuleData; Triple T(M.getTargetTriple()); - // Create a constant that contains the bitcode. - // In case of embedding a marker, ignore the input Buf and use the empty - // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. + if (EmbedBitcode) { - if (!isBitcode((const unsigned char *)Buf.getBufferStart(), + if (Buf.getBufferSize() == 0 || + !isBitcode((const unsigned char *)Buf.getBufferStart(), (const unsigned char *)Buf.getBufferEnd())) { // If the input is LLVM Assembly, bitcode is produced by serializing // the module. Use-lists order need to be preserved in this case. diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 65d8669604950..966edcf693752 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -353,13 +353,7 @@ static cl::opt EmbedBitcode( static void EmitBitcodeSection(Module &M) { if (!EmbedBitcode) return; - SmallVector Buffer; - raw_svector_ostream OS(Buffer); - WriteBitcodeToFile(M, OS); - - std::unique_ptr Buf( - new SmallVectorMemoryBuffer(std::move(Buffer))); - llvm::EmbedBitcodeInModule(M, Buf->getMemBufferRef(), /*EmbedBitcode*/ true, + llvm::EmbedBitcodeInModule(M, llvm::MemoryBufferRef(), /*EmbedBitcode*/ true, /*EmbedMarker*/ false, /*CmdArgs*/ nullptr); } From 932aae77e92b08e63c0225b6eb37dfa80b310313 Mon Sep 17 00:00:00 2001 From: Sourabh Singh Tomar Date: Thu, 10 Sep 2020 23:04:37 +0530 Subject: [PATCH 0283/1079] Revert D86875 "[Flang][NFC] Remove license comments from files in docs/ folder." This reverts commit f787c9a90c69f, this was causing some build issues. --- flang/docs/ArrayComposition.md | 8 ++++++++ flang/docs/C++17.md | 8 ++++++++ flang/docs/C++style.md | 8 ++++++++ flang/docs/Calls.md | 8 ++++++++ flang/docs/Character.md | 8 ++++++++ flang/docs/ControlFlowGraph.md | 8 ++++++++ flang/docs/Directives.md | 8 ++++++++ flang/docs/Extensions.md | 8 ++++++++ flang/docs/FortranForCProgrammers.md | 8 ++++++++ flang/docs/FortranIR.md | 8 ++++++++ flang/docs/IORuntimeInternals.md | 8 ++++++++ flang/docs/ImplementingASemanticCheck.md | 8 ++++++++ flang/docs/Intrinsics.md | 8 ++++++++ flang/docs/LabelResolution.md | 8 ++++++++ flang/docs/ModFiles.md | 8 ++++++++ flang/docs/OpenMP-semantics.md | 8 ++++++++ flang/docs/OptionComparison.md | 8 ++++++++ flang/docs/Overview.md | 8 ++++++++ flang/docs/ParserCombinators.md | 8 ++++++++ flang/docs/Parsing.md | 8 ++++++++ flang/docs/Preprocessing.md | 8 ++++++++ flang/docs/PullRequestChecklist.md | 8 ++++++++ flang/docs/RuntimeDescriptor.md | 8 ++++++++ flang/docs/Semantics.md | 8 ++++++++ 24 files changed, 192 insertions(+) diff --git a/flang/docs/ArrayComposition.md b/flang/docs/ArrayComposition.md index 18194caadf09c..0f30af39f9e4b 100644 --- a/flang/docs/ArrayComposition.md +++ b/flang/docs/ArrayComposition.md @@ -1,3 +1,11 @@ + + This note attempts to describe the motivation for and design of an implementation of Fortran 90 (and later) array expression evaluation that minimizes the use of dynamically allocated temporary storage for diff --git a/flang/docs/C++17.md b/flang/docs/C++17.md index ea8395cfdedc7..87d5fc01f0922 100644 --- a/flang/docs/C++17.md +++ b/flang/docs/C++17.md @@ -1,3 +1,11 @@ + + ## C++14/17 features used in f18 The C++ dialect used in this project constitutes a subset of the diff --git a/flang/docs/C++style.md b/flang/docs/C++style.md index 77e0a04638238..4ab95393d758a 100644 --- a/flang/docs/C++style.md +++ b/flang/docs/C++style.md @@ -1,3 +1,11 @@ + + ## In brief: * Use *clang-format* from llvm 7 diff --git a/flang/docs/Calls.md b/flang/docs/Calls.md index 8a4d65820d19f..d70bc910d73db 100644 --- a/flang/docs/Calls.md +++ b/flang/docs/Calls.md @@ -1,3 +1,11 @@ + + ## Procedure reference implementation protocol Fortran function and subroutine references are complicated. diff --git a/flang/docs/Character.md b/flang/docs/Character.md index f66b144389450..700db864f2dac 100644 --- a/flang/docs/Character.md +++ b/flang/docs/Character.md @@ -1,3 +1,11 @@ + + ## Implementation of `CHARACTER` types in f18 ### Kinds and Character Sets diff --git a/flang/docs/ControlFlowGraph.md b/flang/docs/ControlFlowGraph.md index 7d1e514a87adb..b2b549845ebb6 100644 --- a/flang/docs/ControlFlowGraph.md +++ b/flang/docs/ControlFlowGraph.md @@ -1,3 +1,11 @@ + + ## Concept After a Fortran subprogram has been parsed, its names resolved, and all its semantic constraints successfully checked, the parse tree of its diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md index 554dc4608dd43..c2e93c5f3de2e 100644 --- a/flang/docs/Directives.md +++ b/flang/docs/Directives.md @@ -1,3 +1,11 @@ + + Compiler directives supported by F18 ==================================== diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 027927f67dfd4..7707309a88432 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -1,3 +1,11 @@ + + As a general principle, this compiler will accept by default and without complaint many legacy features, extensions to the standard language, and features that have been deleted from the standard, diff --git a/flang/docs/FortranForCProgrammers.md b/flang/docs/FortranForCProgrammers.md index 542034f3ea833..103def2a92ce6 100644 --- a/flang/docs/FortranForCProgrammers.md +++ b/flang/docs/FortranForCProgrammers.md @@ -1,3 +1,11 @@ + + Fortran For C Programmers ========================= diff --git a/flang/docs/FortranIR.md b/flang/docs/FortranIR.md index 83193ff27a359..5d83aaa8e34cf 100644 --- a/flang/docs/FortranIR.md +++ b/flang/docs/FortranIR.md @@ -1,3 +1,11 @@ + + # Design: Fortran IR ## Introduction diff --git a/flang/docs/IORuntimeInternals.md b/flang/docs/IORuntimeInternals.md index 8ff464ee9c8f7..b4f3092a014ec 100644 --- a/flang/docs/IORuntimeInternals.md +++ b/flang/docs/IORuntimeInternals.md @@ -1,3 +1,11 @@ + + Fortran I/O Runtime Library Internal Design =========================================== diff --git a/flang/docs/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md index 2406f5bc2a58c..3bb16915cb880 100644 --- a/flang/docs/ImplementingASemanticCheck.md +++ b/flang/docs/ImplementingASemanticCheck.md @@ -1,3 +1,11 @@ + +# Introduction I recently added a semantic check to the f18 compiler front end. This document describes my thought process and the resulting implementation. diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md index 6f4dec4678233..7be0bf3e4a9ca 100644 --- a/flang/docs/Intrinsics.md +++ b/flang/docs/Intrinsics.md @@ -1,3 +1,11 @@ + + # A categorization of standard (2018) and extended Fortran intrinsic procedures This note attempts to group the intrinsic procedures of Fortran into categories diff --git a/flang/docs/LabelResolution.md b/flang/docs/LabelResolution.md index 2dfa5a30bb3ca..e837b4fa6aece 100644 --- a/flang/docs/LabelResolution.md +++ b/flang/docs/LabelResolution.md @@ -1,3 +1,11 @@ + + # Semantics: Resolving Labels and Construct Names ## Overview diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md index 367cd4cd54f7c..483341bdd0f47 100644 --- a/flang/docs/ModFiles.md +++ b/flang/docs/ModFiles.md @@ -1,3 +1,11 @@ + + # Module Files Module files hold information from a module that is necessary to compile diff --git a/flang/docs/OpenMP-semantics.md b/flang/docs/OpenMP-semantics.md index 22a3ca5614ebc..4e2a81739cf81 100644 --- a/flang/docs/OpenMP-semantics.md +++ b/flang/docs/OpenMP-semantics.md @@ -1,3 +1,11 @@ + + # OpenMP Semantic Analysis ## OpenMP for F18 diff --git a/flang/docs/OptionComparison.md b/flang/docs/OptionComparison.md index 5c04450a7bb34..db5932411cc1e 100644 --- a/flang/docs/OptionComparison.md +++ b/flang/docs/OptionComparison.md @@ -1,3 +1,11 @@ + + # Compiler options This document catalogs the options processed by F18's peers/competitors. Much of the document is taken up by a set of tables that list the options categorized into different topics. Some of the table headings link to more information about the contents of the tables. For example, the table on **Standards conformance** options links to [notes on Standards conformance](#standards). diff --git a/flang/docs/Overview.md b/flang/docs/Overview.md index 807efda2ed9a3..75a8cd1c4cab0 100644 --- a/flang/docs/Overview.md +++ b/flang/docs/Overview.md @@ -1,3 +1,11 @@ + + # Overview of Compiler Phases Each phase produces either correct output or fatal errors. diff --git a/flang/docs/ParserCombinators.md b/flang/docs/ParserCombinators.md index 757684dcfda60..4f3dc6fd07ae6 100644 --- a/flang/docs/ParserCombinators.md +++ b/flang/docs/ParserCombinators.md @@ -1,3 +1,11 @@ + + ## Concept The Fortran language recognizer here can be classified as an LL recursive descent parser. It is composed from a *parser combinator* library that diff --git a/flang/docs/Parsing.md b/flang/docs/Parsing.md index 54a4fd752f6c1..fad9a4d57278c 100644 --- a/flang/docs/Parsing.md +++ b/flang/docs/Parsing.md @@ -1,3 +1,11 @@ + + The F18 Parser ============== This program source code implements a parser for the Fortran programming diff --git a/flang/docs/Preprocessing.md b/flang/docs/Preprocessing.md index 9b4d905177b7f..7f6f3951cfd16 100644 --- a/flang/docs/Preprocessing.md +++ b/flang/docs/Preprocessing.md @@ -1,3 +1,11 @@ + + Fortran Preprocessing ===================== diff --git a/flang/docs/PullRequestChecklist.md b/flang/docs/PullRequestChecklist.md index 17b6d64923f58..12a67be374a20 100644 --- a/flang/docs/PullRequestChecklist.md +++ b/flang/docs/PullRequestChecklist.md @@ -1,3 +1,11 @@ + + # Pull request checklist Please review the following items before submitting a pull request. This list can also be used when reviewing pull requests. diff --git a/flang/docs/RuntimeDescriptor.md b/flang/docs/RuntimeDescriptor.md index a8eff33f65211..d819517fa9795 100644 --- a/flang/docs/RuntimeDescriptor.md +++ b/flang/docs/RuntimeDescriptor.md @@ -1,3 +1,11 @@ + + ## Concept The properties that characterize data values and objects in Fortran programs must sometimes be materialized when the program runs. diff --git a/flang/docs/Semantics.md b/flang/docs/Semantics.md index f879671b4f4ed..6ea0b292de69f 100644 --- a/flang/docs/Semantics.md +++ b/flang/docs/Semantics.md @@ -1,3 +1,11 @@ + + # Semantic Analysis The semantic analysis pass determines if a syntactically correct Fortran From cb8cb28ed90a10390bacb264d3b6cbb09c2ea94c Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 10 Sep 2020 19:26:59 +0200 Subject: [PATCH 0284/1079] [compiler-rt] [netbsd] Add fallback definitions for MKISCSI=no Add dev/iscsi/iscsi_ioctl.h fallback ioctl(2) operations. --- .../sanitizer_platform_limits_netbsd.cpp | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp index 25da334b63f09..be8b132cb81a0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp @@ -83,6 +83,7 @@ #include #include +#include #include #include #include @@ -139,7 +140,158 @@ #include #include #include +#if __has_include() #include +#else +/* Fallback for MKISCSI=no */ + +typedef struct { + uint32_t status; + uint32_t session_id; + uint32_t connection_id; +} iscsi_conn_status_parameters_t; + +typedef struct { + uint32_t status; + uint16_t interface_version; + uint16_t major; + uint16_t minor; + uint8_t version_string[224]; +} iscsi_get_version_parameters_t; + +typedef struct { + uint32_t status; + uint32_t session_id; + uint32_t connection_id; + struct { + unsigned int immediate : 1; + } options; + uint64_t lun; + scsireq_t req; /* from */ +} iscsi_iocommand_parameters_t; + +typedef enum { + ISCSI_AUTH_None = 0, + ISCSI_AUTH_CHAP = 1, + ISCSI_AUTH_KRB5 = 2, + ISCSI_AUTH_SRP = 3 +} iscsi_auth_types_t; + +typedef enum { + ISCSI_LOGINTYPE_DISCOVERY = 0, + ISCSI_LOGINTYPE_NOMAP = 1, + ISCSI_LOGINTYPE_MAP = 2 +} iscsi_login_session_type_t; + +typedef enum { ISCSI_DIGEST_None = 0, ISCSI_DIGEST_CRC32C = 1 } iscsi_digest_t; + +typedef enum { + ISCSI_SESSION_TERMINATED = 1, + ISCSI_CONNECTION_TERMINATED, + ISCSI_RECOVER_CONNECTION, + ISCSI_DRIVER_TERMINATING +} iscsi_event_t; + +typedef struct { + unsigned int mutual_auth : 1; + unsigned int is_secure : 1; + unsigned int auth_number : 4; + iscsi_auth_types_t auth_type[4]; +} iscsi_auth_info_t; + +typedef struct { + uint32_t status; + int socket; + struct { + unsigned int HeaderDigest : 1; + unsigned int DataDigest : 1; + unsigned int MaxConnections : 1; + unsigned int DefaultTime2Wait : 1; + unsigned int DefaultTime2Retain : 1; + unsigned int MaxRecvDataSegmentLength : 1; + unsigned int auth_info : 1; + unsigned int user_name : 1; + unsigned int password : 1; + unsigned int target_password : 1; + unsigned int TargetName : 1; + unsigned int TargetAlias : 1; + unsigned int ErrorRecoveryLevel : 1; + } is_present; + iscsi_auth_info_t auth_info; + iscsi_login_session_type_t login_type; + iscsi_digest_t HeaderDigest; + iscsi_digest_t DataDigest; + uint32_t session_id; + uint32_t connection_id; + uint32_t MaxRecvDataSegmentLength; + uint16_t MaxConnections; + uint16_t DefaultTime2Wait; + uint16_t DefaultTime2Retain; + uint16_t ErrorRecoveryLevel; + void *user_name; + void *password; + void *target_password; + void *TargetName; + void *TargetAlias; +} iscsi_login_parameters_t; + +typedef struct { + uint32_t status; + uint32_t session_id; +} iscsi_logout_parameters_t; + +typedef struct { + uint32_t status; + uint32_t event_id; +} iscsi_register_event_parameters_t; + +typedef struct { + uint32_t status; + uint32_t session_id; + uint32_t connection_id; +} iscsi_remove_parameters_t; + +typedef struct { + uint32_t status; + uint32_t session_id; + void *response_buffer; + uint32_t response_size; + uint32_t response_used; + uint32_t response_total; + uint8_t key[224]; +} iscsi_send_targets_parameters_t; + +typedef struct { + uint32_t status; + uint8_t InitiatorName[224]; + uint8_t InitiatorAlias[224]; + uint8_t ISID[6]; +} iscsi_set_node_name_parameters_t; + +typedef struct { + uint32_t status; + uint32_t event_id; + iscsi_event_t event_kind; + uint32_t session_id; + uint32_t connection_id; + uint32_t reason; +} iscsi_wait_event_parameters_t; + +#define ISCSI_GET_VERSION _IOWR(0, 1, iscsi_get_version_parameters_t) +#define ISCSI_LOGIN _IOWR(0, 2, iscsi_login_parameters_t) +#define ISCSI_LOGOUT _IOWR(0, 3, iscsi_logout_parameters_t) +#define ISCSI_ADD_CONNECTION _IOWR(0, 4, iscsi_login_parameters_t) +#define ISCSI_RESTORE_CONNECTION _IOWR(0, 5, iscsi_login_parameters_t) +#define ISCSI_REMOVE_CONNECTION _IOWR(0, 6, iscsi_remove_parameters_t) +#define ISCSI_CONNECTION_STATUS _IOWR(0, 7, iscsi_conn_status_parameters_t) +#define ISCSI_SEND_TARGETS _IOWR(0, 8, iscsi_send_targets_parameters_t) +#define ISCSI_SET_NODE_NAME _IOWR(0, 9, iscsi_set_node_name_parameters_t) +#define ISCSI_IO_COMMAND _IOWR(0, 10, iscsi_iocommand_parameters_t) +#define ISCSI_REGISTER_EVENT _IOWR(0, 11, iscsi_register_event_parameters_t) +#define ISCSI_DEREGISTER_EVENT _IOWR(0, 12, iscsi_register_event_parameters_t) +#define ISCSI_WAIT_EVENT _IOWR(0, 13, iscsi_wait_event_parameters_t) +#define ISCSI_POLL_EVENT _IOWR(0, 14, iscsi_wait_event_parameters_t) +#endif #include #include #include From b85c085c846c2cb5d24812555847846877ca13cb Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 10 Sep 2020 19:31:41 +0200 Subject: [PATCH 0285/1079] [compiler-rt] [netbsd] Improve code formatting No functional change. --- .../sanitizer_common/sanitizer_platform_limits_netbsd.cpp | 2 +- .../lib/sanitizer_common/sanitizer_platform_limits_netbsd.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp index be8b132cb81a0..dc1f5a6616f33 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp @@ -524,7 +524,7 @@ struct urio_command { #include "sanitizer_platform_limits_netbsd.h" namespace __sanitizer { -void *__sanitizer_get_link_map_by_dlopen_handle(void* handle) { +void *__sanitizer_get_link_map_by_dlopen_handle(void *handle) { void *p = nullptr; return internal_dlinfo(handle, RTLD_DI_LINKMAP, &p) == 0 ? p : nullptr; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h index d80280d9bf8c8..9e28dcfef0415 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h @@ -21,8 +21,8 @@ namespace __sanitizer { void *__sanitizer_get_link_map_by_dlopen_handle(void *handle); -# define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \ - (link_map *)__sanitizer_get_link_map_by_dlopen_handle(handle) +#define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \ + (link_map *)__sanitizer_get_link_map_by_dlopen_handle(handle) extern unsigned struct_utsname_sz; extern unsigned struct_stat_sz; @@ -1024,12 +1024,10 @@ extern unsigned struct_RF_ProgressInfo_sz; extern unsigned struct_nvlist_ref_sz; extern unsigned struct_StringList_sz; - // A special value to mark ioctls that are not present on the target platform, // when it can not be determined without including any system headers. extern const unsigned IOCTL_NOT_PRESENT; - extern unsigned IOCTL_AFM_ADDFMAP; extern unsigned IOCTL_AFM_DELFMAP; extern unsigned IOCTL_AFM_CLEANFMAP; From 46329f6079da99133eab7942e79226b2afb40e75 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Thu, 10 Sep 2020 13:14:44 -0400 Subject: [PATCH 0286/1079] [ImplicitNullCheck] Handle instructions that preserve zero value This is the first in a series of patches to make implicit null checks more general. This patch identifies instructions that preserves zero value of a register and considers that as a valid instruction to hoist along with the faulting load. See added testcases. Reviewed-By: reames, dantrushin Differential Revision: https://reviews.llvm.org/D87108 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 11 ++++ llvm/lib/CodeGen/ImplicitNullChecks.cpp | 14 +--- llvm/lib/Target/X86/X86InstrInfo.cpp | 28 ++++++++ llvm/lib/Target/X86/X86InstrInfo.h | 4 ++ .../X86/implicit-null-check-negative.ll | 20 ++++++ llvm/test/CodeGen/X86/implicit-null-check.ll | 64 +++++++++++++++++++ 6 files changed, 130 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index f9f9ce41e329b..0629c81d4f4f8 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1270,6 +1270,17 @@ class TargetInstrInfo : public MCInstrInfo { return false; } + /// Returns true if MI's Def is NullValueReg, and the MI + /// does not change the Zero value. i.e. cases such as rax = shr rax, X where + /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this + /// function can return true even if becomes zero. Specifically cases such as + /// NullValueReg = shl NullValueReg, 63. + virtual bool preservesZeroValueInReg(const MachineInstr *MI, + const Register NullValueReg, + const TargetRegisterInfo *TRI) const { + return false; + } + /// If the instruction is an increment of a constant value, return the amount. virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const { return false; diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index dc1b0a867b0d6..8e1f9c36c7fec 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -435,12 +435,6 @@ bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns( if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg())) return true; - // The Dependency can't be re-defining the base register -- then we won't - // get the memory operation on the address we want. This is already - // checked in \c IsSuitableMemoryOp. - assert(!(DependenceMO.isDef() && - TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) && - "Should have been checked before!"); } // The dependence does not clobber live-ins in NullSucc block. @@ -628,11 +622,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( return true; } - // If MI re-defines the PointerReg then we cannot move further. - if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) { - return MO.isReg() && MO.getReg() && MO.isDef() && - TRI->regsOverlap(MO.getReg(), PointerReg); - })) + // If MI re-defines the PointerReg in a way that changes the value of + // PointerReg if it was null, then we cannot move further. + if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI)) return false; InstsSeenSoFar.push_back(&MI); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 5aac29e21d6f9..1f4bf30cc1d02 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3663,6 +3663,34 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, } } +bool X86InstrInfo::preservesZeroValueInReg( + const MachineInstr *MI, const Register NullValueReg, + const TargetRegisterInfo *TRI) const { + if (!MI->modifiesRegister(NullValueReg, TRI)) + return true; + switch (MI->getOpcode()) { + // Shift right/left of a null unto itself is still a null, i.e. rax = shl rax + // X. + case X86::SHR64ri: + case X86::SHR32ri: + case X86::SHL64ri: + case X86::SHL32ri: + assert(MI->getOperand(0).isDef() && MI->getOperand(1).isUse() && + "expected for shift opcode!"); + return MI->getOperand(0).getReg() == NullValueReg && + MI->getOperand(1).getReg() == NullValueReg; + // Zero extend of a sub-reg of NullValueReg into itself does not change the + // null value. + case X86::MOV32rr: + return llvm::all_of(MI->operands(), [&](const MachineOperand &MO) { + return TRI->isSubRegisterEq(NullValueReg, MO.getReg()); + }); + default: + return false; + } + llvm_unreachable("Should be handled above!"); +} + bool X86InstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &MemOp, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index cd91144c829af..215318105de45 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -317,6 +317,10 @@ class X86InstrInfo final : public X86GenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; + bool preservesZeroValueInReg(const MachineInstr *MI, + const Register NullValueReg, + const TargetRegisterInfo *TRI) const override; + bool getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, diff --git a/llvm/test/CodeGen/X86/implicit-null-check-negative.ll b/llvm/test/CodeGen/X86/implicit-null-check-negative.ll index c05b4a072adfd..d7eae8c98173a 100644 --- a/llvm/test/CodeGen/X86/implicit-null-check-negative.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check-negative.ll @@ -109,4 +109,24 @@ define i32 @imp_null_check_add_result(i32* %x, i32* %y) { ret i32 %p } +; This redefines the null check reg by doing a zero-extend, a shift on +; itself and then an add. +; Cannot be converted to implicit check since the zero reg is no longer zero. +define i64 @imp_null_check_load_shift_add_addr(i64* %x, i64 %r) { + entry: + %c = icmp eq i64* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i64 42 + + not_null: + %y = ptrtoint i64* %x to i64 + %shry = shl i64 %y, 6 + %shry.add = add i64 %shry, %r + %y.ptr = inttoptr i64 %shry.add to i64* + %x.loc = getelementptr i64, i64* %y.ptr, i64 1 + %t = load i64, i64* %x.loc + ret i64 %t +} !0 = !{} diff --git a/llvm/test/CodeGen/X86/implicit-null-check.ll b/llvm/test/CodeGen/X86/implicit-null-check.ll index 6d6b31f86dbe9..c6241b18f785e 100644 --- a/llvm/test/CodeGen/X86/implicit-null-check.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check.ll @@ -48,6 +48,8 @@ define i32 @imp_null_check_unordered_load(i32* %x) { ret i32 %t } + +; TODO: Can be converted into implicit check. ;; Probably could be implicit, but we're conservative for now define i32 @imp_null_check_seq_cst_load(i32* %x) { ; CHECK-LABEL: imp_null_check_seq_cst_load: @@ -557,4 +559,66 @@ define i32 @imp_null_check_neg_gep_load(i32* %x) { ret i32 %t } +; This redefines the null check reg by doing a zero-extend and a shift on +; itself. +; Converted into implicit null check since both of these operations do not +; change the nullness of %x (i.e. if it is null, it remains null). +define i64 @imp_null_check_load_shift_addr(i64* %x) { +; CHECK-LABEL: imp_null_check_load_shift_addr: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: shlq $6, %rdi +; CHECK-NEXT: Ltmp17: +; CHECK-NEXT: movq 8(%rdi), %rax ## on-fault: LBB21_1 +; CHECK-NEXT: ## %bb.2: ## %not_null +; CHECK-NEXT: retq +; CHECK-NEXT: LBB21_1: ## %is_null +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq + + entry: + %c = icmp eq i64* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i64 42 + + not_null: + %y = ptrtoint i64* %x to i64 + %shry = shl i64 %y, 6 + %y.ptr = inttoptr i64 %shry to i64* + %x.loc = getelementptr i64, i64* %y.ptr, i64 1 + %t = load i64, i64* %x.loc + ret i64 %t +} + +; Same as imp_null_check_load_shift_addr but shift is by 3 and this is now +; converted into complex addressing. +; TODO: Can be converted into implicit null check +define i64 @imp_null_check_load_shift_by_3_addr(i64* %x) { +; CHECK-LABEL: imp_null_check_load_shift_by_3_addr: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB22_1 +; CHECK-NEXT: ## %bb.2: ## %not_null +; CHECK-NEXT: movq 8(,%rdi,8), %rax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB22_1: ## %is_null +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq + + entry: + %c = icmp eq i64* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i64 42 + + not_null: + %y = ptrtoint i64* %x to i64 + %shry = shl i64 %y, 3 + %y.ptr = inttoptr i64 %shry to i64* + %x.loc = getelementptr i64, i64* %y.ptr, i64 1 + %t = load i64, i64* %x.loc + ret i64 %t +} !0 = !{} From d9c8b0256cfc673c2413b13993c9440be598818f Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 10 Sep 2020 10:05:46 -0700 Subject: [PATCH 0287/1079] [Support] Use unique_function rather than std::function for ThreadPool TaskTy. This will allow non-copyable function objects (e.g. lambdas that capture unique_ptrs) to be used with ThreadPool. Differential Revision: https://reviews.llvm.org/D87467 --- llvm/include/llvm/Support/ThreadPool.h | 3 ++- llvm/unittests/Support/ThreadPool.cpp | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index 528fb32525eb2..3d24fb0997393 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_THREAD_POOL_H #define LLVM_SUPPORT_THREAD_POOL_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/thread.h" @@ -36,7 +37,7 @@ namespace llvm { /// for some work to become available. class ThreadPool { public: - using TaskTy = std::function; + using TaskTy = unique_function; using PackagedTaskTy = std::packaged_task; /// Construct a pool using the hardware strategy \p S for mapping hardware diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp index 43882d0f3ceea..b3747376689a8 100644 --- a/llvm/unittests/Support/ThreadPool.cpp +++ b/llvm/unittests/Support/ThreadPool.cpp @@ -133,6 +133,13 @@ TEST_F(ThreadPoolTest, Async) { ASSERT_EQ(2, i.load()); } +TEST_F(ThreadPoolTest, NonCopyableTask) { + CHECK_UNSUPPORTED(); + ThreadPool Pool; + Pool.async([P = std::make_unique()] {}); + Pool.wait(); +}; + TEST_F(ThreadPoolTest, GetFuture) { CHECK_UNSUPPORTED(); ThreadPool Pool(hardware_concurrency(2)); From 6040d525507ba8a2593f0906259d012725b6aed2 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Thu, 10 Sep 2020 10:57:08 -0700 Subject: [PATCH 0288/1079] [NFC] Fix whitespace in lldb-vscode --help --- lldb/tools/lldb-vscode/lldb-vscode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 54f2e653d0697..7d7d0f9ebe91c 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -2869,7 +2869,7 @@ const std::map &GetRequestHandlers() { } // anonymous namespace static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) { - std::string usage_str = tool_name.str() + "options"; + std::string usage_str = tool_name.str() + " options"; table.PrintHelp(llvm::outs(), usage_str.c_str(), "LLDB VSCode", false); std::string examples = R"___( From a0ffe2b21a5159f3f8eed8e98e488e723aa7cab3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 10 Sep 2020 11:03:48 -0700 Subject: [PATCH 0289/1079] [PGO] Skip if an IndirectBrInst critical edge cannot be split PGOInstrumentation runs `SplitIndirectBrCriticalEdges` but some IndirectBrInst critical edge cannot be split. `getInstrBB` will crash when calling `SplitCriticalEdge`, e.g. int foo(char *p) { void *targets[2]; targets[0] = &&indirect; targets[1] = &&end; for (;; p++) if (*p == 7) { indirect: goto *targets[p[1]]; // the self loop is critical in -O } end: return 0; } Skip such critical edges to prevent a crash. Reviewed By: davidxl, lebedev.ri Differential Revision: https://reviews.llvm.org/D87435 --- .../Instrumentation/PGOInstrumentation.cpp | 5 +++- .../split-indirectbr-critical-edges.ll | 24 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index be2e091e8c08f..dd70c1f77d9c1 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -807,8 +807,11 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { if (!E->IsCritical) return canInstrument(DestBB); + // Some IndirectBr critical edges cannot be split by the previous + // SplitIndirectBrCriticalEdges call. Bail out. unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); - BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); + BasicBlock *InstrBB = + isa(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); if (!InstrBB) { LLVM_DEBUG( dbgs() << "Fail to split critical edge: not instrument this edge.\n"); diff --git a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll index dc834b7cd47cc..70daa54331a30 100644 --- a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll +++ b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll @@ -37,3 +37,27 @@ if.end: ; preds = %if.end.preheader, % indirectbr i8* %2, [label %for.cond2, label %if.end] ; CHECK: indirectbr i8* %2, [label %for.cond2, label %if.end] } + +;; If an indirectbr critical edge cannot be split, ignore it. +;; The edge will not be profiled. +; CHECK-LABEL: @cannot_split( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.instrprof.increment +; CHECK-NOT: call void @llvm.instrprof.increment +define i32 @cannot_split(i8* nocapture readonly %p) { +entry: + %targets = alloca <2 x i8*>, align 16 + store <2 x i8*> , <2 x i8*>* %targets, align 16 + %arrayidx2 = getelementptr inbounds i8, i8* %p, i64 1 + %0 = load i8, i8* %arrayidx2 + %idxprom = sext i8 %0 to i64 + %arrayidx3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %targets, i64 0, i64 %idxprom + %1 = load i8*, i8** %arrayidx3, align 8 + br label %indirect + +indirect: ; preds = %entry, %indirect + indirectbr i8* %1, [label %indirect, label %end] + +end: ; preds = %indirect + ret i32 0 +} From bba736e5036f3983ca22f08dec277fdf37926115 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 10 Sep 2020 20:09:53 +0200 Subject: [PATCH 0290/1079] [compiler-rt] [netbsd] Update generate_netbsd_syscalls.awk Sync with NetBSD 9.99.72. --- .../utils/generate_netbsd_syscalls.awk | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/compiler-rt/utils/generate_netbsd_syscalls.awk b/compiler-rt/utils/generate_netbsd_syscalls.awk index cc7ba314ea551..1bddc0f2f2bff 100755 --- a/compiler-rt/utils/generate_netbsd_syscalls.awk +++ b/compiler-rt/utils/generate_netbsd_syscalls.awk @@ -1167,6 +1167,8 @@ function syscall_body(syscall, mode) pcmd("/* TODO */") } else if (syscall == "dup2") { pcmd("/* Nothing to do */") + } else if (syscall == "getrandom") { + pcmd("/* TODO */") } else if (syscall == "fcntl") { pcmd("/* Nothing to do */") } else if (syscall == "compat_50_select") { @@ -1431,6 +1433,12 @@ function syscall_body(syscall, mode) pcmd("/* TODO */") } else if (syscall == "sysarch") { pcmd("/* TODO */") + } else if (syscall == "__futex") { + pcmd("/* TODO */") + } else if (syscall == "__futex_set_robust_list") { + pcmd("/* TODO */") + } else if (syscall == "__futex_get_robust_list") { + pcmd("/* TODO */") } else if (syscall == "compat_10_osemsys") { pcmd("/* TODO */") } else if (syscall == "compat_10_omsgsys") { @@ -3027,6 +3035,32 @@ function syscall_body(syscall, mode) pcmd(" PRE_READ(fhp_, fh_size_);") pcmd("}") } + } else if (syscall == "__acl_get_link") { + pcmd("/* TODO */") + } else if (syscall == "__acl_set_link") { + pcmd("/* TODO */") + } else if (syscall == "__acl_delete_link") { + pcmd("/* TODO */") + } else if (syscall == "__acl_aclcheck_link") { + pcmd("/* TODO */") + } else if (syscall == "__acl_get_file") { + pcmd("/* TODO */") + } else if (syscall == "__acl_set_file") { + pcmd("/* TODO */") + } else if (syscall == "__acl_get_fd") { + pcmd("/* TODO */") + } else if (syscall == "__acl_set_fd") { + pcmd("/* TODO */") + } else if (syscall == "__acl_delete_file") { + pcmd("/* TODO */") + } else if (syscall == "__acl_delete_fd") { + pcmd("/* TODO */") + } else if (syscall == "__acl_aclcheck_file") { + pcmd("/* TODO */") + } else if (syscall == "__acl_aclcheck_fd") { + pcmd("/* TODO */") + } else if (syscall == "lpathconf") { + pcmd("/* TODO */") } else { print "Unrecognized syscall: " syscall abnormal_exit = 1 From 00460ae520e284ae8c0cd400c1c75c0c7a0e8fa7 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 10 Sep 2020 20:11:35 +0200 Subject: [PATCH 0291/1079] [compiler-rt] [netbsd] Regenerate syscall hooks Sync with NetBSD 9.99.72. --- .../include/sanitizer/netbsd_syscall_hooks.h | 213 +++++++++++++++++- .../sanitizer_syscalls_netbsd.inc | 119 +++++++++- 2 files changed, 316 insertions(+), 16 deletions(-) diff --git a/compiler-rt/include/sanitizer/netbsd_syscall_hooks.h b/compiler-rt/include/sanitizer/netbsd_syscall_hooks.h index 370da0ea72ed8..f661152ccbac7 100644 --- a/compiler-rt/include/sanitizer/netbsd_syscall_hooks.h +++ b/compiler-rt/include/sanitizer/netbsd_syscall_hooks.h @@ -20,8 +20,8 @@ // DO NOT EDIT! THIS FILE HAS BEEN GENERATED! // // Generated with: generate_netbsd_syscalls.awk -// Generated date: 2019-12-24 -// Generated from: syscalls.master,v 1.296 2019/09/22 22:59:39 christos Exp +// Generated date: 2020-09-10 +// Generated from: syscalls.master,v 1.306 2020/08/14 00:53:16 riastradh Exp // //===----------------------------------------------------------------------===// #ifndef SANITIZER_NETBSD_SYSCALL_HOOKS_H @@ -474,7 +474,12 @@ __sanitizer_syscall_pre_impl_dup2((long long)(from), (long long)(to)) #define __sanitizer_syscall_post_dup2(res, from, to) \ __sanitizer_syscall_post_impl_dup2(res, (long long)(from), (long long)(to)) -/* syscall 91 has been skipped */ +#define __sanitizer_syscall_pre_getrandom(buf, buflen, flags) \ + __sanitizer_syscall_pre_impl_getrandom( \ + (long long)(buf), (long long)(buflen), (long long)(flags)) +#define __sanitizer_syscall_post_getrandom(res, buf, buflen, flags) \ + __sanitizer_syscall_post_impl_getrandom( \ + res, (long long)(buf), (long long)(buflen), (long long)(flags)) #define __sanitizer_syscall_pre_fcntl(fd, cmd, arg) \ __sanitizer_syscall_pre_impl_fcntl((long long)(fd), (long long)(cmd), \ (long long)(arg)) @@ -849,9 +854,31 @@ #define __sanitizer_syscall_post_sysarch(res, op, parms) \ __sanitizer_syscall_post_impl_sysarch(res, (long long)(op), \ (long long)(parms)) -/* syscall 166 has been skipped */ -/* syscall 167 has been skipped */ -/* syscall 168 has been skipped */ +#define __sanitizer_syscall_pre___futex(uaddr, op, val, timeout, uaddr2, val2, \ + val3) \ + __sanitizer_syscall_pre_impl___futex((long long)(uaddr), (long long)(op), \ + (long long)(val), (long long)(timeout), \ + (long long)(uaddr2), (long long)(val2), \ + (long long)(val3)) +#define __sanitizer_syscall_post___futex(res, uaddr, op, val, timeout, uaddr2, \ + val2, val3) \ + __sanitizer_syscall_post_impl___futex( \ + res, (long long)(uaddr), (long long)(op), (long long)(val), \ + (long long)(timeout), (long long)(uaddr2), (long long)(val2), \ + (long long)(val3)) +#define __sanitizer_syscall_pre___futex_set_robust_list(head, len) \ + __sanitizer_syscall_pre_impl___futex_set_robust_list((long long)(head), \ + (long long)(len)) +#define __sanitizer_syscall_post___futex_set_robust_list(res, head, len) \ + __sanitizer_syscall_post_impl___futex_set_robust_list( \ + res, (long long)(head), (long long)(len)) +#define __sanitizer_syscall_pre___futex_get_robust_list(lwpid, headp, lenp) \ + __sanitizer_syscall_pre_impl___futex_get_robust_list( \ + (long long)(lwpid), (long long)(headp), (long long)(lenp)) +#define __sanitizer_syscall_post___futex_get_robust_list(res, lwpid, headp, \ + lenp) \ + __sanitizer_syscall_post_impl___futex_get_robust_list( \ + res, (long long)(lwpid), (long long)(headp), (long long)(lenp)) #if !defined(_LP64) #define __sanitizer_syscall_pre_compat_10_osemsys(which, a2, a3, a4, a5) \ __sanitizer_syscall_pre_impl_compat_10_osemsys( \ @@ -2731,6 +2758,83 @@ __sanitizer_syscall_post_impl___fhstatvfs190( \ res, (long long)(fhp), (long long)(fh_size), (long long)(buf), \ (long long)(flags)) +#define __sanitizer_syscall_pre___acl_get_link(path, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_get_link( \ + (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_get_link(res, path, type, aclp) \ + __sanitizer_syscall_post_impl___acl_get_link( \ + res, (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_set_link(path, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_set_link( \ + (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_set_link(res, path, type, aclp) \ + __sanitizer_syscall_post_impl___acl_set_link( \ + res, (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_delete_link(path, type) \ + __sanitizer_syscall_pre_impl___acl_delete_link((long long)(path), \ + (long long)(type)) +#define __sanitizer_syscall_post___acl_delete_link(res, path, type) \ + __sanitizer_syscall_post_impl___acl_delete_link(res, (long long)(path), \ + (long long)(type)) +#define __sanitizer_syscall_pre___acl_aclcheck_link(path, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_aclcheck_link( \ + (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_aclcheck_link(res, path, type, aclp) \ + __sanitizer_syscall_post_impl___acl_aclcheck_link( \ + res, (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_get_file(path, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_get_file( \ + (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_get_file(res, path, type, aclp) \ + __sanitizer_syscall_post_impl___acl_get_file( \ + res, (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_set_file(path, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_set_file( \ + (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_set_file(res, path, type, aclp) \ + __sanitizer_syscall_post_impl___acl_set_file( \ + res, (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_get_fd(filedes, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_get_fd( \ + (long long)(filedes), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_get_fd(res, filedes, type, aclp) \ + __sanitizer_syscall_post_impl___acl_get_fd( \ + res, (long long)(filedes), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_set_fd(filedes, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_set_fd( \ + (long long)(filedes), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_set_fd(res, filedes, type, aclp) \ + __sanitizer_syscall_post_impl___acl_set_fd( \ + res, (long long)(filedes), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_delete_file(path, type) \ + __sanitizer_syscall_pre_impl___acl_delete_file((long long)(path), \ + (long long)(type)) +#define __sanitizer_syscall_post___acl_delete_file(res, path, type) \ + __sanitizer_syscall_post_impl___acl_delete_file(res, (long long)(path), \ + (long long)(type)) +#define __sanitizer_syscall_pre___acl_delete_fd(filedes, type) \ + __sanitizer_syscall_pre_impl___acl_delete_fd((long long)(filedes), \ + (long long)(type)) +#define __sanitizer_syscall_post___acl_delete_fd(res, filedes, type) \ + __sanitizer_syscall_post_impl___acl_delete_fd(res, (long long)(filedes), \ + (long long)(type)) +#define __sanitizer_syscall_pre___acl_aclcheck_file(path, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_aclcheck_file( \ + (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_aclcheck_file(res, path, type, aclp) \ + __sanitizer_syscall_post_impl___acl_aclcheck_file( \ + res, (long long)(path), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre___acl_aclcheck_fd(filedes, type, aclp) \ + __sanitizer_syscall_pre_impl___acl_aclcheck_fd( \ + (long long)(filedes), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_post___acl_aclcheck_fd(res, filedes, type, aclp) \ + __sanitizer_syscall_post_impl___acl_aclcheck_fd( \ + res, (long long)(filedes), (long long)(type), (long long)(aclp)) +#define __sanitizer_syscall_pre_lpathconf(path, name) \ + __sanitizer_syscall_pre_impl_lpathconf((long long)(path), (long long)(name)) +#define __sanitizer_syscall_post_lpathconf(res, path, name) \ + __sanitizer_syscall_post_impl_lpathconf(res, (long long)(path), \ + (long long)(name)) /* Compat with older releases */ #define __sanitizer_syscall_pre_getvfsstat \ @@ -3088,7 +3192,10 @@ void __sanitizer_syscall_post_impl_compat_43_ogetdtablesize(long long res); void __sanitizer_syscall_pre_impl_dup2(long long from, long long to); void __sanitizer_syscall_post_impl_dup2(long long res, long long from, long long to); -/* syscall 91 has been skipped */ +void __sanitizer_syscall_pre_impl_getrandom(long long buf, long long buflen, + long long flags); +void __sanitizer_syscall_post_impl_getrandom(long long res, long long buf, + long long buflen, long long flags); void __sanitizer_syscall_pre_impl_fcntl(long long fd, long long cmd, long long arg); void __sanitizer_syscall_post_impl_fcntl(long long res, long long fd, @@ -3380,9 +3487,26 @@ void __sanitizer_syscall_post_impl_compat_09_ouname(long long res, void __sanitizer_syscall_pre_impl_sysarch(long long op, long long parms); void __sanitizer_syscall_post_impl_sysarch(long long res, long long op, long long parms); -/* syscall 166 has been skipped */ -/* syscall 167 has been skipped */ -/* syscall 168 has been skipped */ +void __sanitizer_syscall_pre_impl___futex(long long uaddr, long long op, + long long val, long long timeout, + long long uaddr2, long long val2, + long long val3); +void __sanitizer_syscall_post_impl___futex(long long res, long long uaddr, + long long op, long long val, + long long timeout, long long uaddr2, + long long val2, long long val3); +void __sanitizer_syscall_pre_impl___futex_set_robust_list(long long head, + long long len); +void __sanitizer_syscall_post_impl___futex_set_robust_list(long long res, + long long head, + long long len); +void __sanitizer_syscall_pre_impl___futex_get_robust_list(long long lwpid, + long long headp, + long long lenp); +void __sanitizer_syscall_post_impl___futex_get_robust_list(long long res, + long long lwpid, + long long headp, + long long lenp); #if !defined(_LP64) void __sanitizer_syscall_pre_impl_compat_10_osemsys(long long which, long long a2, long long a3, @@ -4802,6 +4926,75 @@ void __sanitizer_syscall_post_impl___fhstatvfs190(long long res, long long fhp, long long fh_size, long long buf, long long flags); +void __sanitizer_syscall_pre_impl___acl_get_link(long long path, long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_get_link(long long res, long long path, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl___acl_set_link(long long path, long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_set_link(long long res, long long path, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl___acl_delete_link(long long path, + long long type); +void __sanitizer_syscall_post_impl___acl_delete_link(long long res, + long long path, + long long type); +void __sanitizer_syscall_pre_impl___acl_aclcheck_link(long long path, + long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_aclcheck_link(long long res, + long long path, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl___acl_get_file(long long path, long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_get_file(long long res, long long path, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl___acl_set_file(long long path, long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_set_file(long long res, long long path, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl___acl_get_fd(long long filedes, + long long type, long long aclp); +void __sanitizer_syscall_post_impl___acl_get_fd(long long res, + long long filedes, + long long type, long long aclp); +void __sanitizer_syscall_pre_impl___acl_set_fd(long long filedes, + long long type, long long aclp); +void __sanitizer_syscall_post_impl___acl_set_fd(long long res, + long long filedes, + long long type, long long aclp); +void __sanitizer_syscall_pre_impl___acl_delete_file(long long path, + long long type); +void __sanitizer_syscall_post_impl___acl_delete_file(long long res, + long long path, + long long type); +void __sanitizer_syscall_pre_impl___acl_delete_fd(long long filedes, + long long type); +void __sanitizer_syscall_post_impl___acl_delete_fd(long long res, + long long filedes, + long long type); +void __sanitizer_syscall_pre_impl___acl_aclcheck_file(long long path, + long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_aclcheck_file(long long res, + long long path, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl___acl_aclcheck_fd(long long filedes, + long long type, + long long aclp); +void __sanitizer_syscall_post_impl___acl_aclcheck_fd(long long res, + long long filedes, + long long type, + long long aclp); +void __sanitizer_syscall_pre_impl_lpathconf(long long path, long long name); +void __sanitizer_syscall_post_impl_lpathconf(long long res, long long path, + long long name); #ifdef __cplusplus } // extern "C" diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_syscalls_netbsd.inc b/compiler-rt/lib/sanitizer_common/sanitizer_syscalls_netbsd.inc index 02b7e11b1677f..c4a9d99fe2f01 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_syscalls_netbsd.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_syscalls_netbsd.inc @@ -42,8 +42,8 @@ // DO NOT EDIT! THIS FILE HAS BEEN GENERATED! // // Generated with: generate_netbsd_syscalls.awk -// Generated date: 2019-12-24 -// Generated from: syscalls.master,v 1.296 2019/09/22 22:59:39 christos Exp +// Generated date: 2020-09-10 +// Generated from: syscalls.master,v 1.306 2020/08/14 00:53:16 riastradh Exp // //===----------------------------------------------------------------------===// @@ -872,7 +872,13 @@ PRE_SYSCALL(dup2)(long long from_, long long to_) { /* Nothing to do */ } POST_SYSCALL(dup2)(long long res, long long from_, long long to_) { /* Nothing to do */ } -/* syscall 91 has been skipped */ +PRE_SYSCALL(getrandom)(void *buf_, long long buflen_, long long flags_) { + /* TODO */ +} +POST_SYSCALL(getrandom) +(long long res, void *buf_, long long buflen_, long long flags_) { + /* TODO */ +} PRE_SYSCALL(fcntl)(long long fd_, long long cmd_, void *arg_) { /* Nothing to do */ } @@ -1332,9 +1338,29 @@ PRE_SYSCALL(compat_09_ouname)(void *name_) { /* TODO */ } POST_SYSCALL(compat_09_ouname)(long long res, void *name_) { /* TODO */ } PRE_SYSCALL(sysarch)(long long op_, void *parms_) { /* TODO */ } POST_SYSCALL(sysarch)(long long res, long long op_, void *parms_) { /* TODO */ } -/* syscall 166 has been skipped */ -/* syscall 167 has been skipped */ -/* syscall 168 has been skipped */ +PRE_SYSCALL(__futex) +(void *uaddr_, long long op_, long long val_, void *timeout_, void *uaddr2_, + long long val2_, long long val3_) { + /* TODO */ +} +POST_SYSCALL(__futex) +(long long res, void *uaddr_, long long op_, long long val_, void *timeout_, + void *uaddr2_, long long val2_, long long val3_) { + /* TODO */ +} +PRE_SYSCALL(__futex_set_robust_list)(void *head_, long long len_) { /* TODO */ } +POST_SYSCALL(__futex_set_robust_list) +(long long res, void *head_, long long len_) { + /* TODO */ +} +PRE_SYSCALL(__futex_get_robust_list) +(long long lwpid_, void **headp_, void *lenp_) { + /* TODO */ +} +POST_SYSCALL(__futex_get_robust_list) +(long long res, long long lwpid_, void **headp_, void *lenp_) { + /* TODO */ +} #if !defined(_LP64) PRE_SYSCALL(compat_10_osemsys) (long long which_, long long a2_, long long a3_, long long a4_, long long a5_) { @@ -3824,6 +3850,87 @@ PRE_SYSCALL(__fhstatvfs190) } POST_SYSCALL(__fhstatvfs190) (long long res, void *fhp_, long long fh_size_, void *buf_, long long flags_) {} +PRE_SYSCALL(__acl_get_link)(void *path_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_get_link) +(long long res, void *path_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_set_link)(void *path_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_set_link) +(long long res, void *path_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_delete_link)(void *path_, long long type_) { /* TODO */ } +POST_SYSCALL(__acl_delete_link)(long long res, void *path_, long long type_) { + /* TODO */ +} +PRE_SYSCALL(__acl_aclcheck_link)(void *path_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_aclcheck_link) +(long long res, void *path_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_get_file)(void *path_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_get_file) +(long long res, void *path_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_set_file)(void *path_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_set_file) +(long long res, void *path_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_get_fd)(long long filedes_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_get_fd) +(long long res, long long filedes_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_set_fd)(long long filedes_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_set_fd) +(long long res, long long filedes_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_delete_file)(void *path_, long long type_) { /* TODO */ } +POST_SYSCALL(__acl_delete_file)(long long res, void *path_, long long type_) { + /* TODO */ +} +PRE_SYSCALL(__acl_delete_fd)(long long filedes_, long long type_) { /* TODO */ } +POST_SYSCALL(__acl_delete_fd) +(long long res, long long filedes_, long long type_) { + /* TODO */ +} +PRE_SYSCALL(__acl_aclcheck_file)(void *path_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_aclcheck_file) +(long long res, void *path_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(__acl_aclcheck_fd) +(long long filedes_, long long type_, void *aclp_) { + /* TODO */ +} +POST_SYSCALL(__acl_aclcheck_fd) +(long long res, long long filedes_, long long type_, void *aclp_) { + /* TODO */ +} +PRE_SYSCALL(lpathconf)(void *path_, long long name_) { /* TODO */ } +POST_SYSCALL(lpathconf)(long long res, void *path_, long long name_) { + /* TODO */ +} #undef SYS_MAXSYSARGS } // extern "C" From c195ae2f003261f2c25f569b07ae556dee57f17d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 9 Sep 2020 13:45:36 -0700 Subject: [PATCH 0292/1079] [SLPVectorizer][X86][AMDGPU] Remove fcmp+select to fmin/fmax reduction support. Previously we could match fcmp+select to a reduction if the fcmp had the nonans fast math flag. But if the select had the nonans fast math flag, InstCombine would turn it into a fminnum/fmaxnum intrinsic before SLP gets to it. Seems fairly likely that if one of the fcmp+select pair have the fast math flag, they both would. My plan is to start vectorizing the fmaxnum/fminnum version soon, but I wanted to get this code out as it had some of the strangest fast math flag behaviors. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 97 ++--- .../SLPVectorizer/AMDGPU/horizontal-store.ll | 52 +-- .../SLPVectorizer/AMDGPU/reduction.ll | 80 ++-- .../SLPVectorizer/X86/horizontal-list.ll | 52 ++- .../SLPVectorizer/X86/horizontal-minmax.ll | 360 +++++++++++++++++- 5 files changed, 481 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ec138bf2b7c88..5ff2cd18c73c8 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6256,9 +6256,9 @@ class HorizontalReduction { enum ReductionKind { RK_None, /// Not a reduction. RK_Arithmetic, /// Binary reduction data. - RK_Min, /// Minimum reduction data. + RK_SMin, /// Signed minimum reduction data. RK_UMin, /// Unsigned minimum reduction data. - RK_Max, /// Maximum reduction data. + RK_SMax, /// Signed maximum reduction data. RK_UMax, /// Unsigned maximum reduction data. }; @@ -6276,9 +6276,6 @@ class HorizontalReduction { /// Kind of the reduction operation. ReductionKind Kind = RK_None; - /// True if float point min/max reduction has no NaNs. - bool NoNaN = false; - /// Checks if the reduction operation can be vectorized. bool isVectorizable() const { return LHS && RHS && @@ -6288,10 +6285,9 @@ class HorizontalReduction { Opcode == Instruction::Mul || Opcode == Instruction::FMul || Opcode == Instruction::And || Opcode == Instruction::Or || Opcode == Instruction::Xor)) || - ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && - (Kind == RK_Min || Kind == RK_Max)) || (Opcode == Instruction::ICmp && - (Kind == RK_UMin || Kind == RK_UMax))); + (Kind == RK_SMin || Kind == RK_SMax || + Kind == RK_UMin || Kind == RK_UMax))); } /// Creates reduction operation with the current opcode. @@ -6303,13 +6299,13 @@ class HorizontalReduction { case RK_Arithmetic: return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, Name); - case RK_Min: - Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) - : Builder.CreateFCmpOLT(LHS, RHS); + case RK_SMin: + assert(Opcode == Instruction::ICmp && "Expected integer types."); + Cmp = Builder.CreateICmpSLT(LHS, RHS); return Builder.CreateSelect(Cmp, LHS, RHS, Name); - case RK_Max: - Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) - : Builder.CreateFCmpOGT(LHS, RHS); + case RK_SMax: + assert(Opcode == Instruction::ICmp && "Expected integer types."); + Cmp = Builder.CreateICmpSGT(LHS, RHS); return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); @@ -6337,9 +6333,8 @@ class HorizontalReduction { /// Constructor for reduction operations with opcode and its left and /// right operands. - OperationData(unsigned Opcode, Value *LHS, Value *RHS, ReductionKind Kind, - bool NoNaN = false) - : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind), NoNaN(NoNaN) { + OperationData(unsigned Opcode, Value *LHS, Value *RHS, ReductionKind Kind) + : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { assert(Kind != RK_None && "One of the reduction operations is expected."); } @@ -6350,8 +6345,8 @@ class HorizontalReduction { switch (Kind) { case RK_Arithmetic: return false; - case RK_Min: - case RK_Max: + case RK_SMin: + case RK_SMax: case RK_UMin: case RK_UMax: return true; @@ -6433,10 +6428,8 @@ class HorizontalReduction { switch (Kind) { case RK_Arithmetic: return I->isAssociative(); - case RK_Min: - case RK_Max: - return Opcode == Instruction::ICmp || - cast(I->getOperand(0))->isFast(); + case RK_SMin: + case RK_SMax: case RK_UMin: case RK_UMax: assert(Opcode == Instruction::ICmp && @@ -6466,7 +6459,6 @@ class HorizontalReduction { LHS = nullptr; RHS = nullptr; Kind = RK_None; - NoNaN = false; } /// Get the opcode of the reduction operation. @@ -6494,8 +6486,8 @@ class HorizontalReduction { case RK_Arithmetic: propagateIRFlags(Op, ReductionOps[0]); return Op; - case RK_Min: - case RK_Max: + case RK_SMin: + case RK_SMax: case RK_UMin: case RK_UMax: if (auto *SI = dyn_cast(Op)) @@ -6518,8 +6510,8 @@ class HorizontalReduction { case RK_Arithmetic: propagateIRFlags(Op, I); return Op; - case RK_Min: - case RK_Max: + case RK_SMin: + case RK_SMax: case RK_UMin: case RK_UMax: if (auto *SI = dyn_cast(Op)) { @@ -6536,16 +6528,15 @@ class HorizontalReduction { TargetTransformInfo::ReductionFlags getFlags() const { TargetTransformInfo::ReductionFlags Flags; - Flags.NoNaN = NoNaN; switch (Kind) { case RK_Arithmetic: break; - case RK_Min: - Flags.IsSigned = Opcode == Instruction::ICmp; + case RK_SMin: + Flags.IsSigned = true; Flags.IsMaxOp = false; break; - case RK_Max: - Flags.IsSigned = Opcode == Instruction::ICmp; + case RK_SMax: + Flags.IsSigned = true; Flags.IsMaxOp = true; break; case RK_UMin: @@ -6610,21 +6601,11 @@ class HorizontalReduction { if (m_UMin(m_Value(LHS), m_Value(RHS)).match(Select)) { return OperationData(Instruction::ICmp, LHS, RHS, RK_UMin); } else if (m_SMin(m_Value(LHS), m_Value(RHS)).match(Select)) { - return OperationData(Instruction::ICmp, LHS, RHS, RK_Min); - } else if (m_OrdFMin(m_Value(LHS), m_Value(RHS)).match(Select) || - m_UnordFMin(m_Value(LHS), m_Value(RHS)).match(Select)) { - return OperationData( - Instruction::FCmp, LHS, RHS, RK_Min, - cast(Select->getCondition())->hasNoNaNs()); + return OperationData(Instruction::ICmp, LHS, RHS, RK_SMin); } else if (m_UMax(m_Value(LHS), m_Value(RHS)).match(Select)) { return OperationData(Instruction::ICmp, LHS, RHS, RK_UMax); } else if (m_SMax(m_Value(LHS), m_Value(RHS)).match(Select)) { - return OperationData(Instruction::ICmp, LHS, RHS, RK_Max); - } else if (m_OrdFMax(m_Value(LHS), m_Value(RHS)).match(Select) || - m_UnordFMax(m_Value(LHS), m_Value(RHS)).match(Select)) { - return OperationData( - Instruction::FCmp, LHS, RHS, RK_Max, - cast(Select->getCondition())->hasNoNaNs()); + return OperationData(Instruction::ICmp, LHS, RHS, RK_SMax); } else { // Try harder: look for min/max pattern based on instructions producing // same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2). @@ -6672,14 +6653,7 @@ class HorizontalReduction { case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: - return OperationData(Instruction::ICmp, LHS, RHS, RK_Min); - - case CmpInst::FCMP_OLT: - case CmpInst::FCMP_OLE: - case CmpInst::FCMP_ULT: - case CmpInst::FCMP_ULE: - return OperationData(Instruction::FCmp, LHS, RHS, RK_Min, - cast(Cond)->hasNoNaNs()); + return OperationData(Instruction::ICmp, LHS, RHS, RK_SMin); case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: @@ -6687,14 +6661,7 @@ class HorizontalReduction { case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: - return OperationData(Instruction::ICmp, LHS, RHS, RK_Max); - - case CmpInst::FCMP_OGT: - case CmpInst::FCMP_OGE: - case CmpInst::FCMP_UGT: - case CmpInst::FCMP_UGE: - return OperationData(Instruction::FCmp, LHS, RHS, RK_Max, - cast(Cond)->hasNoNaNs()); + return OperationData(Instruction::ICmp, LHS, RHS, RK_SMax); } } } @@ -7017,8 +6984,8 @@ class HorizontalReduction { TTI->getArithmeticReductionCost(ReductionData.getOpcode(), VecTy, /*IsPairwiseForm=*/false); break; - case RK_Min: - case RK_Max: + case RK_SMin: + case RK_SMax: case RK_UMin: case RK_UMax: { auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy)); @@ -7045,8 +7012,8 @@ class HorizontalReduction { ScalarReduxCost = TTI->getArithmeticInstrCost(ReductionData.getOpcode(), ScalarTy); break; - case RK_Min: - case RK_Max: + case RK_SMin: + case RK_SMax: case RK_UMin: case RK_UMax: ScalarReduxCost = diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll index 4007a0d30edc5..397e98eb881df 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll @@ -107,6 +107,8 @@ define i64 @sminv6() { ret i64 %select5 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define float @fmaxv6() { ; GFX9-LABEL: @fmaxv6( ; GFX9-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @farr to <2 x float>*), align 16 @@ -114,19 +116,21 @@ define float @fmaxv6() { ; GFX9-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 ; GFX9-NEXT: [[CMP1:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] ; GFX9-NEXT: [[SELECT1:%.*]] = select i1 [[CMP1]], float [[TMP2]], float [[TMP3]] -; GFX9-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 2) to <4 x float>*), align 8 -; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP4]], [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP4]], <4 x float> [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] -; GFX9-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 -; GFX9-NEXT: [[TMP6:%.*]] = fcmp fast ogt float [[TMP5]], [[SELECT1]] -; GFX9-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP6]], float [[TMP5]], float [[SELECT1]] +; GFX9-NEXT: [[LOAD3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 2), align 8 +; GFX9-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[SELECT1]], [[LOAD3]] +; GFX9-NEXT: [[SELECT2:%.*]] = select i1 [[CMP2]], float [[SELECT1]], float [[LOAD3]] +; GFX9-NEXT: [[LOAD4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 3), align 4 +; GFX9-NEXT: [[CMP3:%.*]] = fcmp fast ogt float [[SELECT2]], [[LOAD4]] +; GFX9-NEXT: [[SELECT3:%.*]] = select i1 [[CMP3]], float [[SELECT2]], float [[LOAD4]] +; GFX9-NEXT: [[LOAD5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 4), align 16 +; GFX9-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[SELECT3]], [[LOAD5]] +; GFX9-NEXT: [[SELECT4:%.*]] = select i1 [[CMP4]], float [[SELECT3]], float [[LOAD5]] +; GFX9-NEXT: [[LOAD6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 5), align 4 +; GFX9-NEXT: [[CMP5:%.*]] = fcmp fast ogt float [[SELECT4]], [[LOAD6]] +; GFX9-NEXT: [[SELECT5:%.*]] = select i1 [[CMP5]], float [[SELECT4]], float [[LOAD6]] ; GFX9-NEXT: [[STORE_SELECT:%.*]] = select i1 [[CMP1]], float 3.000000e+00, float 4.000000e+00 ; GFX9-NEXT: store float [[STORE_SELECT]], float* @fvar, align 8 -; GFX9-NEXT: ret float [[OP_EXTRA]] +; GFX9-NEXT: ret float [[SELECT5]] ; %load1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 0), align 16 %load2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 1), align 4 @@ -154,6 +158,8 @@ define float @fmaxv6() { ret float %select5 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define double @dminv6() { ; GFX9-LABEL: @dminv6( ; GFX9-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([32 x double]* @darr to <2 x double>*), align 16 @@ -161,19 +167,21 @@ define double @dminv6() { ; GFX9-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; GFX9-NEXT: [[CMP1:%.*]] = fcmp fast olt double [[TMP2]], [[TMP3]] ; GFX9-NEXT: [[SELECT1:%.*]] = select i1 [[CMP1]], double [[TMP2]], double [[TMP3]] -; GFX9-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 2) to <4 x double>*), align 8 -; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x double> [[TMP4]], [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x double> [[TMP4]], <4 x double> [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[RDX_MINMAX_SELECT]], <4 x double> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast olt <4 x double> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x double> [[RDX_MINMAX_SELECT]], <4 x double> [[RDX_SHUF1]] -; GFX9-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[RDX_MINMAX_SELECT3]], i32 0 -; GFX9-NEXT: [[TMP6:%.*]] = fcmp fast olt double [[TMP5]], [[SELECT1]] -; GFX9-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP6]], double [[TMP5]], double [[SELECT1]] +; GFX9-NEXT: [[LOAD3:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 2), align 8 +; GFX9-NEXT: [[CMP2:%.*]] = fcmp fast olt double [[SELECT1]], [[LOAD3]] +; GFX9-NEXT: [[SELECT2:%.*]] = select i1 [[CMP2]], double [[SELECT1]], double [[LOAD3]] +; GFX9-NEXT: [[LOAD4:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 3), align 4 +; GFX9-NEXT: [[CMP3:%.*]] = fcmp fast olt double [[SELECT2]], [[LOAD4]] +; GFX9-NEXT: [[SELECT3:%.*]] = select i1 [[CMP3]], double [[SELECT2]], double [[LOAD4]] +; GFX9-NEXT: [[LOAD5:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 4), align 16 +; GFX9-NEXT: [[CMP4:%.*]] = fcmp fast olt double [[SELECT3]], [[LOAD5]] +; GFX9-NEXT: [[SELECT4:%.*]] = select i1 [[CMP4]], double [[SELECT3]], double [[LOAD5]] +; GFX9-NEXT: [[LOAD6:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 5), align 4 +; GFX9-NEXT: [[CMP5:%.*]] = fcmp fast olt double [[SELECT4]], [[LOAD6]] +; GFX9-NEXT: [[SELECT5:%.*]] = select i1 [[CMP5]], double [[SELECT4]], double [[LOAD6]] ; GFX9-NEXT: [[STORE_SELECT:%.*]] = select i1 [[CMP1]], double 3.000000e+00, double 4.000000e+00 ; GFX9-NEXT: store double [[STORE_SELECT]], double* @dvar, align 8 -; GFX9-NEXT: ret double [[OP_EXTRA]] +; GFX9-NEXT: ret double [[SELECT5]] ; %load1 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 0), align 16 %load2 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 1), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll index d7434394dcc39..f97b1243f9548 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll @@ -611,31 +611,22 @@ entry: ret i16 %max3 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define half @reduction_fmax_v4half(<4 x half> %vec4) { -; GFX9-LABEL: @reduction_fmax_v4half( -; GFX9-NEXT: entry: -; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x half> [[VEC4:%.*]], <4 x half> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x half> [[VEC4]], [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x half> [[VEC4]], <4 x half> [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x half> [[RDX_MINMAX_SELECT]], <4 x half> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x half> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x half> [[RDX_MINMAX_SELECT]], <4 x half> [[RDX_SHUF1]] -; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[RDX_MINMAX_SELECT3]], i32 0 -; GFX9-NEXT: ret half [[TMP0]] -; -; VI-LABEL: @reduction_fmax_v4half( -; VI-NEXT: entry: -; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 -; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 -; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 -; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 -; VI-NEXT: [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]] -; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] -; VI-NEXT: [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]] -; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]] -; VI-NEXT: [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]] -; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]] -; VI-NEXT: ret half [[MAX3]] +; GCN-LABEL: @reduction_fmax_v4half( +; GCN-NEXT: entry: +; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 +; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 +; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 +; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 +; GCN-NEXT: [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]] +; GCN-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] +; GCN-NEXT: [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]] +; GCN-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]] +; GCN-NEXT: [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]] +; GCN-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]] +; GCN-NEXT: ret half [[MAX3]] ; entry: %elt0 = extractelement <4 x half> %vec4, i64 0 @@ -653,31 +644,22 @@ entry: ret half %max3 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define half @reduction_fmin_v4half(<4 x half> %vec4) { -; GFX9-LABEL: @reduction_fmin_v4half( -; GFX9-NEXT: entry: -; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x half> [[VEC4:%.*]], <4 x half> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x half> [[VEC4]], [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x half> [[VEC4]], <4 x half> [[RDX_SHUF]] -; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x half> [[RDX_MINMAX_SELECT]], <4 x half> undef, <4 x i32> -; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast olt <4 x half> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x half> [[RDX_MINMAX_SELECT]], <4 x half> [[RDX_SHUF1]] -; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[RDX_MINMAX_SELECT3]], i32 0 -; GFX9-NEXT: ret half [[TMP0]] -; -; VI-LABEL: @reduction_fmin_v4half( -; VI-NEXT: entry: -; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 -; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 -; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 -; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 -; VI-NEXT: [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]] -; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] -; VI-NEXT: [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]] -; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]] -; VI-NEXT: [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]] -; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]] -; VI-NEXT: ret half [[MIN3]] +; GCN-LABEL: @reduction_fmin_v4half( +; GCN-NEXT: entry: +; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 +; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 +; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 +; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 +; GCN-NEXT: [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]] +; GCN-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] +; GCN-NEXT: [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]] +; GCN-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]] +; GCN-NEXT: [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]] +; GCN-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]] +; GCN-NEXT: ret half [[MIN3]] ; entry: %elt0 = extractelement <4 x half> %vec4, i64 0 @@ -719,4 +701,4 @@ entry: %add3 = fadd fast float %elt3, %add2 ret float %add3 -} \ No newline at end of file +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 7b3acfb6c0c01..dd5d649c41bb4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -266,24 +266,52 @@ entry: ret i32 %conv4 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define float @bar() { ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]]) -; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 +; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]] +; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 +; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]] +; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]] +; CHECK-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4 +; CHECK-NEXT: ret float [[MAX_0_MUL3_2]] ; ; THRESHOLD-LABEL: @bar( ; THRESHOLD-NEXT: entry: -; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 -; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 -; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; THRESHOLD-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]]) -; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP3]] +; THRESHOLD-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 +; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 +; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] +; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]] +; THRESHOLD-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 +; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 +; THRESHOLD-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]] +; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]] +; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]] +; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 +; THRESHOLD-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 +; THRESHOLD-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]] +; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]] +; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]] +; THRESHOLD-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[MAX_0_MUL3_2]] ; entry: %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index f06802eff9c7d..9663ede723cc6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -198,11 +198,59 @@ define i32 @maxi32(i32) { ret i32 %95 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define float @maxf8(float) { -; CHECK-LABEL: @maxf8( -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> [[TMP2]]) -; CHECK-NEXT: ret float [[TMP3]] +; DEFAULT-LABEL: @maxf8( +; DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 +; DEFAULT-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 +; DEFAULT-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] +; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] +; DEFAULT-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; DEFAULT-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] +; DEFAULT-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] +; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] +; DEFAULT-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; DEFAULT-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] +; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] +; DEFAULT-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; DEFAULT-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] +; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] +; DEFAULT-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; DEFAULT-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] +; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] +; DEFAULT-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] +; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] +; DEFAULT-NEXT: ret float [[TMP23]] +; +; THRESH-LABEL: @maxf8( +; THRESH-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16 +; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +; THRESH-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; THRESH-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]] +; THRESH-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; THRESH-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]] +; THRESH-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; THRESH-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]] +; THRESH-NEXT: [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; THRESH-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]] +; THRESH-NEXT: [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; THRESH-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]] +; THRESH-NEXT: [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; THRESH-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]] +; THRESH-NEXT: ret float [[TMP24]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -229,11 +277,107 @@ define float @maxf8(float) { ret float %23 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define float @maxf16(float) { -; CHECK-LABEL: @maxf16( -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> [[TMP2]]) -; CHECK-NEXT: ret float [[TMP3]] +; DEFAULT-LABEL: @maxf16( +; DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 +; DEFAULT-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 +; DEFAULT-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] +; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] +; DEFAULT-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; DEFAULT-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] +; DEFAULT-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] +; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] +; DEFAULT-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; DEFAULT-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] +; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] +; DEFAULT-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; DEFAULT-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] +; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] +; DEFAULT-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; DEFAULT-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] +; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] +; DEFAULT-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] +; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] +; DEFAULT-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 +; DEFAULT-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] +; DEFAULT-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] +; DEFAULT-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 +; DEFAULT-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] +; DEFAULT-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] +; DEFAULT-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 +; DEFAULT-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] +; DEFAULT-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] +; DEFAULT-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 +; DEFAULT-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] +; DEFAULT-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] +; DEFAULT-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 +; DEFAULT-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] +; DEFAULT-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] +; DEFAULT-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 +; DEFAULT-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] +; DEFAULT-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] +; DEFAULT-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 +; DEFAULT-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] +; DEFAULT-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] +; DEFAULT-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 +; DEFAULT-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] +; DEFAULT-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] +; DEFAULT-NEXT: ret float [[TMP47]] +; +; THRESH-LABEL: @maxf16( +; THRESH-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16 +; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +; THRESH-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; THRESH-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]] +; THRESH-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; THRESH-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]] +; THRESH-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; THRESH-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]] +; THRESH-NEXT: [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; THRESH-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]] +; THRESH-NEXT: [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; THRESH-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]] +; THRESH-NEXT: [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; THRESH-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]] +; THRESH-NEXT: [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 +; THRESH-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; THRESH-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]] +; THRESH-NEXT: [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 +; THRESH-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; THRESH-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]] +; THRESH-NEXT: [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 +; THRESH-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; THRESH-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]] +; THRESH-NEXT: [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 +; THRESH-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; THRESH-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]] +; THRESH-NEXT: [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 +; THRESH-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; THRESH-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]] +; THRESH-NEXT: [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 +; THRESH-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; THRESH-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]] +; THRESH-NEXT: [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 +; THRESH-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; THRESH-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]] +; THRESH-NEXT: [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 +; THRESH-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; THRESH-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]] +; THRESH-NEXT: ret float [[TMP48]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -284,11 +428,203 @@ define float @maxf16(float) { ret float %47 } +; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select +; with fastmath on the select. define float @maxf32(float) { -; CHECK-LABEL: @maxf32( -; CHECK-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> [[TMP2]]) -; CHECK-NEXT: ret float [[TMP3]] +; DEFAULT-LABEL: @maxf32( +; DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 +; DEFAULT-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 +; DEFAULT-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] +; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] +; DEFAULT-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; DEFAULT-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] +; DEFAULT-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] +; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] +; DEFAULT-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; DEFAULT-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] +; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] +; DEFAULT-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; DEFAULT-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] +; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] +; DEFAULT-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; DEFAULT-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] +; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] +; DEFAULT-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] +; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] +; DEFAULT-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 +; DEFAULT-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] +; DEFAULT-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] +; DEFAULT-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 +; DEFAULT-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] +; DEFAULT-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] +; DEFAULT-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 +; DEFAULT-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] +; DEFAULT-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] +; DEFAULT-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 +; DEFAULT-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] +; DEFAULT-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] +; DEFAULT-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 +; DEFAULT-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] +; DEFAULT-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] +; DEFAULT-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 +; DEFAULT-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] +; DEFAULT-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] +; DEFAULT-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 +; DEFAULT-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] +; DEFAULT-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] +; DEFAULT-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 +; DEFAULT-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] +; DEFAULT-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] +; DEFAULT-NEXT: [[TMP48:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16 +; DEFAULT-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]] +; DEFAULT-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]] +; DEFAULT-NEXT: [[TMP51:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4 +; DEFAULT-NEXT: [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]] +; DEFAULT-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]] +; DEFAULT-NEXT: [[TMP54:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8 +; DEFAULT-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]] +; DEFAULT-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]] +; DEFAULT-NEXT: [[TMP57:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4 +; DEFAULT-NEXT: [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]] +; DEFAULT-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]] +; DEFAULT-NEXT: [[TMP60:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16 +; DEFAULT-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]] +; DEFAULT-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]] +; DEFAULT-NEXT: [[TMP63:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4 +; DEFAULT-NEXT: [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]] +; DEFAULT-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]] +; DEFAULT-NEXT: [[TMP66:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8 +; DEFAULT-NEXT: [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]] +; DEFAULT-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]] +; DEFAULT-NEXT: [[TMP69:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4 +; DEFAULT-NEXT: [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]] +; DEFAULT-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]] +; DEFAULT-NEXT: [[TMP72:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16 +; DEFAULT-NEXT: [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]] +; DEFAULT-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]] +; DEFAULT-NEXT: [[TMP75:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4 +; DEFAULT-NEXT: [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]] +; DEFAULT-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]] +; DEFAULT-NEXT: [[TMP78:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8 +; DEFAULT-NEXT: [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]] +; DEFAULT-NEXT: [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]] +; DEFAULT-NEXT: [[TMP81:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4 +; DEFAULT-NEXT: [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]] +; DEFAULT-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]] +; DEFAULT-NEXT: [[TMP84:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16 +; DEFAULT-NEXT: [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]] +; DEFAULT-NEXT: [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]] +; DEFAULT-NEXT: [[TMP87:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4 +; DEFAULT-NEXT: [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]] +; DEFAULT-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]] +; DEFAULT-NEXT: [[TMP90:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8 +; DEFAULT-NEXT: [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]] +; DEFAULT-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]] +; DEFAULT-NEXT: [[TMP93:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4 +; DEFAULT-NEXT: [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]] +; DEFAULT-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]] +; DEFAULT-NEXT: ret float [[TMP95]] +; +; THRESH-LABEL: @maxf32( +; THRESH-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16 +; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +; THRESH-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; THRESH-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]] +; THRESH-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; THRESH-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]] +; THRESH-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; THRESH-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]] +; THRESH-NEXT: [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; THRESH-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]] +; THRESH-NEXT: [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; THRESH-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]] +; THRESH-NEXT: [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; THRESH-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]] +; THRESH-NEXT: [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 +; THRESH-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; THRESH-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]] +; THRESH-NEXT: [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 +; THRESH-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; THRESH-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]] +; THRESH-NEXT: [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 +; THRESH-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; THRESH-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]] +; THRESH-NEXT: [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 +; THRESH-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; THRESH-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]] +; THRESH-NEXT: [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 +; THRESH-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; THRESH-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]] +; THRESH-NEXT: [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 +; THRESH-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; THRESH-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]] +; THRESH-NEXT: [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 +; THRESH-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; THRESH-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]] +; THRESH-NEXT: [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 +; THRESH-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; THRESH-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]] +; THRESH-NEXT: [[TMP49:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16 +; THRESH-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] +; THRESH-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float [[TMP49]] +; THRESH-NEXT: [[TMP52:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4 +; THRESH-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] +; THRESH-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float [[TMP52]] +; THRESH-NEXT: [[TMP55:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8 +; THRESH-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] +; THRESH-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float [[TMP55]] +; THRESH-NEXT: [[TMP58:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4 +; THRESH-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] +; THRESH-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float [[TMP58]] +; THRESH-NEXT: [[TMP61:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16 +; THRESH-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] +; THRESH-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float [[TMP61]] +; THRESH-NEXT: [[TMP64:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4 +; THRESH-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] +; THRESH-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float [[TMP64]] +; THRESH-NEXT: [[TMP67:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8 +; THRESH-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] +; THRESH-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float [[TMP67]] +; THRESH-NEXT: [[TMP70:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4 +; THRESH-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] +; THRESH-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float [[TMP70]] +; THRESH-NEXT: [[TMP73:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16 +; THRESH-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] +; THRESH-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float [[TMP73]] +; THRESH-NEXT: [[TMP76:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4 +; THRESH-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] +; THRESH-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float [[TMP76]] +; THRESH-NEXT: [[TMP79:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8 +; THRESH-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] +; THRESH-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float [[TMP79]] +; THRESH-NEXT: [[TMP82:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4 +; THRESH-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] +; THRESH-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float [[TMP82]] +; THRESH-NEXT: [[TMP85:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16 +; THRESH-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] +; THRESH-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float [[TMP85]] +; THRESH-NEXT: [[TMP88:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4 +; THRESH-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] +; THRESH-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float [[TMP88]] +; THRESH-NEXT: [[TMP91:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8 +; THRESH-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] +; THRESH-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float [[TMP91]] +; THRESH-NEXT: [[TMP94:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4 +; THRESH-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; THRESH-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], float [[TMP93]], float [[TMP94]] +; THRESH-NEXT: ret float [[TMP96]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 From 54fcea86b1658f5fc70f4f1e7a763f87742d79bc Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 10 Sep 2020 11:36:13 -0700 Subject: [PATCH 0293/1079] Revert "[Support] Use unique_function rather than std::function for ThreadPool TaskTy." This reverts commit d9c8b0256cfc673c2413b13993c9440be598818f. Some MSVC std::packaged_task implementations are not compatible with move-only types. This caused failures on some of the Windows builders (e.g. http://lab.llvm.org:8011/builders/sanitizer-windows/builds/69412). Reverting until I can come up with a workaround. --- llvm/include/llvm/Support/ThreadPool.h | 3 +-- llvm/unittests/Support/ThreadPool.cpp | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index 3d24fb0997393..528fb32525eb2 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -13,7 +13,6 @@ #ifndef LLVM_SUPPORT_THREAD_POOL_H #define LLVM_SUPPORT_THREAD_POOL_H -#include "llvm/ADT/FunctionExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/thread.h" @@ -37,7 +36,7 @@ namespace llvm { /// for some work to become available. class ThreadPool { public: - using TaskTy = unique_function; + using TaskTy = std::function; using PackagedTaskTy = std::packaged_task; /// Construct a pool using the hardware strategy \p S for mapping hardware diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp index b3747376689a8..43882d0f3ceea 100644 --- a/llvm/unittests/Support/ThreadPool.cpp +++ b/llvm/unittests/Support/ThreadPool.cpp @@ -133,13 +133,6 @@ TEST_F(ThreadPoolTest, Async) { ASSERT_EQ(2, i.load()); } -TEST_F(ThreadPoolTest, NonCopyableTask) { - CHECK_UNSUPPORTED(); - ThreadPool Pool; - Pool.async([P = std::make_unique()] {}); - Pool.wait(); -}; - TEST_F(ThreadPoolTest, GetFuture) { CHECK_UNSUPPORTED(); ThreadPool Pool(hardware_concurrency(2)); From 4252f3009b169db250559d6a197b399375f89b27 Mon Sep 17 00:00:00 2001 From: Dominic Chen Date: Thu, 10 Sep 2020 01:02:13 -0400 Subject: [PATCH 0294/1079] [WebAssembly] Set unreachable as canonical to permit disassembly Currently, using llvm-objdump to disassemble a function containing unreachable will trigger an assertion while decoding the opcode, since both unreachable and debug_unreachable have the same encoding. To avoid this, set unreachable as the canonical decoding. Differential Revision: https://reviews.llvm.org/D87431 --- llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 171dd9a67beb5..63aeb1b467379 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -103,7 +103,7 @@ defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; } // isReturn = 1 -let isTrap = 1 in +let IsCanonical = 1, isTrap = 1 in defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; } // isTerminator = 1 From a39423084cbbeb59e81002e741190dccf08b5c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Federico=20Lebr=C3=B3n?= Date: Thu, 10 Sep 2020 19:00:49 +0000 Subject: [PATCH 0295/1079] Make struct dialects have the same field name as everything else, 'dialect'. Also make the behavior of getting a dialect more forgiving, in the case where there isn't a dialect associated with an attribute. Depends On D86807 Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D86809 --- mlir/include/mlir/IR/OpBase.td | 4 ++-- mlir/lib/TableGen/Attribute.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index b0f08e93666a3..29f139f25069b 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1443,7 +1443,7 @@ class StructFieldAttr { // Structured attribute that wraps a DictionaryAttr and provides both a // validation method and set of accessors for a fixed set of fields. This is // useful when representing data that would normally be in a structure. -class StructAttr attributes> : DictionaryAttrBase()">, "DictionaryAttr with field(s): " # @@ -1459,7 +1459,7 @@ class StructAttr fields = attributes; diff --git a/mlir/lib/TableGen/Attribute.cpp b/mlir/lib/TableGen/Attribute.cpp index e489174a38d91..f34d9c00b4388 100644 --- a/mlir/lib/TableGen/Attribute.cpp +++ b/mlir/lib/TableGen/Attribute.cpp @@ -126,7 +126,12 @@ StringRef Attribute::getDerivedCodeBody() const { } Dialect Attribute::getDialect() const { - return Dialect(def->getValueAsDef("dialect")); + const llvm::RecordVal *record = def->getValue("dialect"); + if (record && record->getValue()) { + if (DefInit *init = dyn_cast(record->getValue())) + return Dialect(init->getDef()); + } + return Dialect(nullptr); } ConstantAttr::ConstantAttr(const DefInit *init) : def(init->getDef()) { @@ -255,7 +260,7 @@ StringRef StructAttr::getStructClassName() const { } StringRef StructAttr::getCppNamespace() const { - Dialect dialect(def->getValueAsDef("structDialect")); + Dialect dialect(def->getValueAsDef("dialect")); return dialect.getCppNamespace(); } From d867be5de389f18cf3c1a61c8b9cbf8bfda8fe28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Federico=20Lebr=C3=B3n?= Date: Thu, 10 Sep 2020 19:14:42 +0000 Subject: [PATCH 0296/1079] Allow Dialects to be initialized via nullptr. This allows Dialect to follow the MLIR style of nullable objects, and in fact is expected by `Dialect::operator bool() const` which already tests whether `def == nullptr`. This just wasn't a reachable situation, because the constructor was dereferencing the pointer unconditionally. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D86807 --- mlir/lib/TableGen/Dialect.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlir/lib/TableGen/Dialect.cpp b/mlir/lib/TableGen/Dialect.cpp index 2b5f7e534ecc7..c17180c204833 100644 --- a/mlir/lib/TableGen/Dialect.cpp +++ b/mlir/lib/TableGen/Dialect.cpp @@ -16,6 +16,8 @@ using namespace mlir; using namespace mlir::tblgen; Dialect::Dialect(const llvm::Record *def) : def(def) { + if (def == nullptr) + return; for (StringRef dialect : def->getValueAsListOfStrings("dependentDialects")) dependentDialects.push_back(dialect); } From 5692497aef08ab4810f125669bc2f6aa79d9ec7e Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 10 Sep 2020 15:10:12 -0400 Subject: [PATCH 0297/1079] [gn build] (semi-manually) port 009cd4e4910 --- .../llvm/lib/Target/PowerPC/BUILD.gn | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn index 3a452fc6e0601..9adb514705d44 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn @@ -18,17 +18,32 @@ tablegen("PPCGenFastISel") { td_file = "PPC.td" } +tablegen("PPCGenGlobalISel") { + visibility = [ ":LLVMPowerPCCodeGen" ] + args = [ "-gen-global-isel" ] + td_file = "PPC.td" +} + +tablegen("PPCGenRegisterBank") { + visibility = [ ":LLVMPowerPCCodeGen" ] + args = [ "-gen-register-bank" ] + td_file = "PPC.td" +} + static_library("LLVMPowerPCCodeGen") { deps = [ ":PPCGenCallingConv", ":PPCGenDAGISel", ":PPCGenFastISel", + ":PPCGenGlobalISel", + ":PPCGenRegisterBank", "MCTargetDesc", "TargetInfo", "//llvm/include/llvm/Config:llvm-config", "//llvm/lib/Analysis", "//llvm/lib/CodeGen", "//llvm/lib/CodeGen/AsmPrinter", + "//llvm/lib/CodeGen/GlobalISel", "//llvm/lib/CodeGen/SelectionDAG", "//llvm/lib/IR", "//llvm/lib/MC", @@ -38,6 +53,10 @@ static_library("LLVMPowerPCCodeGen") { ] include_dirs = [ "." ] sources = [ + "GISel/PPCCallLowering.cpp", + "GISel/PPCInstructionSelector.cpp", + "GISel/PPCLegalizerInfo.cpp", + "GISel/PPCRegisterBankInfo.cpp", "PPCAsmPrinter.cpp", "PPCBoolRetToInt.cpp", "PPCBranchCoalescing.cpp", From 2141705337989195b448e292955f08884babbcbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Federico=20Lebr=C3=B3n?= Date: Thu, 10 Sep 2020 19:18:07 +0000 Subject: [PATCH 0298/1079] Fix operator!= for Dialects. Currently the global operator!=(bool, bool) is selected due to the implicit bool conversion operator. Since this is never the desired semantics, we give it a standard operator!= and make the bool conversion explicit. Depends On D86809 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D86810 --- mlir/include/mlir/TableGen/Dialect.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/TableGen/Dialect.h b/mlir/include/mlir/TableGen/Dialect.h index 623d614d26d38..ee86a2504b3c9 100644 --- a/mlir/include/mlir/TableGen/Dialect.h +++ b/mlir/include/mlir/TableGen/Dialect.h @@ -67,11 +67,13 @@ class Dialect { // underlying record. bool operator==(const Dialect &other) const; + bool operator!=(const Dialect &other) const { return !(*this == other); } + // Compares two dialects by comparing the names of the dialects. bool operator<(const Dialect &other) const; // Returns whether the dialect is defined. - operator bool() const { return def != nullptr; } + explicit operator bool() const { return def != nullptr; } private: const llvm::Record *def; From 783e28a50839e045b72ec11946295fba104642fc Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 10 Sep 2020 14:15:37 -0500 Subject: [PATCH 0299/1079] [Hexagon] Split pair-based masked memops --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 2 ++ .../Hexagon/autohvx/isel-split-masked.ll | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-split-masked.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 22561691f0e02..e63cb50a0fb84 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1985,6 +1985,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { break; case ISD::LOAD: case ISD::STORE: + case ISD::MLOAD: + case ISD::MSTORE: return SplitHvxMemOp(Op, DAG); case ISD::CTPOP: case ISD::CTLZ: diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-split-masked.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-split-masked.ll new file mode 100644 index 0000000000000..61bcbce6e6422 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-split-masked.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this compiles successfully. +; CHECK: vmem + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define void @f0() #0 { +b0: + %v0 = call <64 x i32> @llvm.masked.load.v64i32.p0v64i32(<64 x i32>* nonnull undef, i32 4, <64 x i1> , <64 x i32> undef) + %v1 = icmp sgt <64 x i32> %v0, zeroinitializer + %v2 = sext <64 x i1> %v1 to <64 x i32> + %v3 = add nsw <64 x i32> zeroinitializer, %v2 + %v4 = add nsw <64 x i32> %v3, zeroinitializer + %v5 = icmp sgt <64 x i32> %v4, zeroinitializer + %v6 = select <64 x i1> %v5, <64 x i32> %v4, <64 x i32> zeroinitializer + %v7 = select <64 x i1> zeroinitializer, <64 x i32> undef, <64 x i32> %v6 + %v8 = trunc <64 x i32> %v7 to <64 x i16> + call void @llvm.masked.store.v64i16.p0v64i16(<64 x i16> %v8, <64 x i16>* undef, i32 2, <64 x i1> ) + ret void +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <64 x i32> @llvm.masked.load.v64i32.p0v64i32(<64 x i32>*, i32 immarg, <64 x i1>, <64 x i32>) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v64i16.p0v64i16(<64 x i16>, <64 x i16>*, i32 immarg, <64 x i1>) #2 + +attributes #0 = { "target-features"="+hvx-length128b,+hvxv67,+v67,-long-calls" } +attributes #1 = { argmemonly nounwind readonly willreturn } +attributes #2 = { argmemonly nounwind willreturn } From 7ddfd9b3ebfd3f3db7c6c2e8c72308ff3a3426f2 Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Thu, 10 Sep 2020 11:29:16 -0700 Subject: [PATCH 0300/1079] [SVE] Bail from VectorUtils heuristics for scalable vectors Bail from maskIsAllZeroOrUndef and maskIsAllOneOrUndef prior to iterating over the number of elements for scalable vectors. Assert that the mask type is not scalable in possiblyDemandedEltsInMask . Assert that the types are correct in all three functions. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87424 --- llvm/include/llvm/Analysis/VectorUtils.h | 14 ++++++------- llvm/lib/Analysis/VectorUtils.cpp | 21 +++++++++++++++++++ .../InstCombine/InstCombineCalls.cpp | 18 ++++++++++------ .../AArch64/VectorUtils_heuristics.ll | 21 +++++++++++++++++++ 4 files changed, 61 insertions(+), 13 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 8498335bf78e6..c570bf25e92b5 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -544,20 +544,20 @@ createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); /// elements, it will be padded with undefs. Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef Vecs); -/// Given a mask vector of the form , Return true if all of the -/// elements of this predicate mask are false or undef. That is, return true -/// if all lanes can be assumed inactive. +/// Given a mask vector of i1, Return true if all of the elements of this +/// predicate mask are known to be false or undef. That is, return true if all +/// lanes can be assumed inactive. bool maskIsAllZeroOrUndef(Value *Mask); -/// Given a mask vector of the form , Return true if all of the -/// elements of this predicate mask are true or undef. That is, return true -/// if all lanes can be assumed active. +/// Given a mask vector of i1, Return true if all of the elements of this +/// predicate mask are known to be true or undef. That is, return true if all +/// lanes can be assumed active. bool maskIsAllOneOrUndef(Value *Mask); /// Given a mask vector of the form , return an APInt (of bitwidth Y) /// for each lane which may be active. APInt possiblyDemandedEltsInMask(Value *Mask); - + /// The group of interleaved loads/stores sharing the same stride and /// close to each other. /// diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index e241300dd2e7c..0b10983442e20 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -863,11 +863,19 @@ Value *llvm::concatenateVectors(IRBuilderBase &Builder, } bool llvm::maskIsAllZeroOrUndef(Value *Mask) { + assert(isa(Mask->getType()) && + isa(Mask->getType()->getScalarType()) && + cast(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + auto *ConstMask = dyn_cast(Mask); if (!ConstMask) return false; if (ConstMask->isNullValue() || isa(ConstMask)) return true; + if (isa(ConstMask->getType())) + return false; for (unsigned I = 0, E = cast(ConstMask->getType())->getNumElements(); @@ -882,11 +890,19 @@ bool llvm::maskIsAllZeroOrUndef(Value *Mask) { bool llvm::maskIsAllOneOrUndef(Value *Mask) { + assert(isa(Mask->getType()) && + isa(Mask->getType()->getScalarType()) && + cast(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + auto *ConstMask = dyn_cast(Mask); if (!ConstMask) return false; if (ConstMask->isAllOnesValue() || isa(ConstMask)) return true; + if (isa(ConstMask->getType())) + return false; for (unsigned I = 0, E = cast(ConstMask->getType())->getNumElements(); @@ -902,6 +918,11 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) { /// TODO: This is a lot like known bits, but for /// vectors. Is there something we can common this with? APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { + assert(isa(Mask->getType()) && + isa(Mask->getType()->getScalarType()) && + cast(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a fixed width vector of i1"); const unsigned VWidth = cast(Mask->getType())->getNumElements(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 11c2367d1608e..334e4e3e74abb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -319,11 +319,14 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); } + if (isa(ConstMask->getType())) + return nullptr; + // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), - DemandedElts, UndefElts)) + if (Value *V = + SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) return replaceOperand(II, 0, V); return nullptr; @@ -355,14 +358,17 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { if (ConstMask->isNullValue()) return eraseInstFromFunction(II); + if (isa(ConstMask->getType())) + return nullptr; + // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), - DemandedElts, UndefElts)) + if (Value *V = + SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) return replaceOperand(II, 0, V); - if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), - DemandedElts, UndefElts)) + if (Value *V = + SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts)) return replaceOperand(II, 1, V); return nullptr; diff --git a/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll b/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll new file mode 100644 index 0000000000000..b3a166d10b696 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test checks that instcombine does not crash while invoking +; maskIsAllOneOrUndef, maskIsAllZeroOrUndef, or possiblyDemandedEltsInMask. + +; CHECK-LABEL: novel_algorithm +; CHECK: unreachable +define void @novel_algorithm() { +entry: + %a = call @llvm.masked.load.nxv16i8.p0nxv16i8(* undef, i32 1, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), undef) + %b = add undef, %a + call void @llvm.masked.store.nxv16i8.p0nxv16i8( %b, * undef, i32 1, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer)) + unreachable +} + +declare @llvm.masked.load.nxv16i8.p0nxv16i8(*, i32 immarg, , ) + +declare void @llvm.masked.store.nxv16i8.p0nxv16i8(, *, i32 immarg, ) From b1b9806370196234a62304d308a9f8873759ec28 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Thu, 10 Sep 2020 15:30:42 -0400 Subject: [PATCH 0301/1079] [ImplicitNullChecks] NFC: Remove unused PointerReg arg in dep analysis The PointerReg arg was passed into the dependence function for an assertion which no longer exists. So, this patch updates the dependence functions to avoid the PointerReg in the signature. Tests-Run: make check --- llvm/lib/CodeGen/ImplicitNullChecks.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 8e1f9c36c7fec..9030f32268377 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -204,13 +204,12 @@ class ImplicitNullChecks : public MachineFunctionPass { /// if it was hoisted to the NullCheck block. This is used by caller /// canHoistInst to decide if DependenceMI can be hoisted safely. bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI, - MachineBasicBlock *NullSucc, - unsigned PointerReg); + MachineBasicBlock *NullSucc); /// Return true if \p FaultingMI can be hoisted from after the /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a /// non-null value if we also need to (and legally can) hoist a depedency. - bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg, + bool canHoistInst(MachineInstr *FaultingMI, ArrayRef InstsSeenSoFar, MachineBasicBlock *NullSucc, MachineInstr *&Dependence); @@ -409,8 +408,7 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, } bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns( - MachineInstr *DependenceMI, MachineBasicBlock *NullSucc, - unsigned PointerReg) { + MachineInstr *DependenceMI, MachineBasicBlock *NullSucc) { for (auto &DependenceMO : DependenceMI->operands()) { if (!(DependenceMO.isReg() && DependenceMO.getReg())) continue; @@ -442,7 +440,6 @@ bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns( } bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, - unsigned PointerReg, ArrayRef InstsSeenSoFar, MachineBasicBlock *NullSucc, MachineInstr *&Dependence) { @@ -467,7 +464,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, if (DependenceMI->mayLoadOrStore()) return false; - if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc, PointerReg)) + if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc)) return false; auto DepDepResult = @@ -616,7 +613,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( if (SR == SR_Impossible) return false; if (SR == SR_Suitable && - canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) { + canHoistInst(&MI, InstsSeenSoFar, NullSucc, Dependence)) { NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc, NullSucc, Dependence); return true; From 878cb5170de9bf03798a40185952bdf50fe4a15e Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Thu, 10 Sep 2020 11:45:21 -0700 Subject: [PATCH 0302/1079] [libc][NFC][obvious] Remove a redudant dep of strcmp implementation. --- libc/src/string/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 8efe8c89e9e7f..a347f2bf52675 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -48,8 +48,6 @@ add_entrypoint_object( strcmp.cpp HDRS strcmp.h - DEPENDS - libc.include.string ) add_entrypoint_object( From 4934127e627d7c58342be15bc9230a7cbdf5273f Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 10 Sep 2020 11:51:31 -0400 Subject: [PATCH 0303/1079] Diable sanitizer options for amdgpu Currently AMDGPU does not support sanitizer. Disable sanitizer options for now until they are supported. Differential Revision: https://reviews.llvm.org/D87461 --- clang/lib/Driver/SanitizerArgs.cpp | 8 ++++---- clang/test/Driver/hip-sanitize-options.hip | 9 +++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 clang/test/Driver/hip-sanitize-options.hip diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 0f51443010ca4..0cb1e7b5282b6 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -929,10 +929,10 @@ static bool hasTargetFeatureMTE(const llvm::opt::ArgStringList &CmdArgs) { void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const { - // NVPTX doesn't currently support sanitizers. Bailing out here means that - // e.g. -fsanitize=address applies only to host code, which is what we want - // for now. - if (TC.getTriple().isNVPTX()) + // NVPTX/AMDGPU doesn't currently support sanitizers. Bailing out here means + // that e.g. -fsanitize=address applies only to host code, which is what we + // want for now. + if (TC.getTriple().isNVPTX() || TC.getTriple().isAMDGPU()) return; // Translate available CoverageFeatures to corresponding clang-cc1 flags. diff --git a/clang/test/Driver/hip-sanitize-options.hip b/clang/test/Driver/hip-sanitize-options.hip new file mode 100644 index 0000000000000..908e02136cada --- /dev/null +++ b/clang/test/Driver/hip-sanitize-options.hip @@ -0,0 +1,9 @@ +// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx906 \ +// RUN: -fsanitize=address \ +// RUN: -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK-NOT: {{"[^"]*clang[^"]*".* "-fcuda-is-device".* "-fsanitize=address"}} +// CHECK: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}} From d4bf90271fa988101bdad4f2e78b8c3a0b85fc2d Mon Sep 17 00:00:00 2001 From: Volkan Keles Date: Thu, 10 Sep 2020 12:57:38 -0700 Subject: [PATCH 0304/1079] GlobalISel: Combine fneg(fneg x) to x https://reviews.llvm.org/D87473 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 ++ .../include/llvm/Target/GlobalISel/Combine.td | 12 +++++++- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 ++++ .../AArch64/GlobalISel/combine-fneg.mir | 28 +++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 745522d6b98e0..a403f870ee5eb 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -269,6 +269,9 @@ class CombinerHelper { bool applyCombineExtOfExt(MachineInstr &MI, std::tuple &MatchInfo); + /// Transform fneg(fneg(x)) to x. + bool matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 4d038ad7b240e..5c7e395d54976 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -385,6 +385,15 @@ def not_cmp_fold : GICombineRule< (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }]) >; +// Fold (fneg (fneg x)) -> x. +def fneg_fneg_fold_matchinfo : GIDefMatchData<"Register">; +def fneg_fneg_fold: GICombineRule < + (defs root:$root, fneg_fneg_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_FNEG):$root, + [{ return Helper.matchCombineFNegOfFNeg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -397,7 +406,8 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, - i2p_to_p2i, anyext_trunc_fold]>; + i2p_to_p2i, anyext_trunc_fold, + fneg_fneg_fold]>; def known_bits_simplifications : GICombineGroup<[ and_trivial_mask, redundant_sext_inreg]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 356f084711095..377bbd6526597 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1813,6 +1813,12 @@ bool CombinerHelper::applyCombineExtOfExt( return false; } +bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir new file mode 100644 index 0000000000000..2d0d23088770f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: test_combine_fneg_fneg +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_fneg_fneg + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FNEG %0(s32) + %2:_(s32) = G_FNEG %1(s32) + $w0 = COPY %2(s32) +... +--- +name: test_combine_fneg_fneg_vec +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_fneg_fneg_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](<2 x s32>) + %0:_(<2 x s32>) = COPY $x0 + %1:_(<2 x s32>) = G_FNEG %0(<2 x s32>) + %2:_(<2 x s32>) = G_FNEG %1(<2 x s32>) + $x0 = COPY %2(<2 x s32>) +... From adb738899e6378ae0023acb19cde57a585dce502 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 18:51:34 +0200 Subject: [PATCH 0305/1079] [InstCombine] Regenerate test checks (NFC) --- llvm/test/Transforms/InstCombine/rem.ll | 26 +++++++++---------- .../InstCombine/select-binop-cmp.ll | 12 ++++----- llvm/test/Transforms/InstCombine/select.ll | 4 +-- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index c833acc16853f..2b9f5326dd152 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -49,9 +49,9 @@ define i8 @big_divisor(i8 %x) { define i5 @biggest_divisor(i5 %x) { ; CHECK-LABEL: @biggest_divisor( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i5 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i5 -; CHECK-NEXT: [[REM:%.*]] = add i5 [[TMP2]], [[X]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[DOTNOT]] to i5 +; CHECK-NEXT: [[REM:%.*]] = add i5 [[TMP1]], [[X]] ; CHECK-NEXT: ret i5 [[REM]] ; %rem = urem i5 %x, -1 @@ -128,8 +128,8 @@ define i8 @urem2(i8 %x, i8 %y) { define i8 @urem3(i8 %x) { ; CHECK-LABEL: @urem3( ; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[X:%.*]], 3 -; CHECK-NEXT: [[B1:%.*]] = sub i8 [[X]], [[TMP1]] -; CHECK-NEXT: [[C:%.*]] = add i8 [[B1]], [[X]] +; CHECK-NEXT: [[B_NEG:%.*]] = sub i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[B_NEG]], [[X]] ; CHECK-NEXT: ret i8 [[C]] ; %A = udiv i8 %x, 3 @@ -377,10 +377,10 @@ define i32 @test17(i32 %X) { define i32 @test18(i16 %x, i32 %y) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 63, i32 31 -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], [[Y:%.*]] -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[DOTNOT]], i32 63, i32 31 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i16 %x, 4 %2 = icmp ne i16 %1, 0 @@ -477,10 +477,10 @@ define i32 @test21(i1 %c0, i32* %p) { ; CHECK-NEXT: br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[PHITMP:%.*]] = srem i32 [[V]], 5 +; CHECK-NEXT: [[PHI_BO:%.*]] = srem i32 [[V]], 5 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[LHS:%.*]] = phi i32 [ [[PHITMP]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LHS:%.*]] = phi i32 [ [[PHI_BO]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[LHS]] ; entry: @@ -606,10 +606,10 @@ define i32 @pr27968_3(i1 %c0, i1 %always_false, i32* %p) { ; CHECK-NEXT: br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[PHITMP:%.*]] = and i32 [[V]], 2147483647 +; CHECK-NEXT: [[PHI_BO:%.*]] = and i32 [[V]], 2147483647 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[LHS:%.*]] = phi i32 [ [[PHITMP]], [[IF_THEN]] ], [ 5, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LHS:%.*]] = phi i32 [ [[PHI_BO]], [[IF_THEN]] ], [ 5, [[ENTRY:%.*]] ] ; CHECK-NEXT: br i1 [[ALWAYS_FALSE:%.*]], label [[REM_IS_SAFE:%.*]], label [[REM_IS_UNSAFE:%.*]] ; CHECK: rem.is.safe: ; CHECK-NEXT: ret i32 [[LHS]] diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index a473acd730493..4173c31b2acb1 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -18,8 +18,8 @@ define i32 @select_xor_icmp(i32 %x, i32 %y, i32 %z) { define i32 @select_xor_icmp2(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_icmp2( -; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] +; CHECK-NEXT: [[A_NOT:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[C:%.*]] = select i1 [[A_NOT]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp ne i32 %x, 0 @@ -527,9 +527,9 @@ define i32 @select_xor_fcmp_bad_4(i32 %x, i32 %y, i32 %z, float %k) { define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_icmp_bad_5( -; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[A_NOT:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[B]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A_NOT]], i32 [[Y:%.*]], i32 [[B]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp ne i32 %x, 0 @@ -540,9 +540,9 @@ define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) { define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_icmp_bad_6( -; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 1 +; CHECK-NEXT: [[A_NOT:%.*]] = icmp eq i32 [[X:%.*]], 1 ; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A_NOT]], i32 [[B]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp ne i32 %x, 1 diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 0ac9c699b1ddb..8c9a2b5a5eee9 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1924,8 +1924,8 @@ define i32 @select_dominance_chain(i1 %cond, i32 %x, i32 %y) { ; CHECK: if.false.3: ; CHECK-NEXT: br label [[MERGE_3]] ; CHECK: merge.3: -; CHECK-NEXT: [[S_3:%.*]] = phi i32 [ [[Y:%.*]], [[IF_FALSE_3]] ], [ [[X:%.*]], [[IF_TRUE_3]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = mul i32 [[S_3]], 3 +; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ [[Y:%.*]], [[IF_FALSE_3]] ], [ [[X:%.*]], [[IF_TRUE_3]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = mul i32 [[S_1]], 3 ; CHECK-NEXT: ret i32 [[SUM_2]] ; entry: From 476836331f7d31ca46779742dccf2e26698b94ed Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 18:53:08 +0200 Subject: [PATCH 0306/1079] [InstCombine] Add more tests for select op replacement (NFC) --- llvm/test/Transforms/InstCombine/select.ll | 97 ++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 8c9a2b5a5eee9..570f92866d89b 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2587,3 +2587,100 @@ define void @select_freeze_icmp_multuses(i32 %x, i32 %y) { call void @use_i1_i32(i1 %c.fr, i32 %v) ret void } + +; FIXME: This is a miscompile! +define i32 @pr47322_more_poisonous_replacement(i32 %arg) { +; CHECK-LABEL: @pr47322_more_poisonous_replacement( +; CHECK-NEXT: [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 immarg true), [[RNG0:!range !.*]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[ARG]], [[TRAILING]] +; CHECK-NEXT: ret i32 [[SHIFTED]] +; + %cmp = icmp eq i32 %arg, 0 + %trailing = call i32 @llvm.cttz.i32(i32 %arg, i1 immarg true) + %shifted = lshr i32 %arg, %trailing + %r1.sroa.0.1 = select i1 %cmp, i32 0, i32 %shifted + ret i32 %r1.sroa.0.1 +} + +define i8 @select_replacement_add_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @select_replacement_add_eq( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %cmp = icmp eq i8 %x, 1 + %add = add i8 %x, 1 + %sel = select i1 %cmp, i8 %add, i8 %y + ret i8 %sel +} + +define i8 @select_replacement_add_ne(i8 %x, i8 %y) { +; CHECK-LABEL: @select_replacement_add_ne( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 1 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 [[ADD]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %cmp = icmp ne i8 %x, 1 + call void @use(i1 %cmp) + %add = add i8 %x, 1 + %sel = select i1 %cmp, i8 %y, i8 %add + ret i8 %sel +} + +define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { +; CHECK-LABEL: @select_replacement_add_nuw( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %cmp = icmp eq i8 %x, 1 + %add = add nuw i8 %x, 1 + %sel = select i1 %cmp, i8 %add, i8 %y + ret i8 %sel +} + +define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @select_replacement_sub( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %cmp = icmp eq i8 %x, %y + %sub = sub i8 %x, %y + %sel = select i1 %cmp, i8 %sub, i8 %z + ret i8 %sel +} + +define i8 @select_replacement_shift(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @select_replacement_shift( +; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[Y]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SHL]], i8 [[Z:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %shr = lshr exact i8 %x, 1 + %cmp = icmp eq i8 %shr, %y + %shl = shl i8 %y, 1 + %sel = select i1 %cmp, i8 %shl, i8 %z + ret i8 %sel +} + +define i8 @select_replacement_loop(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @select_replacement_loop( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %cmp = icmp eq i8 %x, %y + %sel = select i1 %cmp, i8 %x, i8 %z + ret i8 %sel +} + +declare void @use(i1) +declare i32 @llvm.cttz.i32(i32, i1 immarg) From 99e78cb7185db1a15afd33020a1e026dc7ac5e1b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 22:11:04 +0200 Subject: [PATCH 0307/1079] [DemandedBits] Add braces to large if (NFC) While the if only contains a single statement, it happens to be a huge switch. Add braces to make this code easier to read. --- llvm/lib/Analysis/DemandedBits.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp index 62e08f3f8a8ba..1575d15550728 100644 --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -115,7 +115,7 @@ void DemandedBits::determineLiveOperandBits( default: break; case Instruction::Call: case Instruction::Invoke: - if (const IntrinsicInst *II = dyn_cast(UserI)) + if (const IntrinsicInst *II = dyn_cast(UserI)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bswap: @@ -171,6 +171,7 @@ void DemandedBits::determineLiveOperandBits( break; } } + } break; case Instruction::Add: if (AOut.isMask()) { From a5168bdb4a25485ac62e18bdc538b4842bc9fbd9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Sep 2020 22:40:46 +0200 Subject: [PATCH 0308/1079] [DemandedBits][BDCE] Add support for min/max intrinsics Add DemandedBits / BDCE support for min/max intrinsics: If the low bits are not demanded in the result, they also aren't demanded in the operands. Differential Revision: https://reviews.llvm.org/D87161 --- llvm/lib/Analysis/DemandedBits.cpp | 8 ++++++++ llvm/test/Transforms/BDCE/intrinsics.ll | 16 ++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp index 1575d15550728..461fd7239905b 100644 --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -170,6 +170,14 @@ void DemandedBits::determineLiveOperandBits( } break; } + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::smax: + case Intrinsic::smin: + // If low bits of result are not demanded, they are also not demanded + // for the min/max operands. + AB = APInt::getBitsSetFrom(BitWidth, AOut.countTrailingZeros()); + break; } } break; diff --git a/llvm/test/Transforms/BDCE/intrinsics.ll b/llvm/test/Transforms/BDCE/intrinsics.ll index 5a186f01fd298..ea0a2289feb2d 100644 --- a/llvm/test/Transforms/BDCE/intrinsics.ll +++ b/llvm/test/Transforms/BDCE/intrinsics.ll @@ -8,8 +8,8 @@ declare i8 @llvm.smin.i8(i8, i8) define i8 @umax(i8 %x, i8 %y, i1 %a, i1 %b) { ; CHECK-LABEL: @umax( -; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 -; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[A2:%.*]] = zext i1 false to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 false to i8 ; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] ; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y2]]) @@ -27,8 +27,8 @@ define i8 @umax(i8 %x, i8 %y, i1 %a, i1 %b) { define i8 @umin(i8 %x, i8 %y, i1 %a, i1 %b) { ; CHECK-LABEL: @umin( -; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 -; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[A2:%.*]] = zext i1 false to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 false to i8 ; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] ; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y2]]) @@ -46,8 +46,8 @@ define i8 @umin(i8 %x, i8 %y, i1 %a, i1 %b) { define i8 @smax(i8 %x, i8 %y, i1 %a, i1 %b) { ; CHECK-LABEL: @smax( -; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 -; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[A2:%.*]] = zext i1 false to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 false to i8 ; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] ; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y2]]) @@ -65,8 +65,8 @@ define i8 @smax(i8 %x, i8 %y, i1 %a, i1 %b) { define i8 @smin(i8 %x, i8 %y, i1 %a, i1 %b) { ; CHECK-LABEL: @smin( -; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 -; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[A2:%.*]] = zext i1 false to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 false to i8 ; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] ; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y2]]) From c74900ca67241bf963b7a4cfa1fae8eadf6bb8cd Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 10 Sep 2020 13:10:27 -0700 Subject: [PATCH 0309/1079] [ORC] Make MaterializationResponsibility immovable, pass by unique_ptr. Making MaterializationResponsibility instances immovable allows their associated VModuleKeys to be updated by the ExecutionSession while the responsibility is still in-flight. This will be used in the upcoming removable code feature to enable safe merging of resource keys even if there are active compiles using the keys being merged. --- .../SpeculativeJIT/SpeculativeJIT.cpp | 15 +- .../Orc/CompileOnDemandLayer.h | 6 +- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 37 +-- .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 3 +- .../ExecutionEngine/Orc/IRTransformLayer.h | 3 +- llvm/include/llvm/ExecutionEngine/Orc/Layer.h | 11 +- .../llvm/ExecutionEngine/Orc/LazyReexports.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- .../Orc/ObjectTransformLayer.h | 2 +- .../Orc/RTDyldObjectLinkingLayer.h | 2 +- .../llvm/ExecutionEngine/Orc/Speculation.h | 3 +- .../Orc/CompileOnDemandLayer.cpp | 42 +-- llvm/lib/ExecutionEngine/Orc/Core.cpp | 50 ++-- .../ExecutionEngine/Orc/IRCompileLayer.cpp | 6 +- .../ExecutionEngine/Orc/IRTransformLayer.cpp | 6 +- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 6 +- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 20 +- llvm/lib/ExecutionEngine/Orc/Layer.cpp | 8 +- .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 16 +- .../Orc/ObjectLinkingLayer.cpp | 59 ++--- .../Orc/ObjectTransformLayer.cpp | 7 +- .../Orc/RTDyldObjectLinkingLayer.cpp | 25 +- llvm/lib/ExecutionEngine/Orc/Speculation.cpp | 4 +- .../ExecutionEngine/Orc/CoreAPIsTest.cpp | 242 ++++++++++-------- .../Orc/LazyCallThroughAndReexportsTest.cpp | 6 +- .../ExecutionEngine/Orc/OrcTestCommon.h | 5 +- 26 files changed, 314 insertions(+), 274 deletions(-) diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp index 4de4897053c1b..24cf0847558f9 100644 --- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp +++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp @@ -113,14 +113,13 @@ class SpeculativeJIT { this->CODLayer.setImplMap(&Imps); this->ES->setDispatchMaterialization( [this](std::unique_ptr MU, - MaterializationResponsibility MR) { - // FIXME: Switch to move capture once we have C++14. - auto SharedMU = std::shared_ptr(std::move(MU)); - auto SharedMR = - std::make_shared(std::move(MR)); - CompileThreads.async([SharedMU, SharedMR]() { - SharedMU->materialize(std::move(*SharedMR)); - }); + std::unique_ptr MR) { + CompileThreads.async( + [UnownedMU = MU.release(), UnownedMR = MR.release()]() { + std::unique_ptr MU(UnownedMU); + std::unique_ptr MR(UnownedMR); + MU->materialize(std::move(MR)); + }); }); ExitOnErr(S.addSpeculationRuntime(MainJD, Mangle)); LocalCXXRuntimeOverrides CXXRuntimeoverrides; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 9ecc0464dec1b..3a2f8b54ad22b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -96,7 +96,8 @@ class CompileOnDemandLayer : public IRLayer { /// Emits the given module. This should not be called by clients: it will be /// called by the JIT when a definition added via the add method is requested. - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; private: struct PerDylibResources { @@ -120,7 +121,8 @@ class CompileOnDemandLayer : public IRLayer { void expandPartition(GlobalValueSet &Partition); - void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM, + void emitPartition(std::unique_ptr R, + ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs); mutable std::mutex CODLayerMutex; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 6951df3f2d3f2..70bd983c40ce0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -410,7 +410,7 @@ class UnexpectedSymbolDefinitions : public ErrorInfo + delegate(const SymbolNameSet &Symbols, VModuleKey NewKey = VModuleKey()); void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies); @@ -577,7 +577,8 @@ class MaterializationUnit { /// Implementations of this method should materialize all symbols /// in the materialzation unit, except for those that have been /// previously discarded. - virtual void materialize(MaterializationResponsibility R) = 0; + virtual void + materialize(std::unique_ptr R) = 0; /// Called by JITDylibs to notify MaterializationUnits that the given symbol /// has been overridden. @@ -594,10 +595,11 @@ class MaterializationUnit { private: virtual void anchor(); - MaterializationResponsibility + std::unique_ptr createMaterializationResponsibility(std::shared_ptr JD) { - return MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), - std::move(InitSymbol), K); + return std::unique_ptr( + new MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), + std::move(InitSymbol), K)); } /// Implementations of this method should discard the given symbol @@ -621,7 +623,7 @@ class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolMap &Symbols); @@ -663,7 +665,7 @@ class ReExportsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); @@ -1116,7 +1118,7 @@ class ExecutionSession { /// For dispatching MaterializationUnit::materialize calls. using DispatchMaterializationFunction = std::function MU, - MaterializationResponsibility MR)>; + std::unique_ptr MR)>; /// Construct an ExecutionSession. /// @@ -1268,10 +1270,11 @@ class ExecutionSession { SymbolState RequiredState = SymbolState::Ready); /// Materialize the given unit. - void dispatchMaterialization(std::unique_ptr MU, - MaterializationResponsibility MR) { + void + dispatchMaterialization(std::unique_ptr MU, + std::unique_ptr MR) { assert(MU && "MU must be non-null"); - DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR.getTargetJITDylib(), *MU)); + DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR->getTargetJITDylib(), *MU)); DispatchMaterialization(std::move(MU), std::move(MR)); } @@ -1283,9 +1286,9 @@ class ExecutionSession { logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: "); } - static void - materializeOnCurrentThread(std::unique_ptr MU, - MaterializationResponsibility MR) { + static void materializeOnCurrentThread( + std::unique_ptr MU, + std::unique_ptr MR) { MU->materialize(std::move(MR)); } @@ -1309,7 +1312,7 @@ class ExecutionSession { // with callbacks from asynchronous queries. mutable std::recursive_mutex OutstandingMUsMutex; std::vector, - MaterializationResponsibility>> + std::unique_ptr>> OutstandingMUs; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index eb74d283f0435..2c53e2f66e851 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -55,7 +55,8 @@ class IRCompileLayer : public IRLayer { void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled); - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; private: mutable std::mutex IRLayerMutex; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index 296d74ae6b865..ee4ee3437fa6d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -37,7 +37,8 @@ class IRTransformLayer : public IRLayer { this->Transform = std::move(Transform); } - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; static ThreadSafeModule identityTransform(ThreadSafeModule TSM, MaterializationResponsibility &R) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h index e843d0f562455..c8a41199760da 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h @@ -100,7 +100,8 @@ class IRLayer { VModuleKey K = VModuleKey()); /// Emit should materialize the given IR. - virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0; + virtual void emit(std::unique_ptr R, + ThreadSafeModule TSM) = 0; private: bool CloneToNewContextOnEmit = false; @@ -117,8 +118,7 @@ class BasicIRLayerMaterializationUnit : public IRMaterializationUnit { ThreadSafeModule TSM, VModuleKey K); private: - - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; IRLayer &L; VModuleKey K; @@ -139,7 +139,7 @@ class ObjectLayer { VModuleKey K = VModuleKey()); /// Emit should materialize the given IR. - virtual void emit(MaterializationResponsibility R, + virtual void emit(std::unique_ptr R, std::unique_ptr O) = 0; private: @@ -162,8 +162,7 @@ class BasicObjectLayerMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; ObjectLayer &L; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 9206e40fffb1c..63e3a80d87d86 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -149,7 +149,7 @@ class LazyReexportsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index cb8ee130ab614..cbcf3928be3df 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -119,7 +119,7 @@ class ObjectLinkingLayer : public ObjectLayer { } /// Emit the object. - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr R, std::unique_ptr O) override; /// Instructs this ObjectLinkingLayer instance to override the symbol flags diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index bf989cc8677cf..c77649f19fc74 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -31,7 +31,7 @@ class ObjectTransformLayer : public ObjectLayer { ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, TransformFunction Transform = TransformFunction()); - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr R, std::unique_ptr O) override; void setTransform(TransformFunction Transform) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 9ada0871cf0cb..9cd3c57a19c6a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -58,7 +58,7 @@ class RTDyldObjectLinkingLayer : public ObjectLayer { ~RTDyldObjectLinkingLayer(); /// Emit the object. - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr R, std::unique_ptr O) override; /// Set the NotifyLoaded callback. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index 10f78c8bc6beb..a138f60a77564 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -181,7 +181,8 @@ class IRSpeculationLayer : public IRLayer { : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; private: TargetAndLikelies diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 9e38dc36faae7..dfb0d06bdba3d 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -88,7 +88,7 @@ class PartitioningIRMaterializationUnit : public IRMaterializationUnit { Parent(Parent) {} private: - void materialize(MaterializationResponsibility R) override { + void materialize(std::unique_ptr R) override { Parent.emitPartition(std::move(R), std::move(TSM), std::move(SymbolToDefinition)); } @@ -128,15 +128,15 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) { void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) { this->AliaseeImpls = Imp; } -void CompileOnDemandLayer::emit(MaterializationResponsibility R, - ThreadSafeModule TSM) { +void CompileOnDemandLayer::emit( + std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Null module"); auto &ES = getExecutionSession(); // Sort the callables and non-callables, build re-exports and lodge the // actual module with the implementation dylib. - auto &PDR = getPerDylibResources(R.getTargetJITDylib()); + auto &PDR = getPerDylibResources(R->getTargetJITDylib()); SymbolAliasMap NonCallables; SymbolAliasMap Callables; @@ -145,7 +145,7 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, cleanUpModule(M); }); - for (auto &KV : R.getSymbols()) { + for (auto &KV : R->getSymbols()) { auto &Name = KV.first; auto &Flags = KV.second; if (Flags.isCallable()) @@ -158,19 +158,19 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, // implementation dylib. if (auto Err = PDR.getImplDylib().define( std::make_unique( - ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), + ES, *getManglingOptions(), std::move(TSM), R->getVModuleKey(), *this))) { ES.reportError(std::move(Err)); - R.failMaterialization(); + R->failMaterialization(); return; } if (!NonCallables.empty()) - R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), - JITDylibLookupFlags::MatchAllSymbols)); + R->replace(reexports(PDR.getImplDylib(), std::move(NonCallables), + JITDylibLookupFlags::MatchAllSymbols)); if (!Callables.empty()) - R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), - std::move(Callables), AliaseeImpls)); + R->replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), + std::move(Callables), AliaseeImpls)); } CompileOnDemandLayer::PerDylibResources & @@ -247,7 +247,7 @@ void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) { } void CompileOnDemandLayer::emitPartition( - MaterializationResponsibility R, ThreadSafeModule TSM, + std::unique_ptr R, ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs) { // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the @@ -257,8 +257,8 @@ void CompileOnDemandLayer::emitPartition( auto &ES = getExecutionSession(); GlobalValueSet RequestedGVs; - for (auto &Name : R.getRequestedSymbols()) { - if (Name == R.getInitializerSymbol()) + for (auto &Name : R->getRequestedSymbols()) { + if (Name == R->getInitializerSymbol()) TSM.withModuleDo([&](Module &M) { for (auto &GV : getStaticInitGVs(M)) RequestedGVs.insert(&GV); @@ -285,9 +285,9 @@ void CompileOnDemandLayer::emitPartition( // If the partition is empty, return the whole module to the symbol table. if (GVsToExtract->empty()) { - R.replace(std::make_unique( - std::move(TSM), R.getVModuleKey(), R.getSymbols(), - R.getInitializerSymbol(), std::move(Defs), *this)); + R->replace(std::make_unique( + std::move(TSM), R->getVModuleKey(), R->getSymbols(), + R->getInitializerSymbol(), std::move(Defs), *this)); return; } @@ -308,7 +308,7 @@ void CompileOnDemandLayer::emitPartition( IRSymbolMapper::add(ES, *getManglingOptions(), PromotedGlobals, SymbolFlags); - if (auto Err = R.defineMaterializing(SymbolFlags)) + if (auto Err = R->defineMaterializing(SymbolFlags)) return std::move(Err); } @@ -348,12 +348,12 @@ void CompileOnDemandLayer::emitPartition( if (!ExtractedTSM) { ES.reportError(ExtractedTSM.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } - R.replace(std::make_unique( - ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this)); + R->replace(std::make_unique( + ES, *getManglingOptions(), std::move(TSM), R->getVModuleKey(), *this)); BaseLayer.emit(std::move(R), std::move(*ExtractedTSM)); } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 18eced68f07bc..243bac79c012f 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -279,7 +279,7 @@ void MaterializationResponsibility::replace( JD->replace(std::move(MU)); } -MaterializationResponsibility +std::unique_ptr MaterializationResponsibility::delegate(const SymbolNameSet &Symbols, VModuleKey NewKey) { @@ -302,9 +302,10 @@ MaterializationResponsibility::delegate(const SymbolNameSet &Symbols, SymbolFlags.erase(I); } - return MaterializationResponsibility(JD, std::move(DelegatedFlags), - std::move(DelegatedInitSymbol), - std::move(NewKey)); + return std::unique_ptr( + new MaterializationResponsibility(JD, std::move(DelegatedFlags), + std::move(DelegatedInitSymbol), + std::move(NewKey))); } void MaterializationResponsibility::addDependencies( @@ -338,10 +339,10 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { } void AbsoluteSymbolsMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { // No dependencies, so these calls can't fail. - cantFail(R.notifyResolved(Symbols)); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(Symbols)); + cantFail(R->notifyEmitted()); } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, @@ -370,16 +371,16 @@ StringRef ReExportsMaterializationUnit::getName() const { } void ReExportsMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { - auto &ES = R.getTargetJITDylib().getExecutionSession(); - JITDylib &TgtJD = R.getTargetJITDylib(); + auto &ES = R->getTargetJITDylib().getExecutionSession(); + JITDylib &TgtJD = R->getTargetJITDylib(); JITDylib &SrcJD = SourceJD ? *SourceJD : TgtJD; // Find the set of requested aliases and aliasees. Return any unrequested // aliases back to the JITDylib so as to not prematurely materialize any // aliasees. - auto RequestedSymbols = R.getRequestedSymbols(); + auto RequestedSymbols = R->getRequestedSymbols(); SymbolAliasMap RequestedAliases; for (auto &Name : RequestedSymbols) { @@ -399,18 +400,19 @@ void ReExportsMaterializationUnit::materialize( if (!Aliases.empty()) { if (SourceJD) - R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); + R->replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); else - R.replace(symbolAliases(std::move(Aliases))); + R->replace(symbolAliases(std::move(Aliases))); } // The OnResolveInfo struct will hold the aliases and responsibilty for each // query in the list. struct OnResolveInfo { - OnResolveInfo(MaterializationResponsibility R, SymbolAliasMap Aliases) + OnResolveInfo(std::unique_ptr R, + SymbolAliasMap Aliases) : R(std::move(R)), Aliases(std::move(Aliases)) {} - MaterializationResponsibility R; + std::unique_ptr R; SymbolAliasMap Aliases; }; @@ -451,7 +453,7 @@ void ReExportsMaterializationUnit::materialize( assert(!QuerySymbols.empty() && "Alias cycle detected!"); auto QueryInfo = std::make_shared( - R.delegate(ResponsibilitySymbols), std::move(QueryAliases)); + R->delegate(ResponsibilitySymbols), std::move(QueryAliases)); QueryInfos.push_back( make_pair(std::move(QuerySymbols), std::move(QueryInfo))); } @@ -480,12 +482,12 @@ void ReExportsMaterializationUnit::materialize( for (auto &KV : QueryInfo->Aliases) if (SrcJDDeps.count(KV.second.Aliasee)) { PerAliasDeps = {KV.second.Aliasee}; - QueryInfo->R.addDependencies(KV.first, PerAliasDepsMap); + QueryInfo->R->addDependencies(KV.first, PerAliasDepsMap); } }; auto OnComplete = [QueryInfo](Expected Result) { - auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); + auto &ES = QueryInfo->R->getTargetJITDylib().getExecutionSession(); if (Result) { SymbolMap ResolutionMap; for (auto &KV : QueryInfo->Aliases) { @@ -499,19 +501,19 @@ void ReExportsMaterializationUnit::materialize( ResolutionMap[KV.first] = JITEvaluatedSymbol( (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags); } - if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) { + if (auto Err = QueryInfo->R->notifyResolved(ResolutionMap)) { ES.reportError(std::move(Err)); - QueryInfo->R.failMaterialization(); + QueryInfo->R->failMaterialization(); return; } - if (auto Err = QueryInfo->R.notifyEmitted()) { + if (auto Err = QueryInfo->R->notifyEmitted()) { ES.reportError(std::move(Err)); - QueryInfo->R.failMaterialization(); + QueryInfo->R->failMaterialization(); return; } } else { ES.reportError(Result.takeError()); - QueryInfo->R.failMaterialization(); + QueryInfo->R->failMaterialization(); } }; @@ -2131,7 +2133,7 @@ void ExecutionSession::dump(raw_ostream &OS) { void ExecutionSession::runOutstandingMUs() { while (1) { Optional, - MaterializationResponsibility>> + std::unique_ptr>> JMU; { diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index 023940dc82982..c6f6870279728 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -25,7 +25,7 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) { this->NotifyCompiled = std::move(NotifyCompiled); } -void IRCompileLayer::emit(MaterializationResponsibility R, +void IRCompileLayer::emit(std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); @@ -33,13 +33,13 @@ void IRCompileLayer::emit(MaterializationResponsibility R, { std::lock_guard Lock(IRLayerMutex); if (NotifyCompiled) - NotifyCompiled(R.getVModuleKey(), std::move(TSM)); + NotifyCompiled(R->getVModuleKey(), std::move(TSM)); else TSM = ThreadSafeModule(); } BaseLayer.emit(std::move(R), std::move(*Obj)); } else { - R.failMaterialization(); + R->failMaterialization(); getExecutionSession().reportError(Obj.takeError()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp index 511248f83b259..d5b11349277c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp @@ -17,14 +17,14 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer, : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} -void IRTransformLayer::emit(MaterializationResponsibility R, +void IRTransformLayer::emit(std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); - if (auto TransformedTSM = Transform(std::move(TSM), R)) + if (auto TransformedTSM = Transform(std::move(TSM), *R)) BaseLayer.emit(std::move(R), std::move(*TransformedTSM)); else { - R.failMaterialization(); + R->failMaterialization(); getExecutionSession().reportError(TransformedTSM.takeError()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 4f7f6089e68db..7d57ed5a3a04c 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -33,12 +33,12 @@ class CompileCallbackMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override { return ""; } private: - void materialize(MaterializationResponsibility R) override { + void materialize(std::unique_ptr R) override { SymbolMap Result; Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported); // No dependencies, so these calls cannot fail. - cantFail(R.notifyResolved(Result)); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(Result)); + cantFail(R->notifyEmitted()); } void discard(const JITDylib &JD, const SymbolStringPtr &Name) override { diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 373d86d92f8d7..81f500d66bc29 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -1085,15 +1085,17 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) std::make_unique(hardware_concurrency(S.NumCompileThreads)); ES->setDispatchMaterialization( [this](std::unique_ptr MU, - MaterializationResponsibility MR) { - // FIXME: Switch to move capture once ThreadPool uses unique_function. - auto SharedMU = std::shared_ptr(std::move(MU)); - auto SharedMR = - std::make_shared(std::move(MR)); - auto Work = [SharedMU, SharedMR]() mutable { - SharedMU->materialize(std::move(*SharedMR)); - }; - CompileThreads->async(std::move(Work)); + std::unique_ptr MR) { + // FIXME: We should be able to use move-capture here, but ThreadPool's + // AsyncTaskTys are std::functions rather than unique_functions + // (because MSVC's std::packaged_tasks don't support move-only types). + // Fix this when all the above gets sorted out. + CompileThreads->async( + [UnownedMU = MU.release(), UnownedMR = MR.release()]() mutable { + std::unique_ptr MU(UnownedMU); + std::unique_ptr MR(UnownedMR); + MU->materialize(std::move(MR)); + }); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp index 0a5d5577e99e8..8052e7b08a5a6 100644 --- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp @@ -133,7 +133,7 @@ BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit( L(L), K(std::move(K)) {} void BasicIRLayerMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { // Throw away the SymbolToDefinition map: it's not usable after we hand // off the module. @@ -144,8 +144,8 @@ void BasicIRLayerMaterializationUnit::materialize( TSM = cloneToNewContext(TSM); #ifndef NDEBUG - auto &ES = R.getTargetJITDylib().getExecutionSession(); - auto &N = R.getTargetJITDylib().getName(); + auto &ES = R->getTargetJITDylib().getExecutionSession(); + auto &N = R->getTargetJITDylib().getName(); #endif // NDEBUG LLVM_DEBUG(ES.runSessionLocked( @@ -200,7 +200,7 @@ StringRef BasicObjectLayerMaterializationUnit::getName() const { } void BasicObjectLayerMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { L.emit(std::move(R), std::move(O)); } diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 5e604130d6eab..695f6cc9c1cb4 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -154,8 +154,8 @@ StringRef LazyReexportsMaterializationUnit::getName() const { } void LazyReexportsMaterializationUnit::materialize( - MaterializationResponsibility R) { - auto RequestedSymbols = R.getRequestedSymbols(); + std::unique_ptr R) { + auto RequestedSymbols = R->getRequestedSymbols(); SymbolAliasMap RequestedAliases; for (auto &RequestedSymbol : RequestedSymbols) { @@ -166,8 +166,8 @@ void LazyReexportsMaterializationUnit::materialize( } if (!CallableAliases.empty()) - R.replace(lazyReexports(LCTManager, ISManager, SourceJD, - std::move(CallableAliases), AliaseeTable)); + R->replace(lazyReexports(LCTManager, ISManager, SourceJD, + std::move(CallableAliases), AliaseeTable)); IndirectStubsManager::StubInitsMap StubInits; for (auto &Alias : RequestedAliases) { @@ -182,7 +182,7 @@ void LazyReexportsMaterializationUnit::materialize( if (!CallThroughTrampoline) { SourceJD.getExecutionSession().reportError( CallThroughTrampoline.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -195,7 +195,7 @@ void LazyReexportsMaterializationUnit::materialize( if (auto Err = ISManager.createStubs(StubInits)) { SourceJD.getExecutionSession().reportError(std::move(Err)); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -204,8 +204,8 @@ void LazyReexportsMaterializationUnit::materialize( Stubs[Alias.first] = ISManager.findStub(*Alias.first, false); // No registered dependencies, so these calls cannot fail. - cantFail(R.notifyResolved(Stubs)); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(Stubs)); + cantFail(R->notifyEmitted()); } void LazyReexportsMaterializationUnit::discard(const JITDylib &JD, diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index d8283fa7e3461..9e3245d9cc991 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -24,9 +24,10 @@ namespace orc { class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { public: - ObjectLinkingLayerJITLinkContext(ObjectLinkingLayer &Layer, - MaterializationResponsibility MR, - std::unique_ptr ObjBuffer) + ObjectLinkingLayerJITLinkContext( + ObjectLinkingLayer &Layer, + std::unique_ptr MR, + std::unique_ptr ObjBuffer) : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {} ~ObjectLinkingLayerJITLinkContext() { @@ -44,14 +45,14 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { void notifyFailed(Error Err) override { Layer.getExecutionSession().reportError(std::move(Err)); - MR.failMaterialization(); + MR->failMaterialization(); } void lookup(const LookupMap &Symbols, std::unique_ptr LC) override { JITDylibSearchOrder LinkOrder; - MR.getTargetJITDylib().withLinkOrderDo( + MR->getTargetJITDylib().withLinkOrderDo( [&](const JITDylibSearchOrder &LO) { LinkOrder = LO; }); auto &ES = Layer.getExecutionSession(); @@ -85,8 +86,8 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto &KV : InternalNamedSymbolDeps) { SymbolDependenceMap InternalDeps; - InternalDeps[&MR.getTargetJITDylib()] = std::move(KV.second); - MR.addDependencies(KV.first, InternalDeps); + InternalDeps[&MR->getTargetJITDylib()] = std::move(KV.second); + MR->addDependencies(KV.first, InternalDeps); } ES.lookup(LookupKind::Static, LinkOrder, std::move(LookupSet), @@ -115,7 +116,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { InternedResult[InternedName] = JITEvaluatedSymbol(Sym->getAddress(), Flags); - if (AutoClaim && !MR.getSymbols().count(InternedName)) { + if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); ExtraSymbolsToClaim[InternedName] = Flags; @@ -133,7 +134,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = JITEvaluatedSymbol(Sym->getAddress(), Flags); - if (AutoClaim && !MR.getSymbols().count(InternedName)) { + if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); ExtraSymbolsToClaim[InternedName] = Flags; @@ -141,19 +142,19 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } if (!ExtraSymbolsToClaim.empty()) - if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim)) + if (auto Err = MR->defineMaterializing(ExtraSymbolsToClaim)) return Err; { - // Check that InternedResult matches up with MR.getSymbols(). + // Check that InternedResult matches up with MR->getSymbols(). // This guards against faulty transformations / compilers / object caches. // First check that there aren't any missing symbols. size_t NumMaterializationSideEffectsOnlySymbols = 0; SymbolNameVector ExtraSymbols; SymbolNameVector MissingSymbols; - for (auto &KV : MR.getSymbols()) { + for (auto &KV : MR->getSymbols()) { // If this is a materialization-side-effects only symbol then bump // the counter and make sure it's *not* defined, otherwise make @@ -175,9 +176,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { // If there are more definitions than expected, add them to the // ExtraSymbols vector. if (InternedResult.size() > - MR.getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) { + MR->getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) { for (auto &KV : InternedResult) - if (!MR.getSymbols().count(KV.first)) + if (!MR->getSymbols().count(KV.first)) ExtraSymbols.push_back(KV.first); } @@ -187,23 +188,23 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { std::move(ExtraSymbols)); } - if (auto Err = MR.notifyResolved(InternedResult)) + if (auto Err = MR->notifyResolved(InternedResult)) return Err; - Layer.notifyLoaded(MR); + Layer.notifyLoaded(*MR); return Error::success(); } void notifyFinalized( std::unique_ptr A) override { - if (auto Err = Layer.notifyEmitted(MR, std::move(A))) { + if (auto Err = Layer.notifyEmitted(*MR, std::move(A))) { Layer.getExecutionSession().reportError(std::move(Err)); - MR.failMaterialization(); + MR->failMaterialization(); return; } - if (auto Err = MR.notifyEmitted()) { + if (auto Err = MR->notifyEmitted()) { Layer.getExecutionSession().reportError(std::move(Err)); - MR.failMaterialization(); + MR->failMaterialization(); } } @@ -217,7 +218,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Config.PrePrunePasses.push_back( [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); }); - Layer.modifyPassConfig(MR, TT, Config); + Layer.modifyPassConfig(*MR, TT, Config); Config.PostPrunePasses.push_back( [this](LinkGraph &G) { return computeNamedSymbolDependencies(G); }); @@ -237,13 +238,13 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { auto &ES = Layer.getExecutionSession(); for (auto *Sym : G.defined_symbols()) if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { - if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + if (!MR->getSymbols().count(ES.intern(Sym->getName()))) G.makeExternal(*Sym); } for (auto *Sym : G.absolute_symbols()) if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { - if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + if (!MR->getSymbols().count(ES.intern(Sym->getName()))) G.makeExternal(*Sym); } @@ -253,13 +254,13 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Error markResponsibilitySymbolsLive(LinkGraph &G) const { auto &ES = Layer.getExecutionSession(); for (auto *Sym : G.defined_symbols()) - if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName()))) + if (Sym->hasName() && MR->getSymbols().count(ES.intern(Sym->getName()))) Sym->setLive(true); return Error::success(); } Error computeNamedSymbolDependencies(LinkGraph &G) { - auto &ES = MR.getTargetJITDylib().getExecutionSession(); + auto &ES = MR->getTargetJITDylib().getExecutionSession(); auto LocalDeps = computeLocalDeps(G); // Compute dependencies for symbols defined in the JITLink graph. @@ -306,7 +307,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } for (auto &P : Layer.Plugins) { - auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(MR); + auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(*MR); if (SyntheticLocalDeps.empty()) continue; @@ -426,12 +427,12 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { SymbolDeps.erase(&SourceJD); } - MR.addDependencies(Name, SymbolDeps); + MR->addDependencies(Name, SymbolDeps); } } ObjectLinkingLayer &Layer; - MaterializationResponsibility MR; + std::unique_ptr MR; std::unique_ptr ObjBuffer; DenseMap ExternalNamedSymbolDeps; DenseMap InternalNamedSymbolDeps; @@ -452,7 +453,7 @@ ObjectLinkingLayer::~ObjectLinkingLayer() { getExecutionSession().reportError(std::move(Err)); } -void ObjectLinkingLayer::emit(MaterializationResponsibility R, +void ObjectLinkingLayer::emit(std::unique_ptr R, std::unique_ptr O) { assert(O && "Object must not be null"); jitLink(std::make_unique( diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp index d18eb38a41423..a57662e10a794 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp @@ -17,8 +17,9 @@ ObjectTransformLayer::ObjectTransformLayer(ExecutionSession &ES, TransformFunction Transform) : ObjectLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} -void ObjectTransformLayer::emit(MaterializationResponsibility R, - std::unique_ptr O) { +void ObjectTransformLayer::emit( + std::unique_ptr R, + std::unique_ptr O) { assert(O && "Module must not be null"); // If there is a transform set then apply it. @@ -26,7 +27,7 @@ void ObjectTransformLayer::emit(MaterializationResponsibility R, if (auto TransformedObj = Transform(std::move(O))) O = std::move(*TransformedObj); else { - R.failMaterialization(); + R->failMaterialization(); getExecutionSession().reportError(TransformedObj.takeError()); return; } diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 7888c2fcbdbd9..1981039eb9f12 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -89,23 +89,18 @@ RTDyldObjectLinkingLayer::~RTDyldObjectLinkingLayer() { } } -void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, - std::unique_ptr O) { +void RTDyldObjectLinkingLayer::emit( + std::unique_ptr R, + std::unique_ptr O) { assert(O && "Object must not be null"); - // This method launches an asynchronous link step that will fulfill our - // materialization responsibility. We need to switch R to be heap - // allocated before that happens so it can live as long as the asynchronous - // link needs it to (i.e. it must be able to outlive this method). - auto SharedR = std::make_shared(std::move(R)); - auto &ES = getExecutionSession(); auto Obj = object::ObjectFile::createObjectFile(*O); if (!Obj) { getExecutionSession().reportError(Obj.takeError()); - SharedR->failMaterialization(); + R->failMaterialization(); return; } @@ -121,7 +116,7 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, continue; } else { ES.reportError(SymType.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -129,7 +124,7 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, if (!SymFlagsOrErr) { // TODO: Test this error. ES.reportError(SymFlagsOrErr.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -139,14 +134,14 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, InternalSymbols->insert(*SymName); else { ES.reportError(SymName.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } } } } - auto K = R.getVModuleKey(); + auto K = R->getVModuleKey(); RuntimeDyld::MemoryManager *MemMgr = nullptr; // Create a record a memory manager for this object. @@ -157,6 +152,10 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, MemMgr = MemMgrs.back().get(); } + // Switch to shared ownership of MR so that it can be captured by both + // lambdas below. + std::shared_ptr SharedR(std::move(R)); + JITDylibSearchOrderResolver Resolver(*SharedR); jitLinkForORC( diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp index 3dd536d8253e3..0b4755fe23cfc 100644 --- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -55,7 +55,7 @@ Error Speculator::addSpeculationRuntime(JITDylib &JD, // If two modules, share the same LLVMContext, different threads must // not access them concurrently without locking the associated LLVMContext // this implementation follows this contract. -void IRSpeculationLayer::emit(MaterializationResponsibility R, +void IRSpeculationLayer::emit(std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Speculation Layer received Null Module ?"); @@ -127,7 +127,7 @@ void IRSpeculationLayer::emit(MaterializationResponsibility R, assert(Mutator.GetInsertBlock()->getParent() == &Fn && "IR builder association mismatch?"); S.registerSymbols(internToJITSymbols(IRNames.getValue()), - &R.getTargetJITDylib()); + &R->getTargetJITDylib()); } } } diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index 2c008dfdbd33e..9a1dbbb172517 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -35,12 +35,12 @@ TEST_F(CoreAPIsStandardTest, BasicSuccessfulLookup) { OnCompletionRun = true; }; - std::shared_ptr FooMR; + std::unique_ptr FooMR; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { - FooMR = std::make_shared(std::move(R)); + [&](std::unique_ptr R) { + FooMR = std::move(R); }))); ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), @@ -99,9 +99,9 @@ TEST_F(CoreAPIsStandardTest, ResolveUnrequestedSymbol) { cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [this](MaterializationResponsibility R) { - cantFail(R.notifyResolved({{Foo, FooSym}, {Bar, BarSym}})); - cantFail(R.notifyEmitted()); + [this](std::unique_ptr R) { + cantFail(R->notifyResolved({{Foo, FooSym}, {Bar, BarSym}})); + cantFail(R->notifyEmitted()); }))); auto Result = @@ -116,14 +116,16 @@ TEST_F(CoreAPIsStandardTest, MaterializationSideEffctsOnlyBasic) { // don't return until they're emitted, and that they don't appear in query // results. - Optional FooR; + std::unique_ptr FooR; Optional Result; cantFail(JD.define(std::make_unique( SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported | JITSymbolFlags::MaterializationSideEffectsOnly}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }))); + [&](std::unique_ptr R) { + FooR = std::move(R); + }))); ES.lookup( LookupKind::Static, makeJITDylibSearchOrder(&JD), @@ -155,7 +157,9 @@ TEST_F(CoreAPIsStandardTest, MaterializationSideEffectsOnlyFailuresPersist) { SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported | JITSymbolFlags::MaterializationSideEffectsOnly}}), - [&](MaterializationResponsibility R) { R.failMaterialization(); }))); + [&](std::unique_ptr R) { + R->failMaterialization(); + }))); EXPECT_THAT_EXPECTED( ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Foo})), @@ -182,10 +186,10 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { bool BarMaterializerDestructed = false; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [this](MaterializationResponsibility R) { + [this](std::unique_ptr R) { ADD_FAILURE() << "Unexpected materialization of \"Bar\""; - cantFail(R.notifyResolved({{Bar, BarSym}})); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved({{Bar, BarSym}})); + cantFail(R->notifyEmitted()); }, nullptr, [&](const JITDylib &JD, const SymbolStringPtr &Name) { @@ -197,10 +201,12 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { // Baz will be in the materializing state initially, then // materialized for the final removal attempt. - Optional BazR; + std::unique_ptr BazR; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Baz, BazSym.getFlags()}}), - [&](MaterializationResponsibility R) { BazR.emplace(std::move(R)); }, + [&](std::unique_ptr R) { + BazR = std::move(R); + }, nullptr, [](const JITDylib &JD, const SymbolStringPtr &Name) { ADD_FAILURE() << "\"Baz\" discarded unexpectedly"; @@ -297,7 +303,7 @@ TEST_F(CoreAPIsStandardTest, LookupFlagsTest) { JITSymbolFlags::Exported | JITSymbolFlags::Weak)); auto MU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [](MaterializationResponsibility R) { + [](std::unique_ptr R) { llvm_unreachable("Symbol materialized on flags lookup"); }); @@ -400,10 +406,10 @@ TEST_F(CoreAPIsStandardTest, TestThatReExportsDontUnnecessarilyMaterialize) { bool BarMaterialized = false; auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { BarMaterialized = true; - cantFail(R.notifyResolved({{Bar, BarSym}})); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved({{Bar, BarSym}})); + cantFail(R->notifyEmitted()); }); cantFail(JD.define(BarMU)); @@ -444,10 +450,12 @@ TEST_F(CoreAPIsStandardTest, TestReexportsGenerator) { } TEST_F(CoreAPIsStandardTest, TestTrivialCircularDependency) { - Optional FooR; + std::unique_ptr FooR; auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); cantFail(JD.define(FooMU)); @@ -476,26 +484,29 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { // does not prevent any symbol from becoming 'ready' once all symbols are // emitted. - // Create three MaterializationResponsibility objects: one for each of Foo, - // Bar and Baz. These are optional because MaterializationResponsibility - // does not have a default constructor). - Optional FooR; - Optional BarR; - Optional BazR; + std::unique_ptr FooR; + std::unique_ptr BarR; + std::unique_ptr BazR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); auto BazMU = std::make_unique( SymbolFlagsMap({{Baz, BazSym.getFlags()}}), - [&](MaterializationResponsibility R) { BazR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BazR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -622,18 +633,22 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { } TEST_F(CoreAPIsStandardTest, FailureInDependency) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -687,18 +702,22 @@ TEST_F(CoreAPIsStandardTest, FailureInDependency) { } TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -753,18 +772,22 @@ TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { } TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -819,18 +842,22 @@ TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { } TEST_F(CoreAPIsStandardTest, FailAfterMaterialization) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -882,9 +909,9 @@ TEST_F(CoreAPIsStandardTest, FailMaterializerWithUnqueriedSymbols) { auto MU = std::make_unique( SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported}, {Bar, JITSymbolFlags::Exported}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { MaterializerRun = true; - R.failMaterialization(); + R->failMaterialization(); }); cantFail(JD.define(std::move(MU))); @@ -911,7 +938,7 @@ TEST_F(CoreAPIsStandardTest, DropMaterializerWhenEmpty) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, WeakExported}, {Bar, WeakExported}}), - [](MaterializationResponsibility R) { + [](std::unique_ptr R) { llvm_unreachable("Unexpected call to materialize"); }, nullptr, @@ -943,10 +970,10 @@ TEST_F(CoreAPIsStandardTest, AddAndMaterializeLazySymbol) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}, {Bar, WeakExported}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { assert(BarDiscarded && "Bar should have been discarded by this point"); - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(R->notifyEmitted()); FooMaterialized = true; }, nullptr, @@ -985,18 +1012,18 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { bool BarMaterialized = false; auto MU1 = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); - cantFail(R.notifyEmitted()); + [&](std::unique_ptr R) { + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + cantFail(R->notifyEmitted()); BarMaterialized = true; }); bool DuplicateBarDiscarded = false; auto MU2 = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { ADD_FAILURE() << "Attempt to materialize Bar from the wrong unit"; - R.failMaterialization(); + R->failMaterialization(); }, nullptr, [&](const JITDylib &JD, SymbolStringPtr Name) { @@ -1026,20 +1053,21 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { TEST_F(CoreAPIsStandardTest, DefineMaterializingSymbol) { bool ExpectNoMoreMaterialization = false; - ES.setDispatchMaterialization([&](std::unique_ptr MU, - MaterializationResponsibility MR) { - if (ExpectNoMoreMaterialization) - ADD_FAILURE() << "Unexpected materialization"; - MU->materialize(std::move(MR)); - }); + ES.setDispatchMaterialization( + [&](std::unique_ptr MU, + std::unique_ptr MR) { + if (ExpectNoMoreMaterialization) + ADD_FAILURE() << "Unexpected materialization"; + MU->materialize(std::move(MR)); + }); auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { cantFail( - R.defineMaterializing(SymbolFlagsMap({{Bar, BarSym.getFlags()}}))); - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); - cantFail(R.notifyEmitted()); + R->defineMaterializing(SymbolFlagsMap({{Bar, BarSym.getFlags()}}))); + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + cantFail(R->notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1093,8 +1121,8 @@ TEST_F(CoreAPIsStandardTest, FailResolution) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported | JITSymbolFlags::Weak}, {Bar, JITSymbolFlags::Exported | JITSymbolFlags::Weak}}), - [&](MaterializationResponsibility R) { - R.failMaterialization(); + [&](std::unique_ptr R) { + R->failMaterialization(); }); cantFail(JD.define(MU)); @@ -1129,23 +1157,23 @@ TEST_F(CoreAPIsStandardTest, FailEmissionAfterResolution) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + [&](std::unique_ptr R) { + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); ES.lookup( LookupKind::Static, makeJITDylibSearchOrder(&JD), SymbolLookupSet({Baz}), SymbolState::Resolved, - [&R](Expected Result) { + [&](Expected Result) { // Called when "baz" is resolved. We don't actually depend // on or care about baz, but use it to trigger failure of // this materialization before Baz has been finalized in // order to test that error propagation is correct in this // scenario. cantFail(std::move(Result)); - R.failMaterialization(); + R->failMaterialization(); }, [&](const SymbolDependenceMap &Deps) { - R.addDependenciesForAll(Deps); + R->addDependenciesForAll(Deps); }); }); @@ -1165,7 +1193,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { // Fail materialization of bar. auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { R.failMaterialization(); }); + [&](std::unique_ptr R) { + R->failMaterialization(); + }); cantFail(JD.define(std::move(BarMU))); @@ -1185,9 +1215,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { TEST_F(CoreAPIsStandardTest, TestLookupWithUnthreadedMaterialization) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), - [&](MaterializationResponsibility R) { - cantFail(R.notifyResolved({{Foo, FooSym}})); - cantFail(R.notifyEmitted()); + [&](std::unique_ptr R) { + cantFail(R->notifyResolved({{Foo, FooSym}})); + cantFail(R->notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1204,15 +1234,14 @@ TEST_F(CoreAPIsStandardTest, TestLookupWithThreadedMaterialization) { #if LLVM_ENABLE_THREADS std::thread MaterializationThread; - ES.setDispatchMaterialization([&](std::unique_ptr MU, - MaterializationResponsibility MR) { - auto SharedMR = - std::make_shared(std::move(MR)); - MaterializationThread = - std::thread([MU = std::move(MU), MR = std::move(SharedMR)] { - MU->materialize(std::move(*MR)); - }); - }); + ES.setDispatchMaterialization( + [&](std::unique_ptr MU, + std::unique_ptr MR) { + MaterializationThread = + std::thread([MU = std::move(MU), MR = std::move(MR)]() mutable { + MU->materialize(std::move(MR)); + }); + }); cantFail(JD.define(absoluteSymbols({{Foo, FooSym}}))); @@ -1238,23 +1267,23 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - auto Requested = R.getRequestedSymbols(); + [&](std::unique_ptr R) { + auto Requested = R->getRequestedSymbols(); EXPECT_EQ(Requested.size(), 1U) << "Expected one symbol requested"; EXPECT_EQ(*Requested.begin(), Foo) << "Expected \"Foo\" requested"; auto NewMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R2) { - cantFail(R2.notifyResolved(SymbolMap({{Bar, BarSym}}))); - cantFail(R2.notifyEmitted()); + [&](std::unique_ptr R2) { + cantFail(R2->notifyResolved(SymbolMap({{Bar, BarSym}}))); + cantFail(R2->notifyEmitted()); BarMaterialized = true; }); - R.replace(std::move(NewMU)); + R->replace(std::move(NewMU)); - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(R->notifyEmitted()); FooMaterialized = true; }); @@ -1280,13 +1309,13 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { TEST_F(CoreAPIsStandardTest, TestMaterializationResponsibilityDelegation) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - auto R2 = R.delegate({Bar}); + [&](std::unique_ptr R) { + auto R2 = R->delegate({Bar}); - cantFail(R.notifyResolved({{Foo, FooSym}})); - cantFail(R.notifyEmitted()); - cantFail(R2.notifyResolved({{Bar, BarSym}})); - cantFail(R2.notifyEmitted()); + cantFail(R->notifyResolved({{Foo, FooSym}})); + cantFail(R->notifyEmitted()); + cantFail(R2->notifyResolved({{Bar, BarSym}})); + cantFail(R2->notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1309,12 +1338,11 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { JITSymbolFlags WeakExported = JITSymbolFlags::Exported; WeakExported &= JITSymbolFlags::Weak; - std::unique_ptr FooResponsibility; + std::unique_ptr FooR; auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { - FooResponsibility = - std::make_unique(std::move(R)); + [&](std::unique_ptr R) { + FooR = std::move(R); }); cantFail(JD.define(MU)); @@ -1328,7 +1356,7 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { auto MU2 = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), - [](MaterializationResponsibility R) { + [](std::unique_ptr R) { llvm_unreachable("This unit should never be materialized"); }); @@ -1339,8 +1367,8 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { consumeError(std::move(Err)); // No dependencies registered, can't fail: - cantFail(FooResponsibility->notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(FooResponsibility->notifyEmitted()); + cantFail(FooR->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(FooR->notifyEmitted()); } static bool linkOrdersEqual(const std::vector> &LHS, diff --git a/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp index 50e7b60a2df4e..81ff3e7a87b30 100644 --- a/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp @@ -39,15 +39,15 @@ TEST_F(LazyReexportsTest, BasicLocalCallThroughManagerOperation) { cantFail(JD.define(std::make_unique( SymbolFlagsMap({{DummyTarget, JITSymbolFlags::Exported}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { DummyTargetMaterialized = true; // No dependencies registered, can't fail. - cantFail(R.notifyResolved( + cantFail(R->notifyResolved( {{DummyTarget, JITEvaluatedSymbol(static_cast( reinterpret_cast(&dummyTarget)), JITSymbolFlags::Exported)}})); - cantFail(R.notifyEmitted()); + cantFail(R->notifyEmitted()); }))); unsigned NotifyResolvedCount = 0; diff --git a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h index b25851d8f796c..afbc4a9ffaa5c 100644 --- a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h +++ b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h @@ -86,7 +86,7 @@ class OrcNativeTarget { class SimpleMaterializationUnit : public orc::MaterializationUnit { public: using MaterializeFunction = - std::function; + std::function)>; using DiscardFunction = std::function; using DestructorFunction = std::function; @@ -108,7 +108,8 @@ class SimpleMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override { return ""; } - void materialize(orc::MaterializationResponsibility R) override { + void + materialize(std::unique_ptr R) override { Materialize(std::move(R)); } From cb19e8c6d192a108b72ab07362921864a9e244f9 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Thu, 10 Sep 2020 12:39:50 -0700 Subject: [PATCH 0310/1079] [libc][obvious] Include Sqrt.h in SqrtLongDoubleX86.h. This makes SqrtLongDoubleX86.h includable by itself. --- libc/utils/FPUtil/SqrtLongDoubleX86.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libc/utils/FPUtil/SqrtLongDoubleX86.h b/libc/utils/FPUtil/SqrtLongDoubleX86.h index 2ac73044cf92f..df80d7d932bac 100644 --- a/libc/utils/FPUtil/SqrtLongDoubleX86.h +++ b/libc/utils/FPUtil/SqrtLongDoubleX86.h @@ -10,6 +10,8 @@ #define LLVM_LIBC_UTILS_FPUTIL_SQRT_LONG_DOUBLE_X86_H #include "FPBits.h" +#include "Sqrt.h" + #include "utils/CPP/TypeTraits.h" namespace __llvm_libc { From c9826829d74e637163fdb0351870b8204e62d6e6 Mon Sep 17 00:00:00 2001 From: Bryan Chan Date: Sat, 29 Aug 2020 17:25:16 -0400 Subject: [PATCH 0311/1079] [EarlyCSE] Equivalent SELECTs should hash equally DenseMap assumes that, if its isEqual method returns true for two elements, then its getHashValue method must return the same value for them. This invariant is broken when one SELECT node is a min/max operation, and the other can be transformed into an equivalent min/max by inverting its predicate and swapping its operands. This patch fixes an assertion failure that would occur intermittently while compiling the following IR: define i32 @t(i32 %i) { %cmp = icmp sle i32 0, %i %twin1 = select i1 %cmp, i32 %i, i32 0 %cmpinv = icmp sgt i32 0, %i %twin2 = select i1 %cmpinv, i32 0, i32 %i %sink = add i32 %twin1, %twin2 ret i32 %sink } Differential Revision: https://reviews.llvm.org/D86843 --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 13 +++++++++++++ llvm/test/Transforms/EarlyCSE/commute.ll | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index b655204d26dd2..f0d3f90995d7b 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -191,6 +191,19 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Pred = ICmpInst::getSwappedPredicate(Pred); } + // Check for inverted variants of min/max by swapping operands. + switch (Pred) { + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_SGE: + Pred = CmpInst::getInversePredicate(Pred); + std::swap(A, B); + break; + default: + break; + } + switch (Pred) { case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break; case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break; diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index 57c5a853a12ff..f5868a5fdfb2f 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -684,6 +684,25 @@ define i32 @select_not_invert_pred_cond_wrong_select_op(i8 %x, i8 %y, i32 %t, i3 ret i32 %r } +; This test is a reproducer for a bug involving inverted min/max selects +; hashing differently but comparing as equal. It exhibits such a pair of +; values, and we run this test with -earlycse-debug-hash which would catch +; the disagreement and fail if it regressed. +define i32 @inverted_max(i32 %i) { +; CHECK-LABEL: @inverted_max( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 0, [[I:%.*]] +; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP]], i32 [[I]], i32 0 +; CHECK-NEXT: [[CMPINV:%.*]] = icmp sgt i32 0, [[I:%.*]] +; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMPINV]], i32 0, i32 [[I]] +; CHECK-NEXT: [[R:%.*]] = add i32 [[M1]], [[M2]] +; CHECK-NEXT: ret i32 [[R]] + %cmp = icmp sle i32 0, %i + %m1 = select i1 %cmp, i32 %i, i32 0 + %cmpinv = icmp sgt i32 0, %i + %m2 = select i1 %cmpinv, i32 0, i32 %i + %r = add i32 %m1, %m2 + ret i32 %r +} ; This test is a reproducer for a bug involving inverted min/max selects ; hashing differently but comparing as equal. It exhibits such a pair of From fb109c42d91c30c8c7497ef1fd7aff6f2969c6e7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 10 Sep 2020 22:00:10 +0100 Subject: [PATCH 0312/1079] [DSE] Switch to MemorySSA-backed DSE by default. The tests have been updated and I plan to move them from the MSSA directory up. Some end-to-end tests needed small adjustments. One difference to the legacy DSE is that legacy DSE also deletes trivially dead instructions that are unrelated to memory operations. Because MemorySSA-backed DSE just walks the MemorySSA, we only visit/check memory instructions. But removing unrelated dead instructions is not really DSE's job and other passes will clean up. One noteworthy change is in llvm/test/Transforms/Coroutines/ArgAddr.ll, but I think this comes down to legacy DSE not handling instructions that may throw correctly in that case. To cover this with MemorySSA-backed DSE, we need an update to llvm.coro.begin to treat it's return value to belong to the same underlying object as the passed pointer. There are some minor cases MemorySSA-backed DSE currently misses, e.g. related to atomic operations, but I think those can be implemented after the switch. This has been discussed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2020-August/144417.html For the MultiSource/SPEC2000/SPEC2006 the number of eliminated stores goes from ~17500 (legayc DSE) to ~26300 (MemorySSA-backed). More numbers and details in the thread on llvm-dev. Impact on CTMark: ``` Legacy Pass Manager exec instrs size-text O3 + 0.60% - 0.27% ReleaseThinLTO + 1.00% - 0.42% ReleaseLTO-g. + 0.77% - 0.33% RelThinLTO (link only) + 0.87% - 0.42% RelLO-g (link only) + 0.78% - 0.33% ``` http://llvm-compile-time-tracker.com/compare.php?from=3f22e96d95c71ded906c67067d75278efb0a2525&to=ae8be4642533ff03803967ee9d7017c0d73b0ee0&stat=instructions ``` New Pass Manager exec instrs. size-text O3 + 0.95% - 0.25% ReleaseThinLTO + 1.34% - 0.41% ReleaseLTO-g. + 1.71% - 0.35% RelThinLTO (link only) + 0.96% - 0.41% RelLO-g (link only) + 2.21% - 0.35% ``` http://195.201.131.214:8000/compare.php?from=3f22e96d95c71ded906c67067d75278efb0a2525&to=ae8be4642533ff03803967ee9d7017c0d73b0ee0&stat=instructions Reviewed By: asbirlea, xbolva00, nikic Differential Revision: https://reviews.llvm.org/D87163 --- clang/test/CodeGen/thinlto-distributed-newpm.ll | 2 +- clang/test/CodeGenObjC/exceptions.m | 3 --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +- llvm/test/Analysis/BasicAA/modref.ll | 1 + llvm/test/CodeGen/AMDGPU/opt-pipeline.ll | 14 ++++++-------- llvm/test/Other/new-pm-defaults.ll | 3 ++- llvm/test/Other/new-pm-lto-defaults.ll | 2 ++ llvm/test/Other/new-pm-thinlto-defaults.ll | 3 ++- llvm/test/Other/opt-O2-pipeline.ll | 7 +++---- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll | 7 +++---- llvm/test/Other/opt-O3-pipeline.ll | 7 +++---- llvm/test/Other/opt-Os-pipeline.ll | 7 +++---- llvm/test/Transforms/Coroutines/ArgAddr.ll | 10 ++++++++++ llvm/test/Transforms/Coroutines/coro-retcon.ll | 1 - .../MSSA/2011-03-25-DSEMiscompile.ll | 2 +- .../MSSA/2011-09-06-EndOfFunction.ll | 2 +- .../DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll | 2 +- .../MSSA/2016-07-17-UseAfterFree.ll | 2 +- .../MSSA/OverwriteStoreBegin.ll | 2 +- .../DeadStoreElimination/MSSA/OverwriteStoreEnd.ll | 2 +- .../DeadStoreElimination/MSSA/PartialStore.ll | 2 +- .../DeadStoreElimination/MSSA/PartialStore2.ll | 4 ++-- .../MSSA/X86/gather-null-pointer.ll | 2 +- .../MSSA/atomic-overlapping.ll | 2 +- .../DeadStoreElimination/MSSA/atomic-todo.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/atomic.ll | 2 +- .../DeadStoreElimination/MSSA/calloc-store.ll | 2 +- .../MSSA/combined-partial-overwrites.ll | 4 ++-- .../DeadStoreElimination/MSSA/const-pointers.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/crash.ll | 2 +- .../DeadStoreElimination/MSSA/cs-cs-aliasing.ll | 2 +- .../DeadStoreElimination/MSSA/debug-counter.ll | 8 ++++---- .../DeadStoreElimination/MSSA/debuginfo.ll | 2 +- .../DeadStoreElimination/MSSA/dominate.ll | 2 +- .../DeadStoreElimination/MSSA/fence-todo.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/fence.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/free.ll | 2 +- .../DeadStoreElimination/MSSA/inst-limits.ll | 2 +- .../DeadStoreElimination/MSSA/int_sideeffect.ll | 2 +- .../DeadStoreElimination/MSSA/invariant.start.ll | 2 +- .../MSSA/launder.invariant.group.ll | 2 +- .../DeadStoreElimination/MSSA/libcalls.ll | 2 +- .../DeadStoreElimination/MSSA/lifetime.ll | 2 +- .../MSSA/mda-with-dbg-values.ll | 4 ++-- .../MSSA/memcpy-complete-overwrite.ll | 4 ++-- .../DeadStoreElimination/MSSA/memintrinsics.ll | 2 +- .../MSSA/memoryssa-scan-limit.ll | 8 ++++---- .../DeadStoreElimination/MSSA/memset-and-memcpy.ll | 4 ++-- .../MSSA/memset-missing-debugloc.ll | 2 +- .../MSSA/memset-unknown-sizes.ll | 2 +- .../MSSA/merge-stores-big-endian.ll | 2 +- .../DeadStoreElimination/MSSA/merge-stores.ll | 2 +- .../MSSA/multiblock-captures.ll | 2 +- .../MSSA/multiblock-exceptions.ll | 2 +- .../DeadStoreElimination/MSSA/multiblock-loops.ll | 2 +- .../MSSA/multiblock-malloc-free.ll | 2 +- .../MSSA/multiblock-memintrinsics.ll | 2 +- .../MSSA/multiblock-memoryphis.ll | 2 +- .../MSSA/multiblock-multipath-throwing.ll | 2 +- .../MSSA/multiblock-multipath.ll | 2 +- .../MSSA/multiblock-overlap.ll | 4 ++-- .../MSSA/multiblock-partial.ll | 2 +- .../DeadStoreElimination/MSSA/multiblock-simple.ll | 2 +- .../MSSA/multiblock-throwing.ll | 2 +- .../MSSA/multiblock-unreachable.ll | 2 +- .../DeadStoreElimination/MSSA/no-targetdata.ll | 2 +- .../DeadStoreElimination/MSSA/noop-stores.ll | 4 ++-- .../DeadStoreElimination/MSSA/operand-bundles.ll | 2 +- .../DeadStoreElimination/MSSA/overlap.ll | 4 ++-- .../DeadStoreElimination/MSSA/pr11390.ll | 2 +- .../pr47285-not-overwritten-on-all-exit-paths.ll | 2 +- .../MSSA/simple-preservation.ll | 2 +- .../DeadStoreElimination/MSSA/simple-todo.ll | 4 ++-- .../Transforms/DeadStoreElimination/MSSA/simple.ll | 4 ++-- .../Transforms/DeadStoreElimination/MSSA/stats.ll | 2 +- .../DeadStoreElimination/MSSA/tail-byval.ll | 2 +- llvm/test/Transforms/MemCpyOpt/memcpy.ll | 3 +++ 77 files changed, 118 insertions(+), 110 deletions(-) diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll index 9f9a8bec4ef5d..315d668aec0ac 100644 --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -131,12 +131,12 @@ ; CHECK-O: Running pass: JumpThreadingPass on main ; CHECK-O: Running pass: CorrelatedValuePropagationPass on main ; CHECK-O: Running pass: DSEPass on main +; CHECK-O: Running analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running pass: LCSSAPass on main ; CHECK-O: Finished {{.*}}Function pass manager run. ; CHECK-O: Running pass: ADCEPass on main -; CHECK-O: Running analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Finished {{.*}}Function pass manager run. diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m index 55a117bcc3dd5..d95398e710147 100644 --- a/clang/test/CodeGenObjC/exceptions.m +++ b/clang/test/CodeGenObjC/exceptions.m @@ -59,9 +59,6 @@ int f2() { // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[X]] // CHECK-NEXT: [[T2:%.*]] = add nsw i32 [[T1]], -1 - // This store is dead. - // CHECK-NEXT: store i32 [[T2]], i32* [[X]] - // CHECK: store i32 6, i32* [[X]] x++; // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]] diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d703f1337a721..a9700bf47a9e4 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,7 +106,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::desc("Enable partial store merging in DSE")); static cl::opt - EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, + EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use the new MemorySSA-backed DSE.")); static cl::opt diff --git a/llvm/test/Analysis/BasicAA/modref.ll b/llvm/test/Analysis/BasicAA/modref.ll index 9904d13296e89..3ac94ad54f466 100644 --- a/llvm/test/Analysis/BasicAA/modref.ll +++ b/llvm/test/Analysis/BasicAA/modref.ll @@ -82,6 +82,7 @@ define void @test3a(i8* %P, i8 %X) { store i8 %Y, i8* %P2 call void @llvm.lifetime.end.p0i8(i64 10, i8* %P) ret void +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 10, i8* %P) ; CHECK-NEXT: ret void } diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index 31531a43fc3f2..b0c0460165e13 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -511,15 +511,14 @@ ; GCN-O2-NEXT: Value Propagation ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Phi Values Analysis -; GCN-O2-NEXT: Memory Dependence Analysis -; GCN-O2-NEXT: Dead Store Elimination -; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Memory SSA +; GCN-O2-NEXT: Dead Store Elimination ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Scalar Evolution Analysis ; GCN-O2-NEXT: Loop Pass Manager ; GCN-O2-NEXT: Loop Invariant Code Motion @@ -871,15 +870,14 @@ ; GCN-O3-NEXT: Value Propagation ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Phi Values Analysis -; GCN-O3-NEXT: Memory Dependence Analysis -; GCN-O3-NEXT: Dead Store Elimination -; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Memory SSA +; GCN-O3-NEXT: Dead Store Elimination ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Loop Pass Manager ; GCN-O3-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 59c24acb17f04..02394ee0f6527 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -205,6 +205,7 @@ ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass @@ -212,7 +213,7 @@ ; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index a3be19ca29f1f..21e43abd5f7fb 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -87,6 +87,8 @@ ; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MemCpyOptPass on foo ; CHECK-O2-NEXT: Running pass: DSEPass on foo +; CHECK-O2-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O2-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O2-NEXT: Running pass: InstCombinePass on foo ; CHECK-O2-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O2-NEXT: Running pass: SCCPPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 0b9b52a57e2a5..9e5ff8d37f806 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -178,13 +178,14 @@ ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop at depth 1 containing: %loop ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index e606e7cfac171..42aa8b0089a54 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -158,15 +158,14 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: Dead Store Elimination -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index aaee6f786bac9..5f78c2f36d509 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -163,15 +163,14 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: Dead Store Elimination -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index b2d2f85ae21be..069ef2dbba7e5 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -163,15 +163,14 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: Dead Store Elimination -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index cc91707c4b009..b7855e6b3856f 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -144,15 +144,14 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: Dead Store Elimination -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll index a1cac168ac402..b711f1f12c9fa 100644 --- a/llvm/test/Transforms/Coroutines/ArgAddr.ll +++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -46,8 +46,18 @@ entry: call void @llvm.coro.destroy(i8* %hdl) ret i32 0 ; CHECK: call void @ctor +; CHECK-NEXT: %dec1.spill.addr.i = getelementptr inbounds i8, i8* %call.i, i64 16 +; CHECK-NEXT: bitcast i8* %dec1.spill.addr.i to i32* +; CHECK-NEXT: store i32 4 ; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: %index.addr5.i = getelementptr inbounds i8, i8* %call.i, i64 20 +; CHECK-NEXT: bitcast i8* %index.addr5.i to i1* +; CHECK-NEXT: store i1 false +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8( +; CHECK-NEXT: store i32 3 ; CHECK-NEXT: call void @print(i32 3) +; CHECK-NEXT: store i1 false +; CHECK-NEXT: store i32 2 ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 } diff --git a/llvm/test/Transforms/Coroutines/coro-retcon.ll b/llvm/test/Transforms/Coroutines/coro-retcon.ll index 13283f05b2661..0021bb497aad9 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -74,7 +74,6 @@ entry: ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll index c90da22026727..25c2d5ffe7f56 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s ; PR9561 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.8" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll index b9a0ea76d7fbb..7e46d28a9c47f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -dse -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll index 30c95961d2b67..665d772d03b91 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -dse -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll index 85a749f81d50b..3501b43600168 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S -enable-dse-partial-overwrite-tracking | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s ; PR28588 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll index 93e8860bdaf31..b5d9c40cbdbc3 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s define void @write4to7(i32* nocapture %p) { ; CHECK-LABEL: @write4to7( diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll index 1cdeade120a69..b6ae657d17e5e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" %struct.vec2 = type { <4 x i32>, <4 x i32> } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll index 4f99ec09d2a03..1dd894e6658cc 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Ensure that the dead store is deleted in this case. It is wholely diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll index 3802d1c22cbec..ebcb0c3808a15 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --data-layout "e" -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s -; RUN: opt < %s --data-layout "E" -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s +; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s +; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s ; This test used to hit an assertion (see PR41949). ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll index 0997ce725b21a..6a5f4bb9eb25c 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -dse -S | FileCheck %s ; Both stores should be emitted because we can't tell if the gather aliases. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll index 5a7bbdd0a6077..d23208166136a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck %s +; RUN: opt -dse %s -S | FileCheck %s target datalayout = "e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll index 8dfb85719c309..b11000570ecc4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll index 51129fe2bcadb..30f799d59ef7f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll index d8fc8136f0d7e..ddb10d7ccc80f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s declare noalias i8* @calloc(i64, i64) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll index a3bd300c8b782..ec1b9a5ee5140 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s -; RUN: opt -S -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false -dse-memoryssa-partial-store-limit=10 < %s | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s +; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s +; RUN: opt -S -dse -enable-dse-partial-store-merging=false -dse-memoryssa-partial-store-limit=10 < %s | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll index 839fdfcf2d2cd..a2218b725cd3b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %t = type { i32 } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll index c3860f1fe6421..ccee7fb8ba58b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S +; RUN: opt < %s -basic-aa -dse -S target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin10.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll index 7ae6c450bb560..b403e3382234d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll index 9def782900899..b881e38e92f30 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll @@ -3,16 +3,16 @@ ; REQUIRES: asserts ; Eliminates store to %R in the entry block. -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP0-COUNT1 %s +; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP0-COUNT1 %s ; Eliminates store to %P in the entry block. -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=1,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP1-COUNT1 %s +; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=1,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP1-COUNT1 %s ; Eliminates both stores in the entry block. -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=2 -S | FileCheck --check-prefix=SKIP0-COUNT2 %s +; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=2 -S | FileCheck --check-prefix=SKIP0-COUNT2 %s ; Eliminates no stores. -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=2,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP2-COUNT1 %s +; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=2,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP2-COUNT1 %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll index f4e7e1fd148c5..b927965dc4054 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -debugify -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -debugify -basic-aa -dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll index 32f8699dc61e6..24dd65e07bbc2 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -enable-dse-memoryssa -disable-output < %s +; RUN: opt -dse -disable-output < %s ; test that we don't crash declare void @bar() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll index cdd12ef302736..ab4e65edaab9e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll @@ -1,6 +1,6 @@ ; XFAIL: * -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse < %s | FileCheck %s ; We DSE stack alloc'ed and byval locations, in the presence of fences. ; Fence does not make an otherwise thread local store visible. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll index fc72f1d96ddaf..5f2398812e93d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse < %s | FileCheck %s ; We conservative choose to prevent dead store elimination ; across release or stronger fences. It's not required diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll index 13cfb7002cf1e..66ccc7b4f47b5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll index 638571f6f4172..6357477ae43be 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s +; RUN: opt -S -dse < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; This test is not relevant for DSE with MemorySSA. Non-memory instructions diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll index 6ea0b190f21fb..035e787f6bd7a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -dse -enable-dse-memoryssa | FileCheck %s +; RUN: opt -S < %s -dse | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll index 82e168b45f754..27400cd4ed16c 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll @@ -1,5 +1,5 @@ ; Test to make sure llvm.invariant.start calls are not treated as clobbers. -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll index 46f3c261f7bc0..28abe2eb5feea 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s ; CHECK-LABEL: void @skipBarrier(i8* %ptr) define void @skipBarrier(i8* %ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll index ceffa47ca8fa9..ac6efd54ddba6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -inferattrs -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s +; RUN: opt -S -inferattrs -basic-aa -dse < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll index 29ff7726c4eee..9aa3c9c1fd420 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll index 937f10d3502c7..79211609a5400 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s +; RUN: opt -S -dse -dse-memoryssa-scanlimit=2 < %s | FileCheck %s +; RUN: opt -S -strip-debug -dse -dse-memoryssa-scanlimit=2 < %s | FileCheck %s ; Test case to check that DSE gets the same result even if we have a dbg value ; between the memcpy. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll index 70c0265813634..9b1624a931bc3 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; XFAIL: * -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll index 81ba0a6764a66..088752c4ebae7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s +; RUN: opt -S -dse < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll index 0e722c56f5f9f..3a8b772b062e0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck --check-prefix=NO-LIMIT %s +; RUN: opt < %s -basic-aa -dse -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basic-aa -dse -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s +; RUN: opt < %s -basic-aa -dse -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll index 02fc8f22b6b40..ad888159ffa67 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll index c28f0cc901247..9229157a9b6ed 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -2,7 +2,7 @@ ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. -; RUN: opt -S -march=native -dse -enable-dse-memoryssa < %s| FileCheck %s +; RUN: opt -S -march=native -dse < %s| FileCheck %s ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 8, i1 false), !dbg ![[DBG:[0-9]+]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll index 115540e54a26b..bbd0d01ee475f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s +; RUN: opt -dse -S %s | FileCheck %s declare i8* @_Znwm() local_unnamed_addr #0 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll index 8acc29f3f62e4..77784ac0c4047 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" define void @byte_by_byte_replacement(i32 *%ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll index 7643c3ba5b9e7..8cd593bb00e77 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" define void @byte_by_byte_replacement(i32 *%ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll index fc3e99723d6e6..45f3e2c429754 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll index 8357ef9302006..08a15565e18ff 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" declare void @f() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll index b213edbaf09e6..c898cf9bee8ac 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll index 763362dd3d479..56f8ee6487d9d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll index d7945e888f4d0..58ef70c1b541b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll index 0ace57e690fe1..1ad2e71f2d59a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll index 944586253bedb..4fe04e5467d3d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll index 8413251036676..ab7a056f7018d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll index e6e206ef5abc7..8a71c73979170 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s -; RUN: opt -dse -enable-dse-memoryssa -dse-memoryssa-partial-store-limit=10 %s -S | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s +; RUN: opt -dse %s -S | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s +; RUN: opt -dse -dse-memoryssa-partial-store-limit=10 %s -S | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s %struct.ham = type { [3 x double], [3 x double]} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll index b2a5c04f31fd4..f998bb44a4716 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll index aa09235e76986..334e080bf8dbb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll index f6031e86bef07..c067a907892d9 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll index df08d619f9dcd..6548ec34ae0ac 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s +; RUN: opt -dse -S %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll index 7e6a4cdf3a7ce..aec3076678787 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -S < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll index 6a9c4b80b3ddf..ad93cfc72a7ec 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll index 5940f2bf052bf..f3df74be031b7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s declare noalias i8* @malloc(i64) "malloc-like" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll index e3e6b8f583a92..31bb3234dc421 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s declare void @use(i64*) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll index c58fc18d2a9d6..56ca604eff98b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -S < %s | FileCheck %s ; PR11390 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll index aaff809d38d0b..7c3bb913f5f70 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s +; RUN: opt -dse -S %s | FileCheck %s @b = local_unnamed_addr global i32 0, align 4 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll index 3562c611e76b2..6aedc1ca01f83 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -enable-knowledge-retention -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-knowledge-retention -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll index a4d3127d25f3d..444e139a4cf62 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; XFAIL: * -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Remove redundant store if loaded value is in another block inside a loop. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index 9f719746f9f17..5ee1a55a7369f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll index bd4f6f0e58668..990f098533bfa 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -basic-aa -dse -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll index ec3bb495182f0..ed2fbd434a75d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s +; RUN: opt -dse -S < %s | FileCheck %s ; Don't eliminate stores to allocas before tail calls to functions that use ; byval. It's correct to mark calls like these as 'tail'. To implement this tail diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 1741da030c2ed..065230d4be139 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -73,8 +73,11 @@ define void @test3(%0* noalias sret %agg.result) nounwind { call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false) ret void ; CHECK-LABEL: @test3( +; CHECK-NEXT: %x.0 = alloca +; CHECK-NEXT: %x.01 = bitcast ; CHECK-NEXT: %agg.result1 = bitcast ; CHECK-NEXT: call void @llvm.memcpy +; CHECK-NEXT: %agg.result2 = bitcast ; CHECK-NEXT: ret void } From 485f3f35cc511637661619967319eafb932df5d5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 10 Sep 2020 14:30:00 -0700 Subject: [PATCH 0313/1079] [ELF] Make two PPC64.cpp variables constexpr. NFC Why are they mutable? :) --- lld/ELF/Arch/PPC64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index cfb3ca9df4066..f5c91c1ff3b56 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -22,8 +22,8 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -static uint64_t ppc64TocOffset = 0x8000; -static uint64_t dynamicThreadPointerOffset = 0x8000; +constexpr uint64_t ppc64TocOffset = 0x8000; +constexpr uint64_t dynamicThreadPointerOffset = 0x8000; // The instruction encoding of bits 21-30 from the ISA for the Xform and Dform // instructions that can be used as part of the initial exec TLS sequence. From b34f116856306d97aa9244a46eb1643a8ddd49a8 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Fri, 4 Sep 2020 08:44:52 -0700 Subject: [PATCH 0314/1079] [flang] Fix assert on constant folding of extended types When we define a derived type that extends another derived type, we can then create a structure constructor that contains values for the fields of both the child type and its parent. The compiler's internal representation of that value contains the name of the parent type where a component name would normally appear. This caused an assert during contant folding. There are three cases for components that appear in structure constructors. The first is the normal case of a component appearing in a structure constructor for its type. The second is a component of the parent (or grandparent) type appearing in a structure constructor for the child type. The third is the parent type component, which can appear in the structure constructor of its child. There are also cases where the component can be arrays. I created the test case folding12.f90 that covers all of these cases and modified the code to handle them. Most of my changes were to the "Find()" method of the type "StructureConstructor" where I added code to cover the second and third cases described above. To handle these cases, I needed to create a "StructureConstructor" for the parent type component and return it. To handle returning a newly created "StructureConstructor", I changed the return type of "Find()" to be "std::optional" rather than an ordinary pointer. This change supersedes D86172. Differential Revision: https://reviews.llvm.org/D87151 --- flang/include/flang/Evaluate/expression.h | 4 +- flang/include/flang/Evaluate/type.h | 2 + flang/lib/Evaluate/expression.cpp | 75 +++++++++- flang/lib/Evaluate/fold-implementation.h | 10 +- flang/lib/Evaluate/type.cpp | 2 +- flang/test/Evaluate/folding12.f90 | 163 ++++++++++++++++++++++ 6 files changed, 245 insertions(+), 11 deletions(-) create mode 100644 flang/test/Evaluate/folding12.f90 diff --git a/flang/include/flang/Evaluate/expression.h b/flang/include/flang/Evaluate/expression.h index 09847ec954072..f0ce375da0153 100644 --- a/flang/include/flang/Evaluate/expression.h +++ b/flang/include/flang/Evaluate/expression.h @@ -717,7 +717,8 @@ class StructureConstructor { return values_.end(); } - const Expr *Find(const Symbol &) const; // can return null + // can return nullopt + std::optional> Find(const Symbol &) const; StructureConstructor &Add(const semantics::Symbol &, Expr &&); int Rank() const { return 0; } @@ -725,6 +726,7 @@ class StructureConstructor { llvm::raw_ostream &AsFortran(llvm::raw_ostream &) const; private: + std::optional> CreateParentComponent(const Symbol &) const; Result result_; StructureConstructorValues values_; }; diff --git a/flang/include/flang/Evaluate/type.h b/flang/include/flang/Evaluate/type.h index cf13ba6e27d96..663ece6eb4a09 100644 --- a/flang/include/flang/Evaluate/type.h +++ b/flang/include/flang/Evaluate/type.h @@ -217,6 +217,8 @@ class DynamicType { const semantics::DerivedTypeSpec *GetDerivedTypeSpec(const DynamicType &); const semantics::DerivedTypeSpec *GetDerivedTypeSpec( const std::optional &); +const semantics::DerivedTypeSpec *GetParentTypeSpec( + const semantics::DerivedTypeSpec &); std::string DerivedTypeSpecAsFortran(const semantics::DerivedTypeSpec &); diff --git a/flang/lib/Evaluate/expression.cpp b/flang/lib/Evaluate/expression.cpp index 5a456648b8254..7f8c9eb32f3f2 100644 --- a/flang/lib/Evaluate/expression.cpp +++ b/flang/lib/Evaluate/expression.cpp @@ -12,7 +12,12 @@ #include "flang/Evaluate/common.h" #include "flang/Evaluate/tools.h" #include "flang/Evaluate/variable.h" +#include "flang/Parser/char-block.h" #include "flang/Parser/message.h" +#include "flang/Semantics/scope.h" +#include "flang/Semantics/symbol.h" +#include "flang/Semantics/tools.h" +#include "flang/Semantics/type.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -206,13 +211,75 @@ bool Expr::operator==(const Expr &that) const { DynamicType StructureConstructor::GetType() const { return result_.GetType(); } -const Expr *StructureConstructor::Find( +std::optional> StructureConstructor::CreateParentComponent( + const Symbol &component) const { + if (const semantics::DerivedTypeSpec * + parentSpec{GetParentTypeSpec(derivedTypeSpec())}) { + StructureConstructor structureConstructor{*parentSpec}; + if (const auto *parentDetails{ + component.detailsIf()}) { + auto parentIter{parentDetails->componentNames().begin()}; + for (const auto &childIter : values_) { + if (parentIter == parentDetails->componentNames().end()) { + break; // There are more components in the child + } + SymbolRef componentSymbol{childIter.first}; + structureConstructor.Add( + *componentSymbol, common::Clone(childIter.second.value())); + ++parentIter; + } + Constant constResult{std::move(structureConstructor)}; + Expr result{std::move(constResult)}; + return std::optional>{result}; + } + } + return std::nullopt; +} + +static const Symbol *GetParentComponentSymbol(const Symbol &symbol) { + if (symbol.test(Symbol::Flag::ParentComp)) { + // we have a created parent component + const auto &compObject{symbol.get()}; + if (const semantics::DeclTypeSpec * compType{compObject.type()}) { + const semantics::DerivedTypeSpec &dtSpec{compType->derivedTypeSpec()}; + const semantics::Symbol &compTypeSymbol{dtSpec.typeSymbol()}; + return &compTypeSymbol; + } + } + if (symbol.detailsIf()) { + // we have an implicit parent type component + return &symbol; + } + return nullptr; +} + +std::optional> StructureConstructor::Find( const Symbol &component) const { if (auto iter{values_.find(component)}; iter != values_.end()) { - return &iter->second.value(); - } else { - return nullptr; + return iter->second.value(); + } + // The component wasn't there directly, see if we're looking for the parent + // component of an extended type + if (const Symbol * typeSymbol{GetParentComponentSymbol(component)}) { + return CreateParentComponent(*typeSymbol); + } + // Look for the component in the parent type component. The parent type + // component is always the first one + if (!values_.empty()) { + const Expr *parentExpr{&values_.begin()->second.value()}; + if (const Expr *derivedExpr{ + std::get_if>(&parentExpr->u)}) { + if (const Constant *constExpr{ + std::get_if>(&derivedExpr->u)}) { + if (std::optional parentComponentValue{ + constExpr->GetScalarValue()}) { + // Try to find the component in the parent structure constructor + return parentComponentValue->Find(component); + } + } + } } + return std::nullopt; } StructureConstructor &StructureConstructor::Add( diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index e01c7de72f8d9..bb5463e697fe1 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -296,8 +296,8 @@ std::optional> Folder::ApplyComponent( Constant &&structures, const Symbol &component, const std::vector> *subscripts) { if (auto scalar{structures.GetScalarValue()}) { - if (auto *expr{scalar->Find(component)}) { - if (const Constant *value{UnwrapConstantValue(*expr)}) { + if (std::optional> expr{scalar->Find(component)}) { + if (const Constant *value{UnwrapConstantValue(expr.value())}) { if (!subscripts) { return std::move(*value); } else { @@ -314,12 +314,12 @@ std::optional> Folder::ApplyComponent( ConstantSubscripts at{structures.lbounds()}; do { StructureConstructor scalar{structures.At(at)}; - if (auto *expr{scalar.Find(component)}) { - if (const Constant *value{UnwrapConstantValue(*expr)}) { + if (std::optional> expr{scalar.Find(component)}) { + if (const Constant *value{UnwrapConstantValue(expr.value())}) { if (!array.get()) { // This technique ensures that character length or derived type // information is propagated to the array constructor. - auto *typedExpr{UnwrapExpr>(*expr)}; + auto *typedExpr{UnwrapExpr>(expr.value())}; CHECK(typedExpr); array = std::make_unique>(*typedExpr); } diff --git a/flang/lib/Evaluate/type.cpp b/flang/lib/Evaluate/type.cpp index e1eec19e896b9..e96e19150f4ee 100644 --- a/flang/lib/Evaluate/type.cpp +++ b/flang/lib/Evaluate/type.cpp @@ -207,7 +207,7 @@ static const semantics::Symbol *FindParentComponent( return nullptr; } -static const semantics::DerivedTypeSpec *GetParentTypeSpec( +const semantics::DerivedTypeSpec *GetParentTypeSpec( const semantics::DerivedTypeSpec &derived) { if (const semantics::Symbol * parent{FindParentComponent(derived)}) { return &parent->get() diff --git a/flang/test/Evaluate/folding12.f90 b/flang/test/Evaluate/folding12.f90 new file mode 100644 index 0000000000000..657ddc6a34ae5 --- /dev/null +++ b/flang/test/Evaluate/folding12.f90 @@ -0,0 +1,163 @@ +! RUN: %S/test_folding.sh %s %t %f18 +! Test folding of structure constructors +module m1 + type parent_type + integer :: parent_field + end type parent_type + type, extends(parent_type) :: child_type + integer :: child_field + end type child_type + type parent_array_type + integer, dimension(2) :: parent_field + end type parent_array_type + type, extends(parent_array_type) :: child_array_type + integer :: child_field + end type child_array_type + + type(child_type), parameter :: child_const1 = child_type(10, 11) + logical, parameter :: test_child1 = child_const1%child_field == 11 + logical, parameter :: test_parent = child_const1%parent_field == 10 + + type(child_type), parameter :: child_const2 = child_type(12, 13) + type(child_type), parameter :: array_var(2) = & + [child_type(14, 15), child_type(16, 17)] + logical, parameter :: test_array_child = array_var(2)%child_field == 17 + logical, parameter :: test_array_parent = array_var(2)%parent_field == 16 + + type array_type + real, dimension(3) :: real_field + end type array_type + type(array_type), parameter :: array_var2 = & + array_type([(real(i*i), i = 1,3)]) + logical, parameter :: test_array_var = array_var2%real_field(2) == 4.0 + + type(child_type), parameter, dimension(2) :: child_const3 = & + [child_type(18, 19), child_type(20, 21)] + integer, dimension(2), parameter :: int_const4 = & + child_const3(:)%parent_field + logical, parameter :: test_child2 = int_const4(1) == 18 + + type(child_array_type), parameter, dimension(2) :: child_const5 = & + [child_array_type([22, 23], 24), child_array_type([25, 26], 27)] + integer, dimension(2), parameter :: int_const6 = child_const5(:)%parent_field(2) + logical, parameter :: test_child3 = int_const6(1) == 23 + + type(child_type), parameter :: child_const7 = child_type(28, 29) + type(parent_type), parameter :: parent_const8 = child_const7%parent_type + logical, parameter :: test_child4 = parent_const8%parent_field == 28 + + type(child_type), parameter :: child_const9 = & + child_type(parent_type(30), 31) + integer, parameter :: int_const10 = child_const9%parent_field + logical, parameter :: test_child5 = int_const10 == 30 + +end module m1 + +module m2 + type grandparent_type + real :: grandparent_field + end type grandparent_type + type, extends(grandparent_type) :: parent_type + integer :: parent_field + end type parent_type + type, extends(parent_type) :: child_type + real :: child_field + end type child_type + + type(child_type), parameter :: child_const1 = child_type(10.0, 11, 12.0) + integer, parameter :: int_const2 = & + child_const1%grandparent_type%grandparent_field + logical, parameter :: test_child1 = int_const2 == 10.0 + integer, parameter :: int_const3 = & + child_const1%grandparent_field + logical, parameter :: test_child2 = int_const3 == 10.0 + + type(child_type), parameter :: child_const4 = & + child_type(parent_type(13.0, 14), 15.0) + integer, parameter :: int_const5 = & + child_const4%grandparent_type%grandparent_field + logical, parameter :: test_child3 = int_const5 == 13.0 + + type(child_type), parameter :: child_const6 = & + child_type(parent_type(grandparent_type(16.0), 17), 18.0) + integer, parameter :: int_const7 = & + child_const6%grandparent_type%grandparent_field + logical, parameter :: test_child4 = int_const7 == 16.0 + integer, parameter :: int_const8 = & + child_const6%grandparent_field + logical, parameter :: test_child5 = int_const8 == 16.0 +end module m2 + +module m3 + ! tests that use components with default initializations and with the + ! components in the structure constructors in a different order from the + ! declared order + type parent_type + integer :: parent_field1 + real :: parent_field2 = 20.0 + logical :: parent_field3 + end type parent_type + type, extends(parent_type) :: child_type + real :: child_field1 + logical :: child_field2 = .false. + integer :: child_field3 + end type child_type + + type(child_type), parameter :: child_const1 = & + child_type( & + parent_field2 = 10.0, child_field3 = 11, & + child_field2 = .true., parent_field3 = .false., & + parent_field1 = 12, child_field1 = 13.3) + logical, parameter :: test_child1 = child_const1%child_field1 == 13.3 + logical, parameter :: test_child2 = child_const1%child_field2 .eqv. .true. + logical, parameter :: test_child3 = child_const1%child_field3 == 11 + logical, parameter :: test_parent1 = child_const1%parent_field1 == 12 + logical, parameter :: test_parent2 = child_const1%parent_field2 == 10.0 + logical, parameter :: test_parent3 = child_const1%parent_field3 .eqv. .false. + logical, parameter :: test_parent4 = & + child_const1%parent_type%parent_field1 == 12 + logical, parameter :: test_parent5 = & + child_const1%parent_type%parent_field2 == 10.0 + logical, parameter :: test_parent6 = & + child_const1%parent_type%parent_field3 .eqv. .false. + + type(parent_type), parameter ::parent_const1 = child_const1%parent_type + logical, parameter :: test_parent7 = parent_const1%parent_field1 == 12 + logical, parameter :: test_parent8 = parent_const1%parent_field2 == 10.0 + logical, parameter :: test_parent9 = & + parent_const1%parent_field3 .eqv. .false. + + type(child_type), parameter :: child_const2 = & + child_type( & + child_field3 = 14, parent_field3 = .true., & + parent_field1 = 15, child_field1 = 16.6) + logical, parameter :: test_child4 = child_const2%child_field1 == 16.6 + logical, parameter :: test_child5 = child_const2%child_field2 .eqv. .false. + logical, parameter :: test_child6 = child_const2%child_field3 == 14 + logical, parameter :: test_parent10 = child_const2%parent_field1 == 15 + logical, parameter :: test_parent11 = child_const2%parent_field2 == 20.0 + logical, parameter :: test_parent12 = child_const2%parent_field3 .eqv. .true. + + type(child_type), parameter :: child_const3 = & + child_type(parent_type( & + parent_field2 = 17.7, parent_field3 = .false., parent_field1 = 18), & + child_field2 = .false., child_field1 = 19.9, child_field3 = 21) + logical, parameter :: test_child7 = child_const3%parent_field1 == 18 + logical, parameter :: test_child8 = child_const3%parent_field2 == 17.7 + logical, parameter :: test_child9 = child_const3%parent_field3 .eqv. .false. + logical, parameter :: test_child10 = child_const3%child_field1 == 19.9 + logical, parameter :: test_child11 = child_const3%child_field2 .eqv. .false. + logical, parameter :: test_child12 = child_const3%child_field3 == 21 + + type(child_type), parameter :: child_const4 = & + child_type(parent_type( & + parent_field3 = .true., parent_field1 = 22), & + child_field1 = 23.4, child_field3 = 24) + logical, parameter :: test_child13 = child_const4%parent_field1 == 22 + logical, parameter :: test_child14 = child_const4%parent_field2 == 20.0 + logical, parameter :: test_child15 = child_const4%parent_field3 .eqv. .true. + logical, parameter :: test_child16 = child_const4%child_field1 == 23.4 + logical, parameter :: test_child17 = child_const4%child_field2 .eqv. .false. + logical, parameter :: test_child18 = child_const4%child_field3 == 24 + +end module m3 From 4e3edef4b8b637c0c76897497eb7c66f00157210 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 10 Sep 2020 11:23:42 -0700 Subject: [PATCH 0315/1079] Use pragmas to work around MSVC x86_32 debug miscompile bug Halide users reported this here: https://llvm.org/pr46176 I reported the issue to MSVC here: https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html This codepath is apparently not covered by LLVM's unit tests, so I added coverage in a unit test. If we want to support this configuration going forward, it means that is in general not safe to pass a SmallVector by value if alignof(T) is greater than 4. This doesn't appear to come up often because passing a SmallVector by value is inefficient and not idiomatic: it copies the inline storage. In this case, the SmallVector is captured by value by a lambda, and the lambda is passed by value into std::function, and that's how we hit the bug. Differential Revision: https://reviews.llvm.org/D87475 --- llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp | 11 +++++++++++ .../CodeGen/GlobalISel/LegalizerInfoTest.cpp | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 17bce517814de..e25705e0e1012 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -10,6 +10,17 @@ // //===----------------------------------------------------------------------===// +// Disable optimizations to work around MSVC debug mode bug in 32-bit: +// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html +// FIXME: Remove this when the issue is closed. +#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86) +// We have to disable runtime checks in order to enable optimizations. This is +// done for the entire file because the problem is actually observed in STL +// template functions. +#pragma runtime_checks("", off) +#pragma optimize("gs", on) +#endif + #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" using namespace llvm; diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp index 7fd2ea453a2ac..ac9112fe5aa49 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp @@ -406,3 +406,13 @@ TEST(LegalizerInfoTest, MMOAlignment) { 32, 8, AtomicOrdering::NotAtomic })); } } + +// This code sequence doesn't do anything, but it covers a previously uncovered +// codepath that used to crash in MSVC x86_32 debug mode. +TEST(LegalizerInfoTest, MSVCDebugMiscompile) { + const LLT S1 = LLT::scalar(1); + const LLT P0 = LLT::pointer(0, 32); + LegalizerInfo LI; + auto Builder = LI.getActionDefinitionsBuilder(TargetOpcode::G_PTRTOINT); + (void)Builder.legalForCartesianProduct({S1}, {P0}); +} From 0448d11a06b451a63a8f60408fec613ad24801ba Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 10 Sep 2020 14:57:16 -0700 Subject: [PATCH 0316/1079] [AArch64][GlobalISel] Don't emit a branch for a fallthrough G_BR at -O0. With optimizations we leave the decision to eliminate fallthrough branches to bock placement, but at -O0 we should do it in the selector to save code size. This regressed -O0 with a recent change to a combiner. --- .../GISel/AArch64InstructionSelector.cpp | 13 ++ .../AArch64/GlobalISel/select-binop.mir | 1 - .../select-jump-table-brjt-constrain.mir | 1 - .../select-returnaddress-liveins.mir | 3 - .../CodeGen/AArch64/GlobalISel/select-xor.mir | 1 - llvm/test/CodeGen/AArch64/unwind-preserved.ll | 190 +++++++++++++++++- 6 files changed, 202 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 33fb9b7287d5c..aa155e18e1105 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Type.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1755,6 +1756,18 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { MachineRegisterInfo &MRI = MF.getRegInfo(); switch (I.getOpcode()) { + case TargetOpcode::G_BR: { + // If the branch jumps to the fallthrough block, don't bother emitting it. + // Only do this for -O0 for a good code size improvement, because when + // optimizations are enabled we want to leave this choice to + // MachineBlockPlacement. + Function &F = MF.getFunction(); + bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; + if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) + return false; + I.eraseFromParent(); + return true; + } case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); case TargetOpcode::G_CONSTANT: { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-binop.mir index 2c53f6df4d4fa..f6aa16784b25e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-binop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-binop.mir @@ -330,7 +330,6 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 - ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 1, 0 ; CHECK: $w0 = COPY [[ADDWri]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir index 082bf43061da4..6df6573b35337 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir @@ -35,7 +35,6 @@ body: | ; CHECK: BR %6 ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: B %bb.3 ; CHECK: bb.3: ; CHECK: RET_ReallyLR bb.1: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir index a309daab0b4ce..f0ae4f17b2ee3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir @@ -19,7 +19,6 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $w0, $x0, $lr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr - ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] ; CHECK: $x0 = COPY [[COPY1]] @@ -47,7 +46,6 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $w0, $x0, $lr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr - ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] ; CHECK: $x0 = COPY [[COPY1]] @@ -78,7 +76,6 @@ body: | ; CHECK: liveins: $w0, $x0, $lr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] - ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: $x0 = COPY [[COPY1]] ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-xor.mir index cc75386271c86..5b39ade02774b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-xor.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-xor.mir @@ -132,7 +132,6 @@ body: | ; CHECK-LABEL: name: xor_constant_n1_s32_gpr_2bb ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[COPY]] diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll index cf2a8e9b4a36a..68fec08255428 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=0 -global-isel-abort=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=1 -global-isel-abort=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=1 -global-isel-abort=0 < %s | FileCheck %s --check-prefix=GISEL ; Test that z0 is saved/restored, as the unwinder may only retain the low 64bits (d0). define @invoke_callee_may_throw_sve( %v) personality i8 0 { @@ -125,6 +125,128 @@ define @invoke_callee_may_throw_sve( %v) pe ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret +; +; GISEL-LABEL: invoke_callee_may_throw_sve: +; GISEL: .Lfunc_begin0: +; GISEL-NEXT: .cfi_startproc +; GISEL-NEXT: // %bb.0: +; GISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; GISEL-NEXT: addvl sp, sp, #-18 +; GISEL-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: addvl sp, sp, #-2 +; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; GISEL-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .Ltmp0: +; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill +; GISEL-NEXT: bl may_throw_sve +; GISEL-NEXT: .Ltmp1: +; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: b .LBB0_1 +; GISEL-NEXT: .LBB0_1: // %.Lcontinue +; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: addvl sp, sp, #2 +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: addvl sp, sp, #18 +; GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB0_2: // %.Lunwind +; GISEL-NEXT: .Ltmp2: +; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: addvl sp, sp, #2 +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: addvl sp, sp, #18 +; GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; GISEL-NEXT: ret %result = invoke @may_throw_sve( %v) to label %.Lcontinue unwind label %.Lunwind .Lcontinue: ret %result @@ -204,6 +326,72 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: add sp, sp, #304 // =304 ; CHECK-NEXT: ret +; +; GISEL-LABEL: invoke_callee_may_throw_neon: +; GISEL: .Lfunc_begin1: +; GISEL-NEXT: .cfi_startproc +; GISEL-NEXT: // %bb.0: +; GISEL-NEXT: sub sp, sp, #304 // =304 +; GISEL-NEXT: stp q23, q22, [sp, #32] // 32-byte Folded Spill +; GISEL-NEXT: stp q21, q20, [sp, #64] // 32-byte Folded Spill +; GISEL-NEXT: stp q19, q18, [sp, #96] // 32-byte Folded Spill +; GISEL-NEXT: stp q17, q16, [sp, #128] // 32-byte Folded Spill +; GISEL-NEXT: stp q15, q14, [sp, #160] // 32-byte Folded Spill +; GISEL-NEXT: stp q13, q12, [sp, #192] // 32-byte Folded Spill +; GISEL-NEXT: stp q11, q10, [sp, #224] // 32-byte Folded Spill +; GISEL-NEXT: stp q9, q8, [sp, #256] // 32-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 304 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset b8, -32 +; GISEL-NEXT: .cfi_offset b9, -48 +; GISEL-NEXT: .cfi_offset b10, -64 +; GISEL-NEXT: .cfi_offset b11, -80 +; GISEL-NEXT: .cfi_offset b12, -96 +; GISEL-NEXT: .cfi_offset b13, -112 +; GISEL-NEXT: .cfi_offset b14, -128 +; GISEL-NEXT: .cfi_offset b15, -144 +; GISEL-NEXT: .cfi_offset b16, -160 +; GISEL-NEXT: .cfi_offset b17, -176 +; GISEL-NEXT: .cfi_offset b18, -192 +; GISEL-NEXT: .cfi_offset b19, -208 +; GISEL-NEXT: .cfi_offset b20, -224 +; GISEL-NEXT: .cfi_offset b21, -240 +; GISEL-NEXT: .cfi_offset b22, -256 +; GISEL-NEXT: .cfi_offset b23, -272 +; GISEL-NEXT: .Ltmp3: +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; GISEL-NEXT: bl may_throw_neon +; GISEL-NEXT: .Ltmp4: +; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: // %bb.1: // %.Lcontinue +; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; GISEL-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; GISEL-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; GISEL-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; GISEL-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; GISEL-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; GISEL-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; GISEL-NEXT: add sp, sp, #304 // =304 +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB1_2: // %.Lunwind +; GISEL-NEXT: .Ltmp5: +; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; GISEL-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; GISEL-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; GISEL-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; GISEL-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; GISEL-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; GISEL-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; GISEL-NEXT: add sp, sp, #304 // =304 +; GISEL-NEXT: ret %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v) to label %.Lcontinue unwind label %.Lunwind .Lcontinue: ret <4 x i32> %result From 2c73bef7fad4bb92213c9e8ace7d98a231efe027 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 10 Sep 2020 16:45:20 -0700 Subject: [PATCH 0317/1079] Fix wrong comment about enabling optimizations to work around a bug --- llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index e25705e0e1012..9ca6d9a9a5517 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -// Disable optimizations to work around MSVC debug mode bug in 32-bit: +// Enable optimizations to work around MSVC debug mode bug in 32-bit: // https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html // FIXME: Remove this when the issue is closed. #if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86) From 035396197a5f129c5ec42e9e46a85c32fa1c1b84 Mon Sep 17 00:00:00 2001 From: Zarko Todorovski Date: Thu, 10 Sep 2020 20:07:11 -0400 Subject: [PATCH 0318/1079] Remove unused variable introduce in 0448d11a06b451a causing build failures with -Werror on. --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index aa155e18e1105..ed31b336aa3e9 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1761,7 +1761,6 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { // Only do this for -O0 for a good code size improvement, because when // optimizations are enabled we want to leave this choice to // MachineBlockPlacement. - Function &F = MF.getFunction(); bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) return false; From 0e47a8d17fe85b4ab810a17cde4178b2729f2363 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Fri, 11 Sep 2020 08:42:16 +0800 Subject: [PATCH 0319/1079] [obj2yaml] Add support for dumping the .debug_ranges section. This patch adds support for dumping the .debug_ranges section to elf2yaml. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87429 --- .../obj2yaml/ELF/DWARF/debug-ranges.yaml | 233 ++++++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 2 + 2 files changed, 235 insertions(+) create mode 100644 llvm/test/tools/obj2yaml/ELF/DWARF/debug-ranges.yaml diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-ranges.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-ranges.yaml new file mode 100644 index 0000000000000..0e3fbae130711 --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-ranges.yaml @@ -0,0 +1,233 @@ +## Test how we dump the .debug_ranges section. + +## a) Test dumping the .debug_ranges section from various object files with +## different endian and bits. + +## Dump the .debug_ranges section from a 32-bit little endian object file where +## the address_size of debug_info is 4. +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DLOWOFFSET=0xFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x04 \ +# RUN: -DOFFSET=0x0000000000000018 -DLOWOFFSET=0x00000000FFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 32-bit big endian object file where the +## address_size of debug_info is 4. +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DENDIAN=MSB -DLOWOFFSET=0xFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x04 \ +# RUN: -DOFFSET=0x0000000000000018 -DLOWOFFSET=0x00000000FFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 32-bit little endian object file where +## the address_size of debug_info is 8. +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DADDRSIZE1=8 \ +# RUN: -DADDRSIZE2=8 -DADDRSIZE3=8 -DADDRSIZE4=8 \ +# RUN: -DLOWOFFSET=0xFFFFFFFFFFFFFFFF -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x08 \ +# RUN: -DOFFSET=0x0000000000000030 -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 32-bit big endian object file where the +## address_size of debug_info is 8. +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DENDIAN=MSB -DADDRSIZE1=8 \ +# RUN: -DADDRSIZE2=8 -DADDRSIZE3=8 -DADDRSIZE4=8 \ +# RUN: -DLOWOFFSET=0xFFFFFFFFFFFFFFFF -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x08 \ +# RUN: -DOFFSET=0x0000000000000030 -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 64-bit little endian object file where +## the address_size of debug_info is 8. +# RUN: yaml2obj --docnum=1 %s -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x08 \ +# RUN: -DOFFSET=0x0000000000000030 -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 64-bit big endian object file where the +## address_size of debug_info is 8. +# RUN: yaml2obj --docnum=1 %s -DENDIAN=MSB -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x08 \ +# RUN: -DOFFSET=0x0000000000000030 -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 64-bit little endian object file where +## the address_size of debug_info is 4. +# RUN: yaml2obj --docnum=1 %s -DADDRSIZE1=4 -DADDRSIZE2=4 -DADDRSIZE3=4 \ +# RUN: -DADDRSIZE4=4 -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x04 \ +# RUN: -DOFFSET=0x0000000000000018 -DLOWOFFSET=0x00000000FFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + +## Dump the .debug_ranges section from a 64-bit big endian object file where the +## address_size of debug_info is 4. +# RUN: yaml2obj --docnum=1 %s -DADDRSIZE1=4 -DADDRSIZE2=4 -DADDRSIZE3=4 \ +# RUN: -DADDRSIZE4=4 -DLOWOFFSET=0xFFFFFFFFFFFFFFFF \ +# RUN: -DHIGHOFFSET=0x10 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC -DADDRSIZE=0x04 \ +# RUN: -DOFFSET=0x0000000000000018 -DLOWOFFSET=0x00000000FFFFFFFF \ +# RUN: -DHIGHOFFSET=0x0000000000000010 + + +# BASIC-NOT: debug_ranges +# BASIC: debug_ranges: +# BASIC-NEXT: - Offset: 0x0000000000000000 +# BASIC-NEXT: AddrSize: [[ADDRSIZE]] +# BASIC-NEXT: Entries: +# BASIC-NEXT: - LowOffset: 0x0000000000000010 +# BASIC-NEXT: HighOffset: 0x0000000000000020 +# BASIC-NEXT: - LowOffset: 0x0000000000000030 +# BASIC-NEXT: HighOffset: 0x0000000000000040 +# BASIC-NEXT: - Offset: [[OFFSET]] +# BASIC-NEXT: AddrSize: [[ADDRSIZE]] +# BASIC-NEXT: Entries: +# BASIC-NEXT: - LowOffset: [[LOWOFFSET]] +# BASIC-NEXT: HighOffset: [[HIGHOFFSET]] + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS=64]] + Data: ELFDATA2[[ENDIAN=LSB]] + Type: ET_EXEC +DWARF: + ## The debug_ranges parser depends on the address_size field + ## of compilation units. We add the .debug_info section to + ## assist the parser. + debug_info: + - Version: 4 + AddrSize: [[ADDRSIZE1=]] + - Version: 4 + AddrSize: [[ADDRSIZE2=]] + debug_ranges: + - AddrSize: [[ADDRSIZE3=]] + Entries: + - LowOffset: 0x10 + HighOffset: 0x20 + - LowOffset: 0x30 + HighOffset: 0x40 + - AddrSize: [[ADDRSIZE4=]] + Entries: + - LowOffset: [[LOWOFFSET=0x10]] + HighOffset: [[HIGHOFFSET=0x20]] + +## b) Test that obj2yaml dumps the .debug_ranges as a raw content section when +## the parser fails. In this case, the address_size of the two compilation units +## doesn't match. + +# RUN: yaml2obj --docnum=1 -DADDRSIZE1=4 -DADDRSIZE2=8 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=RAW --implicit-check-not=debug_ranges + +# RAW: - Name: .debug_ranges +# RAW-NEXT: Type: SHT_PROGBITS +# RAW-NEXT: AddressAlign: 0x0000000000000001 +# RAW-NEXT: Content: '1000000000000000 +## ^--------------- LowOffset +# RAW-SAME: {{^}}2000000000000000 +## ^--------------- HighOffset +# RAW-SAME: {{^}}3000000000000000 +## ^--------------- LowOffset +# RAW-SAME: {{^}}4000000000000000 +## ^--------------- HighOffset +# RAW-SAME: {{^}}0000000000000000 +## ^--------------- +# RAW-SAME: {{^}}0000000000000000 +## ---------------- terminator +# RAW-SAME: {{^}}1000000000000000 +## ^--------------- LowOffset +# RAW-SAME: {{^}}2000000000000000 +## ^--------------- HighOffset +# RAW-SAME: {{^}}0000000000000000 +## ^--------------- +# RAW-SAME: {{^}}0000000000000000' +## ---------------- terminator + +## c) Test dumping an empty .debug_ranges section. + +# RUN: yaml2obj --docnum=2 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=EMPTY --implicit-check-not=Sections: + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_ranges: [] + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_ranges: [] + +## d) Test dumping a .debug_ranges section whose section header properties are +## overridden. + +## Override the sh_type field. +# RUN: yaml2obj --docnum=3 -DTYPE=SHT_STRTAB %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=STRTAB --check-prefixes=COMMON + +## Override the sh_flags field. +# RUN: yaml2obj --docnum=3 -DFLAGS=[SHF_ALLOC] %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=PROGBITS --check-prefixes=COMMON,FLAGS + +## Override the sh_link field. +# RUN: yaml2obj --docnum=3 -DLINK='.sec' %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=PROGBITS --check-prefixes=COMMON,LINK + +## Override the sh_entsize field. +# RUN: yaml2obj --docnum=3 -DENTSIZE=3 %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=PROGBITS --check-prefixes=COMMON,ENTSIZE + +## Override the sh_info field. +# RUN: yaml2obj --docnum=3 -DINFO=3 %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=PROGBITS --check-prefixes=COMMON,INFO + +## Override the sh_addralign field. +# RUN: yaml2obj --docnum=3 -DADDRALIGN=3 %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=PROGBITS --check-prefixes=COMMON,ADDRALIGN + +## Override the sh_address field. +# RUN: yaml2obj --docnum=3 -DADDRESS=0x2020 %s | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=PROGBITS --check-prefixes=COMMON,ADDRESS + +# COMMON: - Name: .debug_ranges +# COMMON-NEXT: Type: SHT_[[TYPE]] +# FLAGS-NEXT: Flags: [ SHF_ALLOC ] +# LINK-NEXT: Link: .sec +# ENTSIZE-NEXT: EntSize: 0x0000000000000003 +# INFO-NEXT: Info: 0x0000000000000003 +# ADDRALIGN-NEXT: AddressAlign: 0x0000000000000003 +# ADDRESS-NEXT: Address: 0x0000000000002020 + +# COMMON: debug_ranges: +# COMMON-NEXT: - Offset: 0x0000000000000000 +# COMMON-NEXT: AddrSize: 0x08 +# COMMON-NEXT: Entries: +# COMMON-NEXT: - LowOffset: 0x0000000000000010 +# COMMON-NEXT: HighOffset: 0x0000000000000020 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_ranges + Type: [[TYPE=SHT_PROGBITS]] + Flags: [[FLAGS=]] + Link: [[LINK='']] + EntSize: [[ENTSIZE=]] + Info: [[INFO=]] + AddressAlign: [[ADDRALIGN=0]] + Address: [[ADDRESS=]] + - Name: .sec + Type: SHT_PROGBITS +DWARF: + debug_info: + - Version: 4 + AddrSize: 8 + debug_ranges: + - Entries: + - LowOffset: 0x10 + HighOffset: 0x20 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 94819cb8d87d3..22fbdd2ed72e7 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -416,6 +416,8 @@ Optional ELFDumper::dumpDWARFSections( Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); else if (RawSec->Name == ".debug_str") Err = dumpDebugStrings(*DWARFCtx.get(), DWARF); + else if (RawSec->Name == ".debug_ranges") + Err = dumpDebugRanges(*DWARFCtx.get(), DWARF); else continue; From bc0a35f3b7dd45077d16b064c8d5c37e6a907d58 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 10 Sep 2020 18:48:24 -0700 Subject: [PATCH 0320/1079] [lldb] Add missing LLDB_REGISTER_CONSTRUCTOR in SBPlatform This fixes the following assertion in TestPlatformPython.py. Assertion failed: (id != 0 && "Forgot to add function to registry?") --- lldb/source/API/SBPlatform.cpp | 69 ++++++++++++++++------------------ 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index 3c6422e211fca..f118048156b96 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -93,8 +93,8 @@ SBPlatformConnectOptions::SBPlatformConnectOptions( SBPlatformConnectOptions::~SBPlatformConnectOptions() { delete m_opaque_ptr; } -SBPlatformConnectOptions &SBPlatformConnectOptions:: -operator=(const SBPlatformConnectOptions &rhs) { +SBPlatformConnectOptions & +SBPlatformConnectOptions::operator=(const SBPlatformConnectOptions &rhs) { LLDB_RECORD_METHOD( SBPlatformConnectOptions &, SBPlatformConnectOptions, operator=,( @@ -196,8 +196,8 @@ SBPlatformShellCommand::SBPlatformShellCommand( *m_opaque_ptr = *rhs.m_opaque_ptr; } -SBPlatformShellCommand &SBPlatformShellCommand:: -operator=(const SBPlatformShellCommand &rhs) { +SBPlatformShellCommand & +SBPlatformShellCommand::operator=(const SBPlatformShellCommand &rhs) { LLDB_RECORD_METHOD( SBPlatformShellCommand &, @@ -581,25 +581,25 @@ SBError SBPlatform::Install(SBFileSpec &src, SBFileSpec &dst) { SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) { LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Run, (lldb::SBPlatformShellCommand &), shell_command); - return LLDB_RECORD_RESULT(ExecuteConnected([&](const lldb::PlatformSP - &platform_sp) { - const char *command = shell_command.GetCommand(); - if (!command) - return Status("invalid shell command (empty)"); - - const char *working_dir = shell_command.GetWorkingDirectory(); - if (working_dir == nullptr) { - working_dir = platform_sp->GetWorkingDirectory().GetCString(); - if (working_dir) - shell_command.SetWorkingDirectory(working_dir); - } - return platform_sp->RunShellCommand(shell_command.m_opaque_ptr->m_shell, - command, FileSpec(working_dir), - &shell_command.m_opaque_ptr->m_status, - &shell_command.m_opaque_ptr->m_signo, - &shell_command.m_opaque_ptr->m_output, - shell_command.m_opaque_ptr->m_timeout); - })); + return LLDB_RECORD_RESULT( + ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { + const char *command = shell_command.GetCommand(); + if (!command) + return Status("invalid shell command (empty)"); + + const char *working_dir = shell_command.GetWorkingDirectory(); + if (working_dir == nullptr) { + working_dir = platform_sp->GetWorkingDirectory().GetCString(); + if (working_dir) + shell_command.SetWorkingDirectory(working_dir); + } + return platform_sp->RunShellCommand( + shell_command.m_opaque_ptr->m_shell, command, FileSpec(working_dir), + &shell_command.m_opaque_ptr->m_status, + &shell_command.m_opaque_ptr->m_signo, + &shell_command.m_opaque_ptr->m_output, + shell_command.m_opaque_ptr->m_timeout); + })); } SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { @@ -705,8 +705,7 @@ SBEnvironment SBPlatform::GetEnvironment() { namespace lldb_private { namespace repro { -template <> -void RegisterMethods(Registry &R) { +template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBPlatformConnectOptions, (const char *)); LLDB_REGISTER_CONSTRUCTOR(SBPlatformConnectOptions, (const lldb::SBPlatformConnectOptions &)); @@ -715,8 +714,7 @@ void RegisterMethods(Registry &R) { SBPlatformConnectOptions, operator=,( const lldb::SBPlatformConnectOptions &)); LLDB_REGISTER_METHOD(const char *, SBPlatformConnectOptions, GetURL, ()); - LLDB_REGISTER_METHOD(void, SBPlatformConnectOptions, SetURL, - (const char *)); + LLDB_REGISTER_METHOD(void, SBPlatformConnectOptions, SetURL, (const char *)); LLDB_REGISTER_METHOD(bool, SBPlatformConnectOptions, GetRsyncEnabled, ()); LLDB_REGISTER_METHOD(void, SBPlatformConnectOptions, EnableRsync, (const char *, const char *, bool)); @@ -727,8 +725,7 @@ void RegisterMethods(Registry &R) { (const char *)); } -template <> -void RegisterMethods(Registry &R) { +template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBPlatformShellCommand, (const char *)); LLDB_REGISTER_CONSTRUCTOR(SBPlatformShellCommand, (const lldb::SBPlatformShellCommand &)); @@ -745,8 +742,7 @@ void RegisterMethods(Registry &R) { GetWorkingDirectory, ()); LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, SetWorkingDirectory, (const char *)); - LLDB_REGISTER_METHOD(uint32_t, SBPlatformShellCommand, GetTimeoutSeconds, - ()); + LLDB_REGISTER_METHOD(uint32_t, SBPlatformShellCommand, GetTimeoutSeconds, ()); LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, SetTimeoutSeconds, (uint32_t)); LLDB_REGISTER_METHOD(int, SBPlatformShellCommand, GetSignal, ()); @@ -754,15 +750,16 @@ void RegisterMethods(Registry &R) { LLDB_REGISTER_METHOD(const char *, SBPlatformShellCommand, GetOutput, ()); } -template <> -void RegisterMethods(Registry &R) { +template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBPlatform, ()); LLDB_REGISTER_CONSTRUCTOR(SBPlatform, (const char *)); LLDB_REGISTER_CONSTRUCTOR(SBPlatform, (const lldb::SBPlatform &)); + LLDB_REGISTER_CONSTRUCTOR(SBPlatformShellCommand, + (const char *, const char *)); LLDB_REGISTER_METHOD(SBPlatform &, SBPlatform, operator=,(const lldb::SBPlatform &)); LLDB_REGISTER_METHOD_CONST(bool, SBPlatform, IsValid, ()); - LLDB_REGISTER_METHOD_CONST(bool, SBPlatform, operator bool, ()); + LLDB_REGISTER_METHOD_CONST(bool, SBPlatform, operator bool,()); LLDB_REGISTER_METHOD(void, SBPlatform, Clear, ()); LLDB_REGISTER_METHOD(const char *, SBPlatform, GetName, ()); LLDB_REGISTER_METHOD(const char *, SBPlatform, GetWorkingDirectory, ()); @@ -802,5 +799,5 @@ void RegisterMethods(Registry &R) { ()); } -} -} +} // namespace repro +} // namespace lldb_private From 0a391c60793bae25804d2a82e5a26e2b9c7a69a1 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Thu, 10 Sep 2020 16:47:29 -0700 Subject: [PATCH 0321/1079] [mlir][Analysis] Allow Slice Analysis to work with linalg::LinalgOp Differential Revision: https://reviews.llvm.org/D87307 --- mlir/lib/Analysis/SliceAnalysis.cpp | 4 +- mlir/test/IR/slice.mlir | 33 ++++++++++++ mlir/test/lib/IR/CMakeLists.txt | 1 + mlir/test/lib/IR/TestSlicing.cpp | 81 +++++++++++++++++++++++++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 5 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 mlir/test/IR/slice.mlir create mode 100644 mlir/test/lib/IR/TestSlicing.cpp diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp index 8f5f87ba620ee..120d4e4a91372 100644 --- a/mlir/lib/Analysis/SliceAnalysis.cpp +++ b/mlir/lib/Analysis/SliceAnalysis.cpp @@ -12,6 +12,7 @@ #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/Function.h" #include "mlir/IR/Operation.h" @@ -84,7 +85,8 @@ static void getBackwardSliceImpl(Operation *op, if (!op) return; - assert((op->getNumRegions() == 0 || isa(op)) && + assert((op->getNumRegions() == 0 || + isa(op)) && "unexpected generic op with regions"); // Evaluate whether we should keep this def. diff --git a/mlir/test/IR/slice.mlir b/mlir/test/IR/slice.mlir new file mode 100644 index 0000000000000..731f3872f67dd --- /dev/null +++ b/mlir/test/IR/slice.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-opt -slice-analysis-test %s | FileCheck %s + +func @slicing_linalg_op(%arg0 : index, %arg1 : index, %arg2 : index) { + %a = alloc(%arg0, %arg2) : memref + %b = alloc(%arg2, %arg1) : memref + %c = alloc(%arg0, %arg1) : memref + %d = alloc(%arg0, %arg1) : memref + linalg.matmul %a, %b, %c : (memref, memref, memref) + linalg.matmul %a, %b, %d : (memref, memref, memref) + dealloc %c : memref + dealloc %b : memref + dealloc %a : memref + dealloc %d : memref + return +} + +// CHECK-LABEL: func @slicing_linalg_op__backward_slice__0 +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index +// CHECK-DAG: %[[A:.+]] = alloc(%[[ARG0]], %[[ARG2]]) : memref +// CHECK-DAG: %[[B:.+]] = alloc(%[[ARG2]], %[[ARG1]]) : memref +// CHECK-DAG: %[[C:.+]] = alloc(%[[ARG0]], %[[ARG1]]) : memref +// CHECK: return + +// CHECK-LABEL: func @slicing_linalg_op__backward_slice__1 +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index +// CHECK-DAG: %[[A:.+]] = alloc(%[[ARG0]], %[[ARG2]]) : memref +// CHECK-DAG: %[[B:.+]] = alloc(%[[ARG2]], %[[ARG1]]) : memref +// CHECK-DAG: %[[C:.+]] = alloc(%[[ARG0]], %[[ARG1]]) : memref +// CHECK: return diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt index cf4ecada0f3cb..a42f90bb92689 100644 --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -6,6 +6,7 @@ add_mlir_library(MLIRTestIR TestPrintDefUse.cpp TestPrintNesting.cpp TestSideEffects.cpp + TestSlicing.cpp TestSymbolUses.cpp TestTypes.cpp diff --git a/mlir/test/lib/IR/TestSlicing.cpp b/mlir/test/lib/IR/TestSlicing.cpp new file mode 100644 index 0000000000000..a95b2f84cfcf5 --- /dev/null +++ b/mlir/test/lib/IR/TestSlicing.cpp @@ -0,0 +1,81 @@ +//===- TestSlicing.cpp - Testing slice functionality ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple testing pass for slicing. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Module.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" + +using namespace mlir; + +/// Create a function with the same signature as the parent function of `op` +/// with name being the function name and a `suffix`. +static LogicalResult createBackwardSliceFunction(Operation *op, + StringRef suffix) { + FuncOp parentFuncOp = op->getParentOfType(); + OpBuilder builder(parentFuncOp); + Location loc = op->getLoc(); + std::string clonedFuncOpName = parentFuncOp.getName().str() + suffix.str(); + FuncOp clonedFuncOp = + builder.create(loc, clonedFuncOpName, parentFuncOp.getType()); + BlockAndValueMapping mapper; + builder.setInsertionPointToEnd(clonedFuncOp.addEntryBlock()); + for (auto arg : enumerate(parentFuncOp.getArguments())) + mapper.map(arg.value(), clonedFuncOp.getArgument(arg.index())); + llvm::SetVector slice; + getBackwardSlice(op, &slice); + for (Operation *slicedOp : slice) + builder.clone(*slicedOp, mapper); + builder.create(loc); + return success(); +} + +namespace { +/// Pass to test slice generated from slice analysis. +struct SliceAnalysisTestPass + : public PassWrapper> { + void runOnOperation() override; + SliceAnalysisTestPass() = default; + SliceAnalysisTestPass(const SliceAnalysisTestPass &) {} +}; +} // namespace + +void SliceAnalysisTestPass::runOnOperation() { + ModuleOp module = getOperation(); + auto funcOps = module.getOps(); + unsigned opNum = 0; + for (auto funcOp : funcOps) { + // TODO: For now this is just looking for Linalg ops. It can be generalized + // to look for other ops using flags. + funcOp.walk([&](Operation *op) { + if (!isa(op)) + return WalkResult::advance(); + std::string append = + std::string("__backward_slice__") + std::to_string(opNum); + createBackwardSliceFunction(op, append); + opNum++; + return WalkResult::advance(); + }); + } +} + +namespace mlir { +void registerSliceAnalysisTestPass() { + PassRegistration pass( + "slice-analysis-test", "Test Slice analysis functionality."); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 437b5f4b6f1a6..e46327aa63992 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -38,6 +38,7 @@ void registerPatternsTestPass(); void registerPrintOpAvailabilityPass(); void registerSideEffectTestPasses(); void registerSimpleParametricTilingPass(); +void registerSliceAnalysisTestPass(); void registerSymbolTestPasses(); void registerTestAffineDataCopyPass(); void registerTestAffineLoopUnswitchingPass(); @@ -88,6 +89,7 @@ void registerTestPasses() { registerPrintOpAvailabilityPass(); registerSideEffectTestPasses(); registerSimpleParametricTilingPass(); + registerSliceAnalysisTestPass(); registerSymbolTestPasses(); registerTestAffineDataCopyPass(); registerTestAllReduceLoweringPass(); From 84c2c4977dfe89112fd564a69c693d271663229c Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 9 Sep 2020 15:15:46 -0700 Subject: [PATCH 0322/1079] scudo: Introduce a new mechanism to let Scudo access a platform-specific TLS slot An upcoming change to Scudo will change how we use the TLS slot in tsd_shared.h, which will be a little easier to deal with if we can remove the code path that calls pthread_getspecific and pthread_setspecific. The only known user of this code path is Fuchsia. We can't eliminate this code path by making Fuchsia use ELF TLS because although Fuchsia supports ELF TLS, it is not supported within libc itself. To address this, Roland McGrath on the Fuchsia team has proposed that Scudo will optionally call a platform-provided function to access a TLS slot reserved for Scudo. Android also has a reserved TLS slot, but the code that accesses the TLS slot lives in Scudo. We can eliminate some complexity and duplicated code by having Android use the same mechanism that was proposed for Fuchsia, which is what this change does. A separate change to Android implements it. Differential Revision: https://reviews.llvm.org/D87420 --- compiler-rt/lib/scudo/standalone/linux.h | 45 ------------------- compiler-rt/lib/scudo/standalone/tsd_shared.h | 44 ++++++++---------- 2 files changed, 19 insertions(+), 70 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/linux.h b/compiler-rt/lib/scudo/standalone/linux.h index c8e41484c8515..72acb6da83a76 100644 --- a/compiler-rt/lib/scudo/standalone/linux.h +++ b/compiler-rt/lib/scudo/standalone/linux.h @@ -18,51 +18,6 @@ namespace scudo { // MapPlatformData is unused on Linux, define it as a minimally sized structure. struct MapPlatformData {}; -#if SCUDO_ANDROID - -#if defined(__aarch64__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("mrs %0, tpidr_el0" : "=r"(__v)); \ - __v; \ - }) -#elif defined(__arm__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(__v)); \ - __v; \ - }) -#elif defined(__i386__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("movl %%gs:0, %0" : "=r"(__v)); \ - __v; \ - }) -#elif defined(__x86_64__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("mov %%fs:0, %0" : "=r"(__v)); \ - __v; \ - }) -#else -#error "Unsupported architecture." -#endif - -// The Android Bionic team has allocated a TLS slot for sanitizers starting -// with Q, given that Android currently doesn't support ELF TLS. It is used to -// store sanitizer thread specific data. -static const int TLS_SLOT_SANITIZER = 6; - -ALWAYS_INLINE uptr *getAndroidTlsPtr() { - return reinterpret_cast(&__get_tls()[TLS_SLOT_SANITIZER]); -} - -#endif // SCUDO_ANDROID - } // namespace scudo #endif // SCUDO_LINUX diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h index 25ba191826c3f..041b834c74852 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -9,9 +9,17 @@ #ifndef SCUDO_TSD_SHARED_H_ #define SCUDO_TSD_SHARED_H_ -#include "linux.h" // for getAndroidTlsPtr() #include "tsd.h" +#if SCUDO_HAS_PLATFORM_TLS_SLOT +// This is a platform-provided header that needs to be on the include path when +// Scudo is compiled. It must declare a function with the prototype: +// uintptr_t *getPlatformAllocatorTlsSlot() +// that returns the address of a thread-local word of storage reserved for +// Scudo, that must be zero-initialized in newly created threads. +#include "scudo_platform_tls_slot.h" +#endif + namespace scudo { template @@ -80,26 +88,21 @@ struct TSDRegistrySharedT { } private: - ALWAYS_INLINE void setCurrentTSD(TSD *CurrentTSD) { -#if _BIONIC - *getAndroidTlsPtr() = reinterpret_cast(CurrentTSD); -#elif SCUDO_LINUX - ThreadTSD = CurrentTSD; + ALWAYS_INLINE uptr *getTlsPtr() const { +#if SCUDO_HAS_PLATFORM_TLS_SLOT + return reinterpret_cast(getPlatformAllocatorTlsSlot()); #else - CHECK_EQ( - pthread_setspecific(PThreadKey, reinterpret_cast(CurrentTSD)), - 0); + static thread_local uptr ThreadTSD; + return &ThreadTSD; #endif } + ALWAYS_INLINE void setCurrentTSD(TSD *CurrentTSD) { + *getTlsPtr() = reinterpret_cast(CurrentTSD); + } + ALWAYS_INLINE TSD *getCurrentTSD() { -#if _BIONIC - return reinterpret_cast *>(*getAndroidTlsPtr()); -#elif SCUDO_LINUX - return ThreadTSD; -#else - return reinterpret_cast *>(pthread_getspecific(PThreadKey)); -#endif + return reinterpret_cast *>(*getTlsPtr()); } bool setNumberOfTSDs(u32 N) { @@ -195,17 +198,8 @@ struct TSDRegistrySharedT { HybridMutex Mutex; HybridMutex MutexTSDs; TSD TSDs[TSDsArraySize]; -#if SCUDO_LINUX && !_BIONIC - static THREADLOCAL TSD *ThreadTSD; -#endif }; -#if SCUDO_LINUX && !_BIONIC -template -THREADLOCAL TSD - *TSDRegistrySharedT::ThreadTSD; -#endif - } // namespace scudo #endif // SCUDO_TSD_SHARED_H_ From d876c7c8ec5387aac14041cace1833b243e5b335 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Sep 2020 12:38:42 -0700 Subject: [PATCH 0323/1079] scudo: Remove the THREADLOCAL macro. Replace all remaining uses with thread_local, which is a C++11 standard feature. Differential Revision: https://reviews.llvm.org/D87478 --- compiler-rt/lib/scudo/standalone/internal_defs.h | 1 - compiler-rt/lib/scudo/standalone/tests/primary_test.cpp | 2 +- compiler-rt/lib/scudo/standalone/tsd_exclusive.h | 8 ++++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/internal_defs.h b/compiler-rt/lib/scudo/standalone/internal_defs.h index a884f1f3a40ed..0babbbe3c11b5 100644 --- a/compiler-rt/lib/scudo/standalone/internal_defs.h +++ b/compiler-rt/lib/scudo/standalone/internal_defs.h @@ -36,7 +36,6 @@ #define FORMAT(F, A) __attribute__((format(printf, F, A))) #define NOINLINE __attribute__((noinline)) #define NORETURN __attribute__((noreturn)) -#define THREADLOCAL __thread #define LIKELY(X) __builtin_expect(!!(X), 1) #define UNLIKELY(X) __builtin_expect(!!(X), 0) #if defined(__i386__) || defined(__x86_64__) diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index a7a2b3160611e..605ce44d49739 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -152,7 +152,7 @@ static std::condition_variable Cv; static bool Ready; template static void performAllocations(Primary *Allocator) { - static THREADLOCAL typename Primary::CacheT Cache; + static thread_local typename Primary::CacheT Cache; Cache.init(nullptr, Allocator); std::vector> V; { diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h index ac5a22c970701..9437167d84821 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -99,16 +99,16 @@ template struct TSDRegistryExT { atomic_u8 Disabled; TSD FallbackTSD; HybridMutex Mutex; - static THREADLOCAL ThreadState State; - static THREADLOCAL TSD ThreadTSD; + static thread_local ThreadState State; + static thread_local TSD ThreadTSD; friend void teardownThread(void *Ptr); }; template -THREADLOCAL TSD TSDRegistryExT::ThreadTSD; +thread_local TSD TSDRegistryExT::ThreadTSD; template -THREADLOCAL ThreadState TSDRegistryExT::State; +thread_local ThreadState TSDRegistryExT::State; template void teardownThread(void *Ptr) { typedef TSDRegistryExT TSDRegistryT; From b22d45049682d1461b6b786f159681e2e5c2ce24 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 10 Sep 2020 22:16:42 -0400 Subject: [PATCH 0324/1079] Remove dependency on clangASTMatchers. - It seems no long required for shared library builds. --- clang/lib/CodeGen/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index f47ecd9bf8465..4039277707c5f 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -92,7 +92,6 @@ add_clang_library(clangCodeGen LINK_LIBS clangAnalysis clangAST - clangASTMatchers clangBasic clangFrontend clangLex From 39dc75f66c60025539940ff47b105418645c025f Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 10 Sep 2020 22:37:35 -0400 Subject: [PATCH 0325/1079] Revert "[EarlyCSE] Equivalent SELECTs should hash equally" This reverts commit c9826829d74e637163fdb0351870b8204e62d6e6 as it breaks regression tests. --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 13 ------------- llvm/test/Transforms/EarlyCSE/commute.ll | 19 ------------------- 2 files changed, 32 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index f0d3f90995d7b..b655204d26dd2 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -191,19 +191,6 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Pred = ICmpInst::getSwappedPredicate(Pred); } - // Check for inverted variants of min/max by swapping operands. - switch (Pred) { - case CmpInst::ICMP_ULE: - case CmpInst::ICMP_UGE: - case CmpInst::ICMP_SLE: - case CmpInst::ICMP_SGE: - Pred = CmpInst::getInversePredicate(Pred); - std::swap(A, B); - break; - default: - break; - } - switch (Pred) { case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break; case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break; diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index f5868a5fdfb2f..57c5a853a12ff 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -684,25 +684,6 @@ define i32 @select_not_invert_pred_cond_wrong_select_op(i8 %x, i8 %y, i32 %t, i3 ret i32 %r } -; This test is a reproducer for a bug involving inverted min/max selects -; hashing differently but comparing as equal. It exhibits such a pair of -; values, and we run this test with -earlycse-debug-hash which would catch -; the disagreement and fail if it regressed. -define i32 @inverted_max(i32 %i) { -; CHECK-LABEL: @inverted_max( -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 0, [[I:%.*]] -; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP]], i32 [[I]], i32 0 -; CHECK-NEXT: [[CMPINV:%.*]] = icmp sgt i32 0, [[I:%.*]] -; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMPINV]], i32 0, i32 [[I]] -; CHECK-NEXT: [[R:%.*]] = add i32 [[M1]], [[M2]] -; CHECK-NEXT: ret i32 [[R]] - %cmp = icmp sle i32 0, %i - %m1 = select i1 %cmp, i32 %i, i32 0 - %cmpinv = icmp sgt i32 0, %i - %m2 = select i1 %cmpinv, i32 0, i32 %i - %r = add i32 %m1, %m2 - ret i32 %r -} ; This test is a reproducer for a bug involving inverted min/max selects ; hashing differently but comparing as equal. It exhibits such a pair of From 3f7c3e84ad69f1ffa767b1b7ce3aa36de6c30f87 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 10 Sep 2020 19:59:31 -0700 Subject: [PATCH 0326/1079] [Asan] Fix __asan_update_allocation_context Update both thread and stack. Update thread and stack as atomic operation. Keep all 32bit of TID as now we have enough bits. Depends on D87135. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87217 --- compiler-rt/lib/asan/asan_allocator.cpp | 105 +++++++++++++----- compiler-rt/lib/asan/asan_allocator.h | 2 +- .../asan/TestCases/asan_update_allocation.cpp | 25 ++++- 3 files changed, 99 insertions(+), 33 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index f7e238d613e16..8cc7de3a9862b 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -51,6 +51,22 @@ static u32 RZSize2Log(u32 rz_size) { static AsanAllocator &get_allocator(); +static void AtomicContextStore(volatile atomic_uint64_t *atomic_context, + u32 tid, u32 stack) { + u64 context = tid; + context <<= 32; + context += stack; + atomic_store(atomic_context, context, memory_order_relaxed); +} + +static void AtomicContextLoad(const volatile atomic_uint64_t *atomic_context, + u32 &tid, u32 &stack) { + u64 context = atomic_load(atomic_context, memory_order_relaxed); + stack = context; + context >>= 32; + tid = context; +} + // The memory chunk allocated from the underlying allocator looks like this: // L L L L L L H H U U U U U U R R // L -- left redzone words (0 or more bytes) @@ -70,12 +86,14 @@ static AsanAllocator &get_allocator(); // B -- address of ChunkHeader pointing to the first 'H' static const uptr kAllocBegMagic = 0xCC6E96B9; -struct ChunkHeader { +class ChunkHeader { + public: atomic_uint8_t chunk_state; u8 from_memalign : 1; u8 alloc_type : 2; u8 rz_log : 3; u8 lsan_tag : 2; + // This field is used for small sizes. For large sizes it is equal to // SizeClassMap::kMaxSize and the actual size is stored in the // SecondaryAllocator's metadata. @@ -83,14 +101,31 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - u32 alloc_tid; - atomic_uint32_t alloc_context_id; + + private: + atomic_uint64_t alloc_context_id; + + public: + void SetAllocContext(u32 tid, u32 stack) { + AtomicContextStore(&alloc_context_id, tid, stack); + } + + void GetAllocContext(u32 &tid, u32 &stack) const { + AtomicContextLoad(&alloc_context_id, tid, stack); + } }; -struct ChunkBase : ChunkHeader { - // Header2, intersects with user memory. - u32 free_context_id; - u32 free_tid; +class ChunkBase : public ChunkHeader { + atomic_uint64_t free_context_id; + + public: + void SetFreeContext(u32 tid, u32 stack) { + AtomicContextStore(&free_context_id, tid, stack); + } + + void GetFreeContext(u32 &tid, u32 &stack) const { + AtomicContextLoad(&free_context_id, tid, stack); + } }; static const uptr kChunkHeaderSize = sizeof(ChunkHeader); @@ -109,7 +144,8 @@ enum { CHUNK_QUARANTINE = 3, }; -struct AsanChunk: ChunkBase { +class AsanChunk : public ChunkBase { + public: uptr Beg() { return reinterpret_cast(this) + kChunkHeaderSize; } uptr UsedSize(bool locked_version = false) { if (user_requested_size != SizeClassMap::kMaxSize) @@ -144,8 +180,6 @@ struct QuarantineCallback { CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); } - CHECK_NE(m->alloc_tid, kInvalidTid); - CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY), kAsanHeapLeftRedzoneMagic); @@ -419,8 +453,8 @@ struct Allocator { if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) return false; if (m->Beg() != addr) return false; - atomic_store(&m->alloc_context_id, StackDepotPut(*stack), - memory_order_relaxed); + AsanThread *t = GetCurrentThread(); + m->SetAllocContext(t ? t->tid() : 0, StackDepotPut(*stack)); return true; } @@ -515,9 +549,6 @@ struct Allocator { AsanChunk *m = reinterpret_cast(chunk_beg); m->alloc_type = alloc_type; m->rz_log = rz_log; - u32 alloc_tid = t ? t->tid() : 0; - m->alloc_tid = alloc_tid; - CHECK_EQ(alloc_tid, m->alloc_tid); // Does alloc_tid fit into the bitfield? m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); @@ -537,8 +568,7 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - atomic_store(&m->alloc_context_id, StackDepotPut(*stack), - memory_order_relaxed); + m->SetAllocContext(t ? t->tid() : 0, StackDepotPut(*stack)); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -591,8 +621,7 @@ struct Allocator { } CHECK_EQ(CHUNK_ALLOCATED, old_chunk_state); // It was a user data. - m->free_tid = kInvalidTid; - m->free_context_id = 0; + m->SetFreeContext(kInvalidTid, 0); return true; } @@ -602,8 +631,7 @@ struct Allocator { CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), CHUNK_QUARANTINE); AsanThread *t = GetCurrentThread(); - m->free_tid = t ? t->tid() : 0; - m->free_context_id = StackDepotPut(*stack); + m->SetFreeContext(t ? t->tid() : 0, StackDepotPut(*stack)); Flags &fl = *flags(); if (fl.max_free_fill_size > 0) { @@ -860,10 +888,23 @@ uptr AsanChunkView::UsedSize() const { return chunk_->UsedSize(); } u32 AsanChunkView::UserRequestedAlignment() const { return Allocator::ComputeUserAlignment(chunk_->user_requested_alignment_log); } -uptr AsanChunkView::AllocTid() const { return chunk_->alloc_tid; } + +uptr AsanChunkView::AllocTid() const { + u32 tid = 0; + u32 stack = 0; + chunk_->GetAllocContext(tid, stack); + return tid; +} + uptr AsanChunkView::FreeTid() const { - return IsQuarantined() ? chunk_->free_tid : kInvalidTid; + if (!IsQuarantined()) + return kInvalidTid; + u32 tid = 0; + u32 stack = 0; + chunk_->GetFreeContext(tid, stack); + return tid; } + AllocType AsanChunkView::GetAllocType() const { return (AllocType)chunk_->alloc_type; } @@ -876,10 +917,19 @@ static StackTrace GetStackTraceFromId(u32 id) { } u32 AsanChunkView::GetAllocStackId() const { - return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); + u32 tid = 0; + u32 stack = 0; + chunk_->GetAllocContext(tid, stack); + return stack; } + u32 AsanChunkView::GetFreeStackId() const { - return IsQuarantined() ? chunk_->free_context_id : 0; + if (!IsQuarantined()) + return 0; + u32 tid = 0; + u32 stack = 0; + chunk_->GetFreeContext(tid, stack); + return stack; } StackTrace AsanChunkView::GetAllocStack() const { @@ -1111,7 +1161,10 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return atomic_load(&m->alloc_context_id, memory_order_relaxed); + u32 tid = 0; + u32 stack = 0; + m->GetAllocContext(tid, stack); + return stack; } void ForEachChunk(ForEachChunkCallback callback, void *arg) { diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index d60b97500a3c3..612799f90964a 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -28,7 +28,7 @@ enum AllocType { FROM_NEW_BR = 3 // Memory block came from operator new [ ] }; -struct AsanChunk; +class AsanChunk; struct AllocatorOptions { u32 quarantine_size_mb; diff --git a/compiler-rt/test/asan/TestCases/asan_update_allocation.cpp b/compiler-rt/test/asan/TestCases/asan_update_allocation.cpp index d703fe024aa05..065f793092f05 100644 --- a/compiler-rt/test/asan/TestCases/asan_update_allocation.cpp +++ b/compiler-rt/test/asan/TestCases/asan_update_allocation.cpp @@ -1,19 +1,32 @@ -// RUN: %clangxx_asan -O0 -DSIZE=10 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK -// RUN: %clangxx_asan -O0 -DSIZE=10000000 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK +// RUN: %clangxx_asan -O0 %s -o %t + +// RUN: not %run %t 10 0 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefixes=CHECK,T0 +// RUN: not %run %t 10000000 0 2>&1 | FileCheck %s --check-prefixes=CHECK,T0 + +// RUN: not %run %t 10 1 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefixes=CHECK,T1 +// RUN: not %run %t 10000000 1 2>&1 | FileCheck %s --check-prefixes=CHECK,T1 + // REQUIRES: stable-runtime -#include #include +#include +#include void UPDATE(void *p) { __asan_update_allocation_context(p); } -int main() { - char *x = (char*)malloc(SIZE * sizeof(char)); - UPDATE(x); +int main(int argc, char *argv[]) { + char *x = (char *)malloc(atoi(argv[1]) * sizeof(char)); + if (atoi(argv[2])) + std::thread([&]() { UPDATE(x); }).join(); + else + UPDATE(x); free(x); return x[5]; // CHECK: {{.*ERROR: AddressSanitizer: heap-use-after-free on address}} + // CHECK: READ of size 1 at {{.*}} thread T0 + // T0: allocated by thread T0 here + // T1: allocated by thread T1 here // CHECK: UPDATE } From 41e68f7ee7b3bb33e9acb0502339a858806e8523 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 10 Sep 2020 23:11:22 -0400 Subject: [PATCH 0327/1079] [EarlyCSE] Fix and recommit the revised c9826829d74e637163fdb0351870b8204e62d6e6 In addition to calculate hash consistently by swapping SELECT's operands, we also need to inverse the select pattern favor to match the original logic. [EarlyCSE] Equivalent SELECTs should hash equally DenseMap assumes that, if its isEqual method returns true for two elements, then its getHashValue method must return the same value for them. This invariant is broken when one SELECT node is a min/max operation, and the other can be transformed into an equivalent min/max by inverting its predicate and swapping its operands. This patch fixes an assertion failure that would occur intermittently while compiling the following IR: define i32 @t(i32 %i) { %cmp = icmp sle i32 0, %i %twin1 = select i1 %cmp, i32 %i, i32 0 %cmpinv = icmp sgt i32 0, %i %twin2 = select i1 %cmpinv, i32 0, i32 %i %sink = add i32 %twin1, %twin2 ret i32 %sink } Differential Revision: https://reviews.llvm.org/D86843 --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 23 +++++++++++++++++++---- llvm/test/Transforms/EarlyCSE/commute.ll | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index b655204d26dd2..f71a2b9e003a9 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -191,11 +191,26 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Pred = ICmpInst::getSwappedPredicate(Pred); } + // Check for inverted variants of min/max by swapping operands. + bool Inversed = false; switch (Pred) { - case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break; - case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break; - case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break; - case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_SGE: + Pred = CmpInst::getInversePredicate(Pred); + std::swap(A, B); + Inversed = true; + break; + default: + break; + } + + switch (Pred) { + case CmpInst::ICMP_UGT: Flavor = Inversed ? SPF_UMIN : SPF_UMAX; break; + case CmpInst::ICMP_ULT: Flavor = Inversed ? SPF_UMAX : SPF_UMIN; break; + case CmpInst::ICMP_SGT: Flavor = Inversed ? SPF_SMIN : SPF_SMAX; break; + case CmpInst::ICMP_SLT: Flavor = Inversed ? SPF_SMAX : SPF_SMIN; break; default: break; } diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index 57c5a853a12ff..a172ba81c6527 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -684,6 +684,26 @@ define i32 @select_not_invert_pred_cond_wrong_select_op(i8 %x, i8 %y, i32 %t, i3 ret i32 %r } +; This test is a reproducer for a bug involving inverted min/max selects +; hashing differently but comparing as equal. It exhibits such a pair of +; values, and we run this test with -earlycse-debug-hash which would catch +; the disagreement and fail if it regressed. +; EarlyCSE should be able to detect the 2nd redundant `select` and eliminate +; it. +define i32 @inverted_max(i32 %i) { +; CHECK-LABEL: @inverted_max( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 0, [[I:%.*]] +; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP]], i32 [[I]], i32 0 +; CHECK-NEXT: [[CMPINV:%.*]] = icmp sgt i32 0, [[I:%.*]] +; CHECK-NEXT: [[R:%.*]] = add i32 [[M1]], [[M1]] +; CHECK-NEXT: ret i32 [[R]] + %cmp = icmp sle i32 0, %i + %m1 = select i1 %cmp, i32 %i, i32 0 + %cmpinv = icmp sgt i32 0, %i + %m2 = select i1 %cmpinv, i32 0, i32 %i + %r = add i32 %m1, %m2 + ret i32 %r +} ; This test is a reproducer for a bug involving inverted min/max selects ; hashing differently but comparing as equal. It exhibits such a pair of From 16ba78ee627c3fe66906349e8c90ee8cc1224298 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Thu, 10 Sep 2020 15:43:28 -0400 Subject: [PATCH 0328/1079] libclc/spirv: Add missing files from D85911 Fixes: 060c8e083dd637866854acb6a0823c45b2ef68ef Signed-off-by: Jan Vesely --- libclc/spirv/lib/math/fma.cl | 6 ++++++ libclc/spirv/lib/math/fma.inc | 3 +++ libclc/spirv64/lib/math/fma.cl | 6 ++++++ libclc/spirv64/lib/math/fma.inc | 3 +++ 4 files changed, 18 insertions(+) create mode 100644 libclc/spirv/lib/math/fma.cl create mode 100644 libclc/spirv/lib/math/fma.inc create mode 100644 libclc/spirv64/lib/math/fma.cl create mode 100644 libclc/spirv64/lib/math/fma.inc diff --git a/libclc/spirv/lib/math/fma.cl b/libclc/spirv/lib/math/fma.cl new file mode 100644 index 0000000000000..982ddc4374f35 --- /dev/null +++ b/libclc/spirv/lib/math/fma.cl @@ -0,0 +1,6 @@ +#include +#include + +#define __CLC_BODY +#define __FLOAT_ONLY +#include diff --git a/libclc/spirv/lib/math/fma.inc b/libclc/spirv/lib/math/fma.inc new file mode 100644 index 0000000000000..0f12c565758ff --- /dev/null +++ b/libclc/spirv/lib/math/fma.inc @@ -0,0 +1,3 @@ +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { + return __clc_sw_fma(a, b, c); +} diff --git a/libclc/spirv64/lib/math/fma.cl b/libclc/spirv64/lib/math/fma.cl new file mode 100644 index 0000000000000..982ddc4374f35 --- /dev/null +++ b/libclc/spirv64/lib/math/fma.cl @@ -0,0 +1,6 @@ +#include +#include + +#define __CLC_BODY +#define __FLOAT_ONLY +#include diff --git a/libclc/spirv64/lib/math/fma.inc b/libclc/spirv64/lib/math/fma.inc new file mode 100644 index 0000000000000..0f12c565758ff --- /dev/null +++ b/libclc/spirv64/lib/math/fma.inc @@ -0,0 +1,3 @@ +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { + return __clc_sw_fma(a, b, c); +} From da9244882804ec6479aac70334fd7f7b4baf855e Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 10 Sep 2020 20:25:42 -0700 Subject: [PATCH 0329/1079] [NFC][MLInliner] Presort instruction successions. Differential Revision: https://reviews.llvm.org/D87489 --- .../Analysis/InlineSizeEstimatorAnalysis.cpp | 113 +++++++----------- 1 file changed, 45 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp index 5c3a6c41ad432..2213cd8598b0a 100644 --- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -67,8 +67,6 @@ class IRToNativeSizeLearning { static const size_t NumNamedFeatures = static_cast(NamedFeatureIndex::NumNamedFeatures); struct FunctionFeatures { - static std::vector> - ImportantInstructionSuccessions; static const size_t FeatureCount; std::array NamedFeatures = {0}; @@ -84,53 +82,38 @@ class IRToNativeSizeLearning { static FunctionFeatures getFunctionFeatures(Function &F, FunctionAnalysisManager &FAM); - -private: - /// Sort once the feature tuples. - struct SortFeatureTuples { - bool IsSorted = false; - SortFeatureTuples() { - std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), - FunctionFeatures::ImportantInstructionSuccessions.end()); - IsSorted = true; - } - }; - - static llvm::ManagedStatic TupleSorter; - - static bool ensureSortedTuples() { return TupleSorter->IsSorted; } }; -llvm::ManagedStatic - IRToNativeSizeLearning::TupleSorter; // This is a point in time - we determined including these pairs of // consecutive instructions (in the IR layout available at inline time) as // features improves the model performance. We want to move away from manual // feature selection. -// The vector is given in opcode pairs rather than labels because 1) labels -// weren't readily available, and 2) the successions were hand - extracted -std::vector> - IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = - {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, - {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, - {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, - {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, - {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, - {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, - {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, - {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, - {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, - {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, - {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, - {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, - {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, - {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, - {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, - {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, - {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, - {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, - {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, - {1, 27}, {49, 1}, {1, 8}, {56, 2}}; +// The array is given in opcode pairs rather than labels because 1) labels +// weren't readily available, and 2) the successions were hand - extracted. +// +// This array must be sorted. +static const std::array, 137> + ImportantInstructionSuccessions{ + {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, + {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, + {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, + {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, + {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, + {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, + {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, + {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, + {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, + {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, + {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, + {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, + {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, + {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, + {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, + {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, + {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, + {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, + {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, + {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; // We have: 9 calculated features (the features here); 1 feature for each // instruction opcode; and 1 feature for each manually-identified sequence. @@ -140,14 +123,13 @@ std::vector> // Note that instruction opcodes start from 1. For convenience, we also have an // always 0 feature for the '0' opcode, hence the extra 1. const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = - IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions - .size() + - getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; + ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + + IRToNativeSizeLearning::NumNamedFeatures; size_t getSize(Function &F, TargetTransformInfo &TTI) { size_t Ret = 0; - for (auto &BB : F) - for (auto &I : BB) + for (const auto &BB : F) + for (const auto &I : BB) Ret += TTI.getInstructionCost( &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); return Ret; @@ -161,8 +143,8 @@ size_t getSize(Function &F, FunctionAnalysisManager &FAM) { unsigned getMaxDominatorTreeDepth(const Function &F, const DominatorTree &Tree) { unsigned Ret = 0; - for (auto &BB : F) - if (auto *TN = Tree.getNode(&BB)) + for (const auto &BB : F) + if (const auto *TN = Tree.getNode(&BB)) Ret = std::max(Ret, TN->getLevel()); return Ret; } @@ -171,42 +153,37 @@ unsigned getMaxDominatorTreeDepth(const Function &F, IRToNativeSizeLearning::FunctionFeatures IRToNativeSizeLearning::getFunctionFeatures(Function &F, FunctionAnalysisManager &FAM) { - ensureSortedTuples(); + assert(llvm::is_sorted(ImportantInstructionSuccessions) && + "expected function features are sorted"); auto &DomTree = FAM.getResult(F); FunctionFeatures FF; size_t InstrCount = getMaxInstructionID() + 1; FF.InstructionHistogram.resize(InstrCount); - FF.InstructionPairHistogram.resize( - FunctionFeatures::ImportantInstructionSuccessions.size()); + FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); - auto StartID = 0; - auto LastID = StartID; + int StartID = 0; + int LastID = StartID; auto getPairIndex = [](size_t a, size_t b) { - auto I = - std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), - FunctionFeatures::ImportantInstructionSuccessions.end(), - std::make_pair(a, b)); - if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) + auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); + if (I == ImportantInstructionSuccessions.end()) return -1; - return static_cast(std::distance( - FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); + return static_cast( + std::distance(ImportantInstructionSuccessions.begin(), I)); }; // We don't want debug calls, because they'd just add noise. - for (auto &BB : F) { - for (auto I = BB.instructionsWithoutDebug().begin(), - E = BB.instructionsWithoutDebug().end(); - I != E; ++I) { - auto ID = I->getOpcode(); + for (const auto &BB : F) { + for (const auto &I : BB.instructionsWithoutDebug()) { + auto ID = I.getOpcode(); ++FF.InstructionHistogram[ID]; int PairIndex = getPairIndex(LastID, ID); if (PairIndex >= 0) ++FF.InstructionPairHistogram[PairIndex]; LastID = ID; - if (isa(*I)) + if (isa(I)) ++FF[NamedFeatureIndex::Calls]; } } From e45b0708ae81ace27de53f12b32a80601cb12bf3 Mon Sep 17 00:00:00 2001 From: Alok Kumar Sharma Date: Fri, 11 Sep 2020 11:11:39 +0530 Subject: [PATCH 0330/1079] [DebugInfo] Fixing CodeView assert related to lowerBound field of DISubrange. This is to fix CodeView build failure https://bugs.llvm.org/show_bug.cgi?id=47287 after DIsSubrange upgrade D80197 Assert condition is now removed and Count is calculated in case LowerBound is absent or zero and Count or UpperBound is constant. If Count is unknown it is later handled as VLA (currently Count is set to zero). Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D87406 --- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index b388e43447835..bcace6264cd04 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1578,11 +1578,16 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { assert(Element->getTag() == dwarf::DW_TAG_subrange_type); const DISubrange *Subrange = cast(Element); - assert(!Subrange->getRawLowerBound() && - "codeview doesn't support subranges with lower bounds"); int64_t Count = -1; - if (auto *CI = Subrange->getCount().dyn_cast()) - Count = CI->getSExtValue(); + // Calculate the count if either LowerBound is absent or is zero and + // either of Count or UpperBound are constant. + auto *LI = Subrange->getLowerBound().dyn_cast(); + if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) { + if (auto *CI = Subrange->getCount().dyn_cast()) + Count = CI->getSExtValue(); + else if (auto *UI = Subrange->getUpperBound().dyn_cast()) + Count = UI->getSExtValue() + 1; // LowerBound is zero + } // Forward declarations of arrays without a size and VLAs use a count of -1. // Emit a count of zero in these cases to match what MSVC does for arrays From f787fe15d8e1cb63b40235e781cd7c2e130bbcd6 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Fri, 11 Sep 2020 01:58:11 -0400 Subject: [PATCH 0331/1079] [EarlyCSE] Remove unnecessary operand swap. - As min/max are commutative operators, there is no need to swap operands. That breaks the convention calculating the hash value. --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 1 - llvm/test/CodeGen/AMDGPU/sad.ll | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index f71a2b9e003a9..e47ecb4fbb44a 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -199,7 +199,6 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, case CmpInst::ICMP_SLE: case CmpInst::ICMP_SGE: Pred = CmpInst::getInversePredicate(Pred); - std::swap(A, B); Inversed = true; break; default: diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll index 3a4a2d07772c1..464b413e65588 100644 --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -earlycse-debug-hash -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}v_sad_u32_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} From 525c83cee00a3a92d9b1a9d6f39ee4fd6c0c798d Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Fri, 11 Sep 2020 07:16:58 +0000 Subject: [PATCH 0332/1079] [NFC][PowerPC] Add tests of constants-i64. --- llvm/test/CodeGen/PowerPC/constants-i64.ll | 70 ++++++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/constants-i64.ll b/llvm/test/CodeGen/PowerPC/constants-i64.ll index 956845f5a5b35..38a765343fc74 100644 --- a/llvm/test/CodeGen/PowerPC/constants-i64.ll +++ b/llvm/test/CodeGen/PowerPC/constants-i64.ll @@ -80,47 +80,93 @@ entry: ; CHECK: blr } -define i64 @cn32_1() #0 { +define i64 @uint32_1() #0 { entry: ret i64 3900000000 -; CHECK-LABEL: @cn32_1 +; CHECK-LABEL: @uint32_1 ; CHECK: lis [[REG1:[0-9]+]], 232 ; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 -; CHECK: sldi 3, [[REG1]], 8 +; CHECK: sldi 3, [[REG2]], 8 ; CHECK: blr } -define i32 @cn32_1_i32() #0 { +define i32 @uint32_1_i32() #0 { entry: ret i32 -394967296 -; CHECK-LABEL: @cn32_1_i32 +; CHECK-LABEL: @uint32_1_i32 ; CHECK: lis [[REG1:[0-9]+]], 232 ; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 -; CHECK: sldi 3, [[REG1]], 8 +; CHECK: sldi 3, [[REG2]], 8 ; CHECK: blr } -define i64 @cn32_2() #0 { +define i64 @uint32_2() #0 { entry: ret i64 4294967295 -; CHECK-LABEL: @cn32_2 +; CHECK-LABEL: @uint32_2 ; CHECK: li [[REG1:[0-9]+]], 0 ; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 -; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori 3, [[REG2]], 65535 ; CHECK: blr } -define i32 @cn32_2_i32() #0 { +define i32 @uint32_2_i32() #0 { entry: ret i32 -1 -; CHECK-LABEL: @cn32_2_i32 +; CHECK-LABEL: @uint32_2_i32 ; CHECK: li [[REG1:[0-9]+]], 0 ; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 -; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori 3, [[REG2]], 65535 +; CHECK: blr +} + +define i64 @uint32_3() #0 { +entry: + ret i64 2147483648 + +; CHECK-LABEL: @uint32_3 +; CHECK: li [[REG1:[0-9]+]], 1 +; CHECK: sldi 3, [[REG1]], 31 +; CHECK: blr +} + +define i64 @uint32_4() #0 { +entry: + ret i64 124800000032 + +; CHECK-LABEL: @uint32_4 +; CHECK: li [[REG1:[0-9]+]], 29 +; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 3752 +; CHECK: ori 3, [[REG3]], 57376 +; CHECK: blr +} + +define i64 @cn_ones_1() #0 { +entry: + ret i64 10460594175 + +; CHECK-LABEL: @cn_ones_1 +; CHECK: li [[REG1:[0-9]+]], 2 +; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 28543 +; CHECK: ori 3, [[REG3]], 65535 +; CHECK: blr +} + +define i64 @cn_ones_2() #0 { +entry: + ret i64 10459119615 + +; CHECK-LABEL: @cn_ones_2 +; CHECK: li [[REG1:[0-9]+]], 2 +; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 28521 +; CHECK: ori 3, [[REG3]], 32767 ; CHECK: blr } From e38be7091ee3d00430652aaa7b66ba3fc8394916 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 10 Sep 2020 14:27:27 +0000 Subject: [PATCH 0333/1079] [Clang] Clarify __builtin_memcpy_inline documentation This patch updates the documentation about `__builtin_memcpy_inline` and reorders the sections so it is more consitent and understandable. Differential Revision: https://reviews.llvm.org/D87458 --- clang/docs/LanguageExtensions.rst | 36 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 60b3f21b3e500..073d9c86e22ff 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2408,20 +2408,6 @@ with ``__has_feature(cxx_constexpr_string_builtins)``. Memory builtins --------------- - * ``__builtin_memcpy_inline`` - -.. code-block:: c - - void __builtin_memcpy_inline(void *dst, const void *src, size_t size); - -``__builtin_memcpy_inline(dst, src, size)`` is identical to -``__builtin_memcpy(dst, src, size)`` except that the generated code is -guaranteed not to call any external functions. See LLVM IR `llvm.memcpy.inline -`_ Intrinsic -for more information. - -Note that the `size` argument must be a compile time constant. - Clang provides constant expression evaluation support for builtin forms of the following functions from the C standard library headers ```` and ````: @@ -2439,7 +2425,27 @@ are pointers to arrays with the same trivially copyable element type, and the given size is an exact multiple of the element size that is no greater than the number of elements accessible through the source and destination operands. -Constant evaluation support is not yet provided for ``__builtin_memcpy_inline``. +Guaranteed inlined copy +^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + void __builtin_memcpy_inline(void *dst, const void *src, size_t size); + + +``__builtin_memcpy_inline`` has been designed as a building block for efficient +``memcpy`` implementations. It is identical to ``__builtin_memcpy`` but also +guarantees not to call any external functions. See LLVM IR `llvm.memcpy.inline +`_ Intrinsic +for more information. + +This is useful to implement a custom version of ``memcpy``, implemement a +``libc`` memcpy or work around the absence of a ``libc``. + +Note that the `size` argument must be a compile time constant. + +Note that this intrinsic cannot yet be called in a ``constexpr`` context. + Atomic Min/Max builtins with memory ordering -------------------------------------------- From 46416f08031f6fcaccd9f51430f7a71c5f510495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 10 Sep 2020 12:37:34 +0300 Subject: [PATCH 0334/1079] =?UTF-8?q?[CodeGen]=20[WinException]=C2=A0Remov?= =?UTF-8?q?e=20a=20redundant=20explicit=20section=20switch=20for=20aarch64?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following EmitWinEHHandlerData() implicitly switches to .xdata, just like on x86_64. This became orphaned from the original code requiring it in 0b61d220c9b1f0 / https://reviews.llvm.org/D61095. Differential Revision: https://reviews.llvm.org/D87447 --- llvm/lib/CodeGen/AsmPrinter/WinException.cpp | 9 --------- llvm/test/CodeGen/AArch64/win64-jumptable.ll | 1 - llvm/test/CodeGen/AArch64/wineh-mingw.ll | 3 +-- llvm/test/CodeGen/AArch64/wineh1.mir | 1 - 4 files changed, 1 insertion(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index cd8077e7d5486..c47ac7e17b6a1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -258,15 +258,6 @@ void WinException::endFuncletImpl() { if (F.hasPersonalityFn()) Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); - // On funclet exit, we emit a fake "function" end marker, so that the call - // to EmitWinEHHandlerData below can calculate the size of the funclet or - // function. - if (isAArch64) { - MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( - Asm->OutStreamer->getCurrentSectionOnly()); - Asm->OutStreamer->SwitchSection(XData); - } - // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); diff --git a/llvm/test/CodeGen/AArch64/win64-jumptable.ll b/llvm/test/CodeGen/AArch64/win64-jumptable.ll index 0c61bcd52366a..1983b2568cdee 100644 --- a/llvm/test/CodeGen/AArch64/win64-jumptable.ll +++ b/llvm/test/CodeGen/AArch64/win64-jumptable.ll @@ -44,7 +44,6 @@ declare void @g(i32, i32) ; CHECK: .word .LBB0_3-.LJTI0_0 ; CHECK: .word .LBB0_4-.LJTI0_0 ; CHECK: .word .LBB0_5-.LJTI0_0 -; CHECK: .section .xdata,"dr" ; CHECK: .seh_handlerdata ; CHECK: .text ; CHECK: .seh_endproc diff --git a/llvm/test/CodeGen/AArch64/wineh-mingw.ll b/llvm/test/CodeGen/AArch64/wineh-mingw.ll index ff1a55711b9ea..d22c61fca7575 100644 --- a/llvm/test/CodeGen/AArch64/wineh-mingw.ll +++ b/llvm/test/CodeGen/AArch64/wineh-mingw.ll @@ -36,8 +36,7 @@ endtryfinally: ; WINEH: .seh_proc foo4 ; WINEH: .seh_handler _d_eh_personality, @unwind, @except ; WINEH: ret -; WINEH: .section .xdata,"dr" -; WINEH-NEXT: .seh_handlerdata +; WINEH: .seh_handlerdata ; WINEH-NEXT: .text ; WINEH-NEXT: .seh_endproc ; WINEH: .section .xdata,"dr" diff --git a/llvm/test/CodeGen/AArch64/wineh1.mir b/llvm/test/CodeGen/AArch64/wineh1.mir index aed1550c54f73..2f73a5291ddd0 100644 --- a/llvm/test/CodeGen/AArch64/wineh1.mir +++ b/llvm/test/CodeGen/AArch64/wineh1.mir @@ -73,7 +73,6 @@ # ASM: .seh_endepilogue # ASM: .seh_endfunclet -# ASM: .section .xdata,"dr" # ASM: .seh_handlerdata # ASM: .text # ASM: .seh_endproc From 700fbe591ac0f29c76e9f2bd77d752d4bd56d274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 7 Sep 2020 14:45:37 +0300 Subject: [PATCH 0335/1079] [MC] [Win64EH] Canonicalize ARM64 unwind opcodes Convert 2-byte opcodes to equivalent 1-byte ones. Adjust the existing exhaustive testcase to avoid being altered by the simplification rules (to keep that test exercising all individual opcodes). Fix the assembler parser limits for register pairs; for .seh_save_regp and .seh_save_regp_x, we can allow up to x29, for a x29+x30 pair (which gets remapped to the UOP_SaveFPLR(X) opcodes), for .seh_save_fregp and .seh_save_fregpx, allow up to d14+d15. Not creating .seh_save_next for float register pairs, as the actual unwinder implementation in current versions of Windows is buggy for that case. This gives a minimal but measurable size reduction. (For a 6.5 MB DLL with 300 KB .xdata, the .xdata shrinks by 48 bytes. The opcode sequences are padded to a 4 byte boundary, so very small improvements might not end up mattering directly.) Differential Revision: https://reviews.llvm.org/D87367 --- llvm/lib/MC/MCWin64EH.cpp | 61 ++++++++++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 8 +- llvm/test/MC/AArch64/seh-optimize.s | 106 ++++++++++++++++++ llvm/test/MC/AArch64/seh.s | 18 +-- 4 files changed, 180 insertions(+), 13 deletions(-) create mode 100644 llvm/test/MC/AArch64/seh-optimize.s diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index fb0de40fc6d5f..e9ab88234ad37 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -544,6 +544,63 @@ FindMatchingEpilog(const std::vector& EpilogInstrs, return nullptr; } +static void simplifyOpcodes(std::vector &Instructions, + bool Reverse) { + unsigned PrevOffset = -1; + unsigned PrevRegister = -1; + + auto VisitInstruction = [&](WinEH::Instruction &Inst) { + // Convert 2-byte opcodes into equivalent 1-byte ones. + if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) { + Inst.Operation = Win64EH::UOP_SaveFPLR; + } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && + Inst.Register == 29) { + Inst.Operation = Win64EH::UOP_SaveFPLRX; + } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && + Inst.Register == 19 && Inst.Offset <= 248) { + Inst.Operation = Win64EH::UOP_SaveR19R20X; + } else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) { + Inst.Operation = Win64EH::UOP_SetFP; + } else if (Inst.Operation == Win64EH::UOP_SaveRegP && + Inst.Register == PrevRegister + 2 && + Inst.Offset == PrevOffset + 16) { + Inst.Operation = Win64EH::UOP_SaveNext; + // Intentionally not creating UOP_SaveNext for float register pairs, + // as current versions of Windows (up to at least 20.04) is buggy + // regarding SaveNext for float pairs. + } + // Update info about the previous instruction, for detecting if + // the next one can be made a UOP_SaveNext + if (Inst.Operation == Win64EH::UOP_SaveR19R20X) { + PrevOffset = 0; + PrevRegister = 19; + } else if (Inst.Operation == Win64EH::UOP_SaveRegPX) { + PrevOffset = 0; + PrevRegister = Inst.Register; + } else if (Inst.Operation == Win64EH::UOP_SaveRegP) { + PrevOffset = Inst.Offset; + PrevRegister = Inst.Register; + } else if (Inst.Operation == Win64EH::UOP_SaveNext) { + PrevRegister += 2; + PrevOffset += 16; + } else { + PrevRegister = -1; + PrevOffset = -1; + } + }; + + // Iterate over instructions in a forward order (for prologues), + // backwards for epilogues (i.e. always reverse compared to how the + // opcodes are stored). + if (Reverse) { + for (auto It = Instructions.rbegin(); It != Instructions.rend(); It++) + VisitInstruction(*It); + } else { + for (WinEH::Instruction &Inst : Instructions) + VisitInstruction(Inst); + } +} + // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { @@ -572,6 +629,10 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { return; } + simplifyOpcodes(info->Instructions, false); + for (auto &I : info->EpilogMap) + simplifyOpcodes(I.second, true); + MCContext &context = streamer.getContext(); MCSymbol *Label = context.createTempSymbol(); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 08a29bbb3e87a..502966c633676 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -5725,7 +5725,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || + if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset); @@ -5737,7 +5737,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::X28) || + if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset); @@ -5789,7 +5789,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) || + if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset); @@ -5801,7 +5801,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) || + if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset); diff --git a/llvm/test/MC/AArch64/seh-optimize.s b/llvm/test/MC/AArch64/seh-optimize.s new file mode 100644 index 0000000000000..0bf33af9cc75f --- /dev/null +++ b/llvm/test/MC/AArch64/seh-optimize.s @@ -0,0 +1,106 @@ +// This test checks that the unwinding opcodes are remapped to more +// efficient ones where possible. + +// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o +// RUN: llvm-readobj -u %t.o | FileCheck %s + +// CHECK: UnwindInformation [ +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func +// CHECK-NEXT: ExceptionRecord: .xdata +// CHECK-NEXT: ExceptionData { +// CHECK: Prologue [ +// CHECK-NEXT: 0xd882 ; stp d10, d11, [sp, #16] +// CHECK-NEXT: 0xda07 ; stp d8, d9, [sp, #-64]! +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0x28 ; stp x19, x20, [sp, #-64]! +// CHECK-NEXT: 0xca49 ; stp x28, x29, [sp, #72] +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0xcc47 ; stp x20, x21, [sp, #-64]! +// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16] +// CHECK-NEXT: 0xca02 ; stp x27, x28, [sp, #16] +// CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! +// CHECK-NEXT: 0xce03 ; stp x27, x28, [sp, #-32]! +// CHECK-NEXT: 0xe1 ; mov fp, sp +// CHECK-NEXT: 0xe201 ; add fp, sp, #8 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: EpilogueScopes [ +// CHECK-NEXT: EpilogueScope { +// CHECK: Opcodes [ +// CHECK-NEXT: 0xc904 ; ldp x23, x24, [sp, #32] +// CHECK-NEXT: 0xe6 ; restore next +// CHECK-NEXT: 0xcc83 ; ldp x21, x22, [sp], #32 +// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32 +// CHECK-NEXT: 0xcc1f ; ldp x19, x20, [sp], #256 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] + + + .text + .globl func + .seh_proc func +func: + add x29, sp, #8 + .seh_add_fp 8 + add x29, sp, #0 + .seh_add_fp 0 + + stp x27, x28, [sp, #-32]! + .seh_save_regp_x x27, 32 + stp x29, x30, [sp, #-32]! + .seh_save_regp_x x29, 32 + + stp x27, x28, [sp, #16] + .seh_save_regp x27, 16 + stp x29, x30, [sp, #16] + .seh_save_regp x29, 16 + + stp x20, x21, [sp, #-64]! + .seh_save_regp_x x20, 64 + stp x22, x23, [sp, #16] + .seh_save_regp x22, 16 + stp x24, x25, [sp, #32] + .seh_save_next + stp x26, x27, [sp, #48] + .seh_save_regp x26, 48 + stp x28, x29, [sp, #72] + .seh_save_regp x28, 72 + + stp x19, x20, [sp, #-64]! + .seh_save_r19r20_x 64 + stp x21, x22, [sp, #16] + .seh_save_regp x21, 16 + + stp d8, d9, [sp, #-64]! + .seh_save_fregp_x d8, 64 + stp d10, d11, [sp, #16] + // This is intentionally not converted into a save_next, to avoid + // bugs in the windows unwinder. + .seh_save_fregp d10, 16 + + .seh_endprologue + + nop + + .seh_startepilogue + ldp x27, x28, [sp, #32] + .seh_save_regp x23, 32 + ldp x23, x24, [sp, #16] + .seh_save_regp x23, 16 + ldp x21, x22, [sp], #32 + .seh_save_regp_x x21, 32 + ldp x19, x20, [sp], #32 + .seh_save_regp_x x19, 32 + ldp x19, x20, [sp], #256 + .seh_save_regp_x x19, 256 + .seh_endepilogue + ret + .seh_endproc diff --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s index f7faa64b9309a..4e235d032d68e 100644 --- a/llvm/test/MC/AArch64/seh.s +++ b/llvm/test/MC/AArch64/seh.s @@ -64,8 +64,8 @@ // CHECK-NEXT: 0xe202 ; add fp, sp, #16 // CHECK-NEXT: 0xdd41 ; str d13, [sp, #8] // CHECK-NEXT: 0xde83 ; str d12, [sp, #-32]! -// CHECK-NEXT: 0xd882 ; stp d10, d11, [sp, #16] -// CHECK-NEXT: 0xda03 ; stp d8, d9, [sp, #-32]! +// CHECK-NEXT: 0xd884 ; stp d10, d11, [sp, #32] +// CHECK-NEXT: 0xda05 ; stp d8, d9, [sp, #-48]! // CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! // CHECK-NEXT: 0x46 ; stp x29, x30, [sp, #48] // CHECK-NEXT: 0xd141 ; str x24, [sp, #8] @@ -74,7 +74,7 @@ // CHECK-NEXT: 0xc882 ; stp x21, x22, [sp, #16] // CHECK-NEXT: 0xd6c2 ; stp x25, lr, [sp, #16] // CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! -// CHECK-NEXT: 0xcc03 ; stp x19, x20, [sp, #-32]! +// CHECK-NEXT: 0xcc83 ; stp x21, x22, [sp, #-32]! // CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! // CHECK-NEXT: 0xe1 ; mov fp, sp // CHECK-NEXT: 0x01 ; sub sp, #16 @@ -113,8 +113,8 @@ func: .seh_set_fp stp x29, x30, [sp, #-32]! .seh_save_fplr_x 32 - stp x19, x20, [sp, #-32]! - .seh_save_regp_x x19, 32 + stp x21, x22, [sp, #-32]! + .seh_save_regp_x x21, 32 stp x19, x20, [sp, #-32]! .seh_save_r19r20_x 32 stp x25, x30, [sp, #16] @@ -131,10 +131,10 @@ func: .seh_save_fplr 48 stp x29, x30, [sp, #-32]! .seh_save_fplr_x 32 - stp d8, d9, [sp, #-32]! - .seh_save_fregp_x d8, 32 - stp d10, d11, [sp, #16] - .seh_save_fregp d10, 16 + stp d8, d9, [sp, #-48]! + .seh_save_fregp_x d8, 48 + stp d10, d11, [sp, #32] + .seh_save_fregp d10, 32 str d12, [sp, #-32]! .seh_save_freg_x d12, 32 str d13, [sp, #8] From 1308bb99e06752ab0b5175c92da31083f91af921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 8 Sep 2020 00:00:07 +0300 Subject: [PATCH 0336/1079] [MC] [Win64EH] Write packed ARM64 epilogues if possible This gives a pretty substantial size reduction; for a 6.5 MB DLL with 300 KB .xdata, the .xdata shrinks by 66 KB. Differential Revision: https://reviews.llvm.org/D87369 --- llvm/include/llvm/MC/MCWinEH.h | 8 + llvm/lib/MC/MCWin64EH.cpp | 57 ++++++- llvm/test/CodeGen/AArch64/wineh3.mir | 22 +-- llvm/test/CodeGen/AArch64/wineh6.mir | 20 +-- llvm/test/CodeGen/AArch64/wineh7.mir | 19 +-- llvm/test/MC/AArch64/seh-packed-epilog.s | 187 +++++++++++++++++++++++ llvm/test/MC/AArch64/seh.s | 16 +- 7 files changed, 266 insertions(+), 63 deletions(-) create mode 100644 llvm/test/MC/AArch64/seh-packed-epilog.s diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h index 53cffccce8c1a..f05f5f1641cd0 100644 --- a/llvm/include/llvm/MC/MCWinEH.h +++ b/llvm/include/llvm/MC/MCWinEH.h @@ -26,6 +26,14 @@ struct Instruction { Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off) : Label(L), Offset(Off), Register(Reg), Operation(Op) {} + + bool operator==(const Instruction &I) const { + // Check whether two instructions refer to the same operation + // applied at a different spot (i.e. pointing at a different label). + return Offset == I.Offset && Register == I.Register && + Operation == I.Operation; + } + bool operator!=(const Instruction &I) const { return !(*this == I); } }; struct FrameInfo { diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index e9ab88234ad37..a585b50828379 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -264,8 +264,7 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, return value; } -static uint32_t -ARM64CountOfUnwindCodes(const std::vector &Insns) { +static uint32_t ARM64CountOfUnwindCodes(ArrayRef Insns) { uint32_t Count = 0; for (const auto &I : Insns) { switch (static_cast(I.Operation)) { @@ -553,18 +552,23 @@ static void simplifyOpcodes(std::vector &Instructions, // Convert 2-byte opcodes into equivalent 1-byte ones. if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) { Inst.Operation = Win64EH::UOP_SaveFPLR; + Inst.Register = -1; } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && Inst.Register == 29) { Inst.Operation = Win64EH::UOP_SaveFPLRX; + Inst.Register = -1; } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && Inst.Register == 19 && Inst.Offset <= 248) { Inst.Operation = Win64EH::UOP_SaveR19R20X; + Inst.Register = -1; } else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) { Inst.Operation = Win64EH::UOP_SetFP; } else if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == PrevRegister + 2 && Inst.Offset == PrevOffset + 16) { Inst.Operation = Win64EH::UOP_SaveNext; + Inst.Register = -1; + Inst.Offset = 0; // Intentionally not creating UOP_SaveNext for float register pairs, // as current versions of Windows (up to at least 20.04) is buggy // regarding SaveNext for float pairs. @@ -601,6 +605,47 @@ static void simplifyOpcodes(std::vector &Instructions, } } +static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, + int PrologCodeBytes) { + // Can only pack if there's one single epilog + if (info->EpilogMap.size() != 1) + return -1; + + const std::vector &Epilog = + info->EpilogMap.begin()->second; + + // Can pack if the epilog is a subset of the prolog but not vice versa + if (Epilog.size() > info->Instructions.size()) + return -1; + + // Check that the epilog actually is a perfect match for the end (backwrds) + // of the prolog. + for (int I = Epilog.size() - 1; I >= 0; I--) { + if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I]) + return -1; + } + + // Check that the epilog actually is at the very end of the function, + // otherwise it can't be packed. + uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference( + streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first); + if (DistanceFromEnd / 4 != Epilog.size()) + return -1; + + int Offset = ARM64CountOfUnwindCodes( + ArrayRef(&info->Instructions[Epilog.size()], + info->Instructions.size() - Epilog.size())); + + // Check that the offset and prolog size fits in the first word; it's + // unclear whether the epilog count in the extension word can be taken + // as packed epilog offset. + if (Offset > 31 || PrologCodeBytes > 124) + return -1; + + info->EpilogMap.clear(); + return Offset; +} + // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { @@ -679,6 +724,8 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions); uint32_t TotalCodeBytes = PrologCodeBytes; + int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes); + // Process epilogs. MapVector EpilogInfo; // Epilogs processed so far. @@ -711,15 +758,17 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { uint32_t CodeWordsMod = TotalCodeBytes % 4; if (CodeWordsMod) CodeWords++; - uint32_t EpilogCount = info->EpilogMap.size(); + uint32_t EpilogCount = + PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size(); bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124; if (!ExtensionWord) { row1 |= (EpilogCount & 0x1F) << 22; row1 |= (CodeWords & 0x1F) << 27; } - // E is always 0 right now, TODO: packed epilog setup if (info->HandlesExceptions) // X row1 |= 1 << 20; + if (PackedEpilogOffset >= 0) // E + row1 |= 1 << 21; row1 |= FuncLength & 0x3FFFF; streamer.emitInt32(row1); diff --git a/llvm/test/CodeGen/AArch64/wineh3.mir b/llvm/test/CodeGen/AArch64/wineh3.mir index 6cbe7f42dc5ec..d1ffa4aedc085 100644 --- a/llvm/test/CodeGen/AArch64/wineh3.mir +++ b/llvm/test/CodeGen/AArch64/wineh3.mir @@ -8,9 +8,9 @@ # CHECK-NEXT: FunctionLength: 124 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 32 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 0 +# CHECK-NEXT: ByteCodeLength: 16 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96] # CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80] @@ -21,22 +21,6 @@ # CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 23 -# CHECK-NEXT: EpilogueStartIndex: 15 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96] -# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80] -# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64] -# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48] -# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32] -# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16] -# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } ... --- diff --git a/llvm/test/CodeGen/AArch64/wineh6.mir b/llvm/test/CodeGen/AArch64/wineh6.mir index 95a11aa3c4e82..e7592bd711460 100644 --- a/llvm/test/CodeGen/AArch64/wineh6.mir +++ b/llvm/test/CodeGen/AArch64/wineh6.mir @@ -6,25 +6,19 @@ # CHECK-NEXT: FunctionLength: 92 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 8 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 1 +# CHECK-NEXT: ByteCodeLength: 4 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0x02 ; sub sp, #32 # CHECK-NEXT: 0xe1 ; mov fp, sp # CHECK-NEXT: 0x81 ; stp x29, x30, [sp, #-16]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 20 -# CHECK-NEXT: EpilogueStartIndex: 4 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe1 ; mov sp, fp -# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } +# CHECK-NEXT: Epilogue [ +# CHECK-NEXT: 0xe1 ; mov sp, fp +# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 +# CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] # CHECK-NEXT: } ... diff --git a/llvm/test/CodeGen/AArch64/wineh7.mir b/llvm/test/CodeGen/AArch64/wineh7.mir index da64b3c002f3d..6bf06d80861a4 100644 --- a/llvm/test/CodeGen/AArch64/wineh7.mir +++ b/llvm/test/CodeGen/AArch64/wineh7.mir @@ -6,9 +6,9 @@ # CHECK-NEXT: FunctionLength: 72 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 16 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 0 +# CHECK-NEXT: ByteCodeLength: 8 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xe204 ; add fp, sp, #32 # CHECK-NEXT: 0x44 ; stp x29, x30, [sp, #32] @@ -16,19 +16,6 @@ # CHECK-NEXT: 0xcc85 ; stp x21, x22, [sp, #-48]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 13 -# CHECK-NEXT: EpilogueStartIndex: 8 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe204 ; sub sp, fp, #32 -# CHECK-NEXT: 0x44 ; ldp x29, x30, [sp, #32] -# CHECK-NEXT: 0xc802 ; ldp x19, x20, [sp, #16] -# CHECK-NEXT: 0xcc85 ; ldp x21, x22, [sp], #48 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: } diff --git a/llvm/test/MC/AArch64/seh-packed-epilog.s b/llvm/test/MC/AArch64/seh-packed-epilog.s new file mode 100644 index 0000000000000..f9978ea7a1139 --- /dev/null +++ b/llvm/test/MC/AArch64/seh-packed-epilog.s @@ -0,0 +1,187 @@ +// This test checks that the epilogue is packed where possible. + +// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o +// RUN: llvm-readobj -u %t.o | FileCheck %s + +// CHECK: UnwindInformation [ +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func +// CHECK-NEXT: ExceptionRecord: .xdata +// CHECK-NEXT: ExceptionData { +// CHECK-NEXT: FunctionLength: +// CHECK-NEXT: Version: +// CHECK-NEXT: ExceptionData: +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK-NEXT: EpilogueOffset: 2 +// CHECK-NEXT: ByteCodeLength: +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: 0xdc04 ; str d8, [sp, #32] +// CHECK-NEXT: 0xe1 ; mov fp, sp +// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16] +// CHECK-NEXT: 0x85 ; stp x29, x30, [sp, #-48]! +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! +// CHECK-NEXT: 0xc842 ; stp x20, x21, [sp, #16] +// CHECK-NEXT: 0x03 ; sub sp, #48 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: Epilogue [ +// CHECK-NEXT: 0xe1 ; mov sp, fp +// CHECK-NEXT: 0x42 ; ldp x29, x30, [sp, #16] +// CHECK-NEXT: 0x85 ; ldp x29, x30, [sp], #48 +// CHECK-NEXT: 0xe6 ; restore next +// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32 +// CHECK-NEXT: 0xc842 ; ldp x20, x21, [sp, #16] +// CHECK-NEXT: 0x03 ; add sp, #48 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: packed2 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked1 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked2 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked3 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No + + .text + .globl func + .seh_proc func +func: + sub sp, sp, #48 + .seh_stackalloc 48 + // Check that canonical opcode forms (r19r20_x, fplr, fplr_x, save_next, + // set_fp) are treated as a match even if one (in prologue or epilogue) + // was simplified from the more generic opcodes. + stp x20, x21, [sp, #16] + .seh_save_regp x20, 16 + stp x19, x20, [sp, #-32]! + .seh_save_r19r20_x 32 + stp x21, x22, [sp, #16] + .seh_save_regp x21, 16 + stp x29, x30, [sp, #-48]! + .seh_save_regp_x x29, 48 + stp x29, x30, [sp, #16] + .seh_save_regp x29, 16 + add x29, sp, #0 + .seh_add_fp 0 + str d8, [sp, #32] + .seh_save_freg d8, 32 + .seh_endprologue + + nop + + .seh_startepilogue + mov sp, x29 + .seh_set_fp + ldp x29, x30, [sp, #16] + .seh_save_fplr 16 + ldp x29, x30, [sp, #-48]! + .seh_save_fplr_x 48 + ldp x21, x22, [sp, #16] + .seh_save_next + ldp x19, x20, [sp], #32 + .seh_save_regp_x x19, 32 + ldp x20, x21, [sp, #16] + .seh_save_regp x20, 16 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + + // Test a perfectly matching epilog with no offset. + .seh_proc packed2 +packed2: + sub sp, sp, #48 + .seh_stackalloc 48 + stp x29, lr, [sp, #-32]! + .seh_save_fplr_x 32 + .seh_endprologue + nop + .seh_startepilogue + ldp x29, lr, [sp], #32 + .seh_save_fplr_x 32 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + + .seh_proc nonpacked1 +nonpacked1: + sub sp, sp, #48 + .seh_stackalloc 48 + .seh_endprologue + + nop + .seh_startepilogue + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + // This epilogue isn't packed with the prologue, as it doesn't align with + // the end of the function (one extra nop before the ret). + nop + ret + .seh_endproc + + + .seh_proc nonpacked2 +nonpacked2: + sub sp, sp, #48 + .seh_stackalloc 48 + sub sp, sp, #32 + .seh_stackalloc 32 + .seh_endprologue + + nop + .seh_startepilogue + // Not packed; the epilogue mismatches at the second opcode. + add sp, sp, #16 + .seh_stackalloc 16 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + .seh_proc nonpacked3 +nonpacked3: + sub sp, sp, #48 + .seh_stackalloc 48 + sub sp, sp, #32 + .seh_stackalloc 32 + .seh_endprologue + + nop + .seh_startepilogue + // Not packed; the epilogue is longer than the prologue. + mov sp, x29 + .seh_set_fp + add sp, sp, #32 + .seh_stackalloc 32 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc diff --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s index 4e235d032d68e..0da956cbf2f5d 100644 --- a/llvm/test/MC/AArch64/seh.s +++ b/llvm/test/MC/AArch64/seh.s @@ -20,7 +20,7 @@ // CHECK-NEXT: } // CHECK: Section { // CHECK: Name: .xdata -// CHECK: RawDataSize: 56 +// CHECK: RawDataSize: 52 // CHECK: RelocationCount: 1 // CHECK: Characteristics [ // CHECK-NEXT: ALIGN_4BYTES @@ -41,7 +41,7 @@ // CHECK-NEXT: Relocations [ // CHECK-NEXT: Section (4) .xdata { -// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler +// CHECK-NEXT: 0x28 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler // CHECK-NEXT: } // CHECK-NEXT: Section (5) .pdata { // CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func @@ -80,15 +80,9 @@ // CHECK-NEXT: 0x01 ; sub sp, #16 // CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] -// CHECK-NEXT: EpilogueScopes [ -// CHECK-NEXT: EpilogueScope { -// CHECK-NEXT: StartOffset: 23 -// CHECK-NEXT: EpilogueStartIndex: 33 -// CHECK-NEXT: Opcodes [ -// CHECK-NEXT: 0x01 ; add sp, #16 -// CHECK-NEXT: 0xe4 ; end -// CHECK-NEXT: ] -// CHECK-NEXT: } +// CHECK-NEXT: Epilogue [ +// CHECK-NEXT: 0x01 ; add sp, #16 +// CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] // CHECK-NEXT: ExceptionHandler [ // CHECK-NEXT: Routine: __C_specific_handler (0x0) From 28012e00d80b994ef0709377da15e2b25e6c0b72 Mon Sep 17 00:00:00 2001 From: Yevgeny Rouban Date: Fri, 11 Sep 2020 12:55:24 +0700 Subject: [PATCH 0337/1079] [NewPM] Introduce PreserveCFG check Check that all passes, which report they preserve CFG, are really preserving CFG. A new standard instrumentation is introduced. It can be switched on/off by the flag verify-cfg-preserved, which is on by default for debug builds. Reviewers: kuhar, fedor.sergeev Differential Revision: https://reviews.llvm.org/D81558 --- .../llvm/Passes/StandardInstrumentations.h | 52 ++++++ llvm/lib/Passes/StandardInstrumentations.cpp | 164 ++++++++++++++++++ 2 files changed, 216 insertions(+) diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 795e2770bbe18..76e217c899745 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -17,8 +17,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassTimingInfo.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/CommandLine.h" #include #include @@ -26,6 +29,7 @@ namespace llvm { class Module; +class Function; /// Instrumentation to print IR before/after passes. /// @@ -73,6 +77,53 @@ class PrintPassInstrumentation { bool DebugLogging; }; +class PreservedCFGCheckerInstrumentation { +private: + // CFG is a map BB -> {(Succ, Multiplicity)}, where BB is a non-leaf basic + // block, {(Succ, Multiplicity)} set of all pairs of the block's successors + // and the multiplicity of the edge (BB->Succ). As the mapped sets are + // unordered the order of successors is not tracked by the CFG. In other words + // this allows basic block successors to be swapped by a pass without + // reporting a CFG change. CFG can be guarded by basic block tracking pointers + // in the Graph (BBGuard). That is if any of the block is deleted or RAUWed + // then the CFG is treated poisoned and no block pointer of the Graph is used. + struct CFG { + struct BBGuard final : public CallbackVH { + BBGuard(const BasicBlock *BB) : CallbackVH(BB) {} + void deleted() override { CallbackVH::deleted(); } + void allUsesReplacedWith(Value *) override { CallbackVH::deleted(); } + bool isPoisoned() const { return !getValPtr(); } + }; + + Optional> BBGuards; + DenseMap> Graph; + + CFG(const Function *F, bool TrackBBLifetime = false); + + bool operator==(const CFG &G) const { + return !isPoisoned() && !G.isPoisoned() && Graph == G.Graph; + } + + bool isPoisoned() const { + if (BBGuards) + for (auto &BB : *BBGuards) { + if (BB.second.isPoisoned()) + return true; + } + return false; + } + + static void printDiff(raw_ostream &out, const CFG &Before, + const CFG &After); + }; + + SmallVector>, 8> GraphStackBefore; + +public: + static cl::opt VerifyPreservedCFG; + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -80,6 +131,7 @@ class StandardInstrumentations { PrintPassInstrumentation PrintPass; TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; + PreservedCFGCheckerInstrumentation PreservedCFGChecker; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index da58fa57bdae7..2ee373b912be0 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -36,6 +36,14 @@ static cl::opt cl::desc("Enable skipping optional passes optnone functions " "under new pass manager")); +cl::opt PreservedCFGCheckerInstrumentation::VerifyPreservedCFG( + "verify-cfg-preserved", cl::Hidden, +#ifdef NDEBUG + cl::init(false)); +#else + cl::init(true)); +#endif + // FIXME: Change `-debug-pass-manager` from boolean to enum type. Similar to // `-debug-pass` in legacy PM. static cl::opt @@ -338,10 +346,166 @@ void PrintPassInstrumentation::registerCallbacks( }); } +PreservedCFGCheckerInstrumentation::CFG::CFG(const Function *F, + bool TrackBBLifetime) { + if (TrackBBLifetime) + BBGuards = DenseMap(F->size()); + for (const auto &BB : *F) { + if (BBGuards) + BBGuards->try_emplace(intptr_t(&BB), &BB); + for (auto *Succ : successors(&BB)) { + Graph[&BB][Succ]++; + if (BBGuards) + BBGuards->try_emplace(intptr_t(Succ), Succ); + } + } +} + +static void printBBName(raw_ostream &out, const BasicBlock *BB) { + if (BB->hasName()) { + out << BB->getName() << "<" << BB << ">"; + return; + } + + if (!BB->getParent()) { + out << "unnamed_removed<" << BB << ">"; + return; + } + + if (BB == &BB->getParent()->getEntryBlock()) { + out << "entry" + << "<" << BB << ">"; + return; + } + + unsigned FuncOrderBlockNum = 0; + for (auto &FuncBB : *BB->getParent()) { + if (&FuncBB == BB) + break; + FuncOrderBlockNum++; + } + out << "unnamed_" << FuncOrderBlockNum << "<" << BB << ">"; +} + +void PreservedCFGCheckerInstrumentation::CFG::printDiff(raw_ostream &out, + const CFG &Before, + const CFG &After) { + assert(!After.isPoisoned()); + + // Print function name. + const CFG *FuncGraph = nullptr; + if (!After.Graph.empty()) + FuncGraph = &After; + else if (!Before.isPoisoned() && !Before.Graph.empty()) + FuncGraph = &Before; + + if (FuncGraph) + out << "In function @" + << FuncGraph->Graph.begin()->first->getParent()->getName() << "\n"; + + if (Before.isPoisoned()) { + out << "Some blocks were deleted\n"; + return; + } + + // Find and print graph differences. + if (Before.Graph.size() != After.Graph.size()) + out << "Different number of non-leaf basic blocks: before=" + << Before.Graph.size() << ", after=" << After.Graph.size() << "\n"; + + for (auto &BB : Before.Graph) { + auto BA = After.Graph.find(BB.first); + if (BA == After.Graph.end()) { + out << "Non-leaf block "; + printBBName(out, BB.first); + out << " is removed (" << BB.second.size() << " successors)\n"; + } + } + + for (auto &BA : After.Graph) { + auto BB = Before.Graph.find(BA.first); + if (BB == Before.Graph.end()) { + out << "Non-leaf block "; + printBBName(out, BA.first); + out << " is added (" << BA.second.size() << " successors)\n"; + continue; + } + + if (BB->second == BA.second) + continue; + + out << "Different successors of block "; + printBBName(out, BA.first); + out << " (unordered):\n"; + out << "- before (" << BB->second.size() << "): "; + for (auto &SuccB : BB->second) { + printBBName(out, SuccB.first); + if (SuccB.second != 1) + out << "(" << SuccB.second << "), "; + else + out << ", "; + } + out << "\n"; + out << "- after (" << BA.second.size() << "): "; + for (auto &SuccA : BA.second) { + printBBName(out, SuccA.first); + if (SuccA.second != 1) + out << "(" << SuccA.second << "), "; + else + out << ", "; + } + out << "\n"; + } +} + +void PreservedCFGCheckerInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!VerifyPreservedCFG) + return; + + PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) { + if (any_isa(IR)) + GraphStackBefore.emplace_back(P, CFG(any_cast(IR))); + else + GraphStackBefore.emplace_back(P, None); + }); + + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &PassPA) { + auto Before = GraphStackBefore.pop_back_val(); + assert(Before.first == P && + "Before and After callbacks must correspond"); + (void)Before; + }); + + PIC.registerAfterPassCallback([this](StringRef P, Any IR, + const PreservedAnalyses &PassPA) { + auto Before = GraphStackBefore.pop_back_val(); + assert(Before.first == P && "Before and After callbacks must correspond"); + auto &GraphBefore = Before.second; + + if (!PassPA.allAnalysesInSetPreserved()) + return; + + if (any_isa(IR)) { + assert(GraphBefore && "Must be built in BeforePassCallback"); + CFG GraphAfter(any_cast(IR), false /* NeedsGuard */); + if (GraphAfter == *GraphBefore) + return; + + dbgs() << "Error: " << P + << " reported it preserved CFG, but changes detected:\n"; + CFG::printDiff(dbgs(), *GraphBefore, GraphAfter); + report_fatal_error(Twine("Preserved CFG changed by ", P)); + } + }); +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC) { PrintIR.registerCallbacks(PIC); PrintPass.registerCallbacks(PIC); TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); + PreservedCFGChecker.registerCallbacks(PIC); } From 1e1770a07ec0f6a3576362ea5eb97aedd33f4b26 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 3 Sep 2020 11:57:55 +0100 Subject: [PATCH 0338/1079] [SVE][CodeGen] Fix InlineFunction for scalable vectors When inlining functions containing allocas of scalable vectors we cannot specify the size in the lifetime markers, since we don't know this at compile time. Added new test here: test/Transforms/Inline/AArch64/sve-alloca-merge.ll Differential Revision: https://reviews.llvm.org/D87139 --- llvm/lib/Transforms/Utils/InlineFunction.cpp | 7 +++-- .../Inline/AArch64/sve-alloca-merge.ll | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 30726627bc829..7ff21d7ee9ef6 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2061,7 +2061,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, dyn_cast(AI->getArraySize())) { auto &DL = Caller->getParent()->getDataLayout(); Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); // Don't add markers for zero-sized allocas. @@ -2070,9 +2070,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. - if (AllocaArraySize != std::numeric_limits::max() && + if (!AllocaTypeSize.isScalable() && + AllocaArraySize != std::numeric_limits::max() && std::numeric_limits::max() / AllocaArraySize >= - AllocaTypeSize) { + AllocaTypeSize.getFixedSize()) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } diff --git a/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll b/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll new file mode 100644 index 0000000000000..c355388ed836f --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll @@ -0,0 +1,29 @@ +; RUN: opt -mtriple=aarch64--linux-gnu -mattr=+sve < %s -inline -S | FileCheck %s + +define void @bar(* %a) { +entry: + %b = alloca , align 16 + store zeroinitializer, * %b, align 16 + %c = load , * %a, align 16 + %d = load , * %b, align 16 + %e = add %c, %d + %f = add %e, %c + store %f, * %a, align 16 + ret void +} + +define i64 @foo() { +; CHECK-LABEL: @foo( +; CHECK: %0 = bitcast * %{{.*}} to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) +; CHECK: %1 = bitcast * %{{.*}} to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) +entry: + %a = alloca , align 16 + store zeroinitializer, * %a, align 16 + %a1 = bitcast * %a to i64* + store i64 1, i64* %a1, align 8 + call void @bar(* %a) + %el = load i64, i64* %a1 + ret i64 %el +} From d380b582f7f04f7635b1fbdb8347a6095660a1b6 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Thu, 10 Sep 2020 23:56:34 -0700 Subject: [PATCH 0339/1079] [mlir][Linalg] Make LinalgBaseTilingPattern not delete the original operation. The LinalgTilingPattern class dervied from the base deletes the original operation. This allows for the use case where the more transformations are necessary on the original operation after tiling. In such cases the pattern can derive from LinalgBaseTilingPattern instead of LinalgTilingPattern. Differential Revision: https://reviews.llvm.org/D87308 --- .../mlir/Dialect/Linalg/Transforms/Transforms.h | 10 +++++++++- mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp | 2 -- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 3049570bd47b6..b55c429a9d02d 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -313,6 +313,13 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern { PatternBenefit benefit = 1) : LinalgBaseTilingPattern(OpTy::getOperationName(), context, options, marker, benefit) {} + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (failed(LinalgBaseTilingPattern::matchAndRewrite(op, rewriter))) + return failure(); + rewriter.eraseOp(op); + return success(); + } }; /// @@ -415,7 +422,8 @@ enum class LinalgLoweringType { AffineLoops = 2, ParallelLoops = 3 }; -template struct LinalgLoweringPattern : public RewritePattern { +template +struct LinalgLoweringPattern : public RewritePattern { LinalgLoweringPattern(MLIRContext *context, LinalgLoweringType loweringType, LinalgMarker marker = LinalgMarker(), PatternBenefit benefit = 1) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index afac3d5f5f9a4..c1aad620fe08a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -126,8 +126,6 @@ LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewrite( // New marker if specified. marker.replaceLinalgMarker(rewriter, res->op.getOperation()); - - rewriter.eraseOp(op); return success(); } From 76e85ae268f8e64540703b0d1710d27ef0d36040 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Fri, 11 Sep 2020 09:53:19 +0200 Subject: [PATCH 0340/1079] [clang][Sparc] Default to -mcpu=v9 for Sparc V8 on Solaris As reported in Bug 42535, `clang` doesn't inline atomic ops on 32-bit Sparc, unlike `gcc` on Solaris. In a 1-stage build with `gcc`, only two testcases are affected (currently `XFAIL`ed), while in a 2-stage build more than 100 tests `FAIL` due to this issue. The reason for this `gcc`/`clang` difference is that `gcc` on 32-bit Solaris/SPARC defaults to `-mpcu=v9` where atomic ops are supported, unlike with `clang`'s default of `-mcpu=v8`. This patch changes `clang` to use `-mcpu=v9` on 32-bit Solaris/SPARC, too. Doing so uncovered two bugs: `clang -m32 -mcpu=v9` chokes with any Solaris system headers included: /usr/include/sys/isa_defs.h:461:2: error: "Both _ILP32 and _LP64 are defined" #error "Both _ILP32 and _LP64 are defined" While `clang` currently defines `__sparcv9` in a 32-bit `-mcpu=v9` compilation, neither `gcc` nor Studio `cc` do. In fact, the Studio 12.6 `cc(1)` man page clearly states: These predefinitions are valid in all modes: [...] __sparcv8 (SPARC) __sparcv9 (SPARC -m64) At the same time, the patch defines `__GCC_HAVE_SYNC_COMPARE_AND_SWAP_[1248]` for a 32-bit Sparc compilation with any V9 cpu. I've also changed `MaxAtomicInlineWidth` for V9, matching what `gcc` does and the Oracle Developer Studio 12.6: C User's Guide documents (Ch. 3, Support for Atomic Types, 3.1 Size and Alignment of Atomic C Types). The two testcases that had been `XFAIL`ed for Bug 42535 are un-`XFAIL`ed again. Tested on `sparcv9-sun-solaris2.11` and `amd64-pc-solaris2.11`. Differential Revision: https://reviews.llvm.org/D86621 --- clang/lib/Basic/Targets/Sparc.cpp | 23 ++++++++++++------- clang/lib/Basic/Targets/Sparc.h | 11 ++++++--- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 ++ .../Preprocessor/predefined-arch-macros.c | 19 ++++++++++++++- .../Posix/instrprof-gcov-parallel.test | 3 --- .../ubsan/TestCases/Float/cast-overflow.cpp | 3 --- 6 files changed, 43 insertions(+), 18 deletions(-) diff --git a/clang/lib/Basic/Targets/Sparc.cpp b/clang/lib/Basic/Targets/Sparc.cpp index 48f36c5ba1c63..5eeb77406c342 100644 --- a/clang/lib/Basic/Targets/Sparc.cpp +++ b/clang/lib/Basic/Targets/Sparc.cpp @@ -147,19 +147,20 @@ void SparcTargetInfo::getTargetDefines(const LangOptions &Opts, void SparcV8TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { SparcTargetInfo::getTargetDefines(Opts, Builder); - switch (getCPUGeneration(CPU)) { - case CG_V8: + if (getTriple().getOS() == llvm::Triple::Solaris) Builder.defineMacro("__sparcv8"); - if (getTriple().getOS() != llvm::Triple::Solaris) + else { + switch (getCPUGeneration(CPU)) { + case CG_V8: + Builder.defineMacro("__sparcv8"); Builder.defineMacro("__sparcv8__"); - break; - case CG_V9: - Builder.defineMacro("__sparcv9"); - if (getTriple().getOS() != llvm::Triple::Solaris) { + break; + case CG_V9: + Builder.defineMacro("__sparcv9"); Builder.defineMacro("__sparcv9__"); Builder.defineMacro("__sparc_v9__"); + break; } - break; } if (getTriple().getVendor() == llvm::Triple::Myriad) { std::string MyriadArchValue, Myriad2Value; @@ -227,6 +228,12 @@ void SparcV8TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__myriad2__", Myriad2Value); Builder.defineMacro("__myriad2", Myriad2Value); } + if (getCPUGeneration(CPU) == CG_V9) { + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); + } } void SparcV9TargetInfo::getTargetDefines(const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h index d24cf15d7cd65..07844abafe11b 100644 --- a/clang/lib/Basic/Targets/Sparc.h +++ b/clang/lib/Basic/Targets/Sparc.h @@ -166,10 +166,15 @@ class LLVM_LIBRARY_VISIBILITY SparcV8TargetInfo : public SparcTargetInfo { PtrDiffType = SignedLong; break; } - // Up to 32 bits are lock-free atomic, but we're willing to do atomic ops - // on up to 64 bits. + // Up to 32 bits (V8) or 64 bits (V9) are lock-free atomic, but we're + // willing to do atomic ops on up to 64 bits. MaxAtomicPromoteWidth = 64; - MaxAtomicInlineWidth = 32; + if (getCPUGeneration(CPU) == CG_V9) + MaxAtomicInlineWidth = 64; + else + // FIXME: This isn't correct for plain V8 which lacks CAS, + // only for LEON 3+ and Myriad. + MaxAtomicInlineWidth = 32; } void getTargetDefines(const LangOptions &Opts, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 8bbb642c2917c..0507794ee34ff 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -347,6 +347,8 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T, case llvm::Triple::sparcv9: if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) return A->getValue(); + if (T.getArch() == llvm::Triple::sparc && T.isOSSolaris()) + return "v9"; return ""; case llvm::Triple::x86: diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 3c369ace32d51..287a7c58cddab 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -3235,9 +3235,26 @@ // RUN: -target sparc-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC-V9 // CHECK_SPARC-V9-NOT: #define __sparcv8 1 +// CHECK_SPARC-V9-NOT: #define __sparcv8__ 1 // CHECK_SPARC-V9: #define __sparc_v9__ 1 // CHECK_SPARC-V9: #define __sparcv9 1 -// CHECK_SPARC-V9-NOT: #define __sparcv8 1 +// CHECK_SPARC-V9: #define __sparcv9__ 1 + +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target sparc-sun-solaris \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC_SOLARIS_GCC_ATOMICS +// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 + +// RUN: %clang -mcpu=v8 -E -dM %s -o - 2>&1 \ +// RUN: -target sparc-sun-solaris \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 // RUN: %clang -E -dM %s -o - 2>&1 \ // RUN: -target sparcel-unknown-linux \ diff --git a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test index 52b51e6269f53..0c7198e3c4e9e 100644 --- a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test +++ b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test @@ -10,9 +10,6 @@ RUN: %run %t.driver %t.target RUN: llvm-cov gcov instrprof-gcov-parallel.target.gcda RUN: FileCheck --input-file instrprof-gcov-parallel.target.c.gcov %s -# Bug 42535 -# XFAIL: sparc-target-arch - # Test if the .gcda file is correctly created from one of child processes # and counters of all processes are recorded correctly. # 707 = CHILDREN * COUNT diff --git a/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp b/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp index 1c680259a2471..479c39f28428a 100644 --- a/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp +++ b/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp @@ -11,9 +11,6 @@ // FIXME: not %run %t 8 2>&1 | FileCheck %s --check-prefix=CHECK-8 // RUN: not %run %t 9 2>&1 | FileCheck %s --check-prefix=CHECK-9 -// Bug 42535 -// XFAIL: sparc-target-arch - // This test assumes float and double are IEEE-754 single- and double-precision. #if defined(__APPLE__) From b8ea47a38039c57e863e3047c33d8584e21360f0 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 11 Sep 2020 10:08:02 +0200 Subject: [PATCH 0341/1079] Uncapitalize word in LanguageExtensions.rst --- clang/docs/LanguageExtensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 073d9c86e22ff..256f7e12364f8 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2436,7 +2436,7 @@ Guaranteed inlined copy ``__builtin_memcpy_inline`` has been designed as a building block for efficient ``memcpy`` implementations. It is identical to ``__builtin_memcpy`` but also guarantees not to call any external functions. See LLVM IR `llvm.memcpy.inline -`_ Intrinsic +`_ intrinsic for more information. This is useful to implement a custom version of ``memcpy``, implemement a From a68673cc067a190f5a9d0f0e3e4837601caf4504 Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Thu, 10 Sep 2020 17:37:56 +0200 Subject: [PATCH 0342/1079] [mlir] Fix generation of AVX512 dialect documentation This changes adjusts the documentation generation for the AVX512 dialect. The machanism to generate documentation was changed with https://github.com/llvm/llvm-project/commit/1a083f027f33f4014247df4c0e757e23d5cdab64. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D87460 --- mlir/include/mlir/Dialect/AVX512/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/AVX512/CMakeLists.txt b/mlir/include/mlir/Dialect/AVX512/CMakeLists.txt index bc57372689b28..3c14238be1bbe 100644 --- a/mlir/include/mlir/Dialect/AVX512/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/AVX512/CMakeLists.txt @@ -1 +1,2 @@ -add_mlir_dialect(AVX512 avx512 AVX512) +add_mlir_dialect(AVX512 avx512) +add_mlir_doc(AVX512 -gen-op-doc AVX512 Dialects/) From e6419d320d501077d1c5e1e7e1291a1ec6573877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 11 Sep 2020 11:14:45 +0300 Subject: [PATCH 0343/1079] [MC] [Win64EH] Fix builds with expensive checks enabled This fixes a failed assert if expensive checks are enabled, since 1308bb99e06752ab0b5175c92da31083f91af921. --- llvm/lib/MC/MCWin64EH.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index a585b50828379..8e8dba760853e 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -632,9 +632,11 @@ static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, if (DistanceFromEnd / 4 != Epilog.size()) return -1; - int Offset = ARM64CountOfUnwindCodes( - ArrayRef(&info->Instructions[Epilog.size()], - info->Instructions.size() - Epilog.size())); + int Offset = Epilog.size() == info->Instructions.size() + ? 0 + : ARM64CountOfUnwindCodes(ArrayRef( + &info->Instructions[Epilog.size()], + info->Instructions.size() - Epilog.size())); // Check that the offset and prolog size fits in the first word; it's // unclear whether the epilog count in the extension word can be taken From c0825fa5fc367bb7dc04a4b9dd4cc62abde04521 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 11 Sep 2020 09:35:20 +0100 Subject: [PATCH 0344/1079] Revert "[ORC] Make MaterializationResponsibility immovable, pass by unique_ptr." This reverts commit c74900ca67241bf963b7a4cfa1fae8eadf6bb8cd. This appears to be breaking some builds on macOS and has been causing build failures on Green Dragon (see below). I am reverting this for now, to unblock testing on Green Dragon. http://green.lab.llvm.org/green/job/clang-stage1-cmake-RA-incremental/18144/console [65/187] /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++ -DBUILD_EXAMPLES -DGTEST_HAS_RTTI=0 -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Iexamples/ThinLtoJIT -I/Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/examples/ThinLtoJIT -Iinclude -I/Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/include -fPIC -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wstring-conversion -fdiagnostics-color -O3 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.sdk -mmacosx-version-min=10.9 -fno-exceptions -fno-rtti -UNDEBUG -std=c++14 -MD -MT examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o -MF examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o.d -o examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o -c /Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp FAILED: examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++ -DBUILD_EXAMPLES -DGTEST_HAS_RTTI=0 -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Iexamples/ThinLtoJIT -I/Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/examples/ThinLtoJIT -Iinclude -I/Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/include -fPIC -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wstring-conversion -fdiagnostics-color -O3 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.sdk -mmacosx-version-min=10.9 -fno-exceptions -fno-rtti -UNDEBUG -std=c++14 -MD -MT examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o -MF examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o.d -o examples/ThinLtoJIT/CMakeFiles/ThinLtoJIT.dir/ThinLtoDiscoveryThread.cpp.o -c /Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp In file included from /Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp:7: /Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h:37:68: error: non-virtual member function marked 'override' hides virtual member function void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; ^ /Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Layer.h:103:16: note: hidden overloaded virtual function 'llvm::orc::IRLayer::emit' declared here: type mismatch at 1st parameter ('std::unique_ptr' vs 'llvm::orc::MaterializationResponsibility') virtual void emit(std::unique_ptr R, ^ 1 error generated. --- .../SpeculativeJIT/SpeculativeJIT.cpp | 15 +- .../Orc/CompileOnDemandLayer.h | 6 +- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 37 ++- .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 3 +- .../ExecutionEngine/Orc/IRTransformLayer.h | 3 +- llvm/include/llvm/ExecutionEngine/Orc/Layer.h | 11 +- .../llvm/ExecutionEngine/Orc/LazyReexports.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- .../Orc/ObjectTransformLayer.h | 2 +- .../Orc/RTDyldObjectLinkingLayer.h | 2 +- .../llvm/ExecutionEngine/Orc/Speculation.h | 3 +- .../Orc/CompileOnDemandLayer.cpp | 42 +-- llvm/lib/ExecutionEngine/Orc/Core.cpp | 50 ++-- .../ExecutionEngine/Orc/IRCompileLayer.cpp | 6 +- .../ExecutionEngine/Orc/IRTransformLayer.cpp | 6 +- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 6 +- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 20 +- llvm/lib/ExecutionEngine/Orc/Layer.cpp | 8 +- .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 16 +- .../Orc/ObjectLinkingLayer.cpp | 59 +++-- .../Orc/ObjectTransformLayer.cpp | 7 +- .../Orc/RTDyldObjectLinkingLayer.cpp | 25 +- llvm/lib/ExecutionEngine/Orc/Speculation.cpp | 4 +- .../ExecutionEngine/Orc/CoreAPIsTest.cpp | 242 ++++++++---------- .../Orc/LazyCallThroughAndReexportsTest.cpp | 6 +- .../ExecutionEngine/Orc/OrcTestCommon.h | 5 +- 26 files changed, 274 insertions(+), 314 deletions(-) diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp index 24cf0847558f9..4de4897053c1b 100644 --- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp +++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp @@ -113,13 +113,14 @@ class SpeculativeJIT { this->CODLayer.setImplMap(&Imps); this->ES->setDispatchMaterialization( [this](std::unique_ptr MU, - std::unique_ptr MR) { - CompileThreads.async( - [UnownedMU = MU.release(), UnownedMR = MR.release()]() { - std::unique_ptr MU(UnownedMU); - std::unique_ptr MR(UnownedMR); - MU->materialize(std::move(MR)); - }); + MaterializationResponsibility MR) { + // FIXME: Switch to move capture once we have C++14. + auto SharedMU = std::shared_ptr(std::move(MU)); + auto SharedMR = + std::make_shared(std::move(MR)); + CompileThreads.async([SharedMU, SharedMR]() { + SharedMU->materialize(std::move(*SharedMR)); + }); }); ExitOnErr(S.addSpeculationRuntime(MainJD, Mangle)); LocalCXXRuntimeOverrides CXXRuntimeoverrides; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 3a2f8b54ad22b..9ecc0464dec1b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -96,8 +96,7 @@ class CompileOnDemandLayer : public IRLayer { /// Emits the given module. This should not be called by clients: it will be /// called by the JIT when a definition added via the add method is requested. - void emit(std::unique_ptr R, - ThreadSafeModule TSM) override; + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; private: struct PerDylibResources { @@ -121,8 +120,7 @@ class CompileOnDemandLayer : public IRLayer { void expandPartition(GlobalValueSet &Partition); - void emitPartition(std::unique_ptr R, - ThreadSafeModule TSM, + void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs); mutable std::mutex CODLayerMutex; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 70bd983c40ce0..6951df3f2d3f2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -410,7 +410,7 @@ class UnexpectedSymbolDefinitions : public ErrorInfo - delegate(const SymbolNameSet &Symbols, VModuleKey NewKey = VModuleKey()); + MaterializationResponsibility delegate(const SymbolNameSet &Symbols, + VModuleKey NewKey = VModuleKey()); void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies); @@ -577,8 +577,7 @@ class MaterializationUnit { /// Implementations of this method should materialize all symbols /// in the materialzation unit, except for those that have been /// previously discarded. - virtual void - materialize(std::unique_ptr R) = 0; + virtual void materialize(MaterializationResponsibility R) = 0; /// Called by JITDylibs to notify MaterializationUnits that the given symbol /// has been overridden. @@ -595,11 +594,10 @@ class MaterializationUnit { private: virtual void anchor(); - std::unique_ptr + MaterializationResponsibility createMaterializationResponsibility(std::shared_ptr JD) { - return std::unique_ptr( - new MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), - std::move(InitSymbol), K)); + return MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), + std::move(InitSymbol), K); } /// Implementations of this method should discard the given symbol @@ -623,7 +621,7 @@ class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(std::unique_ptr R) override; + void materialize(MaterializationResponsibility R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolMap &Symbols); @@ -665,7 +663,7 @@ class ReExportsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(std::unique_ptr R) override; + void materialize(MaterializationResponsibility R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); @@ -1118,7 +1116,7 @@ class ExecutionSession { /// For dispatching MaterializationUnit::materialize calls. using DispatchMaterializationFunction = std::function MU, - std::unique_ptr MR)>; + MaterializationResponsibility MR)>; /// Construct an ExecutionSession. /// @@ -1270,11 +1268,10 @@ class ExecutionSession { SymbolState RequiredState = SymbolState::Ready); /// Materialize the given unit. - void - dispatchMaterialization(std::unique_ptr MU, - std::unique_ptr MR) { + void dispatchMaterialization(std::unique_ptr MU, + MaterializationResponsibility MR) { assert(MU && "MU must be non-null"); - DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR->getTargetJITDylib(), *MU)); + DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR.getTargetJITDylib(), *MU)); DispatchMaterialization(std::move(MU), std::move(MR)); } @@ -1286,9 +1283,9 @@ class ExecutionSession { logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: "); } - static void materializeOnCurrentThread( - std::unique_ptr MU, - std::unique_ptr MR) { + static void + materializeOnCurrentThread(std::unique_ptr MU, + MaterializationResponsibility MR) { MU->materialize(std::move(MR)); } @@ -1312,7 +1309,7 @@ class ExecutionSession { // with callbacks from asynchronous queries. mutable std::recursive_mutex OutstandingMUsMutex; std::vector, - std::unique_ptr>> + MaterializationResponsibility>> OutstandingMUs; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index 2c53e2f66e851..eb74d283f0435 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -55,8 +55,7 @@ class IRCompileLayer : public IRLayer { void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled); - void emit(std::unique_ptr R, - ThreadSafeModule TSM) override; + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; private: mutable std::mutex IRLayerMutex; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index ee4ee3437fa6d..296d74ae6b865 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -37,8 +37,7 @@ class IRTransformLayer : public IRLayer { this->Transform = std::move(Transform); } - void emit(std::unique_ptr R, - ThreadSafeModule TSM) override; + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; static ThreadSafeModule identityTransform(ThreadSafeModule TSM, MaterializationResponsibility &R) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h index c8a41199760da..e843d0f562455 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h @@ -100,8 +100,7 @@ class IRLayer { VModuleKey K = VModuleKey()); /// Emit should materialize the given IR. - virtual void emit(std::unique_ptr R, - ThreadSafeModule TSM) = 0; + virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0; private: bool CloneToNewContextOnEmit = false; @@ -118,7 +117,8 @@ class BasicIRLayerMaterializationUnit : public IRMaterializationUnit { ThreadSafeModule TSM, VModuleKey K); private: - void materialize(std::unique_ptr R) override; + + void materialize(MaterializationResponsibility R) override; IRLayer &L; VModuleKey K; @@ -139,7 +139,7 @@ class ObjectLayer { VModuleKey K = VModuleKey()); /// Emit should materialize the given IR. - virtual void emit(std::unique_ptr R, + virtual void emit(MaterializationResponsibility R, std::unique_ptr O) = 0; private: @@ -162,7 +162,8 @@ class BasicObjectLayerMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(std::unique_ptr R) override; + + void materialize(MaterializationResponsibility R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; ObjectLayer &L; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 63e3a80d87d86..9206e40fffb1c 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -149,7 +149,7 @@ class LazyReexportsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(std::unique_ptr R) override; + void materialize(MaterializationResponsibility R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index cbcf3928be3df..cb8ee130ab614 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -119,7 +119,7 @@ class ObjectLinkingLayer : public ObjectLayer { } /// Emit the object. - void emit(std::unique_ptr R, + void emit(MaterializationResponsibility R, std::unique_ptr O) override; /// Instructs this ObjectLinkingLayer instance to override the symbol flags diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index c77649f19fc74..bf989cc8677cf 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -31,7 +31,7 @@ class ObjectTransformLayer : public ObjectLayer { ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, TransformFunction Transform = TransformFunction()); - void emit(std::unique_ptr R, + void emit(MaterializationResponsibility R, std::unique_ptr O) override; void setTransform(TransformFunction Transform) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 9cd3c57a19c6a..9ada0871cf0cb 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -58,7 +58,7 @@ class RTDyldObjectLinkingLayer : public ObjectLayer { ~RTDyldObjectLinkingLayer(); /// Emit the object. - void emit(std::unique_ptr R, + void emit(MaterializationResponsibility R, std::unique_ptr O) override; /// Set the NotifyLoaded callback. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index a138f60a77564..10f78c8bc6beb 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -181,8 +181,7 @@ class IRSpeculationLayer : public IRLayer { : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} - void emit(std::unique_ptr R, - ThreadSafeModule TSM) override; + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; private: TargetAndLikelies diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index dfb0d06bdba3d..9e38dc36faae7 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -88,7 +88,7 @@ class PartitioningIRMaterializationUnit : public IRMaterializationUnit { Parent(Parent) {} private: - void materialize(std::unique_ptr R) override { + void materialize(MaterializationResponsibility R) override { Parent.emitPartition(std::move(R), std::move(TSM), std::move(SymbolToDefinition)); } @@ -128,15 +128,15 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) { void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) { this->AliaseeImpls = Imp; } -void CompileOnDemandLayer::emit( - std::unique_ptr R, ThreadSafeModule TSM) { +void CompileOnDemandLayer::emit(MaterializationResponsibility R, + ThreadSafeModule TSM) { assert(TSM && "Null module"); auto &ES = getExecutionSession(); // Sort the callables and non-callables, build re-exports and lodge the // actual module with the implementation dylib. - auto &PDR = getPerDylibResources(R->getTargetJITDylib()); + auto &PDR = getPerDylibResources(R.getTargetJITDylib()); SymbolAliasMap NonCallables; SymbolAliasMap Callables; @@ -145,7 +145,7 @@ void CompileOnDemandLayer::emit( cleanUpModule(M); }); - for (auto &KV : R->getSymbols()) { + for (auto &KV : R.getSymbols()) { auto &Name = KV.first; auto &Flags = KV.second; if (Flags.isCallable()) @@ -158,19 +158,19 @@ void CompileOnDemandLayer::emit( // implementation dylib. if (auto Err = PDR.getImplDylib().define( std::make_unique( - ES, *getManglingOptions(), std::move(TSM), R->getVModuleKey(), + ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this))) { ES.reportError(std::move(Err)); - R->failMaterialization(); + R.failMaterialization(); return; } if (!NonCallables.empty()) - R->replace(reexports(PDR.getImplDylib(), std::move(NonCallables), - JITDylibLookupFlags::MatchAllSymbols)); + R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), + JITDylibLookupFlags::MatchAllSymbols)); if (!Callables.empty()) - R->replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), - std::move(Callables), AliaseeImpls)); + R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), + std::move(Callables), AliaseeImpls)); } CompileOnDemandLayer::PerDylibResources & @@ -247,7 +247,7 @@ void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) { } void CompileOnDemandLayer::emitPartition( - std::unique_ptr R, ThreadSafeModule TSM, + MaterializationResponsibility R, ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs) { // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the @@ -257,8 +257,8 @@ void CompileOnDemandLayer::emitPartition( auto &ES = getExecutionSession(); GlobalValueSet RequestedGVs; - for (auto &Name : R->getRequestedSymbols()) { - if (Name == R->getInitializerSymbol()) + for (auto &Name : R.getRequestedSymbols()) { + if (Name == R.getInitializerSymbol()) TSM.withModuleDo([&](Module &M) { for (auto &GV : getStaticInitGVs(M)) RequestedGVs.insert(&GV); @@ -285,9 +285,9 @@ void CompileOnDemandLayer::emitPartition( // If the partition is empty, return the whole module to the symbol table. if (GVsToExtract->empty()) { - R->replace(std::make_unique( - std::move(TSM), R->getVModuleKey(), R->getSymbols(), - R->getInitializerSymbol(), std::move(Defs), *this)); + R.replace(std::make_unique( + std::move(TSM), R.getVModuleKey(), R.getSymbols(), + R.getInitializerSymbol(), std::move(Defs), *this)); return; } @@ -308,7 +308,7 @@ void CompileOnDemandLayer::emitPartition( IRSymbolMapper::add(ES, *getManglingOptions(), PromotedGlobals, SymbolFlags); - if (auto Err = R->defineMaterializing(SymbolFlags)) + if (auto Err = R.defineMaterializing(SymbolFlags)) return std::move(Err); } @@ -348,12 +348,12 @@ void CompileOnDemandLayer::emitPartition( if (!ExtractedTSM) { ES.reportError(ExtractedTSM.takeError()); - R->failMaterialization(); + R.failMaterialization(); return; } - R->replace(std::make_unique( - ES, *getManglingOptions(), std::move(TSM), R->getVModuleKey(), *this)); + R.replace(std::make_unique( + ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this)); BaseLayer.emit(std::move(R), std::move(*ExtractedTSM)); } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 243bac79c012f..18eced68f07bc 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -279,7 +279,7 @@ void MaterializationResponsibility::replace( JD->replace(std::move(MU)); } -std::unique_ptr +MaterializationResponsibility MaterializationResponsibility::delegate(const SymbolNameSet &Symbols, VModuleKey NewKey) { @@ -302,10 +302,9 @@ MaterializationResponsibility::delegate(const SymbolNameSet &Symbols, SymbolFlags.erase(I); } - return std::unique_ptr( - new MaterializationResponsibility(JD, std::move(DelegatedFlags), - std::move(DelegatedInitSymbol), - std::move(NewKey))); + return MaterializationResponsibility(JD, std::move(DelegatedFlags), + std::move(DelegatedInitSymbol), + std::move(NewKey)); } void MaterializationResponsibility::addDependencies( @@ -339,10 +338,10 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { } void AbsoluteSymbolsMaterializationUnit::materialize( - std::unique_ptr R) { + MaterializationResponsibility R) { // No dependencies, so these calls can't fail. - cantFail(R->notifyResolved(Symbols)); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved(Symbols)); + cantFail(R.notifyEmitted()); } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, @@ -371,16 +370,16 @@ StringRef ReExportsMaterializationUnit::getName() const { } void ReExportsMaterializationUnit::materialize( - std::unique_ptr R) { + MaterializationResponsibility R) { - auto &ES = R->getTargetJITDylib().getExecutionSession(); - JITDylib &TgtJD = R->getTargetJITDylib(); + auto &ES = R.getTargetJITDylib().getExecutionSession(); + JITDylib &TgtJD = R.getTargetJITDylib(); JITDylib &SrcJD = SourceJD ? *SourceJD : TgtJD; // Find the set of requested aliases and aliasees. Return any unrequested // aliases back to the JITDylib so as to not prematurely materialize any // aliasees. - auto RequestedSymbols = R->getRequestedSymbols(); + auto RequestedSymbols = R.getRequestedSymbols(); SymbolAliasMap RequestedAliases; for (auto &Name : RequestedSymbols) { @@ -400,19 +399,18 @@ void ReExportsMaterializationUnit::materialize( if (!Aliases.empty()) { if (SourceJD) - R->replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); + R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); else - R->replace(symbolAliases(std::move(Aliases))); + R.replace(symbolAliases(std::move(Aliases))); } // The OnResolveInfo struct will hold the aliases and responsibilty for each // query in the list. struct OnResolveInfo { - OnResolveInfo(std::unique_ptr R, - SymbolAliasMap Aliases) + OnResolveInfo(MaterializationResponsibility R, SymbolAliasMap Aliases) : R(std::move(R)), Aliases(std::move(Aliases)) {} - std::unique_ptr R; + MaterializationResponsibility R; SymbolAliasMap Aliases; }; @@ -453,7 +451,7 @@ void ReExportsMaterializationUnit::materialize( assert(!QuerySymbols.empty() && "Alias cycle detected!"); auto QueryInfo = std::make_shared( - R->delegate(ResponsibilitySymbols), std::move(QueryAliases)); + R.delegate(ResponsibilitySymbols), std::move(QueryAliases)); QueryInfos.push_back( make_pair(std::move(QuerySymbols), std::move(QueryInfo))); } @@ -482,12 +480,12 @@ void ReExportsMaterializationUnit::materialize( for (auto &KV : QueryInfo->Aliases) if (SrcJDDeps.count(KV.second.Aliasee)) { PerAliasDeps = {KV.second.Aliasee}; - QueryInfo->R->addDependencies(KV.first, PerAliasDepsMap); + QueryInfo->R.addDependencies(KV.first, PerAliasDepsMap); } }; auto OnComplete = [QueryInfo](Expected Result) { - auto &ES = QueryInfo->R->getTargetJITDylib().getExecutionSession(); + auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); if (Result) { SymbolMap ResolutionMap; for (auto &KV : QueryInfo->Aliases) { @@ -501,19 +499,19 @@ void ReExportsMaterializationUnit::materialize( ResolutionMap[KV.first] = JITEvaluatedSymbol( (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags); } - if (auto Err = QueryInfo->R->notifyResolved(ResolutionMap)) { + if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) { ES.reportError(std::move(Err)); - QueryInfo->R->failMaterialization(); + QueryInfo->R.failMaterialization(); return; } - if (auto Err = QueryInfo->R->notifyEmitted()) { + if (auto Err = QueryInfo->R.notifyEmitted()) { ES.reportError(std::move(Err)); - QueryInfo->R->failMaterialization(); + QueryInfo->R.failMaterialization(); return; } } else { ES.reportError(Result.takeError()); - QueryInfo->R->failMaterialization(); + QueryInfo->R.failMaterialization(); } }; @@ -2133,7 +2131,7 @@ void ExecutionSession::dump(raw_ostream &OS) { void ExecutionSession::runOutstandingMUs() { while (1) { Optional, - std::unique_ptr>> + MaterializationResponsibility>> JMU; { diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index c6f6870279728..023940dc82982 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -25,7 +25,7 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) { this->NotifyCompiled = std::move(NotifyCompiled); } -void IRCompileLayer::emit(std::unique_ptr R, +void IRCompileLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); @@ -33,13 +33,13 @@ void IRCompileLayer::emit(std::unique_ptr R, { std::lock_guard Lock(IRLayerMutex); if (NotifyCompiled) - NotifyCompiled(R->getVModuleKey(), std::move(TSM)); + NotifyCompiled(R.getVModuleKey(), std::move(TSM)); else TSM = ThreadSafeModule(); } BaseLayer.emit(std::move(R), std::move(*Obj)); } else { - R->failMaterialization(); + R.failMaterialization(); getExecutionSession().reportError(Obj.takeError()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp index d5b11349277c1..511248f83b259 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp @@ -17,14 +17,14 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer, : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} -void IRTransformLayer::emit(std::unique_ptr R, +void IRTransformLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); - if (auto TransformedTSM = Transform(std::move(TSM), *R)) + if (auto TransformedTSM = Transform(std::move(TSM), R)) BaseLayer.emit(std::move(R), std::move(*TransformedTSM)); else { - R->failMaterialization(); + R.failMaterialization(); getExecutionSession().reportError(TransformedTSM.takeError()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 7d57ed5a3a04c..4f7f6089e68db 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -33,12 +33,12 @@ class CompileCallbackMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override { return ""; } private: - void materialize(std::unique_ptr R) override { + void materialize(MaterializationResponsibility R) override { SymbolMap Result; Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported); // No dependencies, so these calls cannot fail. - cantFail(R->notifyResolved(Result)); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved(Result)); + cantFail(R.notifyEmitted()); } void discard(const JITDylib &JD, const SymbolStringPtr &Name) override { diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 81f500d66bc29..373d86d92f8d7 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -1085,17 +1085,15 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) std::make_unique(hardware_concurrency(S.NumCompileThreads)); ES->setDispatchMaterialization( [this](std::unique_ptr MU, - std::unique_ptr MR) { - // FIXME: We should be able to use move-capture here, but ThreadPool's - // AsyncTaskTys are std::functions rather than unique_functions - // (because MSVC's std::packaged_tasks don't support move-only types). - // Fix this when all the above gets sorted out. - CompileThreads->async( - [UnownedMU = MU.release(), UnownedMR = MR.release()]() mutable { - std::unique_ptr MU(UnownedMU); - std::unique_ptr MR(UnownedMR); - MU->materialize(std::move(MR)); - }); + MaterializationResponsibility MR) { + // FIXME: Switch to move capture once ThreadPool uses unique_function. + auto SharedMU = std::shared_ptr(std::move(MU)); + auto SharedMR = + std::make_shared(std::move(MR)); + auto Work = [SharedMU, SharedMR]() mutable { + SharedMU->materialize(std::move(*SharedMR)); + }; + CompileThreads->async(std::move(Work)); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp index 8052e7b08a5a6..0a5d5577e99e8 100644 --- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp @@ -133,7 +133,7 @@ BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit( L(L), K(std::move(K)) {} void BasicIRLayerMaterializationUnit::materialize( - std::unique_ptr R) { + MaterializationResponsibility R) { // Throw away the SymbolToDefinition map: it's not usable after we hand // off the module. @@ -144,8 +144,8 @@ void BasicIRLayerMaterializationUnit::materialize( TSM = cloneToNewContext(TSM); #ifndef NDEBUG - auto &ES = R->getTargetJITDylib().getExecutionSession(); - auto &N = R->getTargetJITDylib().getName(); + auto &ES = R.getTargetJITDylib().getExecutionSession(); + auto &N = R.getTargetJITDylib().getName(); #endif // NDEBUG LLVM_DEBUG(ES.runSessionLocked( @@ -200,7 +200,7 @@ StringRef BasicObjectLayerMaterializationUnit::getName() const { } void BasicObjectLayerMaterializationUnit::materialize( - std::unique_ptr R) { + MaterializationResponsibility R) { L.emit(std::move(R), std::move(O)); } diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 695f6cc9c1cb4..5e604130d6eab 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -154,8 +154,8 @@ StringRef LazyReexportsMaterializationUnit::getName() const { } void LazyReexportsMaterializationUnit::materialize( - std::unique_ptr R) { - auto RequestedSymbols = R->getRequestedSymbols(); + MaterializationResponsibility R) { + auto RequestedSymbols = R.getRequestedSymbols(); SymbolAliasMap RequestedAliases; for (auto &RequestedSymbol : RequestedSymbols) { @@ -166,8 +166,8 @@ void LazyReexportsMaterializationUnit::materialize( } if (!CallableAliases.empty()) - R->replace(lazyReexports(LCTManager, ISManager, SourceJD, - std::move(CallableAliases), AliaseeTable)); + R.replace(lazyReexports(LCTManager, ISManager, SourceJD, + std::move(CallableAliases), AliaseeTable)); IndirectStubsManager::StubInitsMap StubInits; for (auto &Alias : RequestedAliases) { @@ -182,7 +182,7 @@ void LazyReexportsMaterializationUnit::materialize( if (!CallThroughTrampoline) { SourceJD.getExecutionSession().reportError( CallThroughTrampoline.takeError()); - R->failMaterialization(); + R.failMaterialization(); return; } @@ -195,7 +195,7 @@ void LazyReexportsMaterializationUnit::materialize( if (auto Err = ISManager.createStubs(StubInits)) { SourceJD.getExecutionSession().reportError(std::move(Err)); - R->failMaterialization(); + R.failMaterialization(); return; } @@ -204,8 +204,8 @@ void LazyReexportsMaterializationUnit::materialize( Stubs[Alias.first] = ISManager.findStub(*Alias.first, false); // No registered dependencies, so these calls cannot fail. - cantFail(R->notifyResolved(Stubs)); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved(Stubs)); + cantFail(R.notifyEmitted()); } void LazyReexportsMaterializationUnit::discard(const JITDylib &JD, diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 9e3245d9cc991..d8283fa7e3461 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -24,10 +24,9 @@ namespace orc { class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { public: - ObjectLinkingLayerJITLinkContext( - ObjectLinkingLayer &Layer, - std::unique_ptr MR, - std::unique_ptr ObjBuffer) + ObjectLinkingLayerJITLinkContext(ObjectLinkingLayer &Layer, + MaterializationResponsibility MR, + std::unique_ptr ObjBuffer) : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {} ~ObjectLinkingLayerJITLinkContext() { @@ -45,14 +44,14 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { void notifyFailed(Error Err) override { Layer.getExecutionSession().reportError(std::move(Err)); - MR->failMaterialization(); + MR.failMaterialization(); } void lookup(const LookupMap &Symbols, std::unique_ptr LC) override { JITDylibSearchOrder LinkOrder; - MR->getTargetJITDylib().withLinkOrderDo( + MR.getTargetJITDylib().withLinkOrderDo( [&](const JITDylibSearchOrder &LO) { LinkOrder = LO; }); auto &ES = Layer.getExecutionSession(); @@ -86,8 +85,8 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto &KV : InternalNamedSymbolDeps) { SymbolDependenceMap InternalDeps; - InternalDeps[&MR->getTargetJITDylib()] = std::move(KV.second); - MR->addDependencies(KV.first, InternalDeps); + InternalDeps[&MR.getTargetJITDylib()] = std::move(KV.second); + MR.addDependencies(KV.first, InternalDeps); } ES.lookup(LookupKind::Static, LinkOrder, std::move(LookupSet), @@ -116,7 +115,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { InternedResult[InternedName] = JITEvaluatedSymbol(Sym->getAddress(), Flags); - if (AutoClaim && !MR->getSymbols().count(InternedName)) { + if (AutoClaim && !MR.getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); ExtraSymbolsToClaim[InternedName] = Flags; @@ -134,7 +133,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = JITEvaluatedSymbol(Sym->getAddress(), Flags); - if (AutoClaim && !MR->getSymbols().count(InternedName)) { + if (AutoClaim && !MR.getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); ExtraSymbolsToClaim[InternedName] = Flags; @@ -142,19 +141,19 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } if (!ExtraSymbolsToClaim.empty()) - if (auto Err = MR->defineMaterializing(ExtraSymbolsToClaim)) + if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim)) return Err; { - // Check that InternedResult matches up with MR->getSymbols(). + // Check that InternedResult matches up with MR.getSymbols(). // This guards against faulty transformations / compilers / object caches. // First check that there aren't any missing symbols. size_t NumMaterializationSideEffectsOnlySymbols = 0; SymbolNameVector ExtraSymbols; SymbolNameVector MissingSymbols; - for (auto &KV : MR->getSymbols()) { + for (auto &KV : MR.getSymbols()) { // If this is a materialization-side-effects only symbol then bump // the counter and make sure it's *not* defined, otherwise make @@ -176,9 +175,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { // If there are more definitions than expected, add them to the // ExtraSymbols vector. if (InternedResult.size() > - MR->getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) { + MR.getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) { for (auto &KV : InternedResult) - if (!MR->getSymbols().count(KV.first)) + if (!MR.getSymbols().count(KV.first)) ExtraSymbols.push_back(KV.first); } @@ -188,23 +187,23 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { std::move(ExtraSymbols)); } - if (auto Err = MR->notifyResolved(InternedResult)) + if (auto Err = MR.notifyResolved(InternedResult)) return Err; - Layer.notifyLoaded(*MR); + Layer.notifyLoaded(MR); return Error::success(); } void notifyFinalized( std::unique_ptr A) override { - if (auto Err = Layer.notifyEmitted(*MR, std::move(A))) { + if (auto Err = Layer.notifyEmitted(MR, std::move(A))) { Layer.getExecutionSession().reportError(std::move(Err)); - MR->failMaterialization(); + MR.failMaterialization(); return; } - if (auto Err = MR->notifyEmitted()) { + if (auto Err = MR.notifyEmitted()) { Layer.getExecutionSession().reportError(std::move(Err)); - MR->failMaterialization(); + MR.failMaterialization(); } } @@ -218,7 +217,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Config.PrePrunePasses.push_back( [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); }); - Layer.modifyPassConfig(*MR, TT, Config); + Layer.modifyPassConfig(MR, TT, Config); Config.PostPrunePasses.push_back( [this](LinkGraph &G) { return computeNamedSymbolDependencies(G); }); @@ -238,13 +237,13 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { auto &ES = Layer.getExecutionSession(); for (auto *Sym : G.defined_symbols()) if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { - if (!MR->getSymbols().count(ES.intern(Sym->getName()))) + if (!MR.getSymbols().count(ES.intern(Sym->getName()))) G.makeExternal(*Sym); } for (auto *Sym : G.absolute_symbols()) if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { - if (!MR->getSymbols().count(ES.intern(Sym->getName()))) + if (!MR.getSymbols().count(ES.intern(Sym->getName()))) G.makeExternal(*Sym); } @@ -254,13 +253,13 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Error markResponsibilitySymbolsLive(LinkGraph &G) const { auto &ES = Layer.getExecutionSession(); for (auto *Sym : G.defined_symbols()) - if (Sym->hasName() && MR->getSymbols().count(ES.intern(Sym->getName()))) + if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName()))) Sym->setLive(true); return Error::success(); } Error computeNamedSymbolDependencies(LinkGraph &G) { - auto &ES = MR->getTargetJITDylib().getExecutionSession(); + auto &ES = MR.getTargetJITDylib().getExecutionSession(); auto LocalDeps = computeLocalDeps(G); // Compute dependencies for symbols defined in the JITLink graph. @@ -307,7 +306,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } for (auto &P : Layer.Plugins) { - auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(*MR); + auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(MR); if (SyntheticLocalDeps.empty()) continue; @@ -427,12 +426,12 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { SymbolDeps.erase(&SourceJD); } - MR->addDependencies(Name, SymbolDeps); + MR.addDependencies(Name, SymbolDeps); } } ObjectLinkingLayer &Layer; - std::unique_ptr MR; + MaterializationResponsibility MR; std::unique_ptr ObjBuffer; DenseMap ExternalNamedSymbolDeps; DenseMap InternalNamedSymbolDeps; @@ -453,7 +452,7 @@ ObjectLinkingLayer::~ObjectLinkingLayer() { getExecutionSession().reportError(std::move(Err)); } -void ObjectLinkingLayer::emit(std::unique_ptr R, +void ObjectLinkingLayer::emit(MaterializationResponsibility R, std::unique_ptr O) { assert(O && "Object must not be null"); jitLink(std::make_unique( diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp index a57662e10a794..d18eb38a41423 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp @@ -17,9 +17,8 @@ ObjectTransformLayer::ObjectTransformLayer(ExecutionSession &ES, TransformFunction Transform) : ObjectLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} -void ObjectTransformLayer::emit( - std::unique_ptr R, - std::unique_ptr O) { +void ObjectTransformLayer::emit(MaterializationResponsibility R, + std::unique_ptr O) { assert(O && "Module must not be null"); // If there is a transform set then apply it. @@ -27,7 +26,7 @@ void ObjectTransformLayer::emit( if (auto TransformedObj = Transform(std::move(O))) O = std::move(*TransformedObj); else { - R->failMaterialization(); + R.failMaterialization(); getExecutionSession().reportError(TransformedObj.takeError()); return; } diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 1981039eb9f12..7888c2fcbdbd9 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -89,18 +89,23 @@ RTDyldObjectLinkingLayer::~RTDyldObjectLinkingLayer() { } } -void RTDyldObjectLinkingLayer::emit( - std::unique_ptr R, - std::unique_ptr O) { +void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, + std::unique_ptr O) { assert(O && "Object must not be null"); + // This method launches an asynchronous link step that will fulfill our + // materialization responsibility. We need to switch R to be heap + // allocated before that happens so it can live as long as the asynchronous + // link needs it to (i.e. it must be able to outlive this method). + auto SharedR = std::make_shared(std::move(R)); + auto &ES = getExecutionSession(); auto Obj = object::ObjectFile::createObjectFile(*O); if (!Obj) { getExecutionSession().reportError(Obj.takeError()); - R->failMaterialization(); + SharedR->failMaterialization(); return; } @@ -116,7 +121,7 @@ void RTDyldObjectLinkingLayer::emit( continue; } else { ES.reportError(SymType.takeError()); - R->failMaterialization(); + R.failMaterialization(); return; } @@ -124,7 +129,7 @@ void RTDyldObjectLinkingLayer::emit( if (!SymFlagsOrErr) { // TODO: Test this error. ES.reportError(SymFlagsOrErr.takeError()); - R->failMaterialization(); + R.failMaterialization(); return; } @@ -134,14 +139,14 @@ void RTDyldObjectLinkingLayer::emit( InternalSymbols->insert(*SymName); else { ES.reportError(SymName.takeError()); - R->failMaterialization(); + R.failMaterialization(); return; } } } } - auto K = R->getVModuleKey(); + auto K = R.getVModuleKey(); RuntimeDyld::MemoryManager *MemMgr = nullptr; // Create a record a memory manager for this object. @@ -152,10 +157,6 @@ void RTDyldObjectLinkingLayer::emit( MemMgr = MemMgrs.back().get(); } - // Switch to shared ownership of MR so that it can be captured by both - // lambdas below. - std::shared_ptr SharedR(std::move(R)); - JITDylibSearchOrderResolver Resolver(*SharedR); jitLinkForORC( diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp index 0b4755fe23cfc..3dd536d8253e3 100644 --- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -55,7 +55,7 @@ Error Speculator::addSpeculationRuntime(JITDylib &JD, // If two modules, share the same LLVMContext, different threads must // not access them concurrently without locking the associated LLVMContext // this implementation follows this contract. -void IRSpeculationLayer::emit(std::unique_ptr R, +void IRSpeculationLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { assert(TSM && "Speculation Layer received Null Module ?"); @@ -127,7 +127,7 @@ void IRSpeculationLayer::emit(std::unique_ptr R, assert(Mutator.GetInsertBlock()->getParent() == &Fn && "IR builder association mismatch?"); S.registerSymbols(internToJITSymbols(IRNames.getValue()), - &R->getTargetJITDylib()); + &R.getTargetJITDylib()); } } } diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index 9a1dbbb172517..2c008dfdbd33e 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -35,12 +35,12 @@ TEST_F(CoreAPIsStandardTest, BasicSuccessfulLookup) { OnCompletionRun = true; }; - std::unique_ptr FooMR; + std::shared_ptr FooMR; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooMR = std::move(R); + [&](MaterializationResponsibility R) { + FooMR = std::make_shared(std::move(R)); }))); ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), @@ -99,9 +99,9 @@ TEST_F(CoreAPIsStandardTest, ResolveUnrequestedSymbol) { cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [this](std::unique_ptr R) { - cantFail(R->notifyResolved({{Foo, FooSym}, {Bar, BarSym}})); - cantFail(R->notifyEmitted()); + [this](MaterializationResponsibility R) { + cantFail(R.notifyResolved({{Foo, FooSym}, {Bar, BarSym}})); + cantFail(R.notifyEmitted()); }))); auto Result = @@ -116,16 +116,14 @@ TEST_F(CoreAPIsStandardTest, MaterializationSideEffctsOnlyBasic) { // don't return until they're emitted, and that they don't appear in query // results. - std::unique_ptr FooR; + Optional FooR; Optional Result; cantFail(JD.define(std::make_unique( SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported | JITSymbolFlags::MaterializationSideEffectsOnly}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }))); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }))); ES.lookup( LookupKind::Static, makeJITDylibSearchOrder(&JD), @@ -157,9 +155,7 @@ TEST_F(CoreAPIsStandardTest, MaterializationSideEffectsOnlyFailuresPersist) { SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported | JITSymbolFlags::MaterializationSideEffectsOnly}}), - [&](std::unique_ptr R) { - R->failMaterialization(); - }))); + [&](MaterializationResponsibility R) { R.failMaterialization(); }))); EXPECT_THAT_EXPECTED( ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Foo})), @@ -186,10 +182,10 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { bool BarMaterializerDestructed = false; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [this](std::unique_ptr R) { + [this](MaterializationResponsibility R) { ADD_FAILURE() << "Unexpected materialization of \"Bar\""; - cantFail(R->notifyResolved({{Bar, BarSym}})); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved({{Bar, BarSym}})); + cantFail(R.notifyEmitted()); }, nullptr, [&](const JITDylib &JD, const SymbolStringPtr &Name) { @@ -201,12 +197,10 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { // Baz will be in the materializing state initially, then // materialized for the final removal attempt. - std::unique_ptr BazR; + Optional BazR; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Baz, BazSym.getFlags()}}), - [&](std::unique_ptr R) { - BazR = std::move(R); - }, + [&](MaterializationResponsibility R) { BazR.emplace(std::move(R)); }, nullptr, [](const JITDylib &JD, const SymbolStringPtr &Name) { ADD_FAILURE() << "\"Baz\" discarded unexpectedly"; @@ -303,7 +297,7 @@ TEST_F(CoreAPIsStandardTest, LookupFlagsTest) { JITSymbolFlags::Exported | JITSymbolFlags::Weak)); auto MU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [](std::unique_ptr R) { + [](MaterializationResponsibility R) { llvm_unreachable("Symbol materialized on flags lookup"); }); @@ -406,10 +400,10 @@ TEST_F(CoreAPIsStandardTest, TestThatReExportsDontUnnecessarilyMaterialize) { bool BarMaterialized = false; auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { + [&](MaterializationResponsibility R) { BarMaterialized = true; - cantFail(R->notifyResolved({{Bar, BarSym}})); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved({{Bar, BarSym}})); + cantFail(R.notifyEmitted()); }); cantFail(JD.define(BarMU)); @@ -450,12 +444,10 @@ TEST_F(CoreAPIsStandardTest, TestReexportsGenerator) { } TEST_F(CoreAPIsStandardTest, TestTrivialCircularDependency) { - std::unique_ptr FooR; + Optional FooR; auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); cantFail(JD.define(FooMU)); @@ -484,29 +476,26 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { // does not prevent any symbol from becoming 'ready' once all symbols are // emitted. - std::unique_ptr FooR; - std::unique_ptr BarR; - std::unique_ptr BazR; + // Create three MaterializationResponsibility objects: one for each of Foo, + // Bar and Baz. These are optional because MaterializationResponsibility + // does not have a default constructor). + Optional FooR; + Optional BarR; + Optional BazR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - BarR = std::move(R); - }); + [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); auto BazMU = std::make_unique( SymbolFlagsMap({{Baz, BazSym.getFlags()}}), - [&](std::unique_ptr R) { - BazR = std::move(R); - }); + [&](MaterializationResponsibility R) { BazR.emplace(std::move(R)); }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -633,22 +622,18 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { } TEST_F(CoreAPIsStandardTest, FailureInDependency) { - std::unique_ptr FooR; - std::unique_ptr BarR; + Optional FooR; + Optional BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - BarR = std::move(R); - }); + [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -702,22 +687,18 @@ TEST_F(CoreAPIsStandardTest, FailureInDependency) { } TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { - std::unique_ptr FooR; - std::unique_ptr BarR; + Optional FooR; + Optional BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - BarR = std::move(R); - }); + [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -772,22 +753,18 @@ TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { } TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { - std::unique_ptr FooR; - std::unique_ptr BarR; + Optional FooR; + Optional BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - BarR = std::move(R); - }); + [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -842,22 +819,18 @@ TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { } TEST_F(CoreAPIsStandardTest, FailAfterMaterialization) { - std::unique_ptr FooR; - std::unique_ptr BarR; + Optional FooR; + Optional BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); - }); + [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - BarR = std::move(R); - }); + [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -909,9 +882,9 @@ TEST_F(CoreAPIsStandardTest, FailMaterializerWithUnqueriedSymbols) { auto MU = std::make_unique( SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported}, {Bar, JITSymbolFlags::Exported}}), - [&](std::unique_ptr R) { + [&](MaterializationResponsibility R) { MaterializerRun = true; - R->failMaterialization(); + R.failMaterialization(); }); cantFail(JD.define(std::move(MU))); @@ -938,7 +911,7 @@ TEST_F(CoreAPIsStandardTest, DropMaterializerWhenEmpty) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, WeakExported}, {Bar, WeakExported}}), - [](std::unique_ptr R) { + [](MaterializationResponsibility R) { llvm_unreachable("Unexpected call to materialize"); }, nullptr, @@ -970,10 +943,10 @@ TEST_F(CoreAPIsStandardTest, AddAndMaterializeLazySymbol) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}, {Bar, WeakExported}}), - [&](std::unique_ptr R) { + [&](MaterializationResponsibility R) { assert(BarDiscarded && "Bar should have been discarded by this point"); - cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(R.notifyEmitted()); FooMaterialized = true; }, nullptr, @@ -1012,18 +985,18 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { bool BarMaterialized = false; auto MU1 = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); - cantFail(R->notifyEmitted()); + [&](MaterializationResponsibility R) { + cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + cantFail(R.notifyEmitted()); BarMaterialized = true; }); bool DuplicateBarDiscarded = false; auto MU2 = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { + [&](MaterializationResponsibility R) { ADD_FAILURE() << "Attempt to materialize Bar from the wrong unit"; - R->failMaterialization(); + R.failMaterialization(); }, nullptr, [&](const JITDylib &JD, SymbolStringPtr Name) { @@ -1053,21 +1026,20 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { TEST_F(CoreAPIsStandardTest, DefineMaterializingSymbol) { bool ExpectNoMoreMaterialization = false; - ES.setDispatchMaterialization( - [&](std::unique_ptr MU, - std::unique_ptr MR) { - if (ExpectNoMoreMaterialization) - ADD_FAILURE() << "Unexpected materialization"; - MU->materialize(std::move(MR)); - }); + ES.setDispatchMaterialization([&](std::unique_ptr MU, + MaterializationResponsibility MR) { + if (ExpectNoMoreMaterialization) + ADD_FAILURE() << "Unexpected materialization"; + MU->materialize(std::move(MR)); + }); auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { + [&](MaterializationResponsibility R) { cantFail( - R->defineMaterializing(SymbolFlagsMap({{Bar, BarSym.getFlags()}}))); - cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); - cantFail(R->notifyEmitted()); + R.defineMaterializing(SymbolFlagsMap({{Bar, BarSym.getFlags()}}))); + cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + cantFail(R.notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1121,8 +1093,8 @@ TEST_F(CoreAPIsStandardTest, FailResolution) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported | JITSymbolFlags::Weak}, {Bar, JITSymbolFlags::Exported | JITSymbolFlags::Weak}}), - [&](std::unique_ptr R) { - R->failMaterialization(); + [&](MaterializationResponsibility R) { + R.failMaterialization(); }); cantFail(JD.define(MU)); @@ -1157,23 +1129,23 @@ TEST_F(CoreAPIsStandardTest, FailEmissionAfterResolution) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + [&](MaterializationResponsibility R) { + cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); ES.lookup( LookupKind::Static, makeJITDylibSearchOrder(&JD), SymbolLookupSet({Baz}), SymbolState::Resolved, - [&](Expected Result) { + [&R](Expected Result) { // Called when "baz" is resolved. We don't actually depend // on or care about baz, but use it to trigger failure of // this materialization before Baz has been finalized in // order to test that error propagation is correct in this // scenario. cantFail(std::move(Result)); - R->failMaterialization(); + R.failMaterialization(); }, [&](const SymbolDependenceMap &Deps) { - R->addDependenciesForAll(Deps); + R.addDependenciesForAll(Deps); }); }); @@ -1193,9 +1165,7 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { // Fail materialization of bar. auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - R->failMaterialization(); - }); + [&](MaterializationResponsibility R) { R.failMaterialization(); }); cantFail(JD.define(std::move(BarMU))); @@ -1215,9 +1185,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { TEST_F(CoreAPIsStandardTest, TestLookupWithUnthreadedMaterialization) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), - [&](std::unique_ptr R) { - cantFail(R->notifyResolved({{Foo, FooSym}})); - cantFail(R->notifyEmitted()); + [&](MaterializationResponsibility R) { + cantFail(R.notifyResolved({{Foo, FooSym}})); + cantFail(R.notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1234,14 +1204,15 @@ TEST_F(CoreAPIsStandardTest, TestLookupWithThreadedMaterialization) { #if LLVM_ENABLE_THREADS std::thread MaterializationThread; - ES.setDispatchMaterialization( - [&](std::unique_ptr MU, - std::unique_ptr MR) { - MaterializationThread = - std::thread([MU = std::move(MU), MR = std::move(MR)]() mutable { - MU->materialize(std::move(MR)); - }); - }); + ES.setDispatchMaterialization([&](std::unique_ptr MU, + MaterializationResponsibility MR) { + auto SharedMR = + std::make_shared(std::move(MR)); + MaterializationThread = + std::thread([MU = std::move(MU), MR = std::move(SharedMR)] { + MU->materialize(std::move(*MR)); + }); + }); cantFail(JD.define(absoluteSymbols({{Foo, FooSym}}))); @@ -1267,23 +1238,23 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - auto Requested = R->getRequestedSymbols(); + [&](MaterializationResponsibility R) { + auto Requested = R.getRequestedSymbols(); EXPECT_EQ(Requested.size(), 1U) << "Expected one symbol requested"; EXPECT_EQ(*Requested.begin(), Foo) << "Expected \"Foo\" requested"; auto NewMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R2) { - cantFail(R2->notifyResolved(SymbolMap({{Bar, BarSym}}))); - cantFail(R2->notifyEmitted()); + [&](MaterializationResponsibility R2) { + cantFail(R2.notifyResolved(SymbolMap({{Bar, BarSym}}))); + cantFail(R2.notifyEmitted()); BarMaterialized = true; }); - R->replace(std::move(NewMU)); + R.replace(std::move(NewMU)); - cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(R->notifyEmitted()); + cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(R.notifyEmitted()); FooMaterialized = true; }); @@ -1309,13 +1280,13 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { TEST_F(CoreAPIsStandardTest, TestMaterializationResponsibilityDelegation) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](std::unique_ptr R) { - auto R2 = R->delegate({Bar}); + [&](MaterializationResponsibility R) { + auto R2 = R.delegate({Bar}); - cantFail(R->notifyResolved({{Foo, FooSym}})); - cantFail(R->notifyEmitted()); - cantFail(R2->notifyResolved({{Bar, BarSym}})); - cantFail(R2->notifyEmitted()); + cantFail(R.notifyResolved({{Foo, FooSym}})); + cantFail(R.notifyEmitted()); + cantFail(R2.notifyResolved({{Bar, BarSym}})); + cantFail(R2.notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1338,11 +1309,12 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { JITSymbolFlags WeakExported = JITSymbolFlags::Exported; WeakExported &= JITSymbolFlags::Weak; - std::unique_ptr FooR; + std::unique_ptr FooResponsibility; auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](std::unique_ptr R) { - FooR = std::move(R); + [&](MaterializationResponsibility R) { + FooResponsibility = + std::make_unique(std::move(R)); }); cantFail(JD.define(MU)); @@ -1356,7 +1328,7 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { auto MU2 = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), - [](std::unique_ptr R) { + [](MaterializationResponsibility R) { llvm_unreachable("This unit should never be materialized"); }); @@ -1367,8 +1339,8 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { consumeError(std::move(Err)); // No dependencies registered, can't fail: - cantFail(FooR->notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(FooR->notifyEmitted()); + cantFail(FooResponsibility->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(FooResponsibility->notifyEmitted()); } static bool linkOrdersEqual(const std::vector> &LHS, diff --git a/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp index 81ff3e7a87b30..50e7b60a2df4e 100644 --- a/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp @@ -39,15 +39,15 @@ TEST_F(LazyReexportsTest, BasicLocalCallThroughManagerOperation) { cantFail(JD.define(std::make_unique( SymbolFlagsMap({{DummyTarget, JITSymbolFlags::Exported}}), - [&](std::unique_ptr R) { + [&](MaterializationResponsibility R) { DummyTargetMaterialized = true; // No dependencies registered, can't fail. - cantFail(R->notifyResolved( + cantFail(R.notifyResolved( {{DummyTarget, JITEvaluatedSymbol(static_cast( reinterpret_cast(&dummyTarget)), JITSymbolFlags::Exported)}})); - cantFail(R->notifyEmitted()); + cantFail(R.notifyEmitted()); }))); unsigned NotifyResolvedCount = 0; diff --git a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h index afbc4a9ffaa5c..b25851d8f796c 100644 --- a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h +++ b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h @@ -86,7 +86,7 @@ class OrcNativeTarget { class SimpleMaterializationUnit : public orc::MaterializationUnit { public: using MaterializeFunction = - std::function)>; + std::function; using DiscardFunction = std::function; using DestructorFunction = std::function; @@ -108,8 +108,7 @@ class SimpleMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override { return ""; } - void - materialize(std::unique_ptr R) override { + void materialize(orc::MaterializationResponsibility R) override { Materialize(std::move(R)); } From a0e0d30a29841fe6cc854f3949f12bb523814d7a Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 10 Sep 2020 17:56:15 +0200 Subject: [PATCH 0345/1079] [mlir][Linalg] Print both types for linalg.transpose Previously only the input type was printed, and the parser applied it to both input and output, creating an invalid transpose. Print and parse both types, and verify that they match. Differential Revision: https://reviews.llvm.org/D87462 --- .../mlir/Dialect/Linalg/IR/LinalgOps.td | 10 +--- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 50 +++++++++++++------ mlir/test/Dialect/Linalg/invalid.mlir | 11 +++- mlir/test/Dialect/Linalg/llvm.mlir | 2 +- mlir/test/Dialect/Linalg/roundtrip.mlir | 5 +- 5 files changed, 51 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index 1366e920039bf..a7855e6327b20 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -300,7 +300,7 @@ def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>, Example: ```mlir - %1 = linalg.transpose %0 (i, j) -> (j, i) : memref + %1 = linalg.transpose %0 (i, j) -> (j, i) : memref to memref ``` }]; @@ -308,13 +308,7 @@ def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>, "OpBuilder &b, OperationState &result, Value view, " "AffineMapAttr permutation, ArrayRef attrs = {}">]; - let verifier = [{ - if (!permutation().isPermutation()) - return emitOpError("expected a permutation map"); - if (permutation().getNumDims() != getShapedType().getRank()) - return emitOpError("expected a permutation map of same rank as the view"); - return success(); - }]; + let verifier = [{ return ::verify(*this); }]; let extraClassDeclaration = [{ static StringRef getPermutationAttrName() { return "permutation"; } diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index fcead984dfe55..77eb644894779 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -846,13 +846,9 @@ Value SliceOp::getViewSource() { return view(); } //===----------------------------------------------------------------------===// // TransposeOp //===----------------------------------------------------------------------===// -void mlir::linalg::TransposeOp::build(OpBuilder &b, OperationState &result, - Value view, AffineMapAttr permutation, - ArrayRef attrs) { - auto permutationMap = permutation.getValue(); - assert(permutationMap); - auto memRefType = view.getType().cast(); +static MemRefType inferTransposeResultType(MemRefType memRefType, + AffineMap permutationMap) { auto rank = memRefType.getRank(); auto originalSizes = memRefType.getShape(); // Compute permuted sizes. @@ -867,11 +863,21 @@ void mlir::linalg::TransposeOp::build(OpBuilder &b, OperationState &result, auto res = getStridesAndOffset(memRefType, strides, offset); assert(succeeded(res) && strides.size() == static_cast(rank)); (void)res; - auto map = makeStridedLinearLayoutMap(strides, offset, b.getContext()); + auto map = + makeStridedLinearLayoutMap(strides, offset, memRefType.getContext()); map = permutationMap ? map.compose(permutationMap) : map; + return MemRefType::Builder(memRefType).setShape(sizes).setAffineMaps(map); +} + +void mlir::linalg::TransposeOp::build(OpBuilder &b, OperationState &result, + Value view, AffineMapAttr permutation, + ArrayRef attrs) { + auto permutationMap = permutation.getValue(); + assert(permutationMap); + + auto memRefType = view.getType().cast(); // Compute result type. - MemRefType resultType = - MemRefType::Builder(memRefType).setShape(sizes).setAffineMaps(map); + MemRefType resultType = inferTransposeResultType(memRefType, permutationMap); build(b, result, resultType, view, attrs); result.addAttribute(TransposeOp::getPermutationAttrName(), permutation); @@ -881,19 +887,20 @@ static void print(OpAsmPrinter &p, TransposeOp op) { p << op.getOperationName() << " " << op.view() << " " << op.permutation(); p.printOptionalAttrDict(op.getAttrs(), {TransposeOp::getPermutationAttrName()}); - p << " : " << op.view().getType(); + p << " : " << op.view().getType() << " to " << op.getType(); } static ParseResult parseTransposeOp(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType view; AffineMap permutation; - MemRefType type; + MemRefType srcType, dstType; if (parser.parseOperand(view) || parser.parseAffineMap(permutation) || parser.parseOptionalAttrDict(result.attributes) || - parser.parseColonType(type) || - parser.resolveOperand(view, type, result.operands) || - parser.addTypeToList(type, result.types)) + parser.parseColonType(srcType) || + parser.resolveOperand(view, srcType, result.operands) || + parser.parseKeywordType("to", dstType) || + parser.addTypeToList(dstType, result.types)) return failure(); result.addAttribute(TransposeOp::getPermutationAttrName(), @@ -901,6 +908,21 @@ static ParseResult parseTransposeOp(OpAsmParser &parser, return success(); } +static LogicalResult verify(TransposeOp op) { + if (!op.permutation().isPermutation()) + return op.emitOpError("expected a permutation map"); + if (op.permutation().getNumDims() != op.getShapedType().getRank()) + return op.emitOpError( + "expected a permutation map of same rank as the view"); + + auto srcType = op.view().getType().cast(); + auto dstType = op.getType().cast(); + if (dstType != inferTransposeResultType(srcType, op.permutation())) + return op.emitOpError("output type ") + << dstType << " does not match transposed input type " << srcType; + return success(); +} + //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index ca59ecd387ec3..c631c47099b08 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -35,14 +35,21 @@ func @store_number_of_indices(%v : memref) { func @transpose_not_permutation(%v : memref(off + M * i + j)>>) { // expected-error @+1 {{expected a permutation map}} - linalg.transpose %v (i, j) -> (i, i) : memref(off + M * i + j)>> + linalg.transpose %v (i, j) -> (i, i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> } // ----- func @transpose_bad_rank(%v : memref(off + M * i + j)>>) { // expected-error @+1 {{expected a permutation map of same rank as the view}} - linalg.transpose %v (i) -> (i) : memref(off + M * i + j)>> + linalg.transpose %v (i) -> (i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> +} + +// ----- + +func @transpose_wrong_type(%v : memref(off + M * i + j)>>) { + // expected-error @+1 {{output type 'memref (d0 * s1 + s0 + d1)>>' does not match transposed input type 'memref (d0 * s1 + s0 + d1)>>'}} + linalg.transpose %v (i, j) -> (j, i) : memref(off + M * i + j)>> to memref(off + M * i + j)>> } // ----- diff --git a/mlir/test/Dialect/Linalg/llvm.mlir b/mlir/test/Dialect/Linalg/llvm.mlir index 02693e5d1be46..c8031824d6307 100644 --- a/mlir/test/Dialect/Linalg/llvm.mlir +++ b/mlir/test/Dialect/Linalg/llvm.mlir @@ -70,7 +70,7 @@ func @slice_with_range_and_index(%arg0: memref, ptr, i64, array<1 x i64>, array<1 x i64>)> func @transpose(%arg0: memref) { - %0 = linalg.transpose %arg0 (i, j, k) -> (k, i, j) : memref + %0 = linalg.transpose %arg0 (i, j, k) -> (k, i, j) : memref to memref (d2 * s1 + s0 + d0 * s2 + d1)>> return } // CHECK-LABEL: func @transpose diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 2696643246972..404c978fa61bb 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -123,14 +123,15 @@ func @fill_view(%arg0: memref, %arg1: f32) { // ----- // CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> +// CHECK-DAG: #[[$strided3DT:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)> func @transpose(%arg0: memref) { - %0 = linalg.transpose %arg0 (i, j, k) -> (k, j, i) : memref + %0 = linalg.transpose %arg0 (i, j, k) -> (k, j, i) : memref to memref (d2 * s1 + s0 + d1 * s2 + d0)>> return } // CHECK-LABEL: func @transpose // CHECK: linalg.transpose %{{.*}} ([[i:.*]], [[j:.*]], [[k:.*]]) -> ([[k]], [[j]], [[i]]) : -// CHECK-SAME: memref +// CHECK-SAME: memref to memref // ----- From 5405ee553a631dd8cd18eed8ed9e76ec318febcb Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 11 Sep 2020 11:24:08 +0200 Subject: [PATCH 0346/1079] [CodeGenPrepare] Simplify code. NFCI. --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9a4ed2fab608b..3e5dceccf49b0 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5274,22 +5274,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If we have no uses, recursively delete the value and all dead instructions // using it. if (Repl->use_empty()) { - // This can cause recursive deletion, which can invalidate our iterator. - // Use a WeakTrackingVH to hold onto it in case this happens. - Value *CurValue = &*CurInstIterator; - WeakTrackingVH IterHandle(CurValue); - BasicBlock *BB = CurInstIterator->getParent(); - - RecursivelyDeleteTriviallyDeadInstructions( - Repl, TLInfo, nullptr, - [&](Value *V) { removeAllAssertingVHReferences(V); }); - - if (IterHandle != CurValue) { - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - CurInstIterator = BB->begin(); - SunkAddrs.clear(); - } + resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() { + RecursivelyDeleteTriviallyDeadInstructions( + Repl, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); + }); } ++NumMemoryInsts; return true; From 06e356c81e0fce90c9a21f9f5fb7567efa51ee0f Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 11 Sep 2020 10:23:04 +0100 Subject: [PATCH 0347/1079] [AMDGPU] Make movreld-bug test case more robust Without this, future optimizer improvements can optimize the entire function to "return 0". --- llvm/test/CodeGen/AMDGPU/movreld-bug.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/movreld-bug.ll b/llvm/test/CodeGen/AMDGPU/movreld-bug.ll index 3071f18c449fc..4bf15054aee00 100644 --- a/llvm/test/CodeGen/AMDGPU/movreld-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/movreld-bug.ll @@ -8,14 +8,14 @@ ; MOVREL-NEXT: v_movreld_b32_e32 v0, ; GPRIDX: s_set_gpr_idx_on s0, gpr_idx(DST) -; GPRIDX-NEXT: v_mov_b32_e32 v0, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, 1.0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: ; return define amdgpu_ps float @main(i32 inreg %arg) #0 { main_body: - %tmp24 = insertelement <16 x float> undef, float 0.000000e+00, i32 %arg + %tmp24 = insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %arg %tmp25 = extractelement <16 x float> %tmp24, i32 1 ret float %tmp25 } From bceca7a996248aba44c3e4b4752634114650e6ac Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Fri, 11 Sep 2020 11:30:06 +0200 Subject: [PATCH 0348/1079] [clangd][NFC] Get rid of an `else after return` --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 15ef89cb34faa..6ebb71c3b4d13 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -57,7 +57,7 @@ llvm::Optional decodeVersion(llvm::StringRef Encoded) { int64_t Result; if (llvm::to_integer(Encoded, Result, 10)) return Result; - else if (!Encoded.empty()) // Empty can be e.g. diagnostics on close. + if (!Encoded.empty()) // Empty can be e.g. diagnostics on close. elog("unexpected non-numeric version {0}", Encoded); return llvm::None; } From ff77d165a8161705c8ec3bb3ced2711dce297699 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Sep 2020 18:03:41 +0100 Subject: [PATCH 0349/1079] BasicTTIImpl.h - remove unused MCSchedule.h include. NFCI. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 9e5c45084c599..2b72dc3490d75 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -40,7 +40,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/MC/MCSchedule.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" From 70a05ee2880e0ad88416ae4b4bed3cadc53e5cd1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 11 Sep 2020 10:09:10 +0100 Subject: [PATCH 0350/1079] [X86] Keep variables from getDataLayout/getDebugLoc calls as const reference. NFCI. These are only ever used as references in the called functions, so just pass the original reference instead of copying it. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4449a00b95c46..d0115a58ba4e7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19228,7 +19228,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { else IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo()); - auto &DL = DAG.getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); SDValue Scale = DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8); IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); @@ -26320,7 +26320,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) if (Attrs.hasAttribute(Idx, Attribute::InReg)) { - auto &DL = DAG.getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); // FIXME: should only count parameters that are lowered to integers. InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; } @@ -31210,7 +31210,7 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, /// Utility function to emit xbegin specifying the start of an RTM region. static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII) { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); @@ -31336,7 +31336,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); // struct va_list { // i32 gp_offset @@ -31583,7 +31583,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( // Now add the instructions. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); Register CountReg = MI.getOperand(0).getReg(); int64_t RegSaveFrameIndex = MI.getOperand(1).getImm(); @@ -31895,7 +31895,7 @@ MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, MachineBasicBlock *ThisMBB) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -32050,7 +32050,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const X86FrameLowering &TFI = *Subtarget.getFrameLowering(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); const unsigned ProbeSize = getStackProbeSize(*MF); @@ -32143,7 +32143,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); assert(MF->shouldSplitStack()); @@ -32278,7 +32278,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, MachineFunction *MF = BB->getParent(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); assert(!isAsynchronousEHPersonality( classifyEHPersonality(MF->getFunction().getPersonalityFn())) && @@ -32316,7 +32316,7 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, // inside MC, therefore without the two markers shrink-wrapping // may push the prologue/epilogue pass them. const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction &MF = *BB->getParent(); // Emit CALLSEQ_START right before the instruction. @@ -32345,7 +32345,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, // be in the normal return register. MachineFunction *F = BB->getParent(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); assert(MI.getOperand(3).isGlobal() && "This should be a global"); @@ -32484,7 +32484,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, MachineBasicBlock *BB) const { // Copy the virtual register into the R11 physical register and // call the retpoline thunk. - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); Register CalleeVReg = MI.getOperand(0).getReg(); unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); @@ -32546,7 +32546,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, /// \param [in] MBB The Machine Basic Block that will be modified. void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -32589,7 +32589,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -32749,7 +32749,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock * X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -32930,7 +32930,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock * X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -33014,7 +33014,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); @@ -33063,7 +33063,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock * X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *BB) const { - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); @@ -33293,7 +33293,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); auto TMMImmToTMMReg = [](unsigned Imm) { assert (Imm < 8 && "Illegal tmm index"); From 002f5ab3b171c7d9c9ea192b04a5303be78f6e52 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Tue, 8 Sep 2020 17:14:17 +0000 Subject: [PATCH 0351/1079] [clang][aarch64] Fix ILP32 ABI for arm_sve_vector_bits The element types of scalable vectors are defined in terms of stdint types in the ACLE. This patch fixes the mapping to builtin types for the ILP32 ABI when creating VLS types with the arm_sve_vector_bits, where the mapping is as follows: int32_t -> LongTy int64_t -> LongLongTy uint32_t -> UnsignedLongTy uint64_t -> UnsignedLongLongTy This is implemented by leveraging getBuiltinVectorTypeInfo which is target agnostic since it calls ASTContext::getIntTypeForBitwidth for integer types. The element type for svfloat16_t is changed from Float16Ty to HalfTy when creating VLS types since this is what is used elsewhere. For more information, see: https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#types-varying-by-data-model https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#appendix-support-for-scalable-vectors Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87358 --- clang/lib/AST/ItaniumMangle.cpp | 2 +- clang/lib/AST/Type.cpp | 31 ++----------------- clang/lib/CodeGen/TargetInfo.cpp | 2 +- .../CodeGen/attr-arm-sve-vector-bits-types.c | 9 ++++++ 4 files changed, 14 insertions(+), 30 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index d8ccbdaba9c60..877050c160955 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3388,7 +3388,7 @@ void CXXNameMangler::mangleAArch64FixedSveVectorType(const VectorType *T) { case BuiltinType::ULong: TypeName = "__SVUint64_t"; break; - case BuiltinType::Float16: + case BuiltinType::Half: TypeName = "__SVFloat16_t"; break; case BuiltinType::Float: diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 801f89a8f1874..ff73a7340091e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2317,38 +2317,13 @@ QualType Type::getSveEltType(const ASTContext &Ctx) const { assert(isVLSTBuiltinType() && "unsupported type!"); const BuiltinType *BTy = getAs(); - switch (BTy->getKind()) { - default: - llvm_unreachable("Unknown builtin SVE type!"); - case BuiltinType::SveInt8: - return Ctx.SignedCharTy; - case BuiltinType::SveUint8: - case BuiltinType::SveBool: + if (BTy->getKind() == BuiltinType::SveBool) // Represent predicates as i8 rather than i1 to avoid any layout issues. // The type is bitcasted to a scalable predicate type when casting between // scalable and fixed-length vectors. return Ctx.UnsignedCharTy; - case BuiltinType::SveInt16: - return Ctx.ShortTy; - case BuiltinType::SveUint16: - return Ctx.UnsignedShortTy; - case BuiltinType::SveInt32: - return Ctx.IntTy; - case BuiltinType::SveUint32: - return Ctx.UnsignedIntTy; - case BuiltinType::SveInt64: - return Ctx.LongTy; - case BuiltinType::SveUint64: - return Ctx.UnsignedLongTy; - case BuiltinType::SveFloat16: - return Ctx.Float16Ty; - case BuiltinType::SveBFloat16: - return Ctx.BFloat16Ty; - case BuiltinType::SveFloat32: - return Ctx.FloatTy; - case BuiltinType::SveFloat64: - return Ctx.DoubleTy; - } + else + return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType; } bool QualType::isPODType(const ASTContext &Context) const { diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index e1ab61f10585d..5ebf432a4cd36 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5627,7 +5627,7 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { ResType = llvm::ScalableVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2); break; - case BuiltinType::Float16: + case BuiltinType::Half: ResType = llvm::ScalableVectorType::get( llvm::Type::getHalfTy(getVMContext()), 8); break; diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c index a1cfc514081ea..27366dea3d34d 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=1024 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=2048 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-2048 +// RUN: %clang_cc1 -triple aarch64_32-unknown-darwin -target-feature +sve -target-feature +bf16 -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ILP32 #include @@ -579,3 +580,11 @@ void f() { // CHECK-2048-NEXT: %local_arr_f64 = alloca [3 x <32 x double>], align 16 // CHECK-2048-NEXT: %local_arr_bf16 = alloca [3 x <128 x bfloat>], align 16 // CHECK-2048-NEXT: %local_arr_bool = alloca [3 x <32 x i8>], align 2 + +//===----------------------------------------------------------------------===// +// ILP32 ABI +//===----------------------------------------------------------------------===// +// CHECK-ILP32: @global_i32 = global <16 x i32> zeroinitializer, align 16 +// CHECK-ILP32: @global_i64 = global <8 x i64> zeroinitializer, align 16 +// CHECK-ILP32: @global_u32 = global <16 x i32> zeroinitializer, align 16 +// CHECK-ILP32: @global_u64 = global <8 x i64> zeroinitializer, align 16 From 257b29715bb27b7d9f6c3c40c481b6a4af0b37e5 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Fri, 11 Sep 2020 10:17:31 +0100 Subject: [PATCH 0352/1079] [flang][driver] Add the new flang compiler and frontend drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This is the first patch implementing the new Flang driver as outlined in [1], [2] & [3]. It creates Flang driver (`flang-new`) and Flang frontend driver (`flang-new -fc1`). These will be renamed as `flang` and `flang -fc1` once the current Flang throwaway driver, `flang`, can be replaced with `flang-new`. Currently only 2 options are supported: `-help` and `--version`. `flang-new` is implemented in terms of libclangDriver, defaulting the driver mode to `FlangMode` (added to libclangDriver in [4]). This ensures that the driver runs in Flang mode regardless of the name of the binary inferred from argv[0]. The design of the new Flang compiler and frontend drivers is inspired by it counterparts in Clang [3]. Currently, the new Flang compiler and frontend drivers re-use Clang libraries: clangBasic, clangDriver and clangFrontend. To identify Flang options, this patch adds FlangOption/FC1Option enums. Driver::printHelp is updated so that `flang-new` prints only Flang options. The new Flang driver is disabled by default. To enable it, set `-DBUILD_FLANG_NEW_DRIVER=ON` when configuring CMake and add clang to `LLVM_ENABLE_PROJECTS` (e.g. -DLLVM_ENABLE_PROJECTS=“clang;flang;mlir”). [1] “RFC: new Flang driver - next steps” http://lists.llvm.org/pipermail/flang-dev/2020-July/000470.html [2] “RFC: Adding a fortran mode to the clang driver for flang” http://lists.llvm.org/pipermail/cfe-dev/2019-June/062669.html [3] “RFC: refactoring libclangDriver/libclangFrontend to share with Flang” http://lists.llvm.org/pipermail/cfe-dev/2020-July/066393.html [4] https://reviews.llvm.org/rG6bf55804924d5a1d902925ad080b1a2b57c5c75c co-authored-by: Andrzej Warzynski Reviewed By: richard.barton.arm, sameeranjoshi Differential Revision: https://reviews.llvm.org/D86089 --- clang/include/clang/Driver/Driver.h | 2 +- clang/include/clang/Driver/Options.h | 4 +- clang/include/clang/Driver/Options.td | 12 +- clang/lib/Driver/Driver.cpp | 19 ++- clang/lib/Driver/ToolChains/Flang.cpp | 6 +- .../CreateInvocationFromCommandLine.cpp | 4 +- clang/lib/Tooling/Tooling.cpp | 2 +- clang/test/Driver/flang/flang.f90 | 2 +- clang/test/Driver/flang/flang_ucase.F90 | 2 +- .../Driver/flang/multiple-inputs-mixed.f90 | 2 +- clang/test/Driver/flang/multiple-inputs.f90 | 4 +- clang/unittests/Driver/SanitizerArgsTest.cpp | 2 +- clang/unittests/Driver/ToolChainTest.cpp | 10 +- flang/CMakeLists.txt | 22 +++ flang/README.md | 15 ++ .../include/flang/Frontend/CompilerInstance.h | 105 ++++++++++++++ .../flang/Frontend/CompilerInvocation.h | 53 +++++++ .../include/flang/Frontend/FrontendOptions.h | 58 ++++++++ flang/include/flang/FrontendTool/Utils.h | 29 ++++ flang/lib/CMakeLists.txt | 5 + flang/lib/Frontend/CMakeLists.txt | 16 +++ flang/lib/Frontend/CompilerInstance.cpp | 42 ++++++ flang/lib/Frontend/CompilerInvocation.cpp | 115 ++++++++++++++++ flang/lib/Frontend/FrontendOptions.cpp | 9 ++ flang/lib/FrontendTool/CMakeLists.txt | 11 ++ .../ExecuteCompilerInvocation.cpp | 39 ++++++ flang/test/CMakeLists.txt | 4 + flang/test/Flang-Driver/driver-error-cc1.c | 7 + flang/test/Flang-Driver/driver-error-cc1.cpp | 7 + flang/test/Flang-Driver/driver-help.f90 | 13 ++ flang/test/Flang-Driver/driver-version.f90 | 11 ++ flang/test/Flang-Driver/emit-obj.f90 | 17 +++ flang/test/Flang-Driver/missing-input.f90 | 5 + flang/test/lit.cfg.py | 12 +- flang/test/lit.site.cfg.py.in | 5 + flang/tools/CMakeLists.txt | 3 + flang/tools/flang-driver/CMakeLists.txt | 25 ++++ flang/tools/flang-driver/driver.cpp | 129 ++++++++++++++++++ flang/tools/flang-driver/fc1_main.cpp | 56 ++++++++ flang/unittests/CMakeLists.txt | 4 + flang/unittests/Frontend/CMakeLists.txt | 10 ++ .../Frontend/CompilerInstanceTest.cpp | 52 +++++++ llvm/include/llvm/Option/OptTable.h | 2 +- 43 files changed, 924 insertions(+), 28 deletions(-) create mode 100644 flang/include/flang/Frontend/CompilerInstance.h create mode 100644 flang/include/flang/Frontend/CompilerInvocation.h create mode 100644 flang/include/flang/Frontend/FrontendOptions.h create mode 100644 flang/include/flang/FrontendTool/Utils.h create mode 100644 flang/lib/Frontend/CMakeLists.txt create mode 100644 flang/lib/Frontend/CompilerInstance.cpp create mode 100644 flang/lib/Frontend/CompilerInvocation.cpp create mode 100644 flang/lib/Frontend/FrontendOptions.cpp create mode 100644 flang/lib/FrontendTool/CMakeLists.txt create mode 100644 flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp create mode 100644 flang/test/Flang-Driver/driver-error-cc1.c create mode 100644 flang/test/Flang-Driver/driver-error-cc1.cpp create mode 100644 flang/test/Flang-Driver/driver-help.f90 create mode 100644 flang/test/Flang-Driver/driver-version.f90 create mode 100644 flang/test/Flang-Driver/emit-obj.f90 create mode 100644 flang/test/Flang-Driver/missing-input.f90 create mode 100644 flang/tools/flang-driver/CMakeLists.txt create mode 100644 flang/tools/flang-driver/driver.cpp create mode 100644 flang/tools/flang-driver/fc1_main.cpp create mode 100644 flang/unittests/Frontend/CMakeLists.txt create mode 100644 flang/unittests/Frontend/CompilerInstanceTest.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index dc18f1314f81e..7a476199ff7f9 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -301,7 +301,7 @@ class Driver { StringRef CustomResourceDir = ""); Driver(StringRef ClangExecutable, StringRef TargetTriple, - DiagnosticsEngine &Diags, + DiagnosticsEngine &Diags, std::string Title = "clang LLVM compiler", IntrusiveRefCntPtr VFS = nullptr); /// @name Accessors diff --git a/clang/include/clang/Driver/Options.h b/clang/include/clang/Driver/Options.h index 9831efda4e580..06dd3652be940 100644 --- a/clang/include/clang/Driver/Options.h +++ b/clang/include/clang/Driver/Options.h @@ -34,7 +34,9 @@ enum ClangFlags { CC1AsOption = (1 << 11), NoDriverOption = (1 << 12), LinkOption = (1 << 13), - Ignored = (1 << 14), + FlangOption = (1 << 14), + FC1Option = (1 << 15), + Ignored = (1 << 16), }; enum ID { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4ba5d40117e77..922ad580a53e7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -56,6 +56,13 @@ def NoDriverOption : OptionFlag; // be used), add this flag. def LinkOption : OptionFlag; +// FlangOption - This is considered a "core" Flang option, available in +// flang mode. +def FlangOption : OptionFlag; + +// FC1Option - This option should be accepted by flang -fc1. +def FC1Option : OptionFlag; + // A short name to show in documentation. The name will be interpreted as rST. class DocName { string DocName = name; } @@ -2100,7 +2107,7 @@ def gno_embed_source : Flag<["-"], "gno-embed-source">, Group, Flags<[DriverOption]>, HelpText<"Restore the default behavior of not embedding source text in DWARF debug sections">; def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">; -def help : Flag<["-", "--"], "help">, Flags<[CC1Option,CC1AsOption]>, +def help : Flag<["-", "--"], "help">, Flags<[CC1Option,CC1AsOption, FC1Option, FlangOption]>, HelpText<"Display available options">; def ibuiltininc : Flag<["-"], "ibuiltininc">, HelpText<"Enable builtin #include directories even when -nostdinc is used " @@ -3049,7 +3056,8 @@ def _rtlib : Separate<["--"], "rtlib">, Alias; def _serialize_diags : Separate<["-", "--"], "serialize-diagnostics">, Flags<[DriverOption]>, HelpText<"Serialize compiler diagnostics to a file">; // We give --version different semantics from -version. -def _version : Flag<["--"], "version">, Flags<[CoreOption, CC1Option]>, +def _version : Flag<["--"], "version">, + Flags<[CoreOption, CC1Option, FC1Option, FlangOption]>, HelpText<"Print version information">; def _signed_char : Flag<["--"], "signed-char">, Alias; def _std : Separate<["--"], "std">, Alias; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 4ac813718eace..65b44597bc16f 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -128,12 +128,12 @@ std::string Driver::GetResourcesPath(StringRef BinaryPath, } Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, - DiagnosticsEngine &Diags, + DiagnosticsEngine &Diags, std::string Title, IntrusiveRefCntPtr VFS) : Diags(Diags), VFS(std::move(VFS)), Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), LTOMode(LTOK_None), ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT), - DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr), + DriverTitle(Title), CCPrintOptionsFilename(nullptr), CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr), CCCPrintBindings(false), CCPrintOptions(false), CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), @@ -1571,6 +1571,9 @@ void Driver::PrintHelp(bool ShowHidden) const { if (!ShowHidden) ExcludedFlagsBitmask |= HelpHidden; + if (IsFlangMode()) + IncludedFlagsBitmask |= options::FlangOption; + std::string Usage = llvm::formatv("{0} [options] file...", Name).str(); getOpts().PrintHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(), IncludedFlagsBitmask, ExcludedFlagsBitmask, @@ -1578,9 +1581,13 @@ void Driver::PrintHelp(bool ShowHidden) const { } void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { - // FIXME: The following handlers should use a callback mechanism, we don't - // know what the client would like to do. - OS << getClangFullVersion() << '\n'; + if (IsFlangMode()) { + OS << getClangToolFullVersion("flang-new") << '\n'; + } else { + // FIXME: The following handlers should use a callback mechanism, we don't + // know what the client would like to do. + OS << getClangFullVersion() << '\n'; + } const ToolChain &TC = C.getDefaultToolChain(); OS << "Target: " << TC.getTripleString() << '\n'; @@ -1618,7 +1625,7 @@ void Driver::HandleAutocompletions(StringRef PassedFlags) const { std::vector SuggestedCompletions; std::vector Flags; - unsigned short DisableFlags = + unsigned int DisableFlags = options::NoDriverOption | options::Unsupported | options::Ignored; // Distinguish "--autocomplete=-someflag" and "--autocomplete=-someflag," diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 80f6db7ea6427..93401c6626630 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -69,11 +69,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Input.getFilename()); const auto& D = C.getDriver(); - const char* Exec = Args.MakeArgString(D.GetProgramPath("flang", TC)); + // TODO: Replace flang-new with flang once the new driver replaces the + // throwaway driver + const char *Exec = Args.MakeArgString(D.GetProgramPath("flang-new", TC)); C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); } -Flang::Flang(const ToolChain &TC) : Tool("flang", "flang frontend", TC) {} +Flang::Flang(const ToolChain &TC) : Tool("flang-new", "flang frontend", TC) {} Flang::~Flang() {} diff --git a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp index 1d5a6c06b34fe..ff0aa6faf33f6 100644 --- a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -40,8 +40,8 @@ std::unique_ptr clang::createInvocationFromCommandLine( Args.push_back("-fsyntax-only"); // FIXME: We shouldn't have to pass in the path info. - driver::Driver TheDriver(Args[0], llvm::sys::getDefaultTargetTriple(), - *Diags, VFS); + driver::Driver TheDriver(Args[0], llvm::sys::getDefaultTargetTriple(), *Diags, + "clang LLVM compiler", VFS); // Don't check that inputs exist, they may have been remapped. TheDriver.setCheckInputsExist(false); diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 1ee8ce28c2efa..b0d3f5caf67a3 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -78,7 +78,7 @@ newDriver(DiagnosticsEngine *Diagnostics, const char *BinaryName, IntrusiveRefCntPtr VFS) { driver::Driver *CompilerDriver = new driver::Driver(BinaryName, llvm::sys::getDefaultTargetTriple(), - *Diagnostics, std::move(VFS)); + *Diagnostics, "clang LLVM compiler", std::move(VFS)); CompilerDriver->setTitle("clang_based_tool"); return CompilerDriver; } diff --git a/clang/test/Driver/flang/flang.f90 b/clang/test/Driver/flang/flang.f90 index a68be31343f9c..e4629d527d183 100644 --- a/clang/test/Driver/flang/flang.f90 +++ b/clang/test/Driver/flang/flang.f90 @@ -13,7 +13,7 @@ ! * (no type specified, resulting in an object file) ! All invocations should begin with flang -fc1, consume up to here. -! ALL-LABEL: "{{[^"]*}}flang" "-fc1" +! ALL-LABEL: "{{[^"]*}}flang-new" "-fc1" ! Check that f90 files are not treated as "previously preprocessed" ! ... in --driver-mode=flang. diff --git a/clang/test/Driver/flang/flang_ucase.F90 b/clang/test/Driver/flang/flang_ucase.F90 index dd1e20088191f..4da09e138b59d 100644 --- a/clang/test/Driver/flang/flang_ucase.F90 +++ b/clang/test/Driver/flang/flang_ucase.F90 @@ -13,7 +13,7 @@ ! * (no type specified, resulting in an object file) ! All invocations should begin with flang -fc1, consume up to here. -! ALL-LABEL: "{{[^"]*}}flang" "-fc1" +! ALL-LABEL: "{{[^"]*}}flang-new" "-fc1" ! Check that f90 files are not treated as "previously preprocessed" ! ... in --driver-mode=flang. diff --git a/clang/test/Driver/flang/multiple-inputs-mixed.f90 b/clang/test/Driver/flang/multiple-inputs-mixed.f90 index 98d8cab00bdfd..2395dbecf1fe9 100644 --- a/clang/test/Driver/flang/multiple-inputs-mixed.f90 +++ b/clang/test/Driver/flang/multiple-inputs-mixed.f90 @@ -1,7 +1,7 @@ ! Check that flang can handle mixed C and fortran inputs. ! RUN: %clang --driver-mode=flang -### -fsyntax-only %S/Inputs/one.f90 %S/Inputs/other.c 2>&1 | FileCheck --check-prefixes=CHECK-SYNTAX-ONLY %s -! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang{{[^"/]*}}" "-fc1" +! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new{{[^"/]*}}" "-fc1" ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/one.f90" ! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}clang{{[^"/]*}}" "-cc1" ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/other.c" diff --git a/clang/test/Driver/flang/multiple-inputs.f90 b/clang/test/Driver/flang/multiple-inputs.f90 index 34592a3dc3a39..f6ee60e48fef3 100644 --- a/clang/test/Driver/flang/multiple-inputs.f90 +++ b/clang/test/Driver/flang/multiple-inputs.f90 @@ -1,7 +1,7 @@ ! Check that flang driver can handle multiple inputs at once. ! RUN: %clang --driver-mode=flang -### -fsyntax-only %S/Inputs/one.f90 %S/Inputs/two.f90 2>&1 | FileCheck --check-prefixes=CHECK-SYNTAX-ONLY %s -! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang" "-fc1" +! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new" "-fc1" ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/one.f90" -! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang" "-fc1" +! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new" "-fc1" ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/two.f90" diff --git a/clang/unittests/Driver/SanitizerArgsTest.cpp b/clang/unittests/Driver/SanitizerArgsTest.cpp index dac1caddc055e..84bd568523459 100644 --- a/clang/unittests/Driver/SanitizerArgsTest.cpp +++ b/clang/unittests/Driver/SanitizerArgsTest.cpp @@ -57,7 +57,7 @@ class SanitizerArgsTest : public ::testing::Test { new DiagnosticIDs, Opts, new TextDiagnosticPrinter(llvm::errs(), Opts.get())); DriverInstance.emplace(ClangBinary, "x86_64-unknown-linux-gnu", Diags, - prepareFS(ExtraFiles)); + "clang LLVM compiler", prepareFS(ExtraFiles)); std::vector Args = {ClangBinary}; for (const auto &A : ExtraArgs) diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index f84e508b6cbdb..67bf545b14e4b 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -35,7 +35,7 @@ TEST(ToolChainTest, VFSGCCInstallation) { IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); Driver TheDriver("/bin/clang", "arm-linux-gnueabihf", Diags, - InMemoryFileSystem); + "clang LLVM compiler", InMemoryFileSystem); const char *EmptyFiles[] = { "foo.cpp", @@ -89,7 +89,7 @@ TEST(ToolChainTest, VFSGCCInstallationRelativeDir) { IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); Driver TheDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags, - InMemoryFileSystem); + "clang LLVM compiler", InMemoryFileSystem); const char *EmptyFiles[] = { "foo.cpp", "/home/test/lib/gcc/arm-linux-gnueabi/4.6.1/crtbegin.o", @@ -130,13 +130,13 @@ TEST(ToolChainTest, DefaultDriverMode) { new llvm::vfs::InMemoryFileSystem); Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags, - InMemoryFileSystem); + "clang LLVM compiler", InMemoryFileSystem); CCDriver.setCheckInputsExist(false); Driver CXXDriver("/home/test/bin/clang++", "arm-linux-gnueabi", Diags, - InMemoryFileSystem); + "clang LLVM compiler", InMemoryFileSystem); CXXDriver.setCheckInputsExist(false); Driver CLDriver("/home/test/bin/clang-cl", "arm-linux-gnueabi", Diags, - InMemoryFileSystem); + "clang LLVM compiler", InMemoryFileSystem); CLDriver.setCheckInputsExist(false); std::unique_ptr CC(CCDriver.BuildCompilation( diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 707c7235a272a..daae9e9b1246e 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -17,6 +17,7 @@ if (POLICY CMP0077) endif() option(LINK_WITH_FIR "Link driver with FIR and LLVM" ON) +option(FLANG_BUILD_NEW_DRIVER "Build the flang compiler driver" OFF) # Flang requires C++17. set(CMAKE_CXX_STANDARD 17) @@ -61,6 +62,12 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) get_filename_component(LLVM_DIR_ABSOLUTE ${LLVM_DIR} REALPATH) list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR_ABSOLUTE}) + if(FLANG_BUILD_NEW_DRIVER) + # TODO: Remove when libclangDriver is lifted out of Clang + list(APPEND CMAKE_MODULE_PATH ${CLANG_DIR}) + find_package(Clang REQUIRED HINTS "${CLANG_DIR}") + endif() + # If LLVM links to zlib we need the imported targets so we can too. if(LLVM_ENABLE_ZLIB) find_package(ZLIB REQUIRED) @@ -200,6 +207,21 @@ else() endif() endif() +if(FLANG_BUILD_NEW_DRIVER) + # TODO: Remove when libclangDriver is lifted out of Clang + if(FLANG_STANDALONE_BUILD) + set(CLANG_INCLUDE_DIR ${CLANG_INCLUDE_DIRS} ) + # No need to specify TableGen output dir as that's embedded in CLANG_DIR + else() + set(CLANG_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/../clang/include ) + # Specify TableGen output dir for things like DiagnosticCommonKinds.inc, + # DiagnosticDriverKinds.inc (required for reporting diagnostics) + set(CLANG_TABLEGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/tools/clang/include) + include_directories(SYSTEM ${CLANG_TABLEGEN_OUTPUT_DIR}) + endif() + include_directories(SYSTEM ${CLANG_INCLUDE_DIR}) +endif() + if(LINK_WITH_FIR) # tco tool and FIR lib output directories if(FLANG_STANDALONE_BUILD) diff --git a/flang/README.md b/flang/README.md index 3a58c277bacf3..934169b9ae6ac 100644 --- a/flang/README.md +++ b/flang/README.md @@ -143,6 +143,21 @@ cd ~/flang/build cmake -DLLVM_DIR=$LLVM -DMLIR_DIR=$MLIR ~/flang/src make ``` + +### Build The New Flang Driver +The new Flang driver, `flang-new`, is currently under active development and +should be considered as an experimental feature. For this reason it is disabled +by default. This will change once the new driver replaces the _throwaway_ +driver, `flang`. + +In order to build the new driver, add `-DBUILD_FLANG_NEW_DRIVER=ON` to your +CMake invocation line. Additionally, when building out-of-tree, use `CLANG_DIR` +(similarly to `LLVM_DIR` and `MLIR_DIR`) to find the installed Clang +components. + +**Note:** `CLANG_DIR` is only required when building the new Flang driver, +which currently depends on Clang. + # How to Run Tests Flang supports 2 different categories of tests diff --git a/flang/include/flang/Frontend/CompilerInstance.h b/flang/include/flang/Frontend/CompilerInstance.h new file mode 100644 index 0000000000000..298be676ea4a5 --- /dev/null +++ b/flang/include/flang/Frontend/CompilerInstance.h @@ -0,0 +1,105 @@ +//===-- CompilerInstance.h - Flang Compiler Instance ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_FLANG_FRONTEND_COMPILERINSTANCE_H +#define LLVM_FLANG_FRONTEND_COMPILERINSTANCE_H + +#include "flang/Frontend/CompilerInvocation.h" + +#include +#include + +namespace Fortran::frontend { + +class CompilerInstance { + + /// The options used in this compiler instance. + std::shared_ptr invocation_; + + /// The diagnostics engine instance. + llvm::IntrusiveRefCntPtr diagnostics_; + +public: + explicit CompilerInstance(); + + ~CompilerInstance(); + CompilerInvocation &GetInvocation() { + assert(invocation_ && "Compiler instance has no invocation!"); + return *invocation_; + }; + + /// } + /// @name Forwarding Methods + /// { + + clang::DiagnosticOptions &GetDiagnosticOpts() { + return invocation_->GetDiagnosticOpts(); + } + const clang::DiagnosticOptions &GetDiagnosticOpts() const { + return invocation_->GetDiagnosticOpts(); + } + + FrontendOptions &GetFrontendOpts() { return invocation_->GetFrontendOpts(); } + const FrontendOptions &GetFrontendOpts() const { + return invocation_->GetFrontendOpts(); + } + + /// } + /// @name Diagnostics Engine + /// { + + bool HasDiagnostics() const { return diagnostics_ != nullptr; } + + /// Get the current diagnostics engine. + clang::DiagnosticsEngine &GetDiagnostics() const { + assert(diagnostics_ && "Compiler instance has no diagnostics!"); + return *diagnostics_; + } + + /// SetDiagnostics - Replace the current diagnostics engine. + void SetDiagnostics(clang::DiagnosticsEngine *value); + + clang::DiagnosticConsumer &GetDiagnosticClient() const { + assert(diagnostics_ && diagnostics_->getClient() && + "Compiler instance has no diagnostic client!"); + return *diagnostics_->getClient(); + } + + /// Get the current diagnostics engine. + clang::DiagnosticsEngine &getDiagnostics() const { + assert(diagnostics_ && "Compiler instance has no diagnostics!"); + return *diagnostics_; + } + + /// } + /// @name Construction Utility Methods + /// { + + /// Create a DiagnosticsEngine object with a the TextDiagnosticPrinter. + /// + /// If no diagnostic client is provided, this creates a + /// DiagnosticConsumer that is owned by the returned diagnostic + /// object, if using directly the caller is responsible for + /// releasing the returned DiagnosticsEngine's client eventually. + /// + /// \param opts - The diagnostic options; note that the created text + /// diagnostic object contains a reference to these options. + /// + /// \param client If non-NULL, a diagnostic client that will be + /// attached to (and, then, owned by) the returned DiagnosticsEngine + /// object. + /// + /// \return The new object on success, or null on failure. + static clang::IntrusiveRefCntPtr CreateDiagnostics( + clang::DiagnosticOptions *opts, + clang::DiagnosticConsumer *client = nullptr, bool shouldOwnClient = true); + void CreateDiagnostics( + clang::DiagnosticConsumer *client = nullptr, bool shouldOwnClient = true); +}; + +} // end namespace Fortran::frontend +#endif // LLVM_FLANG_FRONTEND_COMPILERINSTANCE_H diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h new file mode 100644 index 0000000000000..0fa169fd16200 --- /dev/null +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -0,0 +1,53 @@ +//===- CompilerInvocation.h - Compiler Invocation Helper Data ---*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_FLANG_FRONTEND_COMPILERINVOCATION_H +#define LLVM_FLANG_FRONTEND_COMPILERINVOCATION_H + +#include "flang/Frontend/FrontendOptions.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" + +namespace Fortran::frontend { +class CompilerInvocationBase { +public: + /// Options controlling the diagnostic engine.$ + llvm::IntrusiveRefCntPtr diagnosticOpts_; + + CompilerInvocationBase(); + CompilerInvocationBase(const CompilerInvocationBase &x); + ~CompilerInvocationBase(); + + clang::DiagnosticOptions &GetDiagnosticOpts() { + return *diagnosticOpts_.get(); + } + const clang::DiagnosticOptions &GetDiagnosticOpts() const { + return *diagnosticOpts_.get(); + } +}; + +class CompilerInvocation : public CompilerInvocationBase { + /// Options controlling the frontend itself. + FrontendOptions frontendOpts_; + +public: + CompilerInvocation() = default; + + FrontendOptions &GetFrontendOpts() { return frontendOpts_; } + const FrontendOptions &GetFrontendOpts() const { return frontendOpts_; } + + /// Create a compiler invocation from a list of input options. + /// \returns true on success. + /// \returns false if an error was encountered while parsing the arguments + /// \param [out] res - The resulting invocation. + static bool CreateFromArgs(CompilerInvocation &res, + llvm::ArrayRef commandLineArgs, + clang::DiagnosticsEngine &diags); +}; + +} // end namespace Fortran::frontend +#endif // LLVM_FLANG_FRONTEND_COMPILERINVOCATION_H diff --git a/flang/include/flang/Frontend/FrontendOptions.h b/flang/include/flang/Frontend/FrontendOptions.h new file mode 100644 index 0000000000000..474086f44e3b1 --- /dev/null +++ b/flang/include/flang/Frontend/FrontendOptions.h @@ -0,0 +1,58 @@ +//===- FrontendOptions.h ----------------------------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_FLANG_FRONTEND_FRONTENDOPTIONS_H +#define LLVM_FLANG_FRONTEND_FRONTENDOPTIONS_H + +#include +#include +namespace Fortran::frontend { + +enum class Language : uint8_t { + Unknown, + + /// LLVM IR: we accept this so that we can run the optimizer on it, + /// and compile it to assembly or object code. + LLVM_IR, + + ///@{ Languages that the frontend can parse and compile. + Fortran, + ///@} +}; + +/// The kind of a file that we've been handed as an input. +class InputKind { +private: + Language lang_; + +public: + /// The input file format. + enum Format { Source, ModuleMap, Precompiled }; + + constexpr InputKind(Language l = Language::Unknown) : lang_(l) {} + + Language GetLanguage() const { return static_cast(lang_); } + + /// Is the input kind fully-unknown? + bool IsUnknown() const { return lang_ == Language::Unknown; } +}; + +/// FrontendOptions - Options for controlling the behavior of the frontend. +class FrontendOptions { +public: + /// Show the -help text. + unsigned showHelp_ : 1; + + /// Show the -version text. + unsigned showVersion_ : 1; + +public: + FrontendOptions() : showHelp_(false), showVersion_(false) {} +}; +} // namespace Fortran::frontend + +#endif // LLVM_FLANG_FRONTEND_FRONTENDOPTIONS_H diff --git a/flang/include/flang/FrontendTool/Utils.h b/flang/include/flang/FrontendTool/Utils.h new file mode 100644 index 0000000000000..f49c4e6dae62d --- /dev/null +++ b/flang/include/flang/FrontendTool/Utils.h @@ -0,0 +1,29 @@ +//===--- Utils.h - Misc utilities for the flang front-end --------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header contains miscellaneous utilities for various front-end actions +// which were split from Frontend to minimise Frontend's dependencies. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FLANG_FRONTENDTOOL_UTILS_H +#define LLVM_FLANG_FRONTENDTOOL_UTILS_H + +namespace Fortran::frontend { + +class CompilerInstance; + +/// ExecuteCompilerInvocation - Execute the given actions described by the +/// compiler invocation object in the given compiler instance. +/// +/// \return - True on success. +bool ExecuteCompilerInvocation(CompilerInstance *flang); + +} // end namespace Fortran::frontend + +#endif // LLVM_FLANG_FRONTENDTOOL_UTILS_H diff --git a/flang/lib/CMakeLists.txt b/flang/lib/CMakeLists.txt index ae321b872a762..d9848bce0fa57 100644 --- a/flang/lib/CMakeLists.txt +++ b/flang/lib/CMakeLists.txt @@ -5,6 +5,11 @@ add_subdirectory(Lower) add_subdirectory(Parser) add_subdirectory(Semantics) +if(FLANG_BUILD_NEW_DRIVER) + add_subdirectory(Frontend) + add_subdirectory(FrontendTool) +endif() + if(LINK_WITH_FIR) add_subdirectory(Optimizer) endif() diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt new file mode 100644 index 0000000000000..fac3f955987f1 --- /dev/null +++ b/flang/lib/Frontend/CMakeLists.txt @@ -0,0 +1,16 @@ +add_flang_library(flangFrontend + CompilerInstance.cpp + CompilerInvocation.cpp + FrontendOptions.cpp + + LINK_LIBS + clangBasic + clangDriver + # TODO: Added to re-use clang's TextDiagnosticBuffer & TextDiagnosticPrinter. + # Add a custom implementation for Flang and remove this dependency. + clangFrontend + + LINK_COMPONENTS + Option + Support +) diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp new file mode 100644 index 0000000000000..bf1461dd16ad6 --- /dev/null +++ b/flang/lib/Frontend/CompilerInstance.cpp @@ -0,0 +1,42 @@ +//===--- CompilerInstance.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/CompilerInstance.h" +#include "flang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace Fortran::frontend; + +CompilerInstance::CompilerInstance() : invocation_(new CompilerInvocation()) {} + +CompilerInstance::~CompilerInstance() = default; + +void CompilerInstance::CreateDiagnostics( + clang::DiagnosticConsumer *client, bool shouldOwnClient) { + diagnostics_ = + CreateDiagnostics(&GetDiagnosticOpts(), client, shouldOwnClient); +} + +clang::IntrusiveRefCntPtr +CompilerInstance::CreateDiagnostics(clang::DiagnosticOptions *opts, + clang::DiagnosticConsumer *client, bool shouldOwnClient) { + clang::IntrusiveRefCntPtr diagID( + new clang::DiagnosticIDs()); + clang::IntrusiveRefCntPtr diags( + new clang::DiagnosticsEngine(diagID, opts)); + + // Create the diagnostic client for reporting errors or for + // implementing -verify. + if (client) { + diags->setClient(client, shouldOwnClient); + } else { + diags->setClient(new clang::TextDiagnosticPrinter(llvm::errs(), opts)); + } + return diags; +} diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp new file mode 100644 index 0000000000000..c68ad5c11d65a --- /dev/null +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -0,0 +1,115 @@ +//===- CompilerInvocation.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/CompilerInvocation.h" +#include "clang/Basic/AllDiagnostics.h" +#include "clang/Basic/DiagnosticDriver.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/raw_ostream.h" + +using namespace Fortran::frontend; + +//===----------------------------------------------------------------------===// +// Initialization. +//===----------------------------------------------------------------------===// +CompilerInvocationBase::CompilerInvocationBase() + : diagnosticOpts_(new clang::DiagnosticOptions()) {} + +CompilerInvocationBase::CompilerInvocationBase(const CompilerInvocationBase &x) + : diagnosticOpts_(new clang::DiagnosticOptions(x.GetDiagnosticOpts())) {} + +CompilerInvocationBase::~CompilerInvocationBase() = default; + +//===----------------------------------------------------------------------===// +// Deserialization (from args) +//===----------------------------------------------------------------------===// +static InputKind ParseFrontendArgs(FrontendOptions &opts, + llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { + // Identify the action (i.e. opts.ProgramAction) + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_Action_Group)) { + switch (a->getOption().getID()) { + default: { + llvm_unreachable("Invalid option in group!"); + } + // TODO: + // case clang::driver::options::OPT_E: + // case clang::driver::options::OPT_emit_obj: + // case calng::driver::options::OPT_emit_llvm: + // case clang::driver::options::OPT_emit_llvm_only: + // case clang::driver::options::OPT_emit_codegen_only: + // case clang::driver::options::OPT_emit_module: + // (...) + } + } + + opts.showHelp_ = args.hasArg(clang::driver::options::OPT_help); + opts.showVersion_ = args.hasArg(clang::driver::options::OPT_version); + + // Get the input kind (from the value passed via `-x`) + InputKind dashX(Language::Unknown); + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_x)) { + llvm::StringRef XValue = a->getValue(); + // Principal languages. + dashX = llvm::StringSwitch(XValue) + .Case("f90", Language::Fortran) + .Default(Language::Unknown); + + // Some special cases cannot be combined with suffixes. + if (dashX.IsUnknown()) + dashX = llvm::StringSwitch(XValue) + .Case("ir", Language::LLVM_IR) + .Default(Language::Unknown); + + if (dashX.IsUnknown()) + diags.Report(clang::diag::err_drv_invalid_value) + << a->getAsString(args) << a->getValue(); + } + + return dashX; +} + +bool CompilerInvocation::CreateFromArgs(CompilerInvocation &res, + llvm::ArrayRef commandLineArgs, + clang::DiagnosticsEngine &diags) { + + bool success = true; + + // Parse the arguments + const llvm::opt::OptTable &opts = clang::driver::getDriverOptTable(); + const unsigned includedFlagsBitmask = + clang::driver::options::FC1Option; + unsigned missingArgIndex, missingArgCount; + llvm::opt::InputArgList args = opts.ParseArgs( + commandLineArgs, missingArgIndex, missingArgCount, includedFlagsBitmask); + + // Issue errors on unknown arguments + for (const auto *a : args.filtered(clang::driver::options::OPT_UNKNOWN)) { + auto argString = a->getAsString(args); + std::string nearest; + if (opts.findNearest(argString, nearest, includedFlagsBitmask) > 1) + diags.Report(clang::diag::err_drv_unknown_argument) << argString; + else + diags.Report(clang::diag::err_drv_unknown_argument_with_suggestion) + << argString << nearest; + success = false; + } + + // Parse the frontend args + ParseFrontendArgs(res.GetFrontendOpts(), args, diags); + + return success; +} diff --git a/flang/lib/Frontend/FrontendOptions.cpp b/flang/lib/Frontend/FrontendOptions.cpp new file mode 100644 index 0000000000000..ea5d54aa7ff06 --- /dev/null +++ b/flang/lib/Frontend/FrontendOptions.cpp @@ -0,0 +1,9 @@ +//===- FrontendOptions.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/FrontendOptions.h" diff --git a/flang/lib/FrontendTool/CMakeLists.txt b/flang/lib/FrontendTool/CMakeLists.txt new file mode 100644 index 0000000000000..eda040f7c7161 --- /dev/null +++ b/flang/lib/FrontendTool/CMakeLists.txt @@ -0,0 +1,11 @@ +add_flang_library(flangFrontendTool + ExecuteCompilerInvocation.cpp + + LINK_LIBS + clangBasic + clangDriver + + LINK_COMPONENTS + Option + Support +) diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp new file mode 100644 index 0000000000000..ab773c95c85dd --- /dev/null +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -0,0 +1,39 @@ +//===--- ExecuteCompilerInvocation.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file holds ExecuteCompilerInvocation(). It is split into its own file to +// minimize the impact of pulling in essentially everything else in Flang. +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/CompilerInstance.h" +#include "clang/Driver/Options.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/CommandLine.h" + +namespace Fortran::frontend { +bool ExecuteCompilerInvocation(CompilerInstance *flang) { + // Honor -help. + if (flang->GetFrontendOpts().showHelp_) { + clang::driver::getDriverOptTable().PrintHelp(llvm::outs(), + "flang-new -fc1 [options] file...", "LLVM 'Flang' Compiler", + /*Include=*/clang::driver::options::FlangOption, + /*Exclude=*/0, /*ShowAllAliases=*/false); + return true; + } + + // Honor -version. + if (flang->GetFrontendOpts().showVersion_) { + llvm::cl::PrintVersionMessage(); + return true; + } + + return true; +} + +} // namespace Fortran::frontend diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index a1532dc7141ff..635d3d88b61c6 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -41,6 +41,10 @@ if (LINK_WITH_FIR) list(APPEND FLANG_TEST_DEPENDS tco) endif() +if (FLANG_BUILD_NEW_DRIVER) + list(APPEND FLANG_TEST_DEPENDS flang-new) +endif() + if (FLANG_INCLUDE_TESTS) if (FLANG_GTEST_AVAIL) list(APPEND FLANG_TEST_DEPENDS FlangUnitTests) diff --git a/flang/test/Flang-Driver/driver-error-cc1.c b/flang/test/Flang-Driver/driver-error-cc1.c new file mode 100644 index 0000000000000..1563ee431579f --- /dev/null +++ b/flang/test/Flang-Driver/driver-error-cc1.c @@ -0,0 +1,7 @@ +// RUN: not %flang-new %s 2>&1 | FileCheck %s + +// REQUIRES: new-flang-driver + +// C files are currently not supported (i.e. `flang -cc1`) + +// CHECK:error: unknown integrated tool '-cc1'. Valid tools include '-fc1'. diff --git a/flang/test/Flang-Driver/driver-error-cc1.cpp b/flang/test/Flang-Driver/driver-error-cc1.cpp new file mode 100644 index 0000000000000..20e469733bc9a --- /dev/null +++ b/flang/test/Flang-Driver/driver-error-cc1.cpp @@ -0,0 +1,7 @@ +// RUN: not %flang-new %s 2>&1 | FileCheck %s + +// REQUIRES: new-flang-driver + +// C++ files are currently not supported (i.e. `flang -cc1`) + +// CHECK:error: unknown integrated tool '-cc1'. Valid tools include '-fc1'. diff --git a/flang/test/Flang-Driver/driver-help.f90 b/flang/test/Flang-Driver/driver-help.f90 new file mode 100644 index 0000000000000..6ecd076efee4e --- /dev/null +++ b/flang/test/Flang-Driver/driver-help.f90 @@ -0,0 +1,13 @@ +! RUN: %flang-new -help 2>&1 | FileCheck %s +! RUN: %flang-new -fc1 -help 2>&1 | FileCheck %s +! RUN: not %flang-new -helps 2>&1 | FileCheck %s --check-prefix=ERROR + +! REQUIRES: new-flang-driver + +! CHECK:USAGE: flang-new +! CHECK-EMPTY: +! CHECK-NEXT:OPTIONS: +! CHECK-NEXT: -help Display available options +! CHECK-NEXT: --version Print version information + +! ERROR: error: unknown argument '-helps'; did you mean '-help' diff --git a/flang/test/Flang-Driver/driver-version.f90 b/flang/test/Flang-Driver/driver-version.f90 new file mode 100644 index 0000000000000..8552d0b2f28b4 --- /dev/null +++ b/flang/test/Flang-Driver/driver-version.f90 @@ -0,0 +1,11 @@ +! RUN: %flang-new --version 2>&1 | FileCheck %s +! RUN: not %flang-new --versions 2>&1 | FileCheck %s --check-prefix=ERROR + +! REQUIRES: new-flang-driver + +! CHECK:flang-new version +! CHECK-NEXT:Target: +! CHECK-NEXT:Thread model: +! CHECK-NEXT:InstalledDir: + +! ERROR: error: unsupported option '--versions'; did you mean '--version'? diff --git a/flang/test/Flang-Driver/emit-obj.f90 b/flang/test/Flang-Driver/emit-obj.f90 new file mode 100644 index 0000000000000..4ddd483828626 --- /dev/null +++ b/flang/test/Flang-Driver/emit-obj.f90 @@ -0,0 +1,17 @@ +! RUN: not %flang-new %s 2>&1 | FileCheck %s --check-prefix=ERROR-IMPLICIT +! RUN: not %flang-new -emit-obj %s 2>&1 | FileCheck %s --check-prefix=ERROR-EXPLICIT +! RUN: not %flang-new -fc1 -emit-obj %s 2>&1 | FileCheck %s --check-prefix=ERROR-FC1 + +! REQUIRES: new-flang-driver + +! By default (e.g. when no options like `-E` are passed) flang-new +! creates a job that corresponds to `-emit-obj`. This option/action is +! not yet supported. Verify that this is correctly reported as error. + +! ERROR-IMPLICIT: error: unknown argument: '-triple' +! ERROR-IMPLICIT: error: unknown argument: '-emit-obj' +! ERROR-IMPLICIT: error: unknown argument: '-o' + +! ERROR-EXPLICIT: error: unknown argument: '-o' + +! ERROR-FC1: error: unknown argument: '-emit-obj' diff --git a/flang/test/Flang-Driver/missing-input.f90 b/flang/test/Flang-Driver/missing-input.f90 new file mode 100644 index 0000000000000..96818bc4bd385 --- /dev/null +++ b/flang/test/Flang-Driver/missing-input.f90 @@ -0,0 +1,5 @@ +! RUN: not %flang-new 2>&1 | FileCheck %s + +! REQUIRES: new-flang-driver + +! CHECK: error: no input files diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 25c63890832fe..21d8530434312 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -25,7 +25,7 @@ config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) # suffixes: A list of file extensions to treat as test files. -config.suffixes = ['.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', +config.suffixes = ['.c', '.cpp', '.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf', '.CUF', '.f18', '.F18', '.fir'] @@ -38,6 +38,13 @@ # directories. config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt'] +# If the new Flang driver is enabled, add the corresponding feature to +# config. Otherwise, exclude the corresponding test directory. +if config.include_flang_new_driver_test: + config.available_features.add('new-flang-driver') +else: + config.excludes.append('Flang-Driver') + # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) @@ -63,6 +70,9 @@ unresolved='fatal') ] +if config.include_flang_new_driver_test: + tools.append(ToolSubst('%flang-new', command=FindTool('flang-new'), unresolved='fatal')) + if config.flang_standalone_build: llvm_config.add_tool_substitutions(tools, [config.flang_llvm_tools_dir]) else: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index 10ec132081544..7a59280283813 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -11,6 +11,11 @@ config.flang_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin" config.python_executable = "@PYTHON_EXECUTABLE@" config.flang_standalone_build = @FLANG_STANDALONE_BUILD@ +# Control the regression test for flang-new driver +import lit.util +config.include_flang_new_driver_test = \ + lit.util.pythonize_bool("@FLANG_BUILD_NEW_DRIVER@") + # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. try: diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index b973127d34435..0fbf828253ef7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,6 +7,9 @@ #===------------------------------------------------------------------------===# add_subdirectory(f18) +if(FLANG_BUILD_NEW_DRIVER) + add_subdirectory(flang-driver) +endif() if(LINK_WITH_FIR) add_subdirectory(tco) endif() diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt new file mode 100644 index 0000000000000..d7bab277287f5 --- /dev/null +++ b/flang/tools/flang-driver/CMakeLists.txt @@ -0,0 +1,25 @@ +# Infrastructure to build flang driver entry point. Flang driver depends on +# LLVM libraries. + +# Set your project compile flags. +link_directories(${LLVM_LIBRARY_DIR}) + +add_flang_tool(flang-new + driver.cpp + fc1_main.cpp +) + +# Link against LLVM and Clang libraries +target_link_libraries(flang-new + PRIVATE + ${LLVM_COMMON_LIBS} + flangFrontend + flangFrontendTool + clangDriver + clangBasic + LLVMSupport + LLVMTarget + LLVMOption +) + +install(TARGETS flang-new DESTINATION bin) diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp new file mode 100644 index 0000000000000..9d04994d98435 --- /dev/null +++ b/flang/tools/flang-driver/driver.cpp @@ -0,0 +1,129 @@ +//===-- driver.cpp - Flang Driver -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the flang driver; it is a thin wrapper +// for functionality in the Driver flang library. +// +//===----------------------------------------------------------------------===// +#include "clang/Driver/Driver.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Compilation.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/VirtualFileSystem.h" + +// main frontend method. Lives inside fc1_main.cpp +extern int fc1_main(llvm::ArrayRef argv, const char *argv0); + +std::string GetExecutablePath(const char *argv0) { + // This just needs to be some symbol in the binary + void *p = (void *)(intptr_t)GetExecutablePath; + return llvm::sys::fs::getMainExecutable(argv0, p); +} + +// This lets us create the DiagnosticsEngine with a properly-filled-out +// DiagnosticOptions instance +static clang::DiagnosticOptions *CreateAndPopulateDiagOpts( + llvm::ArrayRef argv) { + auto *diagOpts = new clang::DiagnosticOptions; + return diagOpts; +} + +static int ExecuteFC1Tool(llvm::SmallVectorImpl &argV) { + llvm::StringRef tool = argV[1]; + if (tool == "-fc1") + return fc1_main(makeArrayRef(argV).slice(2), argV[0]); + + // Reject unknown tools. + // ATM it only supports fc1. Any fc1[*] is rejected. + llvm::errs() << "error: unknown integrated tool '" << tool << "'. " + << "Valid tools include '-fc1'.\n"; + return 1; +} + +int main(int argc_, const char **argv_) { + + // Initialize variables to call the driver + llvm::InitLLVM x(argc_, argv_); + llvm::SmallVector argv(argv_, argv_ + argc_); + + clang::driver::ParsedClangName targetandMode("flang", "--driver-mode=flang"); + std::string driverPath = GetExecutablePath(argv[0]); + + // Check if flang-new is in the frontend mode + auto firstArg = std::find_if( + argv.begin() + 1, argv.end(), [](const char *a) { return a != nullptr; }); + if (firstArg != argv.end()) { + if (llvm::StringRef(argv[1]).startswith("-cc1")) { + llvm::errs() << "error: unknown integrated tool '" << argv[1] << "'. " + << "Valid tools include '-fc1'.\n"; + return 1; + } + // Call flang-new frontend + if (llvm::StringRef(argv[1]).startswith("-fc1")) { + return ExecuteFC1Tool(argv); + } + } + + // Not in the frontend mode - continue in the compiler driver mode. + + // Create DiagnosticsEngine for the compiler driver + llvm::IntrusiveRefCntPtr diagOpts = + CreateAndPopulateDiagOpts(argv); + llvm::IntrusiveRefCntPtr diagID( + new clang::DiagnosticIDs()); + clang::TextDiagnosticPrinter *diagClient = + new clang::TextDiagnosticPrinter(llvm::errs(), &*diagOpts); + clang::DiagnosticsEngine diags(diagID, &*diagOpts, diagClient); + + // Prepare the driver + clang::driver::Driver theDriver(driverPath, + llvm::sys::getDefaultTargetTriple(), diags, "flang LLVM compiler"); + theDriver.setTargetAndMode(targetandMode); + std::unique_ptr c( + theDriver.BuildCompilation(argv)); + llvm::SmallVector, 4> + failingCommands; + + // Run the driver + int res = 1; + bool isCrash = false; + res = theDriver.ExecuteCompilation(*c, failingCommands); + + for (const auto &p : failingCommands) { + int CommandRes = p.first; + const clang::driver::Command *failingCommand = p.second; + if (!res) + res = CommandRes; + + // If result status is < 0 (e.g. when sys::ExecuteAndWait returns -1), + // then the driver command signalled an error. On Windows, abort will + // return an exit code of 3. In these cases, generate additional diagnostic + // information if possible. + isCrash = CommandRes < 0; +#ifdef _WIN32 + IsCrash |= CommandRes == 3; +#endif + if (isCrash) { + theDriver.generateCompilationDiagnostics(*c, *failingCommand); + break; + } + } + + diags.getClient()->finish(); + + // If we have multiple failing commands, we return the result of the first + // failing command. + return res; +} diff --git a/flang/tools/flang-driver/fc1_main.cpp b/flang/tools/flang-driver/fc1_main.cpp new file mode 100644 index 0000000000000..bb69517edde28 --- /dev/null +++ b/flang/tools/flang-driver/fc1_main.cpp @@ -0,0 +1,56 @@ +//===-- fc1_main.cpp - Flang FC1 Compiler Frontend ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the flang -fc1 functionality, which implements the +// core compiler functionality along with a number of additional tools for +// demonstration and testing purposes. +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/CompilerInstance.h" +#include "flang/Frontend/CompilerInvocation.h" +#include "flang/FrontendTool/Utils.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Frontend/TextDiagnosticBuffer.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" + +#include + +using namespace Fortran::frontend; + +int fc1_main(llvm::ArrayRef argv, const char *argv0) { + // Create CompilerInstance + std::unique_ptr flang(new CompilerInstance()); + + // Create DiagnosticsEngine for the frontend driver + flang->CreateDiagnostics(); + if (!flang->HasDiagnostics()) + return 1; + + // Create CompilerInvocation - use a dedicated instance of DiagnosticsEngine + // for parsing the arguments + llvm::IntrusiveRefCntPtr diagID( + new clang::DiagnosticIDs()); + llvm::IntrusiveRefCntPtr diagOpts = + new clang::DiagnosticOptions(); + clang::TextDiagnosticBuffer *diagsBuffer = new clang::TextDiagnosticBuffer; + clang::DiagnosticsEngine diags(diagID, &*diagOpts, diagsBuffer); + bool success = + CompilerInvocation::CreateFromArgs(flang->GetInvocation(), argv, diags); + + diagsBuffer->FlushDiagnostics(flang->getDiagnostics()); + if (!success) + return 1; + + // Execute the frontend actions. + success = ExecuteCompilerInvocation(flang.get()); + + return !success; +} diff --git a/flang/unittests/CMakeLists.txt b/flang/unittests/CMakeLists.txt index a30f0edaec615..c88e9fc660f16 100644 --- a/flang/unittests/CMakeLists.txt +++ b/flang/unittests/CMakeLists.txt @@ -22,3 +22,7 @@ add_subdirectory(Decimal) add_subdirectory(Evaluate) add_subdirectory(Runtime) add_subdirectory(Lower) + +if (FLANG_BUILD_NEW_DRIVER) + add_subdirectory(Frontend) +endif() diff --git a/flang/unittests/Frontend/CMakeLists.txt b/flang/unittests/Frontend/CMakeLists.txt new file mode 100644 index 0000000000000..dd5cbedb0f91d --- /dev/null +++ b/flang/unittests/Frontend/CMakeLists.txt @@ -0,0 +1,10 @@ +add_flang_unittest(FlangFrontendTests + CompilerInstanceTest.cpp +) + +target_link_libraries(FlangFrontendTests + PRIVATE + LLVMSupport + clangBasic + flangFrontend + flangFrontendTool) diff --git a/flang/unittests/Frontend/CompilerInstanceTest.cpp b/flang/unittests/Frontend/CompilerInstanceTest.cpp new file mode 100644 index 0000000000000..a971c4c2b6c97 --- /dev/null +++ b/flang/unittests/Frontend/CompilerInstanceTest.cpp @@ -0,0 +1,52 @@ +//===- unittests/Frontend/CompilerInstanceTest.cpp - CI tests -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/CompilerInstance.h" +#include "gtest/gtest.h" +#include "flang/Frontend/CompilerInvocation.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Options.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "llvm/Support/raw_ostream.h" + +#include +using namespace llvm; +using namespace Fortran::frontend; + +namespace { + +TEST(CompilerInstance, AllowDiagnosticLogWithUnownedDiagnosticConsumer) { + // 1. Set-up a basic DiagnosticConsumer + std::string diagnosticOutput; + llvm::raw_string_ostream diagnosticsOS(diagnosticOutput); + auto diagPrinter = std::make_unique( + diagnosticsOS, new clang::DiagnosticOptions()); + + // 2. Create a CompilerInstance (to manage a DiagnosticEngine) + CompilerInstance compInst; + + // 3. Set-up DiagnosticOptions + auto diagOpts = new clang::DiagnosticOptions(); + // Tell the diagnostics engine to emit the diagnostic log to STDERR. This + // ensures that a chained diagnostic consumer is created so that the test can + // exercise the unowned diagnostic consumer in a chained consumer. + diagOpts->DiagnosticLogFile = "-"; + + // 4. Create a DiagnosticEngine with an unowned consumer + IntrusiveRefCntPtr diags = + compInst.CreateDiagnostics(diagOpts, diagPrinter.get(), + /*ShouldOwnClient=*/false); + + // 5. Report a diagnostic + diags->Report(clang::diag::err_expected) << "no crash"; + + // 6. Verify that the reported diagnostic wasn't lost and did end up in the + // output stream + ASSERT_EQ(diagnosticsOS.str(), "error: expected no crash\n"); +} +} // namespace diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index 1aabff0fd6591..c0742ebc70acc 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -50,7 +50,7 @@ class OptTable { unsigned ID; unsigned char Kind; unsigned char Param; - unsigned short Flags; + unsigned int Flags; unsigned short GroupID; unsigned short AliasID; const char *AliasArgs; From cabd60c26b5df34f096cccca5a915bde3b1d8ee1 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Thu, 10 Sep 2020 15:41:36 +0000 Subject: [PATCH 0353/1079] [clang][aarch64] Fix mangling of bfloat16 neon vectors The AAPCS64 specifies the internal type is used for c++ mangling. For bfloat16 it was defined as `BFloat16` when it should be `Bfloat16`, i.e. lowercase 'f'. For more information, see: https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#appendix-support-for-advanced-simd-extensions Reviewed By: stuij Differential Revision: https://reviews.llvm.org/D87463 --- clang/lib/AST/ItaniumMangle.cpp | 2 +- clang/test/CodeGenCXX/mangle-neon-vectors.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 877050c160955..eb3aa807f63a5 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3275,7 +3275,7 @@ static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) { case BuiltinType::Double: return "Float64"; case BuiltinType::BFloat16: - return "BFloat16"; + return "Bfloat16"; default: llvm_unreachable("Unexpected vector element base type"); } diff --git a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp index 6faf6226efd2e..cb5e40be6a6df 100644 --- a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp +++ b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -triple armv7-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64 +// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-feature +bf16 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64-BF16 typedef float float32_t; typedef double float64_t; @@ -14,6 +15,10 @@ typedef short poly16_t; #endif typedef unsigned __INT64_TYPE__ uint64_t; +#if defined(__ARM_FEATURE_BF16) +typedef __bf16 bfloat16_t; +#endif + typedef __attribute__((neon_vector_type(2))) int int32x2_t; typedef __attribute__((neon_vector_type(4))) int int32x4_t; typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; @@ -28,6 +33,10 @@ typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; +#if defined(__ARM_FEATURE_BF16) +typedef __attribute__((neon_vector_type(4))) __bf16 bfloat16x4_t; +#endif + // CHECK: 16__simd64_int32_t // CHECK-AARCH64: 11__Int32x2_t void f1(int32x2_t v) { } @@ -72,3 +81,8 @@ void f10(poly16x8_t v) {} // CHECK-AARCH64: 13__Float64x2_t void f11(float64x2_t v) { } #endif + +#if defined(__ARM_FEATURE_BF16) +// CHECK-AARCH64-BF16: 14__Bfloat16x4_t +void f12(bfloat16x4_t v) {} +#endif From 82390454f0c4dfc57dbb82a2cad77de1260868a4 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Fri, 11 Sep 2020 11:22:27 +0100 Subject: [PATCH 0354/1079] [DFSan] XFail a test that's suffering too much optimization See https://bugs.llvm.org/show_bug.cgi?id=47488 , rGfb109c42d9 is optimizing out part of this test. --- compiler-rt/test/dfsan/event_callbacks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler-rt/test/dfsan/event_callbacks.c b/compiler-rt/test/dfsan/event_callbacks.c index c0f4fff372822..6f9fd289c226a 100644 --- a/compiler-rt/test/dfsan/event_callbacks.c +++ b/compiler-rt/test/dfsan/event_callbacks.c @@ -2,6 +2,10 @@ // RUN: %clang_dfsan -O2 -mllvm -dfsan-event-callbacks %s %t-callbacks.o -o %t // RUN: %run %t FooBarBaz 2>&1 | FileCheck %s +// See PR47488, parts of this test get optimized out by a more aggressive +// dead store eliminator. +// XFAIL: * + // Tests that callbacks are inserted for store events when // -dfsan-event-callbacks is specified. From 95c7b66abe594116789dd21b32c8ef4c677d18c8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 11 Sep 2020 11:24:59 +0100 Subject: [PATCH 0355/1079] PluginLoader.h - only include CommandLine.h if required. NFCI. We only need this if DONT_GET_PLUGIN_LOADER_OPTION isn't defined. --- llvm/include/llvm/Support/PluginLoader.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/Support/PluginLoader.h b/llvm/include/llvm/Support/PluginLoader.h index c0c516bdae03e..95c087f03d9bf 100644 --- a/llvm/include/llvm/Support/PluginLoader.h +++ b/llvm/include/llvm/Support/PluginLoader.h @@ -16,7 +16,11 @@ #ifndef LLVM_SUPPORT_PLUGINLOADER_H #define LLVM_SUPPORT_PLUGINLOADER_H +#ifndef DONT_GET_PLUGIN_LOADER_OPTION #include "llvm/Support/CommandLine.h" +#endif + +#include namespace llvm { struct PluginLoader { From e9a777c4ec7c86043cf82b29cc78da52585bec25 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 11 Sep 2020 11:44:03 +0100 Subject: [PATCH 0356/1079] Attributor.h - remove unused includes. NFCI. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 5c0a90339150f..e73dc637117b1 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -116,9 +116,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/DOTGraphTraits.h" -#include "llvm/Support/GraphWriter.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" From e17219b15f7528c8240a93fd9385b3a9f3290aa5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 11 Sep 2020 12:12:18 +0100 Subject: [PATCH 0357/1079] [IPO] Remove unnecessary Module.h includes. NFCI. Uses of Module are all implicit to PassInfoMixin<> so we can guarantee PassManager.h to handle it for us. --- llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h | 1 - llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h | 1 - llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h | 1 - 3 files changed, 3 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h b/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h index c2626d0867b4d..782633799ede6 100644 --- a/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h +++ b/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h @@ -19,7 +19,6 @@ #ifndef LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H #define LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h b/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h index 8440df6397299..d34a510811018 100644 --- a/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h +++ b/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h @@ -14,7 +14,6 @@ #ifndef LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H #define LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h index 7379009b2592c..fd99843d0449b 100644 --- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h @@ -13,7 +13,6 @@ #ifndef LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { From 0caeaff123768020c7b0e1a648d6b6ba67ad6d87 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Fri, 11 Sep 2020 12:10:55 +0100 Subject: [PATCH 0358/1079] [LiveDebugValues][NFC] Re-land 60db26a66d, add instr-ref tests This was landed but reverted in 5b9c2b1bea7 due to asan picking up a memory leak. This is fixed in the change to InstrRefBasedImpl.cpp. Original commit message follows: [LiveDebugValues][NFC] Add instr-ref tests, adapt old tests This patch adds a few tests in DebugInfo/MIR/InstrRef/ of interesting behaviour that the instruction referencing implementation of LiveDebugValues has. Mostly, these tests exist to ensure that if you give the "-experimental-debug-variable-locations" command line switch, the right implementation runs; and to ensure it behaves the same way as the VarLoc LiveDebugValues implementation. I've also touched roughly 30 other tests, purely to make the tests less rigid about what output to accept. DBG_VALUE instructions are usually printed with a trailing !debug-location indicating its scope: !debug-location !1234 However InstrRefBasedLDV produces new DebugLoc instances on the fly, meaning there sometimes isn't a numbered node when they're printed, making the output: !debug-location !DILocation(line: 0, blah blah) Which causes a ton of these tests to fail. This patch removes checks for that final part of each DBG_VALUE instruction. None of them appear to be actually checking the scope is correct, just that it's present, so I don't believe there's any loss in coverage here. Differential Revision: https://reviews.llvm.org/D83054 --- .../LiveDebugValues/InstrRefBasedImpl.cpp | 2 ++ .../DebugInfo/MIR/Mips/last-inst-bundled.mir | 2 +- .../DebugInfo/MIR/X86/kill-after-spill.mir | 24 +++++++------- .../MIR/X86/live-debug-values-3preds.mir | 6 ++-- .../X86/live-debug-values-bad-transfer.mir | 32 ++++++++++++------- .../DebugInfo/MIR/X86/live-debug-values.mir | 2 +- ...vedebugvalues-ignores-metaInstructions.mir | 6 ++-- .../MIR/X86/livedebugvalues_basic_diamond.mir | 8 ++--- ...ebugvalues_basic_diamond_match_clobber.mir | 6 ++-- ...vedebugvalues_basic_diamond_match_move.mir | 12 +++---- ...edebugvalues_basic_diamond_one_clobber.mir | 6 ++-- ...livedebugvalues_basic_diamond_one_move.mir | 8 ++--- .../MIR/X86/livedebugvalues_basic_loop.mir | 8 ++--- .../MIR/X86/livedebugvalues_bb_to_bb.mir | 8 ++--- .../livedebugvalues_bb_to_bb_clobbered.mir | 4 +-- ...vedebugvalues_bb_to_bb_move_to_clobber.mir | 8 ++--- .../MIR/X86/livedebugvalues_loop_break.mir | 10 +++--- .../MIR/X86/livedebugvalues_loop_diamond.mir | 12 +++---- .../X86/livedebugvalues_loop_diamond_move.mir | 12 +++---- .../X86/livedebugvalues_loop_two_backedge.mir | 10 +++--- .../X86/livedebugvalues_loop_within_loop.mir | 12 +++---- ...livedebugvalues_loop_within_loop_moved.mir | 4 +-- ...bugvalues_loop_within_loop_outer_moved.mir | 6 ++-- 23 files changed, 109 insertions(+), 99 deletions(-) diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index cfaec85d3f3dd..e39811e33e8c6 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -3114,6 +3114,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, bool Changed = TTracker->Transfers.size() != 0; delete MTracker; + delete TTracker; + MTracker = nullptr; VTracker = nullptr; TTracker = nullptr; diff --git a/llvm/test/DebugInfo/MIR/Mips/last-inst-bundled.mir b/llvm/test/DebugInfo/MIR/Mips/last-inst-bundled.mir index 1187dd4331408..ed7360a68da49 100644 --- a/llvm/test/DebugInfo/MIR/Mips/last-inst-bundled.mir +++ b/llvm/test/DebugInfo/MIR/Mips/last-inst-bundled.mir @@ -21,7 +21,7 @@ # # Check that last bundled instruction of block gets recognized as end of basic block. # CHECK: bb.2.if.end -# CHECK-NEXT: DBG_VALUE $s0, $noreg, !12, !DIExpression(), debug-location !17 +# CHECK-NEXT: DBG_VALUE $s0, $noreg, !12, !DIExpression() --- | ; ModuleID = '' diff --git a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir index d85be7f6d8048..fb5503d7e086e 100644 --- a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir +++ b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir @@ -14,8 +14,8 @@ # ... # # CHECK: bb.1.if.end: -# CHECK: DBG_VALUE $rbp, 0, !37, !DIExpression(DW_OP_constu, 44, DW_OP_minus), debug-location !58 -# CHECK-NOT: DBG_VALUE $rbp, 0, !36, !DIExpression(DW_OP_constu, 48, DW_OP_minus), debug-location !57 +# CHECK: DBG_VALUE $rbp, 0, !37, !DIExpression(DW_OP_constu, 44, DW_OP_minus) +# CHECK-NOT: DBG_VALUE $rbp, 0, !36, !DIExpression(DW_OP_constu, 48, DW_OP_minus) --- | ; ModuleID = '' @@ -283,7 +283,7 @@ body: | $r13 = MOV64rr $rax renamable $ecx = XOR32rr undef $ecx, undef $ecx, implicit-def dead $eflags renamable $r13 = AND64rr killed renamable $r13, renamable $r14, implicit-def $eflags - JCC_1 %bb.9, 4, implicit $eflags + JCC_1 %bb.9, 4, implicit $eflags, debug-location !57 bb.1.if.end: successors: %bb.2(0x30000000), %bb.3(0x50000000) @@ -301,7 +301,7 @@ body: | $r12 = MOV64rr $rax $r15 = MOV64rr $r12 renamable $r15 = AND64ri8 killed renamable $r15, -123, implicit-def $eflags - JCC_1 %bb.2, 4, implicit $eflags + JCC_1 %bb.2, 4, implicit $eflags, debug-location !57 bb.3.private.exit: successors: %bb.9(0x30000000), %bb.4(0x50000000) @@ -316,7 +316,7 @@ body: | CALL64pcrel32 @func4, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax renamable $ecx = MOV32ri 1 TEST32rr killed renamable $eax, renamable $eax, implicit-def $eflags - JCC_1 %bb.9, 4, implicit $eflags + JCC_1 %bb.9, 4, implicit $eflags, debug-location !57 bb.4.if.then8: successors: %bb.8(0x30000000), %bb.5(0x50000000) @@ -327,21 +327,21 @@ body: | CALL64pcrel32 @func5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit-def $rsp, implicit-def $ssp renamable $rax = MOV64rm killed renamable $r13, 1, $noreg, 8, $noreg :: (load 8 from %ir.13) TEST64rr renamable $rax, renamable $rax, implicit-def $eflags - JCC_1 %bb.8, 4, implicit $eflags + JCC_1 %bb.8, 4, implicit $eflags, debug-location !57 bb.5.land.lhs.true: successors: %bb.6(0x30000000), %bb.7(0x50000000) liveins: $rax, $r12, $r15 CMP32mi8 renamable $r15, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.tot_perf2, align 8) - JCC_1 %bb.7, 5, implicit $eflags + JCC_1 %bb.7, 5, implicit $eflags, debug-location !57 bb.6.lor.lhs.false: successors: %bb.8(0x30000000), %bb.7(0x50000000) liveins: $rax, $r12, $r15 CMP32mi8 killed renamable $r15, 1, $noreg, 4, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.tot_bw) - JCC_1 %bb.8, 4, implicit $eflags + JCC_1 %bb.8, 4, implicit $eflags, debug-location !57 bb.7.if.then14: successors: %bb.8(0x80000000) @@ -350,13 +350,13 @@ body: | renamable $rdx = MOV64rm killed renamable $rax, 1, $noreg, 8, $noreg :: (load 8 from %ir.20) $rdi = MOV64rr killed $r12 $esi = MOV32rm $rbp, 1, $noreg, -44, $noreg :: (load 4 from %stack.1) - CALL64pcrel32 @func6, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $rdx, implicit-def $rsp, implicit-def $ssp + CALL64pcrel32 @func6, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, debug-location !57 bb.8.cleanup: successors: %bb.9(0x80000000) renamable $ecx = MOV32ri 1 - JMP_1 %bb.9 + JMP_1 %bb.9, debug-location !57 bb.2.if.then3: successors: %bb.9(0x80000000) @@ -369,7 +369,7 @@ body: | $edx = MOV32ri 5 $r8d = MOV32rm $rbp, 1, $noreg, -48, $noreg :: (load 4 from %stack.0) CALL64pcrel32 @func3, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $rcx, implicit $r8d, implicit-def $rsp, implicit-def $ssp - renamable $ecx = XOR32rr undef $ecx, undef $ecx, implicit-def dead $eflags + renamable $ecx = XOR32rr undef $ecx, undef $ecx, implicit-def dead $eflags, debug-location !57 bb.9.cleanup: liveins: $ecx @@ -382,6 +382,6 @@ body: | $r14 = POP64r implicit-def $rsp, implicit $rsp $r15 = POP64r implicit-def $rsp, implicit $rsp $rbp = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RETQ $eax, debug-location !57 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir index c55269951aa50..bef0f4e4aa5ab 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir @@ -31,9 +31,9 @@ # DBG_VALUE for variables "x", "y" and "z" are extended into %bb.9 from its # predecessors %bb.0, %bb.2 and %bb.8. # CHECK: bb.9.for.end: -# CHECK-DAG: DBG_VALUE $edi, $noreg, ![[X_VAR]], !DIExpression(), debug-location !{{[0-9]+}} -# CHECK-DAG: DBG_VALUE $esi, $noreg, ![[Y_VAR]], !DIExpression(), debug-location !{{[0-9]+}} -# CHECK-DAG: DBG_VALUE $edx, $noreg, ![[Z_VAR]], !DIExpression(), debug-location !{{[0-9]+}} +# CHECK-DAG: DBG_VALUE $edi, $noreg, ![[X_VAR]], !DIExpression() +# CHECK-DAG: DBG_VALUE $esi, $noreg, ![[Y_VAR]], !DIExpression() +# CHECK-DAG: DBG_VALUE $edx, $noreg, ![[Z_VAR]], !DIExpression() # CHECK: RET --- | diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir index 1d978b9c45532..97fad0755b80e 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir @@ -1,4 +1,5 @@ # RUN: llc %s -mtriple=x86_64-unknown-unknown -o - -run-pass=livedebugvalues | FileCheck %s --implicit-check-not=DBG_VALUE +# RUN: llc %s -mtriple=x86_64-unknown-unknown -o - -run-pass=livedebugvalues -experimental-debug-variable-locations | FileCheck %s -check-prefix=NEWLDV --implicit-check-not=DBG_VALUE # # Test that the DBG_VALUE of ecx below does not get propagated. It is considered # live-in on LiveDebugValues' first pass through the loop, but on the second it @@ -17,6 +18,13 @@ # CHECK-LABEL: bb.1.loop: # CHECK: $ebx = COPY killed $ecx # CHECK-NEXT: DBG_VALUE +# +# This doesn't occur under value-tracking LiveDebugValues though. +# +# NEWLDV-LABEL: name: foo +# NEWLDV-LABEL: bb.0.entry: +# NEWLDV: $ecx = MOV32ri 0 +# NEWLDV-NEXT: DBG_VALUE --- | source_filename = "live-debug-values-remove-range.ll" @@ -74,30 +82,30 @@ body: | CFI_INSTRUCTION def_cfa_offset 16 CFI_INSTRUCTION offset $rbx, -16 $ebx = MOV32rr $edi - $eax = MOV32ri 0 - $ecx = MOV32ri 0 + $eax = MOV32ri 0, debug-location !10 + $ecx = MOV32ri 0, debug-location !10 DBG_VALUE $ecx, $noreg, !9, !DIExpression(), debug-location !10 - $edi = MOV32ri 0 - $esi = MOV32ri 0 + $edi = MOV32ri 0, debug-location !10 + $esi = MOV32ri 0, debug-location !10 bb.1.loop: successors: %bb.1, %bb.2 liveins: $ebx, $eax, $ecx, $edi, $esi - $eax = COPY $ecx - $ebx = COPY killed $ecx - $ecx = COPY killed $edi - $edi = COPY killed $esi - $esi = MOV32ri 1 + $eax = COPY $ecx, debug-location !10 + $ebx = COPY killed $ecx, debug-location !10 + $ecx = COPY killed $edi, debug-location !10 + $edi = COPY killed $esi, debug-location !10 + $esi = MOV32ri 1, debug-location !10 TEST8ri killed renamable $al, 1, implicit-def $eflags - JCC_1 %bb.1, 5, implicit killed $eflags + JCC_1 %bb.1, 5, implicit killed $eflags, debug-location !10 bb.2.exit: liveins: $ebx - $eax = MOV32rr killed $ebx + $eax = MOV32rr killed $ebx, debug-location !10 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa_offset 8 - RETQ $eax + RETQ $eax, debug-location !10 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir index 2cf52611bafd1..2731eac26ecdd 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir @@ -35,7 +35,7 @@ # CHECK: ![[N_VAR:[0-9]+]] = !DILocalVariable(name: "n",{{.*}}) # # CHECK: bb.5.if.end.7: -# CHECK: DBG_VALUE $ebx, $noreg, ![[N_VAR]], !DIExpression(), debug-location !{{[0-9]+}} +# CHECK: DBG_VALUE $ebx, $noreg, ![[N_VAR]], !DIExpression() --- | diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir index e8c3a994e59d0..89c7d55d95c6e 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir @@ -6,11 +6,11 @@ ; CHECK-LABEL: bb.0.entry: ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir index 4004199ad0482..89b4ac63e08a1 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir @@ -5,13 +5,13 @@ ; a diamond that doesn't move or clobber their locations. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir index 063b7f450e08e..bd6dacc2fed1a 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir @@ -5,12 +5,12 @@ ; a diamond when the location is clobbered and not into the successor block. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-NEXT: $ebx = MOV32ri 0, debug-location !17 ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-NEXT: $ebx = MOV32ri 0, debug-location !17 define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir index 8e530c89db621..05a1955532aaa 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir @@ -5,17 +5,17 @@ ; diamond CFG when the location is moved by another instruction. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-NEXT: $eax = MOV32ri 0, debug-location !17 - ; CHECK-NEXT: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK-NEXT: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-NEXT: $eax = MOV32ri 0, debug-location !17 - ; CHECK-NEXT: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK-NEXT: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir index a89546800a217..ee843492c7b95 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir @@ -5,11 +5,11 @@ ; of a diamond CFG that clobbers its location. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir index 4b9b70455407b..fe3924bf846ae 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir @@ -5,13 +5,13 @@ ; of a diamond CFG that moves its location. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-NEXT: $eax = MOV32ri 0, debug-location !17 - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir index ba2d31ea0b462..d7eb4bd48ab3a 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir @@ -5,13 +5,13 @@ ; loop that doesn't move or clobber its location. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir index 2801df4832e33..f48940a24861b 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir @@ -5,13 +5,13 @@ ; sequential CFG. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir index d1cacff032e13..f969179b76a7d 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir @@ -5,9 +5,9 @@ ; control flow when it's location is clobbered. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir index c1cb8d5daa958..339d21380fa64 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir @@ -5,13 +5,13 @@ ; no control flow when a location is moved and then clobbered. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-NEXT: $eax = MOV32ri 0, debug-location !17 - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir index 7860517adaf08..0d9cc1905134a 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir @@ -5,15 +5,15 @@ ; break. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.4.bb4: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir index 9854e05e20dca..1e410054dc1cb 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir @@ -5,17 +5,17 @@ ; diamond pattern and beyond. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.4.bb4: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.5.bb5: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir index ed7bdcffd881b..7861e7dfa9c62 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir @@ -5,17 +5,17 @@ ; diamond pattern but not beyond. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.4.bb4: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.5.bb5: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir index 0989ee335b083..83f7235558947 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir @@ -5,15 +5,15 @@ ; backedges and beyond. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.4.bb4: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir index f15275ed60a90..7ff781a07fce6 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir @@ -4,17 +4,17 @@ ; Check that DBG_VALUE instructions are propagated into loops within loops. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.2.bb2: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.4.bb4: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.5.bb5: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir index da624928c3aa8..fca7f83a14be4 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir @@ -5,9 +5,9 @@ ; loops that move their locations. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.3.bb3: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir index 12f22df63b141..baade395c6ede 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir @@ -5,11 +5,11 @@ ; loops that move their locations. ; CHECK-LABEL: bb.0.entry: - ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $ebx, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.4.bb4: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.5.bb5: - ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ; CHECK: DBG_VALUE $eax, $noreg, !16, !DIExpression() define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { entry: From 1c08da38676d15600b5c707cf7522eb4273a5347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= Date: Wed, 12 Aug 2020 16:33:22 +0200 Subject: [PATCH 0359/1079] [analyzer][MacroExpansion] Add a few dumps functions --- .../StaticAnalyzer/Core/PlistDiagnostics.cpp | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index ed62778623a80..c4b66da676aad 100644 --- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -825,13 +825,31 @@ void PlistDiagnostics::FlushDiagnosticsImpl( namespace { -using ExpArgTokens = llvm::SmallVector; +using ExpArgTokensTy = llvm::SmallVector; +} // end of anonymous namespace + +LLVM_DUMP_METHOD static void +dumpExpArgTokensToStream(llvm::raw_ostream &Out, const Preprocessor &PP, + const ExpArgTokensTy &Toks); + +LLVM_DUMP_METHOD static void dumpExpArgTokens(const Preprocessor &PP, + const ExpArgTokensTy &Toks) { + dumpExpArgTokensToStream(llvm::errs(), PP, Toks); +} + +namespace { /// Maps unexpanded macro arguments to expanded arguments. A macro argument may /// need to expanded further when it is nested inside another macro. -class MacroArgMap : public std::map { +class MacroArgMap : public std::map { public: void expandFromPrevMacro(const MacroArgMap &Super); + LLVM_DUMP_METHOD void dump(const Preprocessor &PP) const { + dumpToStream(llvm::errs(), PP); + } + + LLVM_DUMP_METHOD void dumpToStream(llvm::raw_ostream &Out, + const Preprocessor &PP) const; }; struct MacroNameAndArgs { @@ -1225,7 +1243,7 @@ static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP, void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) { for (value_type &Pair : *this) { - ExpArgTokens &CurrExpArgTokens = Pair.second; + ExpArgTokensTy &CurrExpArgTokens = Pair.second; // For each token in the expanded macro argument. auto It = CurrExpArgTokens.begin(); @@ -1244,7 +1262,7 @@ void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) { continue; } - const ExpArgTokens &SuperExpArgTokens = Super.at(II); + const ExpArgTokensTy &SuperExpArgTokens = Super.at(II); It = CurrExpArgTokens.insert( It, SuperExpArgTokens.begin(), SuperExpArgTokens.end()); @@ -1254,6 +1272,23 @@ void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) { } } +void MacroArgMap::dumpToStream(llvm::raw_ostream &Out, + const Preprocessor &PP) const { + for (const std::pair Pair : *this) { + Out << Pair.first->getName() << " -> "; + dumpExpArgTokensToStream(Out, PP, Pair.second); + Out << '\n'; + } +} + +static void dumpExpArgTokensToStream(llvm::raw_ostream &Out, + const Preprocessor &PP, + const ExpArgTokensTy &Toks) { + TokenPrinter Printer(Out, PP); + for (Token Tok : Toks) + Printer.printToken(Tok); +} + void TokenPrinter::printToken(const Token &Tok) { // If this is the first token to be printed, don't print space. if (PrevTok.isNot(tok::unknown)) { From 26d9a94681056f88bd3e892f8113093268fa0907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= Date: Wed, 12 Aug 2020 17:54:49 +0200 Subject: [PATCH 0360/1079] [analyzer][MacroExpansion][NFC] Fix incorrectly calling parameters arguments --- .../StaticAnalyzer/Core/PlistDiagnostics.cpp | 165 +++++++++--------- 1 file changed, 85 insertions(+), 80 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index c4b66da676aad..87c9b84794637 100644 --- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -825,25 +825,26 @@ void PlistDiagnostics::FlushDiagnosticsImpl( namespace { -using ExpArgTokensTy = llvm::SmallVector; +using ArgTokensTy = llvm::SmallVector; } // end of anonymous namespace -LLVM_DUMP_METHOD static void -dumpExpArgTokensToStream(llvm::raw_ostream &Out, const Preprocessor &PP, - const ExpArgTokensTy &Toks); +LLVM_DUMP_METHOD static void dumpArgTokensToStream(llvm::raw_ostream &Out, + const Preprocessor &PP, + const ArgTokensTy &Toks); -LLVM_DUMP_METHOD static void dumpExpArgTokens(const Preprocessor &PP, - const ExpArgTokensTy &Toks) { - dumpExpArgTokensToStream(llvm::errs(), PP, Toks); +LLVM_DUMP_METHOD static void dumpArgTokens(const Preprocessor &PP, + const ArgTokensTy &Toks) { + dumpArgTokensToStream(llvm::errs(), PP, Toks); } namespace { -/// Maps unexpanded macro arguments to expanded arguments. A macro argument may +/// Maps unexpanded macro parameters to expanded arguments. A macro argument may /// need to expanded further when it is nested inside another macro. -class MacroArgMap : public std::map { +class MacroParamMap : public std::map { public: - void expandFromPrevMacro(const MacroArgMap &Super); + void expandFromPrevMacro(const MacroParamMap &Super); + LLVM_DUMP_METHOD void dump(const Preprocessor &PP) const { dumpToStream(llvm::errs(), PP); } @@ -852,13 +853,13 @@ class MacroArgMap : public std::map { const Preprocessor &PP) const; }; -struct MacroNameAndArgs { +struct MacroExpansionInfo { std::string Name; const MacroInfo *MI = nullptr; - MacroArgMap Args; + MacroParamMap ParamMap; - MacroNameAndArgs(std::string N, const MacroInfo *MI, MacroArgMap M) - : Name(std::move(N)), MI(MI), Args(std::move(M)) {} + MacroExpansionInfo(std::string N, const MacroInfo *MI, MacroParamMap M) + : Name(std::move(N)), MI(MI), ParamMap(std::move(M)) {} }; class TokenPrinter { @@ -896,7 +897,7 @@ class TokenPrinter { /// /// As we expand the last line, we'll immediately replace PRINT(str) with /// print(x). The information that both 'str' and 'x' refers to the same string -/// is an information we have to forward, hence the argument \p PrevArgs. +/// is an information we have to forward, hence the argument \p PrevParamMap. /// /// To avoid infinite recursion we maintain the already processed tokens in /// a set. This is carried as a parameter through the recursive calls. The set @@ -906,13 +907,11 @@ class TokenPrinter { /// #define f(y) x /// #define x f(x) static std::string getMacroNameAndPrintExpansion( - TokenPrinter &Printer, - SourceLocation MacroLoc, - const Preprocessor &PP, - const MacroArgMap &PrevArgs, + TokenPrinter &Printer, SourceLocation MacroLoc, const Preprocessor &PP, + const MacroParamMap &PrevParamMap, llvm::SmallPtrSet &AlreadyProcessedTokens); -/// Retrieves the name of the macro and what it's arguments expand into +/// Retrieves the name of the macro and what it's parameters expand into /// at \p ExpanLoc. /// /// For example, for the following macro expansion: @@ -934,8 +933,8 @@ static std::string getMacroNameAndPrintExpansion( /// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of /// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgMap map /// { (x, a) } will be returned. -static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, - const Preprocessor &PP); +static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, + const Preprocessor &PP); /// Retrieves the ')' token that matches '(' \p It points to. static MacroInfo::tokens_iterator getMatchingRParen( @@ -969,21 +968,20 @@ getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP, llvm::SmallPtrSet AlreadyProcessedTokens; std::string MacroName = getMacroNameAndPrintExpansion( - Printer, MacroLoc, *PPToUse, MacroArgMap{}, AlreadyProcessedTokens); + Printer, MacroLoc, *PPToUse, MacroParamMap{}, AlreadyProcessedTokens); return {MacroName, std::string(OS.str())}; } static std::string getMacroNameAndPrintExpansion( - TokenPrinter &Printer, - SourceLocation MacroLoc, - const Preprocessor &PP, - const MacroArgMap &PrevArgs, + TokenPrinter &Printer, SourceLocation MacroLoc, const Preprocessor &PP, + const MacroParamMap &PrevParamMap, llvm::SmallPtrSet &AlreadyProcessedTokens) { const SourceManager &SM = PP.getSourceManager(); - MacroNameAndArgs Info = getMacroNameAndArgs(SM.getExpansionLoc(MacroLoc), PP); - IdentifierInfo* IDInfo = PP.getIdentifierInfo(Info.Name); + MacroExpansionInfo MExpInfo = + getMacroExpansionInfo(SM.getExpansionLoc(MacroLoc), PP); + IdentifierInfo *MacroNameII = PP.getIdentifierInfo(MExpInfo.Name); // TODO: If the macro definition contains another symbol then this function is // called recursively. In case this symbol is the one being defined, it will @@ -991,18 +989,18 @@ static std::string getMacroNameAndPrintExpansion( // in this case we don't get the full expansion text in the Plist file. See // the test file where "value" is expanded to "garbage_" instead of // "garbage_value". - if (!AlreadyProcessedTokens.insert(IDInfo).second) - return Info.Name; + if (!AlreadyProcessedTokens.insert(MacroNameII).second) + return MExpInfo.Name; - if (!Info.MI) - return Info.Name; + if (!MExpInfo.MI) + return MExpInfo.Name; // Manually expand its arguments from the previous macro. - Info.Args.expandFromPrevMacro(PrevArgs); + MExpInfo.ParamMap.expandFromPrevMacro(PrevParamMap); // Iterate over the macro's tokens and stringify them. - for (auto It = Info.MI->tokens_begin(), E = Info.MI->tokens_end(); It != E; - ++It) { + for (auto It = MExpInfo.MI->tokens_begin(), E = MExpInfo.MI->tokens_end(); + It != E; ++It) { Token T = *It; // If this token is not an identifier, we only need to print it. @@ -1018,8 +1016,8 @@ static std::string getMacroNameAndPrintExpansion( // If this token is a macro that should be expanded inside the current // macro. if (getMacroInfoForLocation(PP, SM, II, T.getLocation())) { - getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP, Info.Args, - AlreadyProcessedTokens); + getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP, + MExpInfo.ParamMap, AlreadyProcessedTokens); // If this is a function-like macro, skip its arguments, as // getExpandedMacro() already printed them. If this is the case, let's @@ -1031,10 +1029,10 @@ static std::string getMacroNameAndPrintExpansion( } // If this token is the current macro's argument, we should expand it. - auto ArgMapIt = Info.Args.find(II); - if (ArgMapIt != Info.Args.end()) { - for (MacroInfo::tokens_iterator ArgIt = ArgMapIt->second.begin(), - ArgEnd = ArgMapIt->second.end(); + auto ParamToArgIt = MExpInfo.ParamMap.find(II); + if (ParamToArgIt != MExpInfo.ParamMap.end()) { + for (MacroInfo::tokens_iterator ArgIt = ParamToArgIt->second.begin(), + ArgEnd = ParamToArgIt->second.end(); ArgIt != ArgEnd; ++ArgIt) { // These tokens may still be macros, if that is the case, handle it the @@ -1052,7 +1050,8 @@ static std::string getMacroNameAndPrintExpansion( } getMacroNameAndPrintExpansion(Printer, ArgIt->getLocation(), PP, - Info.Args, AlreadyProcessedTokens); + MExpInfo.ParamMap, + AlreadyProcessedTokens); // Peek the next token if it is a tok::l_paren. This way we can decide // if this is the application or just a reference to a function maxro // symbol: @@ -1073,13 +1072,13 @@ static std::string getMacroNameAndPrintExpansion( Printer.printToken(T); } - AlreadyProcessedTokens.erase(IDInfo); + AlreadyProcessedTokens.erase(MacroNameII); - return Info.Name; + return MExpInfo.Name; } -static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, - const Preprocessor &PP) { +static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, + const Preprocessor &PP) { const SourceManager &SM = PP.getSourceManager(); const LangOptions &LangOpts = PP.getLangOpts(); @@ -1112,15 +1111,15 @@ static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, if (!MI) return { MacroName, MI, {} }; - // Acquire the macro's arguments. + // Acquire the macro's arguments at the expansion point. // // The rough idea here is to lex from the first left parentheses to the last - // right parentheses, and map the macro's unexpanded arguments to what they - // will be expanded to. An expanded macro argument may contain several tokens - // (like '3 + 4'), so we'll lex until we find a tok::comma or tok::r_paren, at - // which point we start lexing the next argument or finish. - ArrayRef MacroArgs = MI->params(); - if (MacroArgs.empty()) + // right parentheses, and map the macro's parameter to what they will be + // expanded to. A macro argument may contain several token (like '3 + 4'), so + // we'll lex until we find a tok::comma or tok::r_paren, at which point we + // start lexing the next argument or finish. + ArrayRef MacroParams = MI->params(); + if (MacroParams.empty()) return { MacroName, MI, {} }; RawLexer.LexFromRawLexer(TheTok); @@ -1135,9 +1134,9 @@ static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, if (TheTok.isNot(tok::l_paren)) return { MacroName, MI, {} }; - MacroArgMap Args; + MacroParamMap ParamMap; - // When the macro's argument is a function call, like + // When the argument is a function call, like // CALL_FN(someFunctionName(param1, param2)) // we will find tok::l_paren, tok::r_paren, and tok::comma that do not divide // actual macro arguments, or do not represent the macro argument's closing @@ -1152,8 +1151,8 @@ static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, // even if we lex a tok::comma and ParanthesesDepth == 1. const IdentifierInfo *__VA_ARGS__II = PP.getIdentifierInfo("__VA_ARGS__"); - for (const IdentifierInfo *UnexpArgII : MacroArgs) { - MacroArgMap::mapped_type ExpandedArgTokens; + for (const IdentifierInfo *CurrParamII : MacroParams) { + MacroParamMap::mapped_type ArgTokens; // One could also simply not supply a single argument to __VA_ARGS__ -- this // results in a preprocessor warning, but is not an error: @@ -1169,8 +1168,9 @@ static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, // Lex the first token of the next macro parameter. RawLexer.LexFromRawLexer(TheTok); - while (!(ParenthesesDepth == 1 && - (UnexpArgII == __VA_ARGS__II ? false : TheTok.is(tok::comma)))) { + while ( + !(ParenthesesDepth == 1 && + (CurrParamII == __VA_ARGS__II ? false : TheTok.is(tok::comma)))) { assert(TheTok.isNot(tok::eof) && "EOF encountered while looking for expanded macro args!"); @@ -1186,21 +1186,26 @@ static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc, if (TheTok.is(tok::raw_identifier)) PP.LookUpIdentifierInfo(TheTok); - ExpandedArgTokens.push_back(TheTok); + ArgTokens.push_back(TheTok); RawLexer.LexFromRawLexer(TheTok); } } else { - assert(UnexpArgII == __VA_ARGS__II); + // FIXME: Handle when multiple parameters map to a single argument. + // Currently, we only handle when multiple arguments map to the same + // parameter. + assert(CurrParamII == __VA_ARGS__II && + "No more macro arguments are found, but the current parameter " + "isn't __VA_ARGS__!"); } - Args.emplace(UnexpArgII, std::move(ExpandedArgTokens)); + ParamMap.emplace(CurrParamII, std::move(ArgTokens)); } assert(TheTok.is(tok::r_paren) && "Expanded macro argument acquisition failed! After the end of the loop" " this token should be ')'!"); - return { MacroName, MI, Args }; + return {MacroName, MI, ParamMap}; } static MacroInfo::tokens_iterator getMatchingRParen( @@ -1240,14 +1245,14 @@ static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP, return MD->findDirectiveAtLoc(Loc, SM).getMacroInfo(); } -void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) { +void MacroParamMap::expandFromPrevMacro(const MacroParamMap &Super) { for (value_type &Pair : *this) { - ExpArgTokensTy &CurrExpArgTokens = Pair.second; + ArgTokensTy &CurrArgTokens = Pair.second; // For each token in the expanded macro argument. - auto It = CurrExpArgTokens.begin(); - while (It != CurrExpArgTokens.end()) { + auto It = CurrArgTokens.begin(); + while (It != CurrArgTokens.end()) { if (It->isNot(tok::identifier)) { ++It; continue; @@ -1262,28 +1267,28 @@ void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) { continue; } - const ExpArgTokensTy &SuperExpArgTokens = Super.at(II); + const ArgTokensTy &SuperArgTokens = Super.at(II); - It = CurrExpArgTokens.insert( - It, SuperExpArgTokens.begin(), SuperExpArgTokens.end()); - std::advance(It, SuperExpArgTokens.size()); - It = CurrExpArgTokens.erase(It); + It = CurrArgTokens.insert(It, SuperArgTokens.begin(), + SuperArgTokens.end()); + std::advance(It, SuperArgTokens.size()); + It = CurrArgTokens.erase(It); } } } -void MacroArgMap::dumpToStream(llvm::raw_ostream &Out, - const Preprocessor &PP) const { - for (const std::pair Pair : *this) { +void MacroParamMap::dumpToStream(llvm::raw_ostream &Out, + const Preprocessor &PP) const { + for (const std::pair Pair : *this) { Out << Pair.first->getName() << " -> "; - dumpExpArgTokensToStream(Out, PP, Pair.second); + dumpArgTokensToStream(Out, PP, Pair.second); Out << '\n'; } } -static void dumpExpArgTokensToStream(llvm::raw_ostream &Out, - const Preprocessor &PP, - const ExpArgTokensTy &Toks) { +static void dumpArgTokensToStream(llvm::raw_ostream &Out, + const Preprocessor &PP, + const ArgTokensTy &Toks) { TokenPrinter Printer(Out, PP); for (Token Tok : Toks) Printer.printToken(Tok); From 4eed800b18abaeba3082bf950fbe5c3020c4b592 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Fri, 11 Sep 2020 12:17:51 +0100 Subject: [PATCH 0361/1079] [NFC] Fix the signature and definition of findByPrefix In https://reviews.llvm.org/rG257b29715bb27b7d9f6c3c40c481b6a4af0b37e5, the definition of OptTable::Info::Flags was changed from `unsigned short` to `unsigned int`, but the definition/declaration of OptTable::findByPrefix wasn't updated to reflect that. This patch updates findByPrefix accordingly. --- llvm/include/llvm/Option/OptTable.h | 2 +- llvm/lib/Option/OptTable.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index c0742ebc70acc..58c09b23d237c 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -152,7 +152,7 @@ class OptTable { /// /// \return The vector of flags which start with Cur. std::vector findByPrefix(StringRef Cur, - unsigned short DisableFlags) const; + unsigned int DisableFlags) const; /// Find the OptTable option that most closely matches the given string. /// diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 740e02a9d2f0e..304c09fff9d28 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -228,7 +228,7 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const { } std::vector -OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { +OptTable::findByPrefix(StringRef Cur, unsigned int DisableFlags) const { std::vector Ret; for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) { const Info &In = OptionInfos[I]; From 7527898fef47da929e70c81100a0248c2f445762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= Date: Wed, 12 Aug 2020 19:00:24 +0200 Subject: [PATCH 0362/1079] [analyzer][MacroExpansion][NFC] Fix a missing test output check --- .../plist-macros-with-expansion.cpp.plist | 100 +++++++++--------- .../Analysis/plist-macros-with-expansion.cpp | 3 + 2 files changed, 53 insertions(+), 50 deletions(-) diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist index 2988f8504fcf7..499119c81d259 100644 --- a/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist +++ b/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist @@ -5645,12 +5645,12 @@ start - line459 + line462 col33 file0 - line459 + line462 col33 file0 @@ -5658,12 +5658,12 @@ end - line459 + line462 col37 file0 - line459 + line462 col39 file0 @@ -5675,7 +5675,7 @@ kindevent location - line459 + line462 col37 file0 @@ -5683,12 +5683,12 @@ - line459 + line462 col37 file0 - line459 + line462 col41 file0 @@ -5704,7 +5704,7 @@ kindevent location - line458 + line461 col1 file0 @@ -5718,7 +5718,7 @@ kindevent location - line458 + line461 col1 file0 @@ -5726,12 +5726,12 @@ - line458 + line461 col1 file0 - line458 + line461 col16 file0 @@ -5747,7 +5747,7 @@ kindevent location - line459 + line462 col37 file0 @@ -5755,12 +5755,12 @@ - line459 + line462 col37 file0 - line459 + line462 col41 file0 @@ -5780,12 +5780,12 @@ start - line459 + line462 col37 file0 - line459 + line462 col39 file0 @@ -5793,12 +5793,12 @@ end - line459 + line462 col35 file0 - line459 + line462 col35 file0 @@ -5810,7 +5810,7 @@ kindevent location - line459 + line462 col35 file0 @@ -5818,12 +5818,12 @@ - line459 + line462 col33 file0 - line459 + line462 col41 file0 @@ -5841,7 +5841,7 @@ location - line458 + line461 col1 file0 @@ -5860,7 +5860,7 @@ issue_hash_function_offset0 location - line459 + line462 col35 file0 @@ -5868,8 +5868,8 @@ 0 - 458 - 459 + 461 + 462 @@ -5884,12 +5884,12 @@ start - line468 + line471 col33 file0 - line468 + line471 col33 file0 @@ -5897,12 +5897,12 @@ end - line468 + line471 col37 file0 - line468 + line471 col39 file0 @@ -5914,7 +5914,7 @@ kindevent location - line468 + line471 col37 file0 @@ -5922,12 +5922,12 @@ - line468 + line471 col37 file0 - line468 + line471 col41 file0 @@ -5943,7 +5943,7 @@ kindevent location - line467 + line470 col1 file0 @@ -5957,7 +5957,7 @@ kindevent location - line467 + line470 col1 file0 @@ -5965,12 +5965,12 @@ - line467 + line470 col1 file0 - line467 + line470 col11 file0 @@ -5986,7 +5986,7 @@ kindevent location - line468 + line471 col37 file0 @@ -5994,12 +5994,12 @@ - line468 + line471 col37 file0 - line468 + line471 col41 file0 @@ -6019,12 +6019,12 @@ start - line468 + line471 col37 file0 - line468 + line471 col39 file0 @@ -6032,12 +6032,12 @@ end - line468 + line471 col35 file0 - line468 + line471 col35 file0 @@ -6049,7 +6049,7 @@ kindevent location - line468 + line471 col35 file0 @@ -6057,12 +6057,12 @@ - line468 + line471 col33 file0 - line468 + line471 col41 file0 @@ -6080,7 +6080,7 @@ location - line467 + line470 col1 file0 @@ -6099,7 +6099,7 @@ issue_hash_function_offset0 location - line468 + line471 col35 file0 @@ -6107,8 +6107,8 @@ 0 - 467 - 468 + 470 + 471 diff --git a/clang/test/Analysis/plist-macros-with-expansion.cpp b/clang/test/Analysis/plist-macros-with-expansion.cpp index e07747eaec74d..a81ba0846905f 100644 --- a/clang/test/Analysis/plist-macros-with-expansion.cpp +++ b/clang/test/Analysis/plist-macros-with-expansion.cpp @@ -452,6 +452,9 @@ void recursiveMacroUser() { // expected-warning@-1{{expression result unused}} } +// CHECK: namevalue +// CHECK-NEXT: expansiongarbage_ + #define FOO(x) int foo() { return x; } #define APPLY_ZERO1(function) function(0) From e6f2f17f05a1248b069ba830c4afffd61ee2f297 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 11 Sep 2020 06:19:07 -0400 Subject: [PATCH 0363/1079] [mlir][Linalg] Refactor StructuredOpInterface - NFC This revision refactors and cleans up a bunch of things to simplify StructuredOpInterface before work can proceed on Linalg on tensors: - break out pieces of the StructuredOps trait that are part of the StructuredOpInterface, - drop referenceIterators and referenceIndexingMaps that end up being more confusing than useful, - drop NamedStructuredOpTrait --- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 61 +-- .../Linalg/IR/LinalgStructuredOpsInterface.td | 500 ++++++++++++++---- .../mlir/Dialect/Linalg/IR/LinalgTraits.h | 316 +---------- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 25 +- mlir/test/Dialect/Linalg/invalid.mlir | 19 +- .../test-linalg-ods-gen.tc | 21 +- .../mlir-linalg-ods-gen.cpp | 43 +- 7 files changed, 489 insertions(+), 496 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index e003fd15d0b1e..ac6e9317fa32c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -130,21 +130,22 @@ def CopyOp : LinalgStructured_Op<"copy", [ let extraClassDeclaration = libraryCallName # [{ // Rank-polymorphic. // filling_value -> O(ivs) with parallel iterators. - llvm::Optional> referenceIterators() { - unsigned nPar = input().getType().cast().getRank(); - return SmallVector(nPar, getParallelIteratorTypeName()); + ArrayAttr iterator_types() { + unsigned nPar = getInputShapedType(0).getRank(); + return Builder(getContext()).getStrArrayAttr( + SmallVector(nPar, getParallelIteratorTypeName())); } // I(input_perm(ivs)) -> O(output_perm(ivs)) - llvm::Optional> referenceIndexingMaps() { + ArrayAttr indexing_maps() { MLIRContext *context = getContext(); auto maybeInputMap = inputPermutation(); auto maybeOutputMap = outputPermutation(); unsigned inputRank = getInputShapedType(0).getRank(); unsigned outputRank = getOutputShapedType(0).getRank(); - return SmallVector{ + return Builder(getContext()).getAffineMapArrayAttr({ extractOrIdentityMap(maybeInputMap, inputRank, context), - extractOrIdentityMap(maybeOutputMap, outputRank, context)}; + extractOrIdentityMap(maybeOutputMap, outputRank, context)}); } Value getSource() { return input();} @@ -163,16 +164,17 @@ def FillOp : LinalgStructured_Op<"fill", [NInputs<0>, NOutputs<1>]> { let extraClassDeclaration = libraryCallName # [{ // Rank-polymorphic. // filling_value -> O(ivs) with parallel iterators. - llvm::Optional> referenceIterators() { - unsigned nPar = output().getType().cast().getRank(); - return SmallVector(nPar, getParallelIteratorTypeName()); + ArrayAttr iterator_types() { + unsigned nPar = getOutputShapedType(0).getRank(); + return Builder(getContext()).getStrArrayAttr( + SmallVector(nPar, getParallelIteratorTypeName())); } - llvm::Optional> referenceIndexingMaps() { + ArrayAttr indexing_maps() { MLIRContext *context = getContext(); // filling_value -> O(ivs) - return SmallVector{ - extractOrIdentityMap(llvm::None, getNumParallelLoops(), context)}; + return Builder(getContext()).getAffineMapArrayAttr({ + extractOrIdentityMap(llvm::None, getNumParallelLoops(), context)}); } }]; @@ -295,7 +297,7 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { getNumOutputFeatureDimensions(); } - llvm::Optional> referenceIterators() { + ArrayAttr iterator_types() { // Outer parallel loops are always the number of output dimensions; i.e. // [b, xs, q] in the TF notation above. unsigned nPar = getOutputShapedType(0).getRank(); @@ -310,7 +312,7 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { iters.reserve(nPar + nRed + nWin); iters.append(nRed, getReductionIteratorTypeName()); iters.append(nWin, getWindowIteratorTypeName()); - return iters; + return Builder(getContext()).getStrArrayAttr(iters); } // F(z0, ..., zN-1, q, k) * @@ -318,7 +320,7 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { // -> O(b, x0, ..., xN-1, k) // for N equal to `nWindow`. If there is no padding attribute, it will be // ignored. - llvm::Optional> referenceIndexingMaps() { + ArrayAttr indexing_maps() { MLIRContext *context = getContext(); auto nWin = getNumWindowLoops(); assert(nWin > 0 && "expected at least one window dimension"); @@ -343,7 +345,7 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { auto zs = makeAffineDimExprs(nWin, idx, context); // Construct the weighedSum expression. auto ws = weightedPoolingInputIndex(*this, xs, zs); - return SmallVector{ + return Builder(getContext()).getAffineMapArrayAttr({ // filter[z[0], ..., z[N-1], q, k] AffineMap::get(idx, 0, concat(concat(zs, qs), ks), context), // input[b, @@ -353,7 +355,7 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { // q] AffineMap::get(idx, 0, concat(concat(bs, ws), qs), context), // output[b, x[0], ..., x[N-1], k] - AffineMap::get(idx, 0, concat(concat(bs, xs), ks), context)}; + AffineMap::get(idx, 0, concat(concat(bs, xs), ks), context)}); } }]; @@ -384,7 +386,7 @@ class SingleInputPoolingBase_Op OptionalAttr:$padding); let extraClassDeclaration = commonUtils# [{ - llvm::Optional> referenceIterators() { + ArrayAttr iterator_types() { // Outer parallel loops are always the number of output dimensions. unsigned nPar = getOutputShapedType(0).getRank(); // The window loops has the same number loops with output dimensions. @@ -392,10 +394,10 @@ class SingleInputPoolingBase_Op SmallVector iters(nPar, getParallelIteratorTypeName()); iters.reserve(nPar + nWin); iters.append(nWin, getWindowIteratorTypeName()); - return iters; + return Builder(getContext()).getStrArrayAttr(iters); } - llvm::Optional> referenceIndexingMaps() { + ArrayAttr indexing_maps() { MLIRContext *context = getContext(); auto nPar = getNumParallelLoops(); auto nWin = getNumWindowLoops(); @@ -406,14 +408,13 @@ class SingleInputPoolingBase_Op // Construct the weighedSum expression. auto inputDims = weightedPoolingInputIndex(*this, outputDims, windowDims); - return SmallVector{ + return Builder(getContext()).getAffineMapArrayAttr({ // input AffineMap::get(idx, 0, inputDims, context), // windowDims AffineMap::get(idx, 0, windowDims, context), // output - AffineMap::get(idx, 0, outputDims, context) - }; + AffineMap::get(idx, 0, outputDims, context)}); } }]; @@ -466,7 +467,7 @@ class GenericOpBase : LinalgStructuredBase_Op:$library_call, Confined, [IntMinValue<0>]>:$symbol_source); - let results = (outs Variadic:$output_tensors); + let results = (outs Variadic:$output_lis); let regions = (region AnyRegion:$region); let extraClassDeclaration = [{ SmallVector linalgTraitAttrNames() { @@ -485,16 +486,6 @@ class GenericOpBase : LinalgStructuredBase_Op> referenceIterators() { - llvm_unreachable( - "No such thing as reference iterator types for a generic op."); - } - - llvm::Optional> referenceIndexingMaps() { - llvm_unreachable( - "No such thing as reference indexing maps for a generic op."); - } - llvm::Optional getSymbolSource() { auto ss = symbol_source(); return ss.hasValue() ? @@ -807,8 +798,6 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> { // Named Linalg ops, implemented as a declarative configurations of generic ops. //===----------------------------------------------------------------------===// -def NamedStructuredOpTraits : NativeOpTrait<"linalg::NamedStructuredOpTraits">; - class LinalgNamedStructured_Op props> : LinalgStructuredBase_Op { string spec = ?; diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 82882b083b2d8..f32b70efd87e1 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -23,168 +23,486 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { // Loop types handling. //===------------------------------------------------------------------===// InterfaceMethod< - "Return the number of parallel loops within the current operation.", - "unsigned", "getNumParallelLoops" + /*desc=*/[{ + Return the number of parallel loops within the current operation. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumParallelLoops", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getNumIterators(getParallelIteratorTypeName(), + $_op.iterator_types()); + }] >, InterfaceMethod< - "Return the number of reduction loops within the current operation.", - "unsigned", "getNumReductionLoops" + /*desc=*/[{ + Return the number of reduction loops within the current operation. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumReductionLoops", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getNumIterators(getReductionIteratorTypeName(), + $_op.iterator_types()); + }] >, InterfaceMethod< - "Return the number of window loops within the current operation.", - "unsigned", "getNumWindowLoops" + /*desc=*/[{ + Return the number of window loops within the current operation. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumWindowLoops", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getNumIterators(getWindowIteratorTypeName(), + $_op.iterator_types()); + }] >, InterfaceMethod< - "Return the number of loops within the current operation.", - "unsigned", "getNumLoops">, - + /*desc=*/[{ + Return the total number of loops within the current operation. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumLoops", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getNumIterators($_op.iterator_types()); + }] + >, InterfaceMethod< - [{Returns true if the current operation has only one loop and it's a - reduction loop}], - "bool", "hasSingleReductionLoop">, - + /*desc=*/[{ + Returns true if the current operation has only one loop and it's a + reduction loop. + }], + /*retTy=*/"bool", + /*methodName=*/"hasSingleReductionLoop", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto iters = $_op.iterator_types(); + return iters.size() == 1 && + getNumIterators(getReductionIteratorTypeName(), iters) == 1; + }]>, //===------------------------------------------------------------------===// - // Input arguments handling. + // Num input/output arguments handling. //===------------------------------------------------------------------===// + // These special methods must be defined by each op that wants to implement + // the LinalgStructuredInterface. For now, this is either: + // - inherited statically by using the NInputs or + // NOutputs traits. + // - derived from args_in/args_out attributes (for linalg.generic and + // linalg.indexed_generic ops). + InterfaceMethod< + /*desc=*/[{ + Return the number of inputs from the current operation. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumInputs" + >, InterfaceMethod< - "Return the number of inputs from the current operation.", - "unsigned", "getNumInputs" + /*desc=*/[{ + Return the number of outputs from the current operation. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumOutputs" >, - InterfaceMethod<"Return the input view at the given index.", - "Value", "getInput", (ins "unsigned":$i) + //===------------------------------------------------------------------===// + // Input arguments handling. + //===------------------------------------------------------------------===// + InterfaceMethod< + /*desc=*/[{ + Return the `i`-th input value. + The `i^th` input argument is always the `i^th` operand regardless of + whether we have tensors or buffers. + }], + /*retTy=*/"Value", + /*methodName=*/"getInput", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(i < $_op.getNumInputs()); + return this->getOperation()->getOperand(i); + }] >, - InterfaceMethod<[{ + InterfaceMethod< + /*desc=*/[{ Return the index of the given input value `v`, or `None` if the value is not an input. }], - "llvm::Optional", "getIndexOfInput", (ins "Value":$v) + /*retTy=*/"llvm::Optional", + /*methodName=*/"getIndexOfInput", + /*args=*/(ins "Value":$value), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto it = llvm::find(getInputs(), value); + if (it != getInputs().end()) + return it - getInputs().begin(); + return llvm::None; + }] >, InterfaceMethod< - "Return the input operands from the current operation.", - "Operation::operand_range", "getInputs" - >, - InterfaceMethod<[{ + /*desc=*/[{ Return the `i`-th input shaped type, irrespective of buffer or tensor type. - }], "ShapedType", "getInputShapedType", (ins "unsigned":$i)>, - InterfaceMethod<[{ + }], + /*retTy=*/"ShapedType", + /*methodName=*/"getInputShapedType", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getInput(i).getType().template cast(); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the input operands from the current operation. + }], + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getInputs", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto range = this->getOperation()->getOperands(); + return {range.begin(), range.begin() + $_op.getNumInputs()}; + }] + >, + InterfaceMethod< + /*desc=*/[{ Return the subset of input operands that are of ranked tensor type. - }], "SmallVector", "getInputTensorTypes">, + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getInputTensorTypes" , + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + SmallVector res; + for (Type type : getInputs().getTypes()) + if (auto t = type.template dyn_cast()) + res.push_back(t); + return res; + }] + >, //===------------------------------------------------------------------===// // Output arguments handling. //===------------------------------------------------------------------===// InterfaceMethod< - "Return the number of outputs from the current operation.", - "unsigned", "getNumOutputs" - >, - InterfaceMethod<"Return the output buffer at the given index.", - "Value", "getOutputBuffer", (ins "unsigned":$i) + /*desc=*/[{ + Return the output buffer at the given index, asserts that this is a + buffer operand and not a tensor result. + The `i^th` output argument is an operand (resp. a return value) iff it + is a value of buffer type (resp. a return value of tensor type). + }], + /*retTy=*/"Value", + /*methodName=*/"getOutputBuffer", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Output buffers are passed as output buffer operands (side-effecting). + // Output tensors are results. + // The union of the 2 are all the outputs and we want to ensure i does + // not overflow the buffer operands. + assert(i + this->getOperation()->getNumResults() < $_op.getNumOutputs() + && "overflowing output buffer index"); + return this->getOperation()->getOperand($_op.getNumInputs() + i); + }] >, - InterfaceMethod<[{ + InterfaceMethod< + /*desc=*/[{ Return the index of the given buffer value, or `None` if the value is not part of the output buffers. }], - "llvm::Optional", "getIndexOfOutputBuffer", (ins "Value":$view) + /*retTy=*/"llvm::Optional", + /*methodName=*/"getIndexOfOutputBuffer", + /*args=*/(ins "Value":$value), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto it = llvm::find(getOutputBuffers(), value); + if (it != getOutputBuffers().end()) + return it - getOutputBuffers().begin(); + return llvm::None; + }] >, - InterfaceMethod<[{ + InterfaceMethod< + /*desc=*/[{ Return the type of the output buffer at the given index. - }], "MemRefType", "getOutputBufferType", (ins "unsigned":$i)>, - InterfaceMethod<[{ + }], + /*retTy=*/"MemRefType", + /*methodName=*/"getOutputBufferType", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getOutputBuffer(i).getType().template cast(); + }]>, + InterfaceMethod< + /*desc=*/[{ Return the `i`-th output shaped type, irrespective of buffer or tensor type. - }], "ShapedType", "getOutputShapedType", (ins "unsigned":$i)>, - InterfaceMethod<[{ + }], + /*retTy=*/"ShapedType", + /*methodName=*/"getOutputShapedType", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getShapedType(i + $_op.getNumInputs()); + }]>, + InterfaceMethod< + /*desc=*/[{ Return the results that are of ranked tensor type. - }], "SmallVector", "getOutputTensorTypes">, + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputTensorTypes", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + SmallVector res; + for (Type type : this->getOperation()->getResults().getTypes()) + res.push_back(type.template cast()); + return res; + }]>, InterfaceMethod< - "Return the output buffers (operands) from the current operation.", - "Operation::operand_range", "getOutputBuffers" + /*desc=*/[{ + Return the output buffers (operands) from the current operation. + }], + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getOutputBuffers", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto range = this->getOperation()->getOperands(); + return {range.begin() + $_op.getNumInputs(), + range.begin() + getNumInputsAndOutputBuffers()}; + }] >, //===------------------------------------------------------------------===// // Input and Output arguments handling. //===------------------------------------------------------------------===// InterfaceMethod< - "Return one single buffer at position `$i`.", - "Value", "getBuffer", (ins "unsigned":$i) + /*desc=*/[{ + Return one single buffer at position `$i`. + }], + /*retTy=*/"Value", + /*methodName=*/"getBuffer", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(i < getNumInputsAndOutputBuffers() && "overflowing buffers index"); + return this->getOperation()->getOperand(i); + }] >, InterfaceMethod< - "Return the number of inputs and outputs, irrespective of their buffer " - "or tensor type.", - "unsigned", "getNumInputsAndOutputs" + /*desc=*/[{ + Return the number of inputs and outputs, irrespective of their buffer or + tensor type. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumInputsAndOutputs", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return $_op.getNumInputs() + $_op.getNumOutputs(); + }] >, InterfaceMethod< - "Return the number of inputs, irrespective of their buffer or tensor " - "type, and output buffers", - "unsigned", "getNumInputsAndOutputBuffers" + /*desc=*/[{ + Return the number of inputs, irrespective of their buffer or tensor type + and output buffers + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumInputsAndOutputBuffers", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return $_op.getNumInputs() + $_op.getNumOutputs() - + this->getOperation()->getNumResults(); + }] >, InterfaceMethod< - "Return the range over inputs (irrespective of type) and output buffers.", - "Operation::operand_range", "getInputsAndOutputBuffers" + /*desc=*/[{ + Return the range over inputs (irrespective of type) and output buffers. + }], + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getInputsAndOutputBuffers", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto range = this->getOperation()->getOperands(); + return {range.begin(), range.begin() + getNumInputsAndOutputBuffers()}; + }] >, InterfaceMethod< - "Return the shaped types for all the inputs and outputs", - "SmallVector", "getInputOutputShapedTypes" + /*desc=*/[{ + Return the `i`-th shaped type, there are 3 cases: + 1. if `i < $_op.getNumInputs()` then return `getInputShapedType(i)`; + otherwise + 2. if `i < getNumInputsAndOutputBuffers()` then return the + `getOutputBufferType(i - $_op.getNumInputs())`; otherwise + 3. return the `i - getNumInputsAndOutputBuffers()` result type. + }], + /*retTy=*/"ShapedType", + /*methodName=*/"getShapedType", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + if (i < $_op.getNumInputs()) + return getInputShapedType(i); + if (i < getNumInputsAndOutputBuffers()) + return getOutputBufferType(i - $_op.getNumInputs()); + return getOutputTensorTypes()[i - getNumInputsAndOutputBuffers()]; + }]>, + InterfaceMethod< + /*desc=*/[{ + Return the shaped types for all the inputs and outputs + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getInputOutputShapedTypes", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + SmallVector inputOutputTypes( + this->getOperation()->operand_type_begin(), + this->getOperation()->operand_type_end()); + inputOutputTypes.append(this->getOperation()->result_type_begin(), + this->getOperation()->result_type_end()); + return llvm::to_vector<4>( + llvm::map_range(inputOutputTypes, [](Type type) -> ShapedType { + return type.cast(); + })); + }] >, //===------------------------------------------------------------------===// // Other interface methods. //===------------------------------------------------------------------===// InterfaceMethod< - "Return the reference iterators for this named op (if any are " - "specified). These reference iterators are used to specify the default " - "behavior of the op. Typically this would be a static method but in " - "order to allow rank-polymorphic ops, this needs to be per object " - "instance. Named ops must define referenceIterators, even if empty for " - "the 0-D case. Generic ops on the other hand have a None " - "`referenceIterators`", - "llvm::Optional>", "referenceIterators" + /*desc=*/[{ + Return the iterator types attribute within the current operation. + }], + /*retTy=*/"ArrayAttr", + /*methodName=*/"iterator_types", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return $_op.iterator_types(); + }] >, InterfaceMethod< - "Return the reference indexing maps for this named op (if any are " - "specified). Typically this would be a static method but in order to " - "allow rank-polymorphic ops, this needs to be per object instance. Named " - "ops must define referenceIterators, even if empty for the 0-D case. " - "Generic ops on the other hand have a None `referenceIndexingMaps`", - "llvm::Optional>", "referenceIndexingMaps" + /*desc=*/[{ + Return the indexing maps attribute within the current operation. + }], + /*retTy=*/"ArrayAttr", + /*methodName=*/"indexing_maps" >, InterfaceMethod< - "Return the iterator types attribute within the current operation.", - "ArrayAttr", "iterator_types" + /*desc=*/[{ + Return the indexing maps within the current operation. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getIndexingMaps", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return llvm::to_vector<4>( + llvm::map_range($_op.indexing_maps(), + [](Attribute attr) -> AffineMap { + return attr.cast().getValue(); + })); + }] >, InterfaceMethod< - "Return the indexing maps attribute within the current operation.", - "ArrayAttr", "indexing_maps" + /*desc=*/[{ + Return the input or output indexing map at index `i`. + }], + /*retTy=*/"AffineMap", + /*methodName=*/"getIndexingMap", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(i < getNumInputsAndOutputs()); + return $_op.indexing_maps() + .getValue()[i] + .template cast() + .getValue(); + }] >, InterfaceMethod< - "Return the indexing maps within the current operation.", - "SmallVector", "getIndexingMaps" - >, - InterfaceMethod<"Return the input or output indexing map at index `i`.", - "AffineMap", "getIndexingMap", (ins "unsigned":$i) - >, - InterfaceMethod<"Return the input indexing map at index `i`.", - "AffineMap", "getInputIndexingMap", (ins "unsigned":$i) + /*desc=*/[{ + Return the input indexing map at index `i`. + }], + /*retTy=*/"AffineMap", + /*methodName=*/"getInputIndexingMap", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(i < $_op.getNumInputs()); + return $_op.indexing_maps() + .getValue()[i] + .template cast() + .getValue(); + }] >, - InterfaceMethod<"Return the output indexing map at index `i`.", - "AffineMap", "getOutputIndexingMap", (ins "unsigned":$i) + InterfaceMethod< + /*desc=*/[{ + Return the output indexing map at index `i`. + }], + /*retTy=*/"AffineMap", + /*methodName=*/"getOutputIndexingMap", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(i < $_op.getNumOutputs()); + return $_op.indexing_maps() + .getValue()[i + $_op.getNumInputs()] + .template cast() + .getValue(); + }] >, - InterfaceMethod<[{ + InterfaceMethod< + /*desc=*/[{ Return whether the op has only MemRef input and outputs. - }], "bool", "hasBufferSemantics">, - InterfaceMethod<[{ + }], + /*retTy=*/"bool", + /*methodName=*/"hasBufferSemantics", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return this->getOperation()->getNumResults() == 0 && + llvm::all_of(getInputs(), + [](Value v) { return v.getType().isa(); }); + }] + >, + InterfaceMethod< + /*desc=*/[{ Return whether the op has only RankedTensor input and outputs. - }], "bool", "hasTensorSemantics">, + }], + /*retTy=*/"bool", + /*methodName=*/"hasTensorSemantics", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto isTensorType = [](Value v) { + return v.getType().isa(); + }; + return llvm::all_of(getInputs(), isTensorType) && + llvm::all_of(this->getOperation()->getResults(), isTensorType); + }] + >, //===------------------------------------------------------------------===// // Other static interface methods. //===------------------------------------------------------------------===// - StaticInterfaceMethod<[{ + StaticInterfaceMethod< + /*desc=*/[{ Create an operation of the current type with the given location, operands, and attributes. }], - "Operation *", "create", + /*retTy=*/"Operation *", + /*methodName=*/"create", (ins "OpBuilder &":$builder, "Location":$loc, "ValueRange":$operands, "ArrayRef":$attributes), [{ @@ -192,11 +510,13 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { attributes); }] >, - InterfaceMethod<[{ + InterfaceMethod< + /*desc=*/[{ Clone the current operation with the given location and operands. This is used to abstract away the optional underlying region creation. }], - "Operation *", "clone", + /*retTy=*/"Operation *", + /*methodName=*/"clone", (ins "OpBuilder &":$b, "Location":$loc, "ValueRange":$operands), [{ BlockAndValueMapping map; unsigned numRegions = $_op.getOperation()->getNumRegions(); diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h index 8dda7d0a1445f..c4790ca617f11 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h @@ -49,8 +49,8 @@ template class NOutputs { }; }; -/// This class provides the API for structured ops that are known to operate on -/// buffers or tensors. This trait must be used in conjunction with an op +/// This class provides a verifier for structured ops that are known to operate +/// on buffers or tensors. This trait must be used in conjunction with an op /// definition or a trait that provides the methods `getNumInputs` and /// `getNumOutputs`. Use as a trait as follows: /// @@ -59,324 +59,18 @@ template class NOutputs { template class StructuredOpTraits : public OpTrait::TraitBase { -private: - /// Return the number of inputs, irrespective of their buffer or tensor type. - /// For internal use only. - unsigned nInputs() { - return cast(this->getOperation()).getNumInputs(); - } - /// Return the number of outputs, irrespective of their buffer or tensor type. - /// For internal use only. - unsigned nOutputs() { - return cast(this->getOperation()).getNumOutputs(); - } - public: - //==========================================================================// - // Loop types handling. - //==========================================================================// - unsigned getNumParallelLoops() { - return getNumIterators( - getParallelIteratorTypeName(), - cast(this->getOperation()).iterator_types()); - } - unsigned getNumReductionLoops() { - return getNumIterators( - getReductionIteratorTypeName(), - cast(this->getOperation()).iterator_types()); - } - unsigned getNumWindowLoops() { - return getNumIterators( - getWindowIteratorTypeName(), - cast(this->getOperation()).iterator_types()); - } - unsigned getNumLoops() { - return getNumIterators( - cast(this->getOperation()).iterator_types()); - } - - bool hasSingleReductionLoop() { - auto iterators = cast(this->getOperation()).iterator_types(); - return iterators.size() == 1 && - getNumIterators(getReductionIteratorTypeName(), iterators); - } - - //==========================================================================// - // Input arguments handling. - //==========================================================================// - // The `i^th` input argument is always the `i^th` operand regardless of - // whether we have tensors or buffers. - // - /// Return the `i`-th input value. - Value getInput(unsigned i) { - assert(i < nInputs()); - return this->getOperation()->getOperand(i); - } - /// Return the index of `value` in the list of inputs if found, llvm::None - /// otherwise. - Optional getIndexOfInput(Value value) { - auto it = llvm::find(getInputs(), value); - if (it != getInputs().end()) - return it - getInputs().begin(); - return llvm::None; - } - /// Return the `i`-th input shaped type, irrespective of buffer or tensor - /// type. - ShapedType getInputShapedType(unsigned i) { - return getInput(i).getType().template cast(); - } - /// Return the range over inputs. - Operation::operand_range getInputs() { - auto range = this->getOperation()->getOperands(); - return {range.begin(), range.begin() + nInputs()}; - } - /// Query the subset of input operands that are of ranked tensor type. - SmallVector getInputTensorTypes() { - SmallVector res; - for (Type type : getInputs().getTypes()) - if (auto t = type.template dyn_cast()) - res.push_back(t); - return res; - } - - //==========================================================================// - // Output arguments handling. - //==========================================================================// - // The `i^th` output argument is an operand (resp. a return value) iff it is - // a value of buffer type (resp. a return value of tensor type). - - /// Return the `i`-th output, asserts that this is a buffer operand and not - /// a tensor result. - Value getOutputBuffer(unsigned i) { - assert(i + this->getOperation()->getNumResults() < nOutputs() && - "overflowing output buffer index"); - return this->getOperation()->getOperand(nInputs() + i); - } - /// Return the index of `value` in the list of output buffers if found, - /// llvm::None otherwise. - Optional getIndexOfOutputBuffer(Value value) { - auto it = llvm::find(getOutputBuffers(), value); - if (it != getOutputBuffers().end()) - return it - getOutputBuffers().begin(); - return llvm::None; - } - /// Return the `i`-th output buffer type. - MemRefType getOutputBufferType(unsigned i) { - return getOutputBuffer(i).getType().template cast(); - } - /// Return the `i`-th output shaped type, irrespective of buffer of tensor - /// type. - ShapedType getOutputShapedType(unsigned i) { - return getShapedType(i + nInputs()); - } - /// Query the subset of results that are of ranked tensor type. - SmallVector getOutputTensorTypes() { - SmallVector res; - for (Type type : this->getOperation()->getResults().getTypes()) - res.push_back(type.template cast()); - return res; - } - /// Return the range over outputs. - Operation::operand_range getOutputBuffers() { - auto range = this->getOperation()->getOperands(); - return {range.begin() + nInputs(), - range.begin() + getNumInputsAndOutputBuffers()}; - } - - //==========================================================================// - // Input and Output arguments handling. - //==========================================================================// - Value getBuffer(unsigned i) { - assert(i < getNumInputsAndOutputBuffers() && "overflowing buffers index"); - return this->getOperation()->getOperand(i); - } - /// Return the number of inputs and outputs, irrespective of their buffer or - /// tensor type. - unsigned getNumInputsAndOutputs() { return nInputs() + nOutputs(); } - /// Return the number of inputs, irrespective of their buffer or tensor type, - /// and output buffers. - unsigned getNumInputsAndOutputBuffers() { - assert(this->getOperation()->getNumResults() <= nOutputs()); - return nInputs() + nOutputs() - this->getOperation()->getNumResults(); - } - /// Return the range over inputs (irrespective of type) and output buffers. - Operation::operand_range getInputsAndOutputBuffers() { - auto range = this->getOperation()->getOperands(); - return {range.begin(), range.begin() + getNumInputsAndOutputBuffers()}; - } - /// Return the `i`-th shaped type, there are 3 cases: - /// 1. if `i < nInputs()` then return `getInputShapedType(i)`; otherwise - /// 2. if `i < getNumInputsAndOutputBuffers()` then return the - /// `getOutputBufferType(i - nInputs())`; otherwise - /// 3. return the `i - getNumInputsAndOutputBuffers()` result type. - ShapedType getShapedType(unsigned i) { - if (i < nInputs()) - return getInputShapedType(i); - if (i < getNumInputsAndOutputBuffers()) - return getOutputBufferType(i - nInputs()).template cast(); - return getOutputTensorTypes()[i - getNumInputsAndOutputBuffers()] - .template cast(); - } - /// Return the shaped types for all the inputs and outputs - SmallVector getInputOutputShapedTypes() { - SmallVector inputOutputTypes( - this->getOperation()->operand_type_begin(), - this->getOperation()->operand_type_end()); - inputOutputTypes.append(this->getOperation()->result_type_begin(), - this->getOperation()->result_type_end()); - return llvm::to_vector<4>( - llvm::map_range(inputOutputTypes, [](Type type) -> ShapedType { - return type.cast(); - })); - } - - //==========================================================================// - // Other interface methods. - //==========================================================================// - - // Get or build the indexing_maps ArrayAttr. - ArrayAttr iterator_types() { - // Return the attribute if it is present. - if (auto attr = this->getOperation()->getAttr("iterator_types")) - return attr.template cast(); - - // If not, form the attribute using the reference iterator types for the - // ConcreteType. - auto maybeReferenceIteratorTypes = - cast(this->getOperation()).referenceIterators(); - - // If there is no reference, this must be a generic op. - // TODO: Traits are used to define ops. Split into cpp to avoid cyclic - // dependency. - auto name = this->getOperation()->getName().getStringRef(); - if (!maybeReferenceIteratorTypes && name != "generic" && - name != "indexed_generic") { - this->getOperation()->dump(); - llvm_unreachable("Op missing referenceIterators"); - } - - // If we have a reference, build the reference attribute and set it in the - // op before returning. - auto *ctx = this->getOperation()->getContext(); - auto attrRange = llvm::map_range(*maybeReferenceIteratorTypes, - [ctx](StringRef str) -> Attribute { - return StringAttr::get(str, ctx); - }); - auto attr = ArrayAttr::get(llvm::to_vector<4>(attrRange), ctx); - // TODO: Need to memoize this. Can't just store as an attribute atm as it - // will impact parser, printer and tests. - // this->getOperation()->setAttr("iterator_types", attr); - return attr; - } - - // Get or build the indexing_maps ArrayAttr. - ArrayAttr indexing_maps() { - // Return the attribute if it is present. - if (auto attr = this->getOperation()->getAttr("indexing_maps")) - return attr.template cast(); - - // If not, form the attribute using the reference indexing map for the - // ConcreteType. - auto maybeReferenceIndexingMaps = - cast(this->getOperation()).referenceIndexingMaps(); - - // If there is no reference, this must be a generic op. - auto name = this->getOperation()->getName().getStringRef(); - if (!maybeReferenceIndexingMaps && name != "generic" && - name != "indexed_generic") { - this->getOperation()->dump(); - llvm_unreachable("Op missing referenceIndexingMaps"); - } - - // If we have a reference, build the reference attribute and set it in the - // op before returning. - auto *ctx = this->getOperation()->getContext(); - auto attrRange = - llvm::map_range(*maybeReferenceIndexingMaps, [ctx](AffineMap map) { - // 0-D corner case because there is no such thing as a concrete empty - // map type. - if (!map) - map = AffineMap::get(0, 0, getAffineConstantExpr(0, ctx)); - return AffineMapAttr::get(map); - }); - SmallVector attrs{attrRange.begin(), attrRange.end()}; - auto attr = ArrayAttr::get(attrs, ctx); - // TODO: Need to memoize this. Can't just store as an attribute atm as it - // will impact parser, printer and tests. - // this->getOperation()->setAttr("indexing_maps", attr); - return attr; - } - - SmallVector getIndexingMaps() { - return llvm::to_vector<4>( - llvm::map_range(indexing_maps(), [](Attribute attr) -> AffineMap { - return attr.cast().getValue(); - })); - } - - AffineMap getIndexingMap(unsigned i) { - assert(i < getNumInputsAndOutputs()); - return indexing_maps() - .getValue()[i] - .template cast() - .getValue(); - } - - AffineMap getInputIndexingMap(unsigned i) { - assert(i < nInputs()); - return indexing_maps() - .getValue()[i] - .template cast() - .getValue(); - } - - AffineMap getOutputIndexingMap(unsigned i) { - assert(i < nOutputs()); - return indexing_maps() - .getValue()[i + nInputs()] - .template cast() - .getValue(); - } - - /// Query whether the op has only buffer inputs and no returns. - bool hasBufferSemantics() { - return this->getOperation()->getNumResults() == 0 && - llvm::all_of(getInputs(), - [](Value v) { return v.getType().isa(); }); - } - - /// Query whether the op has only tensor inputs and outputs. - bool hasTensorSemantics() { - auto isTensorType = [](Value v) { - return v.getType().isa(); - }; - return llvm::all_of(getInputs(), isTensorType) && - llvm::all_of(this->getOperation()->getResults(), isTensorType); - } - - //==========================================================================// - // Other static interface methods. - //==========================================================================// static LogicalResult verifyTrait(Operation *op) { + ConcreteType concreteOp = cast(op); auto nOperands = cast(op).getNumInputsAndOutputBuffers(); if (failed(OpTrait::impl::verifyAtLeastNOperands(op, nOperands))) return failure(); + if (op->getNumResults() > concreteOp.getNumOutputs()) + return op->emitError("unexpected #results > #outputs"); return success(); } }; -/// This class provides the API for named Linalg StructuredOps. -template -class NamedStructuredOpTraits - : public OpTrait::TraitBase { -public: - static SmallVector referenceIterators(TypeRange inputTypes, - TypeRange outputTypes); - - static SmallVector referenceIndexingMaps(TypeRange inputTypes, - TypeRange outputTypes); -}; - } // namespace linalg } // namespace OpTrait } // namespace mlir diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 77eb644894779..7071cd385f770 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -260,13 +260,14 @@ static LogicalResult verifyGenericOp(GenericOpType op) { if (failed(BlockArgsVerifier::verify(op, region.front()))) return failure(); - auto attr = op.template getAttrOfType("symbol_source"); - int64_t targetRank = 0; - if (attr) { - unsigned index = attr.getInt(); + auto symbolSourceAttr = + op.template getAttrOfType("symbol_source"); + int64_t expectedNumSymbols = 0; + if (symbolSourceAttr) { + unsigned index = symbolSourceAttr.getInt(); if (index >= op.getNumOperands()) return op.emitOpError("symbol_source index out of range"); - targetRank = op.getShapedType(index).getRank(); + expectedNumSymbols = op.getShapedType(index).getRank(); } SmallVector indexingMaps; @@ -278,9 +279,9 @@ static LogicalResult verifyGenericOp(GenericOpType op) { auto view = (idx < nInputViews) ? op.getInputShapedType(idx) : op.getOutputShapedType(idx - nInputViews); - if (m.getNumSymbols() != targetRank) + if (m.getNumSymbols() != expectedNumSymbols) return op.emitOpError("expected the number of symbols in indexing_map #") - << idx << " to match target rank"; + << idx << " to match rank of operand `symbol_source`"; if (m.getNumDims() != nLoops) return op.emitOpError("expected indexing_map #") @@ -1246,15 +1247,9 @@ void buildNamedStructuredOpRegionAndAttributes(Builder &builder, mlir::edsc::ScopedContext scope(opBuilder, builder.getUnknownLoc()); NamedStructuredOpType::regionBuilder(*body); - auto indexingMaps = builder.getAffineMapArrayAttr( - NamedStructuredOpType::referenceIndexingMaps(operandTypes, - tensorResultTypes)); - result.addAttribute(getIndexingMapsAttrName(), indexingMaps); + // indexing_maps is an auto-generated method. - auto iterators = - builder.getStrArrayAttr(NamedStructuredOpType::referenceIterators( - operandTypes, tensorResultTypes)); - result.addAttribute(getIteratorTypesAttrName(), iterators); + // iterator_types is an auto-generated method. } template diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index c631c47099b08..3774aed7ad1f0 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -113,7 +113,7 @@ func @generic_mismatched_num_returns(%arg0: memref) { // ----- func @generic_symbol_in_map(%arg0: memref) { - // expected-error @+1 {{expected the number of symbols in indexing_map #0 to match target rank}} + // expected-error @+1 {{expected the number of symbols in indexing_map #0 to match rank of operand `symbol_source`}} linalg.generic { args_in = 0, args_out = 1, @@ -514,3 +514,20 @@ func @named_ops(%a3: memref, %b3: memref, %c3: memref, memref, memref) -> () return } + +// ----- + +func @generic(%arg0: tensor) { + // expected-error @+1 {{unexpected #results > #outputs}} + linalg.generic { + args_in = 1, + args_out = 1, + indexing_maps = [ affine_map<(i) -> (i)> ], + iterator_types = ["parallel"] + } %arg0 { + ^bb(%0: i4) : + %1 = std.addi %0, %0: i4 + linalg.yield %1, %1: i4, i4 + } : tensor -> (tensor, tensor) + return +} diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc index d796d1917c035..aad983eb85d28 100644 --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc @@ -4,16 +4,15 @@ // ODS-LABEL: def Test1Op : LinalgNamedStructured_Op<"test1", [ // ODS-NEXT: NInputs<2> // ODS-NEXT: NOutputs<1> -// ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: SmallVector Test1Op::referenceIterators +// IMPL-LABEL: ArrayAttr Test1Op::iterator_types() { // IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: SmallVector Test1Op::referenceIndexingMaps +// IMPL: ArrayAttr Test1Op::indexing_maps() { // IMPL: AffineMap::get(2, 0, {d0, d1}, context), // IMPL-NEXT: AffineMap::get(2, 0, {d1}, context), -// IMPL-NEXT: AffineMap::get(2, 0, {d0}, context) }; +// IMPL-NEXT: AffineMap::get(2, 0, {d0}, context) }); // // IMPL: void Test1Op::regionBuilder(Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); @@ -29,16 +28,15 @@ def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) { // ODS-LABEL: def Test2Op : LinalgNamedStructured_Op<"test2", [ // ODS-NEXT: NInputs<2> // ODS-NEXT: NOutputs<1> -// ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: SmallVector Test2Op::referenceIterators +// IMPL-LABEL: ArrayAttr Test2Op::iterator_types() { // IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: SmallVector Test2Op::referenceIndexingMaps +// IMPL: ArrayAttr Test2Op::indexing_maps() { // IMPL: AffineMap::get(3, 0, {d0, d2}, context), // IMPL-NEXT: AffineMap::get(3, 0, {d2, d1}, context), -// IMPL-NEXT: AffineMap::get(3, 0, {d0, d1}, context) }; +// IMPL-NEXT: AffineMap::get(3, 0, {d0, d1}, context) }); // // IMPL: Test2Op::regionBuilder(Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); @@ -54,16 +52,15 @@ def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) { // ODS-LABEL: def Test3Op : LinalgNamedStructured_Op<"test3", [ // ODS-NEXT: NInputs<2> // ODS-NEXT: NOutputs<1> -// ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: SmallVector Test3Op::referenceIterators +// IMPL-LABEL: ArrayAttr Test3Op::iterator_types() { // IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: SmallVector Test3Op::referenceIndexingMaps +// IMPL: ArrayAttr Test3Op::indexing_maps() { // IMPL: AffineMap::get(4, 0, {d0, d1, d3}, context), // IMPL-NEXT: AffineMap::get(4, 0, {d3, d2}, context), -// IMPL-NEXT: AffineMap::get(4, 0, {d0, d1, d2}, context) }; +// IMPL-NEXT: AffineMap::get(4, 0, {d0, d1, d2}, context) }); // // IMPL: Test3Op::regionBuilder(Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index 92efef67e8f4a..59d655684f48c 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -974,19 +974,19 @@ class TCParser { /// Parse and print the information for a TC def. /// When `gen-ods-decl` is used, this prints the ODS declaration for the TC. /// When `gen-impl` is used, this prints the C++ implementation for the extra - /// methods defined in ODS (referenceIterators, referenceIndexingMaps and - /// regionBuilder). + /// methods defined in ODS (`iterator_types`, `indexing_maps` and + /// `regionBuilder`). LogicalResult parseAndEmitODSDef(llvm::raw_ostream &os); /// Print the ODS class that defines a new `cppOpName` for a `linalgOpName`. void printODS(llvm::raw_ostream &os, StringRef cppOpName, StringRef linalgOpName); - /// Print the C++ StructuredOpsInterface impl of `referenceIterators`. + /// Print the C++ StructuredOpsInterface impl of `iterator_types`. void printReferenceIterators(llvm::raw_ostream &os, StringRef cppOpName, ComprehensionParsingState &state); - /// Print the C++ StructuredOpsInterface impl of `referenceIndexingMaps`. + /// Print the C++ StructuredOpsInterface impl of `indexing_maps`. void printReferenceIndexingMaps(llvm::raw_ostream &os, StringRef cppOpName, ComprehensionParsingState &state); @@ -1446,7 +1446,6 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, const char *header = R"FMT( def {0} : LinalgNamedStructured_Op<"{1}", [ NInputs<{2}>, NOutputs<{3}>, - NamedStructuredOpTraits, SingleBlockImplicitTerminator<"YieldOp">]> { let arguments = (ins Variadic:$views); let results = (outs Variadic:$output_tensors); @@ -1465,16 +1464,9 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, return ::parseNamedStructuredOp<{0}>(parser, result); }]; let extraClassDeclaration = [{{ - llvm::Optional> referenceIterators(); - static SmallVector referenceIterators( - TypeRange inputTypes, TypeRange outputTypes); - - llvm::Optional> referenceIndexingMaps(); - static SmallVector referenceIndexingMaps( - TypeRange inputTypes, TypeRange outputTypes); - + ArrayAttr iterator_types(); + ArrayAttr indexing_maps(); static void regionBuilder(Block &block); - std::string getLibraryCallName() {{ return generateLibraryCallName(getOperation()); } @@ -1492,20 +1484,14 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, os << llvm::formatv(header, cppOpName, linalgOpName, nInputs, nOutputs); } -/// Print the C++ StructuredOpsInterface impl of `referenceIterators`. +/// Print the C++ StructuredOpsInterface impl of `iterator_types`. void TCParser::printReferenceIterators(llvm::raw_ostream &os, StringRef cppOpName, ComprehensionParsingState &state) { const char *referenceReferenceIteratorsFmt = R"FMT( - // This is temporary until we transition out of manually specified ops - // that should be auto-generated with linalg-ods-gen. - llvm::Optional> {0}::referenceIterators() {{ - llvm_unreachable("Unexpected missing `iterator_types` attribute."); - } - SmallVector {0}::referenceIterators( - TypeRange inputTypes, TypeRange outputTypes) { - return SmallVector{{ {1} }; + ArrayAttr {0}::iterator_types() { + return Builder(getContext()).getStrArrayAttr(SmallVector{{ {1} }); })FMT"; std::string iteratorsStr; @@ -1542,16 +1528,11 @@ void TCParser::printReferenceIndexingMaps(llvm::raw_ostream &os, R"FMT( // This is temporary until we transition out of manually specified ops that // should be auto-generated with linalg-ods-gen. - llvm::Optional> {0}::referenceIndexingMaps() {{ - llvm_unreachable("Unexpected missing `indexing_maps` attribute."); - } - SmallVector {0}::referenceIndexingMaps( - TypeRange inputTypes, TypeRange outputTypes) { - assert(!inputTypes.empty() && "At least one input expected"); - MLIRContext *context = (*inputTypes.begin()).getContext(); + ArrayAttr {0}::indexing_maps() { + MLIRContext *context = getContext(); AffineExpr {1}; bindDims(context, {1}); - return SmallVector{{ {2} }; + return Builder(context).getAffineMapArrayAttr({ {2} }); })FMT"; // 2. Print a comma-separated list of identifiers for the AffineExpr in From be0d79f32930fe780dc89ba96dac0ba163f7ec50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= Date: Fri, 11 Sep 2020 13:51:54 +0200 Subject: [PATCH 0364/1079] [analyzer][MacroExpansion] Fix a crash where multiple parameters resolved to __VA_ARGS__ In short, macro expansions handled the case where a variadic parameter mapped to multiple arguments, but not the other way around. An internal ticket was submitted that demonstrated that we fail an assertion. Macro expansion so far worked by lexing the source code token-by-token and using the Preprocessor to turn these tokens into identifiers or just get their proper spelling, but what this counter intuitively doesn't do, is actually expand these macros, so we have to do the heavy lifting -- in this case, figure out what __VA_ARGS__ expands into. Since this case can only occur in a nested macro, the information we gathered from the containing macro does contain this information. If a parameter resolves to __VA_ARGS__, we need to temporarily stop getting our tokens from the lexer, and get the tokens from what __VA_ARGS__ maps to. Differential Revision: https://reviews.llvm.org/D86135 --- .../StaticAnalyzer/Core/PlistDiagnostics.cpp | 107 +- .../plist-macros-with-expansion.cpp.plist | 2013 +++++++++++------ .../Analysis/plist-macros-with-expansion.cpp | 61 +- 3 files changed, 1491 insertions(+), 690 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index 87c9b84794637..441dcad424442 100644 --- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Casting.h" +#include using namespace clang; using namespace ento; @@ -879,6 +880,46 @@ class TokenPrinter { void printToken(const Token &Tok); }; +/// Wrapper around a Lexer object that can lex tokens one-by-one. Its possible +/// to "inject" a range of tokens into the stream, in which case the next token +/// is retrieved from the next element of the range, until the end of the range +/// is reached. +class TokenStream { +public: + TokenStream(SourceLocation ExpanLoc, const SourceManager &SM, + const LangOptions &LangOpts) + : ExpanLoc(ExpanLoc) { + FileID File; + unsigned Offset; + std::tie(File, Offset) = SM.getDecomposedLoc(ExpanLoc); + const llvm::MemoryBuffer *MB = SM.getBuffer(File); + const char *MacroNameTokenPos = MB->getBufferStart() + Offset; + + RawLexer = std::make_unique(SM.getLocForStartOfFile(File), LangOpts, + MB->getBufferStart(), MacroNameTokenPos, + MB->getBufferEnd()); + } + + void next(Token &Result) { + if (CurrTokenIt == TokenRange.end()) { + RawLexer->LexFromRawLexer(Result); + return; + } + Result = *CurrTokenIt; + CurrTokenIt++; + } + + void injectRange(const ArgTokensTy &Range) { + TokenRange = Range; + CurrTokenIt = TokenRange.begin(); + } + + std::unique_ptr RawLexer; + ArgTokensTy TokenRange; + ArgTokensTy::iterator CurrTokenIt = TokenRange.begin(); + SourceLocation ExpanLoc; +}; + } // end of anonymous namespace /// The implementation method of getMacroExpansion: It prints the expansion of @@ -933,8 +974,9 @@ static std::string getMacroNameAndPrintExpansion( /// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of /// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgMap map /// { (x, a) } will be returned. -static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, - const Preprocessor &PP); +static MacroExpansionInfo +getMacroExpansionInfo(const MacroParamMap &PrevParamMap, + SourceLocation ExpanLoc, const Preprocessor &PP); /// Retrieves the ')' token that matches '(' \p It points to. static MacroInfo::tokens_iterator getMatchingRParen( @@ -980,7 +1022,7 @@ static std::string getMacroNameAndPrintExpansion( const SourceManager &SM = PP.getSourceManager(); MacroExpansionInfo MExpInfo = - getMacroExpansionInfo(SM.getExpansionLoc(MacroLoc), PP); + getMacroExpansionInfo(PrevParamMap, SM.getExpansionLoc(MacroLoc), PP); IdentifierInfo *MacroNameII = PP.getIdentifierInfo(MExpInfo.Name); // TODO: If the macro definition contains another symbol then this function is @@ -1077,24 +1119,20 @@ static std::string getMacroNameAndPrintExpansion( return MExpInfo.Name; } -static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, - const Preprocessor &PP) { +static MacroExpansionInfo +getMacroExpansionInfo(const MacroParamMap &PrevParamMap, + SourceLocation ExpanLoc, const Preprocessor &PP) { const SourceManager &SM = PP.getSourceManager(); const LangOptions &LangOpts = PP.getLangOpts(); // First, we create a Lexer to lex *at the expansion location* the tokens // referring to the macro's name and its arguments. - std::pair LocInfo = SM.getDecomposedLoc(ExpanLoc); - const llvm::MemoryBuffer *MB = SM.getBuffer(LocInfo.first); - const char *MacroNameTokenPos = MB->getBufferStart() + LocInfo.second; - - Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, - MB->getBufferStart(), MacroNameTokenPos, MB->getBufferEnd()); + TokenStream TStream(ExpanLoc, SM, LangOpts); // Acquire the macro's name. Token TheTok; - RawLexer.LexFromRawLexer(TheTok); + TStream.next(TheTok); std::string MacroName = PP.getSpelling(TheTok); @@ -1122,7 +1160,7 @@ static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, if (MacroParams.empty()) return { MacroName, MI, {} }; - RawLexer.LexFromRawLexer(TheTok); + TStream.next(TheTok); // When this is a token which expands to another macro function then its // parentheses are not at its expansion locaiton. For example: // @@ -1166,7 +1204,7 @@ static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, if (ParenthesesDepth != 0) { // Lex the first token of the next macro parameter. - RawLexer.LexFromRawLexer(TheTok); + TStream.next(TheTok); while ( !(ParenthesesDepth == 1 && @@ -1183,16 +1221,38 @@ static MacroExpansionInfo getMacroExpansionInfo(SourceLocation ExpanLoc, if (ParenthesesDepth == 0) break; - if (TheTok.is(tok::raw_identifier)) + if (TheTok.is(tok::raw_identifier)) { PP.LookUpIdentifierInfo(TheTok); + // This token is a variadic parameter: + // + // #define PARAMS_RESOLVE_TO_VA_ARGS(i, fmt) foo(i, fmt); \ + // i = 0; + // #define DISPATCH(...) \ + // PARAMS_RESOLVE_TO_VA_ARGS(__VA_ARGS__); + // // ^~~~~~~~~~~ Variadic parameter here + // + // void mulitpleParamsResolveToVA_ARGS(void) { + // int x = 1; + // DISPATCH(x, "LF1M healer"); // Multiple arguments are mapped to + // // a single __VA_ARGS__ parameter. + // (void)(10 / x); + // } + // + // We will stumble across this while trying to expand + // PARAMS_RESOLVE_TO_VA_ARGS. By this point, we already noted during + // the processing of DISPATCH what __VA_ARGS__ maps to, so we'll + // retrieve the next series of tokens from that. + if (TheTok.getIdentifierInfo() == __VA_ARGS__II) { + TStream.injectRange(PrevParamMap.at(__VA_ARGS__II)); + TStream.next(TheTok); + continue; + } + } ArgTokens.push_back(TheTok); - RawLexer.LexFromRawLexer(TheTok); + TStream.next(TheTok); } } else { - // FIXME: Handle when multiple parameters map to a single argument. - // Currently, we only handle when multiple arguments map to the same - // parameter. assert(CurrParamII == __VA_ARGS__II && "No more macro arguments are found, but the current parameter " "isn't __VA_ARGS__!"); @@ -1295,6 +1355,15 @@ static void dumpArgTokensToStream(llvm::raw_ostream &Out, } void TokenPrinter::printToken(const Token &Tok) { + // TODO: Handle GNU extensions where hash and hashhash occurs right before + // __VA_ARGS__. + // cppreference.com: "some compilers offer an extension that allows ## to + // appear after a comma and before __VA_ARGS__, in which case the ## does + // nothing when the variable arguments are present, but removes the comma when + // the variable arguments are not present: this makes it possible to define + // macros such as fprintf (stderr, format, ##__VA_ARGS__)" + // FIXME: Handle named variadic macro parameters (also a GNU extension). + // If this is the first token to be printed, don't print space. if (PrevTok.isNot(tok::unknown)) { // If the tokens were already space separated, or if they must be to avoid diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist index 499119c81d259..4a2741f0d4937 100644 --- a/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist +++ b/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist @@ -16,12 +16,12 @@ start - line25 + line23 col3 file0 - line25 + line23 col5 file0 @@ -29,12 +29,12 @@ end - line26 + line24 col3 file0 - line26 + line24 col21 file0 @@ -46,7 +46,7 @@ kindevent location - line26 + line24 col3 file0 @@ -54,12 +54,12 @@ - line26 + line24 col3 file0 - line26 + line24 col21 file0 @@ -79,12 +79,12 @@ start - line27 + line25 col3 file0 - line27 + line25 col3 file0 @@ -92,12 +92,12 @@ end - line27 + line25 col8 file0 - line27 + line25 col8 file0 @@ -109,7 +109,7 @@ kindevent location - line27 + line25 col8 file0 @@ -117,12 +117,12 @@ - line27 + line25 col4 file0 - line27 + line25 col6 file0 @@ -140,7 +140,7 @@ location - line26 + line24 col3 file0 @@ -159,7 +159,7 @@ issue_hash_function_offset3 location - line27 + line25 col8 file0 @@ -167,10 +167,10 @@ 0 + 22 + 23 24 25 - 26 - 27 @@ -185,12 +185,12 @@ start - line38 + line36 col3 file0 - line38 + line36 col5 file0 @@ -198,12 +198,12 @@ end - line39 + line37 col3 file0 - line39 + line37 col39 file0 @@ -215,7 +215,7 @@ kindevent location - line39 + line37 col3 file0 @@ -223,12 +223,12 @@ - line39 + line37 col3 file0 - line39 + line37 col39 file0 @@ -248,12 +248,12 @@ start - line40 + line38 col3 file0 - line40 + line38 col3 file0 @@ -261,12 +261,12 @@ end - line40 + line38 col8 file0 - line40 + line38 col8 file0 @@ -278,7 +278,7 @@ kindevent location - line40 + line38 col8 file0 @@ -286,12 +286,12 @@ - line40 + line38 col4 file0 - line40 + line38 col6 file0 @@ -309,7 +309,7 @@ location - line39 + line37 col3 file0 @@ -328,7 +328,7 @@ issue_hash_function_offset3 location - line40 + line38 col8 file0 @@ -336,10 +336,10 @@ 0 + 35 + 36 37 38 - 39 - 40 @@ -354,12 +354,12 @@ start - line58 + line56 col3 file0 - line58 + line56 col5 file0 @@ -367,12 +367,12 @@ end - line59 + line57 col3 file0 - line59 + line57 col9 file0 @@ -384,7 +384,7 @@ kindevent location - line59 + line57 col3 file0 @@ -392,12 +392,12 @@ - line59 + line57 col3 file0 - line59 + line57 col15 file0 @@ -413,7 +413,7 @@ kindevent location - line50 + line48 col1 file0 @@ -431,12 +431,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -444,12 +444,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -461,7 +461,7 @@ kindevent location - line51 + line49 col3 file0 @@ -469,12 +469,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -490,7 +490,7 @@ kindevent location - line59 + line57 col3 file0 @@ -498,12 +498,12 @@ - line59 + line57 col3 file0 - line59 + line57 col15 file0 @@ -523,12 +523,12 @@ start - line60 + line58 col3 file0 - line60 + line58 col3 file0 @@ -536,12 +536,12 @@ end - line60 + line58 col8 file0 - line60 + line58 col8 file0 @@ -553,7 +553,7 @@ kindevent location - line60 + line58 col8 file0 @@ -561,12 +561,12 @@ - line60 + line58 col4 file0 - line60 + line58 col6 file0 @@ -584,7 +584,7 @@ location - line59 + line57 col3 file0 @@ -603,7 +603,7 @@ issue_hash_function_offset3 location - line60 + line58 col8 file0 @@ -611,12 +611,12 @@ 0 - 50 - 51 + 48 + 49 + 55 + 56 57 58 - 59 - 60 @@ -631,12 +631,12 @@ start - line78 + line76 col3 file0 - line78 + line76 col5 file0 @@ -644,12 +644,12 @@ end - line79 + line77 col3 file0 - line79 + line77 col9 file0 @@ -661,7 +661,7 @@ kindevent location - line79 + line77 col3 file0 @@ -669,12 +669,12 @@ - line79 + line77 col3 file0 - line79 + line77 col13 file0 @@ -690,7 +690,7 @@ kindevent location - line50 + line48 col1 file0 @@ -708,12 +708,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -721,12 +721,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -738,7 +738,7 @@ kindevent location - line51 + line49 col3 file0 @@ -746,12 +746,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -767,7 +767,7 @@ kindevent location - line79 + line77 col3 file0 @@ -775,12 +775,12 @@ - line79 + line77 col3 file0 - line79 + line77 col13 file0 @@ -796,7 +796,7 @@ kindevent location - line80 + line78 col12 file0 @@ -804,12 +804,12 @@ - line80 + line78 col3 file0 - line80 + line78 col10 file0 @@ -827,7 +827,7 @@ location - line79 + line77 col3 file0 @@ -837,7 +837,7 @@ location - line80 + line78 col3 file0 @@ -856,7 +856,7 @@ issue_hash_function_offset3 location - line80 + line78 col12 file0 @@ -864,12 +864,12 @@ 0 - 50 - 51 + 48 + 49 + 75 + 76 77 78 - 79 - 80 @@ -884,12 +884,12 @@ start - line97 + line95 col3 file0 - line97 + line95 col5 file0 @@ -897,12 +897,12 @@ end - line98 + line96 col3 file0 - line98 + line96 col28 file0 @@ -914,7 +914,7 @@ kindevent location - line98 + line96 col3 file0 @@ -922,12 +922,12 @@ - line98 + line96 col3 file0 - line98 + line96 col33 file0 @@ -947,12 +947,12 @@ start - line99 + line97 col3 file0 - line99 + line97 col3 file0 @@ -960,12 +960,12 @@ end - line99 + line97 col8 file0 - line99 + line97 col8 file0 @@ -977,7 +977,7 @@ kindevent location - line99 + line97 col8 file0 @@ -985,12 +985,12 @@ - line99 + line97 col4 file0 - line99 + line97 col6 file0 @@ -1008,7 +1008,7 @@ location - line98 + line96 col3 file0 @@ -1027,7 +1027,7 @@ issue_hash_function_offset3 location - line99 + line97 col8 file0 @@ -1035,10 +1035,10 @@ 0 + 94 + 95 96 97 - 98 - 99 @@ -1053,12 +1053,12 @@ start - line114 + line112 col3 file0 - line114 + line112 col5 file0 @@ -1066,12 +1066,12 @@ end - line115 + line113 col3 file0 - line115 + line113 col42 file0 @@ -1083,7 +1083,7 @@ kindevent location - line115 + line113 col3 file0 @@ -1091,12 +1091,12 @@ - line115 + line113 col3 file0 - line115 + line113 col47 file0 @@ -1116,12 +1116,12 @@ start - line116 + line114 col3 file0 - line116 + line114 col3 file0 @@ -1129,12 +1129,12 @@ end - line116 + line114 col8 file0 - line116 + line114 col8 file0 @@ -1146,7 +1146,7 @@ kindevent location - line116 + line114 col8 file0 @@ -1154,12 +1154,12 @@ - line116 + line114 col4 file0 - line116 + line114 col6 file0 @@ -1177,7 +1177,7 @@ location - line115 + line113 col3 file0 @@ -1196,7 +1196,7 @@ issue_hash_function_offset3 location - line116 + line114 col8 file0 @@ -1204,10 +1204,10 @@ 0 + 111 + 112 113 114 - 115 - 116 @@ -1222,12 +1222,12 @@ start - line134 + line132 col3 file0 - line134 + line132 col5 file0 @@ -1235,12 +1235,12 @@ end - line135 + line133 col3 file0 - line135 + line133 col39 file0 @@ -1252,7 +1252,7 @@ kindevent location - line135 + line133 col3 file0 @@ -1260,12 +1260,12 @@ - line135 + line133 col3 file0 - line135 + line133 col44 file0 @@ -1285,12 +1285,12 @@ start - line136 + line134 col3 file0 - line136 + line134 col3 file0 @@ -1298,12 +1298,12 @@ end - line136 + line134 col8 file0 - line136 + line134 col8 file0 @@ -1315,7 +1315,7 @@ kindevent location - line136 + line134 col8 file0 @@ -1323,12 +1323,12 @@ - line136 + line134 col4 file0 - line136 + line134 col6 file0 @@ -1346,7 +1346,7 @@ location - line135 + line133 col3 file0 @@ -1365,7 +1365,7 @@ issue_hash_function_offset3 location - line136 + line134 col8 file0 @@ -1373,10 +1373,10 @@ 0 + 131 + 132 133 134 - 135 - 136 @@ -1391,12 +1391,12 @@ start - line161 + line159 col3 file0 - line161 + line159 col5 file0 @@ -1404,12 +1404,12 @@ end - line162 + line160 col3 file0 - line162 + line160 col19 file0 @@ -1421,7 +1421,7 @@ kindevent location - line162 + line160 col3 file0 @@ -1429,12 +1429,12 @@ - line162 + line160 col3 file0 - line162 + line160 col52 file0 @@ -1454,12 +1454,12 @@ start - line163 + line161 col3 file0 - line163 + line161 col3 file0 @@ -1467,12 +1467,12 @@ end - line163 + line161 col6 file0 - line163 + line161 col6 file0 @@ -1484,7 +1484,7 @@ kindevent location - line163 + line161 col6 file0 @@ -1492,12 +1492,12 @@ - line163 + line161 col4 file0 - line163 + line161 col4 file0 @@ -1515,7 +1515,7 @@ location - line162 + line160 col3 file0 @@ -1534,7 +1534,7 @@ issue_hash_function_offset3 location - line163 + line161 col6 file0 @@ -1542,10 +1542,10 @@ 0 + 158 + 159 160 161 - 162 - 163 @@ -1560,12 +1560,12 @@ start - line170 + line168 col3 file0 - line170 + line168 col5 file0 @@ -1573,12 +1573,12 @@ end - line171 + line169 col3 file0 - line171 + line169 col19 file0 @@ -1590,7 +1590,7 @@ kindevent location - line171 + line169 col3 file0 @@ -1598,12 +1598,12 @@ - line171 + line169 col3 file0 - line171 + line169 col52 file0 @@ -1623,12 +1623,12 @@ start - line172 + line170 col3 file0 - line172 + line170 col3 file0 @@ -1636,12 +1636,12 @@ end - line172 + line170 col6 file0 - line172 + line170 col6 file0 @@ -1653,7 +1653,7 @@ kindevent location - line172 + line170 col6 file0 @@ -1661,12 +1661,12 @@ - line172 + line170 col4 file0 - line172 + line170 col4 file0 @@ -1684,7 +1684,7 @@ location - line171 + line169 col3 file0 @@ -1703,7 +1703,7 @@ issue_hash_function_offset3 location - line172 + line170 col6 file0 @@ -1711,10 +1711,10 @@ 0 + 167 + 168 169 170 - 171 - 172 @@ -1729,12 +1729,12 @@ start - line179 + line177 col3 file0 - line179 + line177 col5 file0 @@ -1742,12 +1742,12 @@ end - line180 + line178 col3 file0 - line180 + line178 col19 file0 @@ -1759,7 +1759,7 @@ kindevent location - line180 + line178 col3 file0 @@ -1767,12 +1767,12 @@ - line180 + line178 col3 file0 - line180 + line178 col52 file0 @@ -1792,12 +1792,12 @@ start - line181 + line179 col3 file0 - line181 + line179 col3 file0 @@ -1805,12 +1805,12 @@ end - line181 + line179 col6 file0 - line181 + line179 col6 file0 @@ -1822,7 +1822,7 @@ kindevent location - line181 + line179 col6 file0 @@ -1830,12 +1830,12 @@ - line181 + line179 col4 file0 - line181 + line179 col4 file0 @@ -1853,7 +1853,7 @@ location - line180 + line178 col3 file0 @@ -1872,7 +1872,7 @@ issue_hash_function_offset3 location - line181 + line179 col6 file0 @@ -1880,10 +1880,10 @@ 0 + 176 + 177 178 179 - 180 - 181 @@ -1898,12 +1898,12 @@ start - line193 + line191 col3 file0 - line193 + line191 col5 file0 @@ -1911,12 +1911,12 @@ end - line194 + line192 col3 file0 - line194 + line192 col15 file0 @@ -1928,7 +1928,7 @@ kindevent location - line194 + line192 col3 file0 @@ -1936,12 +1936,12 @@ - line194 + line192 col3 file0 - line194 + line192 col30 file0 @@ -1957,7 +1957,7 @@ kindevent location - line50 + line48 col1 file0 @@ -1975,12 +1975,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -1988,12 +1988,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -2005,7 +2005,7 @@ kindevent location - line51 + line49 col3 file0 @@ -2013,12 +2013,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -2034,7 +2034,7 @@ kindevent location - line194 + line192 col3 file0 @@ -2042,12 +2042,12 @@ - line194 + line192 col3 file0 - line194 + line192 col30 file0 @@ -2067,12 +2067,12 @@ start - line195 + line193 col3 file0 - line195 + line193 col3 file0 @@ -2080,12 +2080,12 @@ end - line195 + line193 col6 file0 - line195 + line193 col6 file0 @@ -2097,7 +2097,7 @@ kindevent location - line195 + line193 col6 file0 @@ -2105,12 +2105,12 @@ - line195 + line193 col4 file0 - line195 + line193 col4 file0 @@ -2128,7 +2128,7 @@ location - line194 + line192 col3 file0 @@ -2147,7 +2147,7 @@ issue_hash_function_offset3 location - line195 + line193 col6 file0 @@ -2155,12 +2155,12 @@ 0 - 50 - 51 + 48 + 49 + 190 + 191 192 193 - 194 - 195 @@ -2175,12 +2175,12 @@ start - line207 + line205 col3 file0 - line207 + line205 col5 file0 @@ -2188,12 +2188,12 @@ end - line208 + line206 col3 file0 - line208 + line206 col15 file0 @@ -2205,7 +2205,7 @@ kindevent location - line208 + line206 col3 file0 @@ -2213,12 +2213,12 @@ - line208 + line206 col3 file0 - line208 + line206 col48 file0 @@ -2234,7 +2234,7 @@ kindevent location - line201 + line199 col1 file0 @@ -2252,12 +2252,12 @@ start - line201 + line199 col1 file0 - line201 + line199 col4 file0 @@ -2265,12 +2265,12 @@ end - line202 + line200 col3 file0 - line202 + line200 col11 file0 @@ -2282,7 +2282,7 @@ kindevent location - line202 + line200 col3 file0 @@ -2290,12 +2290,12 @@ - line202 + line200 col3 file0 - line202 + line200 col17 file0 @@ -2311,7 +2311,7 @@ kindevent location - line50 + line48 col1 file0 @@ -2329,12 +2329,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -2342,12 +2342,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -2359,7 +2359,7 @@ kindevent location - line51 + line49 col3 file0 @@ -2367,12 +2367,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -2388,7 +2388,7 @@ kindevent location - line202 + line200 col3 file0 @@ -2396,12 +2396,12 @@ - line202 + line200 col3 file0 - line202 + line200 col17 file0 @@ -2421,12 +2421,12 @@ start - line202 + line200 col3 file0 - line202 + line200 col11 file0 @@ -2434,12 +2434,12 @@ end - line203 + line201 col3 file0 - line203 + line201 col7 file0 @@ -2451,7 +2451,7 @@ kindevent location - line208 + line206 col3 file0 @@ -2459,12 +2459,12 @@ - line208 + line206 col3 file0 - line208 + line206 col48 file0 @@ -2484,12 +2484,12 @@ start - line209 + line207 col3 file0 - line209 + line207 col3 file0 @@ -2497,12 +2497,12 @@ end - line209 + line207 col6 file0 - line209 + line207 col6 file0 @@ -2514,7 +2514,7 @@ kindevent location - line209 + line207 col6 file0 @@ -2522,12 +2522,12 @@ - line209 + line207 col4 file0 - line209 + line207 col4 file0 @@ -2545,7 +2545,7 @@ location - line208 + line206 col3 file0 @@ -2564,7 +2564,7 @@ issue_hash_function_offset3 location - line209 + line207 col6 file0 @@ -2572,15 +2572,15 @@ 0 - 50 - 51 + 48 + 49 + 199 + 200 201 - 202 - 203 + 204 + 205 206 207 - 208 - 209 @@ -2595,12 +2595,12 @@ start - line219 + line217 col3 file0 - line219 + line217 col5 file0 @@ -2608,12 +2608,12 @@ end - line220 + line218 col3 file0 - line220 + line218 col31 file0 @@ -2625,7 +2625,7 @@ kindevent location - line220 + line218 col3 file0 @@ -2633,12 +2633,12 @@ - line220 + line218 col3 file0 - line220 + line218 col64 file0 @@ -2654,7 +2654,7 @@ kindevent location - line201 + line199 col1 file0 @@ -2672,12 +2672,12 @@ start - line201 + line199 col1 file0 - line201 + line199 col4 file0 @@ -2685,12 +2685,12 @@ end - line202 + line200 col3 file0 - line202 + line200 col11 file0 @@ -2702,7 +2702,7 @@ kindevent location - line202 + line200 col3 file0 @@ -2710,12 +2710,12 @@ - line202 + line200 col3 file0 - line202 + line200 col17 file0 @@ -2731,7 +2731,7 @@ kindevent location - line50 + line48 col1 file0 @@ -2749,12 +2749,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -2762,12 +2762,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -2779,7 +2779,7 @@ kindevent location - line51 + line49 col3 file0 @@ -2787,12 +2787,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -2808,7 +2808,7 @@ kindevent location - line202 + line200 col3 file0 @@ -2816,12 +2816,12 @@ - line202 + line200 col3 file0 - line202 + line200 col17 file0 @@ -2841,12 +2841,12 @@ start - line202 + line200 col3 file0 - line202 + line200 col11 file0 @@ -2854,12 +2854,12 @@ end - line203 + line201 col3 file0 - line203 + line201 col7 file0 @@ -2871,7 +2871,7 @@ kindevent location - line220 + line218 col3 file0 @@ -2879,12 +2879,12 @@ - line220 + line218 col3 file0 - line220 + line218 col64 file0 @@ -2904,12 +2904,12 @@ start - line221 + line219 col3 file0 - line221 + line219 col3 file0 @@ -2917,12 +2917,12 @@ end - line221 + line219 col6 file0 - line221 + line219 col6 file0 @@ -2934,7 +2934,7 @@ kindevent location - line221 + line219 col6 file0 @@ -2942,12 +2942,12 @@ - line221 + line219 col4 file0 - line221 + line219 col4 file0 @@ -2965,7 +2965,7 @@ location - line220 + line218 col3 file0 @@ -2984,7 +2984,7 @@ issue_hash_function_offset3 location - line221 + line219 col6 file0 @@ -2992,15 +2992,15 @@ 0 - 50 - 51 + 48 + 49 + 199 + 200 201 - 202 - 203 + 216 + 217 218 219 - 220 - 221 @@ -3015,12 +3015,12 @@ start - line231 + line229 col3 file0 - line231 + line229 col5 file0 @@ -3028,12 +3028,12 @@ end - line235 + line233 col3 file0 - line235 + line233 col13 file0 @@ -3045,7 +3045,7 @@ kindevent location - line235 + line233 col3 file0 @@ -3053,12 +3053,12 @@ - line235 + line233 col3 file0 - line235 + line233 col58 file0 @@ -3074,7 +3074,7 @@ kindevent location - line235 + line233 col3 file0 @@ -3088,7 +3088,7 @@ kindevent location - line235 + line233 col3 file0 @@ -3096,12 +3096,12 @@ - line235 + line233 col3 file0 - line235 + line233 col58 file0 @@ -3117,7 +3117,7 @@ kindevent location - line50 + line48 col1 file0 @@ -3135,12 +3135,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -3148,12 +3148,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -3165,7 +3165,7 @@ kindevent location - line51 + line49 col3 file0 @@ -3173,12 +3173,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -3194,7 +3194,7 @@ kindevent location - line235 + line233 col3 file0 @@ -3202,12 +3202,12 @@ - line235 + line233 col3 file0 - line235 + line233 col58 file0 @@ -3223,7 +3223,7 @@ kindevent location - line235 + line233 col3 file0 @@ -3231,12 +3231,12 @@ - line235 + line233 col3 file0 - line235 + line233 col58 file0 @@ -3256,12 +3256,12 @@ start - line236 + line234 col3 file0 - line236 + line234 col3 file0 @@ -3269,12 +3269,12 @@ end - line236 + line234 col8 file0 - line236 + line234 col8 file0 @@ -3286,7 +3286,7 @@ kindevent location - line236 + line234 col8 file0 @@ -3294,12 +3294,12 @@ - line236 + line234 col4 file0 - line236 + line234 col6 file0 @@ -3317,7 +3317,7 @@ location - line235 + line233 col3 file0 @@ -3327,7 +3327,7 @@ location - line235 + line233 col3 file0 @@ -3346,7 +3346,7 @@ issue_hash_function_offset6 location - line236 + line234 col8 file0 @@ -3354,13 +3354,13 @@ 0 - 50 - 51 + 48 + 49 + 228 + 229 230 - 231 - 232 - 235 - 236 + 233 + 234 @@ -3371,7 +3371,7 @@ kindevent location - line246 + line244 col3 file0 @@ -3379,12 +3379,12 @@ - line246 + line244 col3 file0 - line254 + line252 col4 file0 @@ -3400,7 +3400,7 @@ kindevent location - line246 + line244 col3 file0 @@ -3408,12 +3408,12 @@ - line246 + line244 col3 file0 - line254 + line252 col4 file0 @@ -3431,7 +3431,7 @@ location - line246 + line244 col3 file0 @@ -3450,7 +3450,7 @@ issue_hash_function_offset1 location - line246 + line244 col3 file0 @@ -3458,8 +3458,8 @@ 0 - 245 - 246 + 243 + 244 @@ -3474,12 +3474,12 @@ start - line268 + line266 col3 file0 - line268 + line266 col5 file0 @@ -3487,12 +3487,12 @@ end - line270 + line268 col3 file0 - line270 + line268 col25 file0 @@ -3504,7 +3504,7 @@ kindevent location - line270 + line268 col3 file0 @@ -3512,12 +3512,12 @@ - line270 + line268 col3 file0 - line270 + line268 col31 file0 @@ -3537,12 +3537,12 @@ start - line271 + line269 col3 file0 - line271 + line269 col3 file0 @@ -3550,12 +3550,12 @@ end - line271 + line269 col8 file0 - line271 + line269 col8 file0 @@ -3567,7 +3567,7 @@ kindevent location - line271 + line269 col8 file0 @@ -3575,12 +3575,12 @@ - line271 + line269 col4 file0 - line271 + line269 col6 file0 @@ -3598,7 +3598,7 @@ location - line270 + line268 col3 file0 @@ -3617,7 +3617,7 @@ issue_hash_function_offset4 location - line271 + line269 col8 file0 @@ -3625,10 +3625,10 @@ 0 - 267 + 265 + 266 268 - 270 - 271 + 269 @@ -3643,12 +3643,12 @@ start - line282 + line280 col3 file0 - line282 + line280 col5 file0 @@ -3656,12 +3656,12 @@ end - line284 + line282 col3 file0 - line284 + line282 col20 file0 @@ -3673,7 +3673,7 @@ kindevent location - line284 + line282 col3 file0 @@ -3681,12 +3681,12 @@ - line284 + line282 col3 file0 - line284 + line282 col27 file0 @@ -3706,12 +3706,12 @@ start - line285 + line283 col3 file0 - line285 + line283 col3 file0 @@ -3719,12 +3719,12 @@ end - line285 + line283 col8 file0 - line285 + line283 col8 file0 @@ -3736,7 +3736,7 @@ kindevent location - line285 + line283 col8 file0 @@ -3744,12 +3744,12 @@ - line285 + line283 col4 file0 - line285 + line283 col6 file0 @@ -3767,7 +3767,7 @@ location - line284 + line282 col3 file0 @@ -3786,7 +3786,7 @@ issue_hash_function_offset4 location - line285 + line283 col8 file0 @@ -3794,10 +3794,10 @@ 0 - 281 + 279 + 280 282 - 284 - 285 + 283 @@ -3812,12 +3812,12 @@ start - line295 + line293 col3 file0 - line295 + line293 col5 file0 @@ -3825,12 +3825,12 @@ end - line296 + line294 col3 file0 - line296 + line294 col44 file0 @@ -3842,7 +3842,7 @@ kindevent location - line296 + line294 col3 file0 @@ -3850,12 +3850,12 @@ - line296 + line294 col3 file0 - line296 + line294 col61 file0 @@ -3871,7 +3871,7 @@ kindevent location - line50 + line48 col1 file0 @@ -3889,12 +3889,12 @@ start - line50 + line48 col1 file0 - line50 + line48 col4 file0 @@ -3902,12 +3902,12 @@ end - line51 + line49 col3 file0 - line51 + line49 col3 file0 @@ -3919,7 +3919,7 @@ kindevent location - line51 + line49 col3 file0 @@ -3927,12 +3927,12 @@ - line51 + line49 col3 file0 - line51 + line49 col17 file0 @@ -3948,7 +3948,7 @@ kindevent location - line296 + line294 col3 file0 @@ -3956,12 +3956,12 @@ - line296 + line294 col3 file0 - line296 + line294 col61 file0 @@ -3981,12 +3981,12 @@ start - line297 + line295 col3 file0 - line297 + line295 col3 file0 @@ -3994,12 +3994,12 @@ end - line297 + line295 col8 file0 - line297 + line295 col8 file0 @@ -4011,7 +4011,7 @@ kindevent location - line297 + line295 col8 file0 @@ -4019,12 +4019,12 @@ - line297 + line295 col4 file0 - line297 + line295 col6 file0 @@ -4042,7 +4042,7 @@ location - line296 + line294 col3 file0 @@ -4061,7 +4061,7 @@ issue_hash_function_offset3 location - line297 + line295 col8 file0 @@ -4069,12 +4069,12 @@ 0 - 50 - 51 + 48 + 49 + 292 + 293 294 295 - 296 - 297 @@ -4089,12 +4089,12 @@ start - line315 + line313 col3 file0 - line315 + line313 col5 file0 @@ -4102,12 +4102,12 @@ end - line316 + line314 col3 file0 - line316 + line314 col22 file0 @@ -4119,7 +4119,7 @@ kindevent location - line316 + line314 col3 file0 @@ -4127,12 +4127,12 @@ - line316 + line314 col3 file0 - line316 + line314 col42 file0 @@ -4152,12 +4152,12 @@ start - line317 + line315 col3 file0 - line317 + line315 col3 file0 @@ -4165,12 +4165,12 @@ end - line317 + line315 col8 file0 - line317 + line315 col8 file0 @@ -4182,7 +4182,7 @@ kindevent location - line317 + line315 col8 file0 @@ -4190,12 +4190,12 @@ - line317 + line315 col4 file0 - line317 + line315 col6 file0 @@ -4213,7 +4213,7 @@ location - line316 + line314 col3 file0 @@ -4232,7 +4232,7 @@ issue_hash_function_offset3 location - line317 + line315 col8 file0 @@ -4240,10 +4240,10 @@ 0 + 312 + 313 314 315 - 316 - 317 @@ -4258,12 +4258,12 @@ start - line324 + line322 col3 file0 - line324 + line322 col5 file0 @@ -4271,12 +4271,12 @@ end - line327 + line325 col3 file0 - line327 + line325 col22 file0 @@ -4288,7 +4288,7 @@ kindevent location - line327 + line325 col3 file0 @@ -4296,12 +4296,12 @@ - line327 + line325 col3 file0 - line327 + line325 col27 file0 @@ -4321,12 +4321,12 @@ start - line328 + line326 col3 file0 - line328 + line326 col3 file0 @@ -4334,12 +4334,12 @@ end - line328 + line326 col8 file0 - line328 + line326 col8 file0 @@ -4351,7 +4351,7 @@ kindevent location - line328 + line326 col8 file0 @@ -4359,12 +4359,12 @@ - line328 + line326 col4 file0 - line328 + line326 col6 file0 @@ -4382,7 +4382,7 @@ location - line327 + line325 col3 file0 @@ -4401,7 +4401,7 @@ issue_hash_function_offset5 location - line328 + line326 col8 file0 @@ -4409,10 +4409,10 @@ 0 - 323 - 324 - 327 - 328 + 321 + 322 + 325 + 326 @@ -4427,12 +4427,12 @@ start - line343 + line341 col3 file0 - line343 + line341 col5 file0 @@ -4440,12 +4440,12 @@ end - line344 + line342 col3 file0 - line344 + line342 col30 file0 @@ -4457,7 +4457,7 @@ kindevent location - line344 + line342 col3 file0 @@ -4465,12 +4465,12 @@ - line344 + line342 col3 file0 - line344 + line342 col45 file0 @@ -4490,12 +4490,12 @@ start - line345 + line343 col3 file0 - line345 + line343 col3 file0 @@ -4503,12 +4503,12 @@ end - line345 + line343 col8 file0 - line345 + line343 col8 file0 @@ -4520,7 +4520,7 @@ kindevent location - line345 + line343 col8 file0 @@ -4528,12 +4528,12 @@ - line345 + line343 col4 file0 - line345 + line343 col6 file0 @@ -4551,7 +4551,7 @@ location - line344 + line342 col3 file0 @@ -4570,7 +4570,7 @@ issue_hash_function_offset3 location - line345 + line343 col8 file0 @@ -4578,10 +4578,10 @@ 0 + 340 + 341 342 343 - 344 - 345 @@ -4596,12 +4596,12 @@ start - line352 + line350 col3 file0 - line352 + line350 col5 file0 @@ -4609,12 +4609,12 @@ end - line353 + line351 col3 file0 - line353 + line351 col19 file0 @@ -4626,7 +4626,7 @@ kindevent location - line353 + line351 col3 file0 @@ -4634,12 +4634,12 @@ - line353 + line351 col3 file0 - line353 + line351 col53 file0 @@ -4659,12 +4659,12 @@ start - line354 + line352 col3 file0 - line354 + line352 col3 file0 @@ -4672,12 +4672,12 @@ end - line354 + line352 col6 file0 - line354 + line352 col6 file0 @@ -4689,7 +4689,7 @@ kindevent location - line354 + line352 col6 file0 @@ -4697,12 +4697,12 @@ - line354 + line352 col4 file0 - line354 + line352 col4 file0 @@ -4720,7 +4720,7 @@ location - line353 + line351 col3 file0 @@ -4739,7 +4739,7 @@ issue_hash_function_offset3 location - line354 + line352 col6 file0 @@ -4747,10 +4747,10 @@ 0 + 349 + 350 351 352 - 353 - 354 @@ -4765,12 +4765,12 @@ start - line365 + line363 col3 file0 - line365 + line363 col5 file0 @@ -4778,12 +4778,12 @@ end - line366 + line364 col3 file0 - line366 + line364 col11 file0 @@ -4795,7 +4795,7 @@ kindevent location - line366 + line364 col3 file0 @@ -4803,12 +4803,12 @@ - line366 + line364 col3 file0 - line366 + line364 col23 file0 @@ -4828,12 +4828,12 @@ start - line367 + line365 col3 file0 - line367 + line365 col3 file0 @@ -4841,12 +4841,12 @@ end - line367 + line365 col8 file0 - line367 + line365 col8 file0 @@ -4858,7 +4858,7 @@ kindevent location - line367 + line365 col8 file0 @@ -4866,12 +4866,12 @@ - line367 + line365 col4 file0 - line367 + line365 col6 file0 @@ -4889,7 +4889,7 @@ location - line366 + line364 col3 file0 @@ -4908,7 +4908,7 @@ issue_hash_function_offset3 location - line367 + line365 col8 file0 @@ -4916,10 +4916,10 @@ 0 + 362 + 363 364 365 - 366 - 367 @@ -4934,12 +4934,12 @@ start - line374 + line372 col3 file0 - line374 + line372 col5 file0 @@ -4947,12 +4947,12 @@ end - line375 + line373 col3 file0 - line375 + line373 col19 file0 @@ -4964,7 +4964,7 @@ kindevent location - line375 + line373 col3 file0 @@ -4972,12 +4972,12 @@ - line375 + line373 col3 file0 - line375 + line373 col52 file0 @@ -4997,12 +4997,12 @@ start - line376 + line374 col3 file0 - line376 + line374 col3 file0 @@ -5010,12 +5010,12 @@ end - line376 + line374 col6 file0 - line376 + line374 col6 file0 @@ -5027,7 +5027,7 @@ kindevent location - line376 + line374 col6 file0 @@ -5035,12 +5035,12 @@ - line376 + line374 col4 file0 - line376 + line374 col4 file0 @@ -5058,7 +5058,7 @@ location - line375 + line373 col3 file0 @@ -5077,7 +5077,7 @@ issue_hash_function_offset3 location - line376 + line374 col6 file0 @@ -5085,10 +5085,10 @@ 0 + 371 + 372 373 374 - 375 - 376 @@ -5103,12 +5103,12 @@ start - line422 + line420 col3 file0 - line422 + line420 col5 file0 @@ -5116,12 +5116,12 @@ end - line422 + line420 col18 file0 - line422 + line420 col43 file0 @@ -5133,7 +5133,7 @@ kindevent location - line422 + line420 col18 file0 @@ -5141,12 +5141,12 @@ - line422 + line420 col18 file0 - line422 + line420 col49 file0 @@ -5162,7 +5162,7 @@ kindevent location - line417 + line415 col1 file0 @@ -5180,12 +5180,12 @@ start - line417 + line415 col1 file0 - line417 + line415 col3 file0 @@ -5193,12 +5193,12 @@ end - line418 + line416 col3 file0 - line418 + line416 col21 file0 @@ -5210,7 +5210,7 @@ kindpop-up location - line418 + line416 col3 file0 @@ -5218,12 +5218,12 @@ - line418 + line416 col3 file0 - line418 + line416 col27 file0 @@ -5238,7 +5238,7 @@ kindpop-up location - line418 + line416 col3 file0 @@ -5246,12 +5246,12 @@ - line418 + line416 col3 file0 - line418 + line416 col27 file0 @@ -5266,7 +5266,7 @@ kindevent location - line418 + line416 col3 file0 @@ -5274,12 +5274,12 @@ - line418 + line416 col3 file0 - line418 + line416 col27 file0 @@ -5297,7 +5297,7 @@ location - line418 + line416 col3 file0 @@ -5316,7 +5316,7 @@ issue_hash_function_offset1 location - line418 + line416 col3 file0 @@ -5324,10 +5324,10 @@ 0 - 417 - 418 - 421 - 422 + 415 + 416 + 419 + 420 @@ -5342,12 +5342,12 @@ start - line437 + line435 col3 file0 - line437 + line435 col5 file0 @@ -5355,12 +5355,12 @@ end - line438 + line436 col3 file0 - line438 + line436 col25 file0 @@ -5372,7 +5372,7 @@ kindevent location - line438 + line436 col3 file0 @@ -5380,12 +5380,12 @@ - line438 + line436 col3 file0 - line438 + line436 col67 file0 @@ -5405,12 +5405,12 @@ start - line439 + line437 col3 file0 - line439 + line437 col3 file0 @@ -5418,12 +5418,12 @@ end - line439 + line437 col8 file0 - line439 + line437 col8 file0 @@ -5435,7 +5435,7 @@ kindevent location - line439 + line437 col8 file0 @@ -5443,12 +5443,12 @@ - line439 + line437 col4 file0 - line439 + line437 col6 file0 @@ -5466,7 +5466,7 @@ location - line438 + line436 col3 file0 @@ -5485,7 +5485,7 @@ issue_hash_function_offset3 location - line439 + line437 col8 file0 @@ -5493,10 +5493,10 @@ 0 + 434 + 435 436 437 - 438 - 439 @@ -5511,12 +5511,12 @@ start - line450 + line448 col3 file0 - line450 + line448 col4 file0 @@ -5524,12 +5524,12 @@ end - line450 + line448 col7 file0 - line450 + line448 col11 file0 @@ -5541,7 +5541,7 @@ kindevent location - line450 + line448 col7 file0 @@ -5549,12 +5549,12 @@ - line450 + line448 col7 file0 - line450 + line448 col16 file0 @@ -5570,7 +5570,7 @@ kindevent location - line451 + line449 col7 file0 @@ -5578,12 +5578,12 @@ - line451 + line449 col5 file0 - line451 + line449 col13 file0 @@ -5601,7 +5601,7 @@ location - line450 + line448 col7 file0 @@ -5620,7 +5620,7 @@ issue_hash_function_offset2 location - line451 + line449 col7 file0 @@ -5628,9 +5628,9 @@ 0 + 447 + 448 449 - 450 - 451 @@ -5645,12 +5645,12 @@ start - line462 + line460 col33 file0 - line462 + line460 col33 file0 @@ -5658,12 +5658,12 @@ end - line462 + line460 col37 file0 - line462 + line460 col39 file0 @@ -5675,7 +5675,7 @@ kindevent location - line462 + line460 col37 file0 @@ -5683,12 +5683,12 @@ - line462 + line460 col37 file0 - line462 + line460 col41 file0 @@ -5704,7 +5704,7 @@ kindevent location - line461 + line459 col1 file0 @@ -5718,7 +5718,7 @@ kindevent location - line461 + line459 col1 file0 @@ -5726,12 +5726,12 @@ - line461 + line459 col1 file0 - line461 + line459 col16 file0 @@ -5747,7 +5747,7 @@ kindevent location - line462 + line460 col37 file0 @@ -5755,12 +5755,12 @@ - line462 + line460 col37 file0 - line462 + line460 col41 file0 @@ -5780,12 +5780,12 @@ start - line462 + line460 col37 file0 - line462 + line460 col39 file0 @@ -5793,12 +5793,12 @@ end - line462 + line460 col35 file0 - line462 + line460 col35 file0 @@ -5810,7 +5810,7 @@ kindevent location - line462 + line460 col35 file0 @@ -5818,12 +5818,12 @@ - line462 + line460 col33 file0 - line462 + line460 col41 file0 @@ -5841,7 +5841,7 @@ location - line461 + line459 col1 file0 @@ -5860,7 +5860,7 @@ issue_hash_function_offset0 location - line462 + line460 col35 file0 @@ -5868,8 +5868,8 @@ 0 - 461 - 462 + 459 + 460 @@ -5884,12 +5884,12 @@ start - line471 + line469 col33 file0 - line471 + line469 col33 file0 @@ -5897,12 +5897,12 @@ end - line471 + line469 col37 file0 - line471 + line469 col39 file0 @@ -5914,7 +5914,7 @@ kindevent location - line471 + line469 col37 file0 @@ -5922,12 +5922,12 @@ - line471 + line469 col37 file0 - line471 + line469 col41 file0 @@ -5943,7 +5943,7 @@ kindevent location - line470 + line468 col1 file0 @@ -5957,7 +5957,7 @@ kindevent location - line470 + line468 col1 file0 @@ -5965,12 +5965,12 @@ - line470 + line468 col1 file0 - line470 + line468 col11 file0 @@ -5986,7 +5986,7 @@ kindevent location - line471 + line469 col37 file0 @@ -5994,12 +5994,12 @@ - line471 + line469 col37 file0 - line471 + line469 col41 file0 @@ -6019,12 +6019,12 @@ start - line471 + line469 col37 file0 - line471 + line469 col39 file0 @@ -6032,12 +6032,12 @@ end - line471 + line469 col35 file0 - line471 + line469 col35 file0 @@ -6049,7 +6049,7 @@ kindevent location - line471 + line469 col35 file0 @@ -6057,12 +6057,12 @@ - line471 + line469 col33 file0 - line471 + line469 col41 file0 @@ -6080,7 +6080,7 @@ location - line470 + line468 col1 file0 @@ -6099,7 +6099,7 @@ issue_hash_function_offset0 location - line471 + line469 col35 file0 @@ -6107,8 +6107,683 @@ 0 - 470 - 471 + 468 + 469 + + + + + path + + + kindcontrol + edges + + + start + + + line481 + col3 + file0 + + + line481 + col5 + file0 + + + end + + + line482 + col3 + file0 + + + line482 + col10 + file0 + + + + + + + kindevent + location + + line482 + col3 + file0 + + ranges + + + + line482 + col3 + file0 + + + line482 + col28 + file0 + + + + depth0 + extended_message + The value 0 is assigned to 'x' + message + The value 0 is assigned to 'x' + + + kindevent + location + + line483 + col13 + file0 + + ranges + + + + line483 + col10 + file0 + + + line483 + col15 + file0 + + + + depth0 + extended_message + Division by zero + message + Division by zero + + + macro_expansions + + + location + + line482 + col3 + file0 + + nameDISPATCH + expansionfoo(x, "LF1M healer");x = 0;; + + + descriptionDivision by zero + categoryLogic error + typeDivision by zero + check_namecore.DivideZero + + issue_hash_content_of_line_in_context0911a97774745d4fa0ac03cd9680dfe1 + issue_context_kindfunction + issue_contextmulitpleParamsResolveToVA_ARGS + issue_hash_function_offset3 + location + + line483 + col13 + file0 + + ExecutedLines + + 0 + + 480 + 481 + 482 + 483 + + + + + path + + + kindcontrol + edges + + + start + + + line494 + col3 + file0 + + + line494 + col5 + file0 + + + end + + + line495 + col3 + file0 + + + line495 + col16 + file0 + + + + + + + kindevent + location + + line495 + col3 + file0 + + ranges + + + + line495 + col3 + file0 + + + line495 + col71 + file0 + + + + depth0 + extended_message + The value 0 is assigned to 'x' + message + The value 0 is assigned to 'x' + + + kindevent + location + + line496 + col13 + file0 + + ranges + + + + line496 + col10 + file0 + + + line496 + col15 + file0 + + + + depth0 + extended_message + Division by zero + message + Division by zero + + + macro_expansions + + + location + + line495 + col3 + file0 + + nameCONCAT_VA_ARGS + expansionvariadicCFunction(x, "You need to construct additional pylons.",'c', 9);x = 0; + + + descriptionDivision by zero + categoryLogic error + typeDivision by zero + check_namecore.DivideZero + + issue_hash_content_of_line_in_contexted592fb952ed786e7efdc81bbc538e94 + issue_context_kindfunction + issue_contextconcatVA_ARGS + issue_hash_function_offset3 + location + + line496 + col13 + file0 + + ExecutedLines + + 0 + + 493 + 494 + 495 + 496 + + + + + path + + + kindcontrol + edges + + + start + + + line502 + col3 + file0 + + + line502 + col5 + file0 + + + end + + + line503 + col3 + file0 + + + line503 + col16 + file0 + + + + + + + kindevent + location + + line503 + col3 + file0 + + ranges + + + + line503 + col3 + file0 + + + line503 + col44 + file0 + + + + depth0 + extended_message + The value 0 is assigned to 'x' + message + The value 0 is assigned to 'x' + + + kindevent + location + + line504 + col13 + file0 + + ranges + + + + line504 + col10 + file0 + + + line504 + col15 + file0 + + + + depth0 + extended_message + Division by zero + message + Division by zero + + + macro_expansions + + + location + + line503 + col3 + file0 + + nameCONCAT_VA_ARGS + expansionvariadicCFunction(x, "You need to construct",);x = 0; + + + descriptionDivision by zero + categoryLogic error + typeDivision by zero + check_namecore.DivideZero + + issue_hash_content_of_line_in_context4b0ab46d7a972d0a388b4bb59351480a + issue_context_kindfunction + issue_contextconcatVA_ARGSEmpty + issue_hash_function_offset3 + location + + line504 + col13 + file0 + + ExecutedLines + + 0 + + 501 + 502 + 503 + 504 + + + + + path + + + kindcontrol + edges + + + start + + + line514 + col3 + file0 + + + line514 + col5 + file0 + + + end + + + line515 + col3 + file0 + + + line515 + col21 + file0 + + + + + + + kindevent + location + + line515 + col3 + file0 + + ranges + + + + line515 + col3 + file0 + + + line515 + col71 + file0 + + + + depth0 + extended_message + The value 0 is assigned to 'x' + message + The value 0 is assigned to 'x' + + + kindevent + location + + line516 + col13 + file0 + + ranges + + + + line516 + col10 + file0 + + + line516 + col15 + file0 + + + + depth0 + extended_message + Division by zero + message + Division by zero + + + macro_expansions + + + location + + line515 + col3 + file0 + + nameSTRINGIFIED_VA_ARGS + expansionvariadicCFunction(x, "Additional supply depots required.", "'a'", 10);x = 0; + + + descriptionDivision by zero + categoryLogic error + typeDivision by zero + check_namecore.DivideZero + + issue_hash_content_of_line_in_context6622e3f0651f97e6cbf4e075e6b07707 + issue_context_kindfunction + issue_contextstringifyVA_ARGS + issue_hash_function_offset3 + location + + line516 + col13 + file0 + + ExecutedLines + + 0 + + 513 + 514 + 515 + 516 + + + + + path + + + kindcontrol + edges + + + start + + + line524 + col3 + file0 + + + line524 + col5 + file0 + + + end + + + line525 + col3 + file0 + + + line525 + col21 + file0 + + + + + + + kindevent + location + + line525 + col3 + file0 + + ranges + + + + line525 + col3 + file0 + + + line525 + col62 + file0 + + + + depth0 + extended_message + The value 0 is assigned to 'x' + message + The value 0 is assigned to 'x' + + + kindevent + location + + line526 + col13 + file0 + + ranges + + + + line526 + col10 + file0 + + + line526 + col15 + file0 + + + + depth0 + extended_message + Division by zero + message + Division by zero + + + macro_expansions + + + location + + line525 + col3 + file0 + + nameSTRINGIFIED_VA_ARGS + expansionvariadicCFunction(x, "Additional supply depots required.", ")";x = 0; + + + descriptionDivision by zero + categoryLogic error + typeDivision by zero + check_namecore.DivideZero + + issue_hash_content_of_line_in_context86c6e52c81f1129e6c9f51e6938d9ee7 + issue_context_kindfunction + issue_contextstringifyVA_ARGSEmpty + issue_hash_function_offset3 + location + + line526 + col13 + file0 + + ExecutedLines + + 0 + + 523 + 524 + 525 + 526 diff --git a/clang/test/Analysis/plist-macros-with-expansion.cpp b/clang/test/Analysis/plist-macros-with-expansion.cpp index a81ba0846905f..f79070095385d 100644 --- a/clang/test/Analysis/plist-macros-with-expansion.cpp +++ b/clang/test/Analysis/plist-macros-with-expansion.cpp @@ -1,5 +1,3 @@ -// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core -verify %s -// // RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core %s \ // RUN: -analyzer-output=plist -o %t.plist \ // RUN: -analyzer-config expand-macros=true @@ -472,3 +470,62 @@ void useZeroApplier2() { (void)(1 / bar()); } // expected-warning{{Division by z // CHECK: nameAPPLY_ZERO2 // CHECK-NEXT: expansionint bar() { return 0; } + +void foo(int &x, const char *str); + +#define PARAMS_RESOLVE_TO_VA_ARGS(i, fmt) foo(i, fmt); \ + i = 0; +#define DISPATCH(...) PARAMS_RESOLVE_TO_VA_ARGS(__VA_ARGS__); + +void mulitpleParamsResolveToVA_ARGS(void) { + int x = 1; + DISPATCH(x, "LF1M healer"); + (void)(10 / x); // expected-warning{{Division by zero}} +} +// CHECK: nameDISPATCH +// CHECK-NEXT: expansionfoo(x, "LF1M healer");x = 0;; + +void variadicCFunction(int &x, const char *str, ...); + +#define CONCAT_VA_ARGS(i, fmt, ...) variadicCFunction(i, fmt, ##__VA_ARGS__); \ + i = 0; + +void concatVA_ARGS(void) { + int x = 1; + CONCAT_VA_ARGS(x, "You need to construct additional pylons.", 'c', 9); + (void)(10 / x); // expected-warning{{Division by zero}} +} +// CHECK: nameCONCAT_VA_ARGS +// CHECK-NEXT: expansionvariadicCFunction(x, "You need to construct additional pylons.",'c', 9);x = 0; + +void concatVA_ARGSEmpty(void) { + int x = 1; + CONCAT_VA_ARGS(x, "You need to construct"); + (void)(10 / x); // expected-warning{{Division by zero}} +} +// FIXME: The comma shouldn't be present after the last argument. +// CHECK: nameCONCAT_VA_ARGS +// CHECK-NEXT: expansionvariadicCFunction(x, "You need to construct",);x = 0; + +#define STRINGIFIED_VA_ARGS(i, fmt, ...) variadicCFunction(i, fmt, #__VA_ARGS__); \ + i = 0; + +void stringifyVA_ARGS(void) { + int x = 1; + STRINGIFIED_VA_ARGS(x, "Additional supply depots required.", 'a', 10); + (void)(10 / x); // expected-warning{{Division by zero}} +} + +// FIXME: Stringify and escape __VA_ARGS__ correctly. +// CHECK: nameSTRINGIFIED_VA_ARGS +// CHECK-NEXT: expansionvariadicCFunction(x, "Additional supply depots required.", "'a'", 10);x = 0; + +void stringifyVA_ARGSEmpty(void) { + int x = 1; + STRINGIFIED_VA_ARGS(x, "Additional supply depots required."); + (void)(10 / x); // expected-warning{{Division by zero}} +} + +// FIXME: Stringify and escape __VA_ARGS__ correctly. +// CHECK: nameSTRINGIFIED_VA_ARGS +// CHECK-NEXT: expansionvariadicCFunction(x, "Additional supply depots required.", ")";x = 0; From 1851bab176bba70fb6c6452b7ae55c2dc97f7bb9 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 11 Sep 2020 08:19:00 -0400 Subject: [PATCH 0365/1079] [MLIR][Linalg] Undo spurious parameter name change --- mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index ac6e9317fa32c..41beab0590085 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -467,7 +467,7 @@ class GenericOpBase : LinalgStructuredBase_Op:$library_call, Confined, [IntMinValue<0>]>:$symbol_source); - let results = (outs Variadic:$output_lis); + let results = (outs Variadic:$output_tensors); let regions = (region AnyRegion:$region); let extraClassDeclaration = [{ SmallVector linalgTraitAttrNames() { From a5cefd95cc60318fbf8610ee782bd22b492692a2 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Wed, 9 Sep 2020 19:11:47 +0000 Subject: [PATCH 0366/1079] [libTooling] Fix use of `char` in comparison. Fixes Transformer's `Range` parser to handle `char` in a platform-independent way. Differential Revision: https://reviews.llvm.org/D87409 --- clang/lib/Tooling/Transformer/Parsing.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp index fb5fd4a800bbb..66fa04a15594a 100644 --- a/clang/lib/Tooling/Transformer/Parsing.cpp +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -148,7 +148,7 @@ static ParseState advance(ParseState S, size_t N) { } static StringRef consumeWhitespace(StringRef S) { - return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); }); + return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); }); } // Parses a single expected character \c c from \c State, skipping preceding @@ -165,7 +165,7 @@ static ExpectedProgress parseChar(char c, ParseState State) { static ExpectedProgress parseId(ParseState State) { State.Input = consumeWhitespace(State.Input); auto Id = State.Input.take_while( - [](char c) { return c >= 0 && isIdentifierBody(c); }); + [](char c) { return isASCII(c) && isIdentifierBody(c); }); if (Id.empty()) return makeParseError(State, "failed to parse name"); return makeParseProgress(advance(State, Id.size()), Id.str()); From 9fda213ac0e2af05fdae69c60d2cdde316c31cd6 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 11 Sep 2020 13:56:57 +0100 Subject: [PATCH 0367/1079] [ARM] Update arm-storebytesmerge.ll test. NFC This test was using a very odd combination of cortex-m7 and Neon. I have changed it to thumbv7em only. --- llvm/test/CodeGen/ARM/arm-storebytesmerge.ll | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll b/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll index fec6ea7ae8382..c7bd79e7ca1d2 100644 --- a/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll +++ b/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll @@ -1,11 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7em-arm-none-eabi %s -o - | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv7em-arm-none-eabi" - -; Function Attrs: nounwind -define arm_aapcs_vfpcc void @test(i8* %v50) #0 { +define arm_aapcs_vfpcc void @test(i8* %v50) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r1, #65534 @@ -337,5 +333,3 @@ define arm_aapcs_vfpcc void @test(i8* %v50) #0 { ret void } -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="-d32,+dsp,+fp-armv8,+hwdiv,+thumb-mode,-crc,-crypto,-dotprod,-fullfp16,-hwdiv-arm,-neon,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" } - From 271a7bb144d3f51d29a465329c3614eaa15a6a3c Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Fri, 11 Sep 2020 14:17:19 +0100 Subject: [PATCH 0368/1079] [flang] Add new documentation main page Add a new index page to be the Flang documentation mainpage instead of Overview.md, which jumps straight into the compiler Design. The index file needs to be in .rst format to use the toctree directive to create table of contents. Also use the sphinx_markdown_tables extension to generate html tables form markdown. A number of additional style changes to the existing docs were needed to make this work well: * Convert all headings to the # style, which works better with toctree's titlesonly option. Ensure that there is only one top-level heading per document. * Add a title to documents that don't have one for rendering on the index. * Convert the grammar docs from .txt to .md. for better rendering * Fixed broken link to a section in another document - sphinx does not seem to support anchor links in markdown files. Depends on D87226 Reviewed By: sameeranjoshi Differential Revision: https://reviews.llvm.org/D87242 --- flang/docs/ArrayComposition.md | 31 +++++---- flang/docs/BijectiveInternalNameUniquing.md | 21 +++--- flang/docs/C++17.md | 13 ++-- flang/docs/C++style.md | 9 +++ flang/docs/Calls.md | 7 ++ flang/docs/Character.md | 17 +++-- flang/docs/ControlFlowGraph.md | 7 ++ flang/docs/Directives.md | 5 +- flang/docs/Extensions.md | 27 +++++--- flang/docs/FortranForCProgrammers.md | 68 ++++++++++--------- flang/docs/FortranIR.md | 5 ++ flang/docs/IORuntimeInternals.md | 63 +++++++++-------- flang/docs/ImplementingASemanticCheck.md | 42 +++++++----- flang/docs/Intrinsics.md | 57 +++++++++------- flang/docs/LabelResolution.md | 5 ++ flang/docs/ModFiles.md | 5 ++ ...-4.5-grammar.txt => OpenMP-4.5-grammar.md} | 17 +++-- flang/docs/OpenMP-semantics.md | 5 ++ flang/docs/OptionComparison.md | 15 ++-- flang/docs/Overview.md | 5 ++ flang/docs/ParserCombinators.md | 9 +++ flang/docs/Parsing.md | 33 +++++---- flang/docs/Preprocessing.md | 32 +++++---- flang/docs/PullRequestChecklist.md | 2 +- flang/docs/RuntimeDescriptor.md | 7 ++ flang/docs/Semantics.md | 5 ++ flang/docs/conf.py | 13 +++- .../{f2018-grammar.txt => f2018-grammar.md} | 12 ++-- flang/docs/index.md | 61 +++++++++++++++++ 29 files changed, 399 insertions(+), 199 deletions(-) rename flang/docs/{OpenMP-4.5-grammar.txt => OpenMP-4.5-grammar.md} (97%) rename flang/docs/{f2018-grammar.txt => f2018-grammar.md} (99%) create mode 100644 flang/docs/index.md diff --git a/flang/docs/ArrayComposition.md b/flang/docs/ArrayComposition.md index 0f30af39f9e4b..9e61abe5670f3 100644 --- a/flang/docs/ArrayComposition.md +++ b/flang/docs/ArrayComposition.md @@ -6,6 +6,13 @@ --> +# Array Composition + +```eval_rst +.. contents:: + :local: +``` + This note attempts to describe the motivation for and design of an implementation of Fortran 90 (and later) array expression evaluation that minimizes the use of dynamically allocated temporary storage for @@ -34,8 +41,8 @@ Other Fortran intrinsic functions are technically transformational (e.g., `COMMAND_ARGUMENT_COUNT`) but not of interest for this note. The generic `REDUCE` is also not considered here. -Arrays as functions -=================== +## Arrays as functions + A whole array can be viewed as a function that maps its indices to the values of its elements. Specifically, it is a map from a tuple of integers to its element type. @@ -45,8 +52,8 @@ and the shape of the array delimits the domain of the map. `REAL :: A(N,M)` can be seen as a function mapping ordered pairs of integers `(J,K)` with `1<=J<=N` and `1<=J<=M` to real values. -Array expressions as functions -============================== +## Array expressions as functions + The same perspective can be taken of an array expression comprising intrinsic operators and elemental functions. Fortran doesn't allow one to apply subscripts directly to an expression, @@ -83,8 +90,8 @@ side variable as an operand of the right-hand side expression, and any function calls on the right-hand side are elemental or scalar-valued, we can avoid the use of a temporary. -Transformational intrinsic functions as function composition -============================================================ +## Transformational intrinsic functions as function composition + Many of the transformational intrinsic functions listed above can, when their array arguments are viewed as functions over their index tuples, be seen as compositions of those functions with @@ -127,8 +134,8 @@ More completely: * `SPREAD(A,DIM=d,NCOPIES=n)` for compile-time `d` simply applies `A` to a reduced index tuple. -Determination of rank and shape -=============================== +## Determination of rank and shape + An important part of evaluating array expressions without the use of temporary storage is determining the shape of the result prior to, or without, evaluating the elements of the result. @@ -173,8 +180,8 @@ In cases where the analyzed shape is known at compile time, we should be able to have the opportunity to avoid heap allocation in favor of stack storage, if the scope of the variable is local. -Automatic reallocation of allocatables -====================================== +## Automatic reallocation of allocatables + Fortran 2003 introduced the ability to assign non-conforming array expressions to ALLOCATABLE arrays with the implied semantics of reallocation to the new shape. @@ -182,8 +189,8 @@ The implementation of this feature also becomes more straightforward if our implementation of array expressions has decoupled calculation of shapes from the evaluation of the elements of the result. -Rewriting rules -=============== +## Rewriting rules + Let `{...}` denote an ordered tuple of 1-based indices, e.g. `{j,k}`, into the result of an array expression or subexpression. diff --git a/flang/docs/BijectiveInternalNameUniquing.md b/flang/docs/BijectiveInternalNameUniquing.md index b302d389c664f..7a6e8a4f4e644 100644 --- a/flang/docs/BijectiveInternalNameUniquing.md +++ b/flang/docs/BijectiveInternalNameUniquing.md @@ -1,4 +1,9 @@ -## Bijective Internal Name Uniquing +# Bijective Internal Name Uniquing + +```eval_rst +.. contents:: + :local: +``` FIR has a flat namespace. No two objects may have the same name at the module level. (These would be functions, globals, etc.) @@ -13,14 +18,14 @@ Fortran is case insensitive, which allows the compiler to convert the user's identifiers to all lower case. Such a universal conversion implies that all upper case letters are available for use in uniquing. -### Prefix `_Q` +## Prefix `_Q` All uniqued names have the prefix sequence `_Q` to indicate the name has been uniqued. (Q is chosen because it is a [low frequency letter](http://pi.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html) in English.) -### Scope Building +## Scope Building Symbols can be scoped by the module, submodule, or procedure that contains that symbol. After the `_Q` sigil, names are constructed from outermost to @@ -45,7 +50,7 @@ The uniqued name of `fun` becomes: _QMmodSs1modSs2modFsubPfun ``` -### Common blocks +## Common blocks * A common block name will be prefixed with `B` @@ -69,7 +74,7 @@ The uniqued name in case of `blank common block` becomes: _QB ``` -### Module scope global data +## Module scope global data * A global data entity is prefixed with `E` * A global entity that is constant (parameter) will be prefixed with `EC` @@ -92,7 +97,7 @@ The uniqued name of `pi` becomes: _QMmodECpi ``` -### Procedures/Subprograms +## Procedures/Subprograms * A procedure/subprogram is prefixed with `P` @@ -105,7 +110,7 @@ The uniqued name of `sub` becomes: _QPsub ``` -### Derived types and related +## Derived types and related * A derived type is prefixed with `T` * If a derived type has KIND parameters, they are listed in a consistent @@ -148,7 +153,7 @@ The uniqued name of `yourtype` where `k1=4` and `k2=-6` (at compile-time): type `yourtype` above would be `_QCTyourtypeK4KN6`. The type descriptor for `REAL(4)` would be `_QCrealK4`. -### Compiler generated names +## Compiler generated names Compiler generated names do not have to be mapped back to Fortran. These names will be prefixed with `_QQ` and followed by a unique compiler diff --git a/flang/docs/C++17.md b/flang/docs/C++17.md index 87d5fc01f0922..9e0120d2e4c5e 100644 --- a/flang/docs/C++17.md +++ b/flang/docs/C++17.md @@ -6,7 +6,12 @@ --> -## C++14/17 features used in f18 +# C++14/17 features used in f18 + +```eval_rst +.. contents:: + :local: +``` The C++ dialect used in this project constitutes a subset of the standard C++ programming language and library features. @@ -32,7 +37,7 @@ The most important of these are: (`std::tuple` is actually a C++11 feature, but I include it in this list because it's not particularly well known.) -### Sum types +## Sum types First, some background information to explain the need for sum types in f18. @@ -111,7 +116,7 @@ would be to: functions (or the forbidden `dynamic_cast`) to identify alternatives during analysis -### Product types +## Product types Many productions in the Fortran grammar describe a sequence of various sub-parses. @@ -133,7 +138,7 @@ So we use `std::tuple` for such things. It has also been handy for template metaprogramming that needs to work with lists of types. -### `std::optional` +## `std::optional` This simple little type is used wherever a value might or might not be present. diff --git a/flang/docs/C++style.md b/flang/docs/C++style.md index 4ab95393d758a..fb11e64116141 100644 --- a/flang/docs/C++style.md +++ b/flang/docs/C++style.md @@ -6,6 +6,15 @@ --> +# Flang C++ Style Guide + +```eval_rst +.. contents:: + :local: +``` + +This document captures the style guide rules that are followed in the Flang codebase. + ## In brief: * Use *clang-format* from llvm 7 diff --git a/flang/docs/Calls.md b/flang/docs/Calls.md index d70bc910d73db..440d0bd147c2d 100644 --- a/flang/docs/Calls.md +++ b/flang/docs/Calls.md @@ -6,6 +6,13 @@ --> +# Representation of Fortran function calls + +```eval_rst +.. contents:: + :local: +``` + ## Procedure reference implementation protocol Fortran function and subroutine references are complicated. diff --git a/flang/docs/Character.md b/flang/docs/Character.md index 700db864f2dac..603dd8848ba1b 100644 --- a/flang/docs/Character.md +++ b/flang/docs/Character.md @@ -6,9 +6,14 @@ --> -## Implementation of `CHARACTER` types in f18 +# Implementation of `CHARACTER` types in f18 -### Kinds and Character Sets +```eval_rst +.. contents:: + :local: +``` + +## Kinds and Character Sets The f18 compiler and runtime support three kinds of the intrinsic `CHARACTER` type of Fortran 2018. @@ -48,7 +53,7 @@ We might want to support one or more environment variables to change these assumptions, especially for `KIND=1` users of ISO-8859 character sets besides Latin-1. -### Lengths +## Lengths Allocatable `CHARACTER` objects in Fortran may defer the specification of their lengths until the time of their allocation or whole (non-substring) @@ -76,7 +81,7 @@ Fortran substrings are rather like subscript triplets into a hidden "zero" dimension of a scalar `CHARACTER` value, but they cannot have strides. -### Concatenation +## Concatenation Fortran has one `CHARACTER`-valued intrinsic operator, `//`, which concatenates its operands (10.1.5.3). @@ -105,7 +110,7 @@ The result of `//` may be used The f18 compiler has a general (but slow) means of implementing concatenation and a specialized (fast) option to optimize the most common case. -#### General concatenation +### General concatenation In the most general case, the f18 compiler's generated code and runtime support library represent the result as a deferred-length allocatable @@ -130,7 +135,7 @@ When the left-hand side of a `CHARACTER` assignment is a deferred-length allocatable and the right-hand side is a temporary, use of the runtime's `MoveAlloc()` subroutine instead can save an allocation and a copy. -#### Optimized concatenation +### Optimized concatenation Scalar `CHARACTER(KIND=1)` expressions evaluated as the right-hand sides of assignments to independent substrings or whole variables that are not diff --git a/flang/docs/ControlFlowGraph.md b/flang/docs/ControlFlowGraph.md index b2b549845ebb6..dcdecf1b77f65 100644 --- a/flang/docs/ControlFlowGraph.md +++ b/flang/docs/ControlFlowGraph.md @@ -6,6 +6,13 @@ --> +# Control Flow Graph + +```eval_rst +.. contents:: + :local: +``` + ## Concept After a Fortran subprogram has been parsed, its names resolved, and all its semantic constraints successfully checked, the parse tree of its diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md index c2e93c5f3de2e..a1a99b674cef2 100644 --- a/flang/docs/Directives.md +++ b/flang/docs/Directives.md @@ -6,8 +6,9 @@ --> -Compiler directives supported by F18 -==================================== +# Compiler directives supported by Flang + +A list of non-standard directives supported by Flang * `!dir$ fixed` and `!dir$ free` select Fortran source forms. Their effect persists to the end of the current source file. diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 7707309a88432..1c85c3f42d1b1 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -6,6 +6,13 @@ --> +# Fortran Extensions supported by Flang + +```eval_rst +.. contents:: + :local: +``` + As a general principle, this compiler will accept by default and without complaint many legacy features, extensions to the standard language, and features that have been deleted from the standard, @@ -16,8 +23,8 @@ Other non-standard features, which do conflict with the current standard specification of the Fortran programming language, are accepted if enabled by command-line options. -Intentional violations of the standard -====================================== +## Intentional violations of the standard + * Scalar `INTEGER` actual argument expressions (not variables!) are converted to the kinds of scalar `INTEGER` dummy arguments when the interface is explicit and the kinds differ. @@ -29,8 +36,8 @@ Intentional violations of the standard so long as they contain no executable code, no internal subprograms, and allocate no storage outside a named `COMMON` block. (C1415) -Extensions, deletions, and legacy features supported by default -=============================================================== +## Extensions, deletions, and legacy features supported by default + * Tabs in source * `<>` as synonym for `.NE.` and `/=` * `$` and `@` as legal characters in names @@ -123,8 +130,8 @@ Extensions, deletions, and legacy features supported by default * DATA statement initialization is allowed for procedure pointers outside structure constructors. -Extensions supported when enabled by options --------------------------------------------- +### Extensions supported when enabled by options + * C-style backslash escape sequences in quoted CHARACTER literals (but not Hollerith) [-fbackslash] * Logical abbreviations `.T.`, `.F.`, `.N.`, `.A.`, `.O.`, and `.X.` @@ -145,8 +152,8 @@ Extensions supported when enabled by options * Ignore occurrences of `IMPLICIT NONE` and `IMPLICIT NONE(TYPE)` [-fimplicit-none-type-never] -Extensions and legacy features deliberately not supported ---------------------------------------------------------- +### Extensions and legacy features deliberately not supported + * `.LG.` as synonym for `.NE.` * `REDIMENSION` * Allocatable `COMMON` @@ -189,8 +196,8 @@ Extensions and legacy features deliberately not supported PGI, Intel, and XLF support this in ways that are not numerically equivalent. PGI converts the arguments while Intel and XLF replace the specific by the related generic. -Preprocessing behavior -====================== +## Preprocessing behavior + * The preprocessor is always run, whatever the filename extension may be. * We respect Fortran comments in macro actual arguments (like GNU, Intel, NAG; unlike PGI and XLF) on the principle that macro calls should be treated diff --git a/flang/docs/FortranForCProgrammers.md b/flang/docs/FortranForCProgrammers.md index 103def2a92ce6..572433ab7c154 100644 --- a/flang/docs/FortranForCProgrammers.md +++ b/flang/docs/FortranForCProgrammers.md @@ -6,8 +6,12 @@ --> -Fortran For C Programmers -========================= +# Fortran For C Programmers + +```eval_rst +.. contents:: + :local: +``` This note is limited to essential information about Fortran so that a C or C++ programmer can get started more quickly with the language, @@ -16,8 +20,8 @@ to write or modify Fortran code. Please see other sources to learn about Fortran's rich history, current applications, and modern best practices in new code. -Know This At Least ------------------- +## Know This At Least + * There have been many implementations of Fortran, often from competing vendors, and the standard language has been defined by U.S. and international standards organizations. The various editions of @@ -53,8 +57,8 @@ Know This At Least interfaces in compiled "modules", as well as legacy mechanisms for sharing data and interconnecting subprograms. -A Rosetta Stone ---------------- +## A Rosetta Stone + Fortran's language standard and other documentation uses some terminology in particular ways that might be unfamiliar. @@ -81,8 +85,8 @@ in particular ways that might be unfamiliar. | Type-bound procedure | Kind of a C++ member function but not really | | Unformatted | Raw binary | -Data Types ----------- +## Data Types + There are five built-in ("intrinsic") types: `INTEGER`, `REAL`, `COMPLEX`, `LOGICAL`, and `CHARACTER`. They are parameterized with "kind" values, which should be treated as @@ -117,8 +121,8 @@ Last, there are "typeless" binary constants that can be used in a few situations, like static data initialization or immediate conversion, where type is not necessary. -Arrays ------- +## Arrays + Arrays are not types in Fortran. Being an array is a property of an object or function, not of a type. Unlike C, one cannot have an array of arrays or an array of pointers, @@ -133,8 +137,8 @@ And yes, the default lower bound on each dimension is 1, not 0. Expressions can manipulate arrays as multidimensional values, and the compiler will create the necessary loops. -Allocatables ------------- +## Allocatables + Modern Fortran programs use `ALLOCATABLE` data extensively. Such variables and derived type components are allocated dynamically. They are automatically deallocated when they go out of scope, much @@ -147,8 +151,8 @@ and follow up all the references that are made in the documentation from the description of `ALLOCATABLE` to other topics; it's a feature that interacts with much of the rest of the language.) -I/O ---- +## I/O + Fortran's input/output features are built into the syntax of the language, rather than being defined by library interfaces as in C and C++. There are means for raw binary I/O and for "formatted" transfers to @@ -173,8 +177,8 @@ One can also use compiler-generated formatting in "list-directed" I/O, in which the compiler derives reasonable default formats based on data types. -Subprograms ------------ +## Subprograms + Fortran has both `FUNCTION` and `SUBROUTINE` subprograms. They share the same name space, but functions cannot be called as subroutines or vice versa. @@ -188,8 +192,8 @@ their own internal procedures. As is the case with C++ lambda expressions, internal procedures can reference names from their host subprograms. -Modules -------- +## Modules + Modern Fortran has good support for separate compilation and namespace management. The *module* is the basic unit of compilation, although independent @@ -204,8 +208,8 @@ All references to objects in modules are done with direct names or aliases that have been added to the local scope, as Fortran has no means of qualifying references with module names. -Arguments ---------- +## Arguments + Functions and subroutines have "dummy" arguments that are dynamically associated with actual arguments during calls. Essentially, all argument passing in Fortran is by reference, not value. @@ -236,8 +240,8 @@ scope. This is the opposite of the assumptions under which a C or C++ compiler must labor when trying to optimize code with pointers. -Overloading ------------ +## Overloading + Fortran supports a form of overloading via its interface feature. By default, an interface is a means for specifying prototypes for a set of subroutines and functions. @@ -250,8 +254,8 @@ A similar feature can be used for generic type-bound procedures. This feature can be used to overload the built-in operators and some I/O statements, too. -Polymorphism ------------- +## Polymorphism + Fortran code can be written to accept data of some derived type or any extension thereof using `CLASS`, deferring the actual type to execution, rather than the usual `TYPE` syntax. @@ -261,8 +265,8 @@ Fortran's `SELECT TYPE` construct is used to distinguish between possible specific types dynamically, when necessary. It's a little like C++17's `std::visit()` on a discriminated union. -Pointers --------- +## Pointers + Pointers are objects in Fortran, not data types. Pointers can point to data, arrays, and subprograms. A pointer can only point to data that has the `TARGET` attribute. @@ -287,8 +291,8 @@ out of scope. A legacy feature, "Cray pointers", implements dynamic base addressing of one variable using an address stored in another. -Preprocessing -------------- +## Preprocessing + There is no standard preprocessing feature, but every real Fortran implementation has some support for passing Fortran source code through a variant of the standard C source preprocessor. @@ -302,8 +306,8 @@ suffix (e.g., "foo.F90") or a compiler command line option. (Since the F18 compiler always runs its built-in preprocessing stage, no special option or filename suffix is required.) -"Object Oriented" Programming ------------------------------ +## "Object Oriented" Programming + Fortran doesn't have member functions (or subroutines) in the sense that C++ does, in which a function has immediate access to the members of a specific instance of a derived type. @@ -325,8 +329,8 @@ There's a lot more that can be said about type-bound procedures (e.g., how they support overloading) but this should be enough to get you started with the most common usage. -Pitfalls --------- +## Pitfalls + Variable initializers, e.g. `INTEGER :: J=123`, are _static_ initializers! They imply that the variable is stored in static storage, not on the stack, and the initialized value lasts only until the variable is assigned. diff --git a/flang/docs/FortranIR.md b/flang/docs/FortranIR.md index 5d83aaa8e34cf..f1f643a1d17da 100644 --- a/flang/docs/FortranIR.md +++ b/flang/docs/FortranIR.md @@ -8,6 +8,11 @@ # Design: Fortran IR +```eval_rst +.. contents:: + :local: +``` + ## Introduction After semantic analysis is complete and it has been determined that the compiler has a legal Fortran program as input, the parse tree will be lowered to an intermediate representation for the purposes of high-level analysis and optimization. In this document, that intermediate representation will be called Fortran IR or FIR. The pass that converts from the parse tree and other data structures of the front-end to FIR will be called the "Burnside bridge". diff --git a/flang/docs/IORuntimeInternals.md b/flang/docs/IORuntimeInternals.md index b4f3092a014ec..2748fcf16fa3c 100644 --- a/flang/docs/IORuntimeInternals.md +++ b/flang/docs/IORuntimeInternals.md @@ -6,8 +6,12 @@ --> -Fortran I/O Runtime Library Internal Design -=========================================== +# Fortran I/O Runtime Library Internal Design + +```eval_rst +.. contents:: + :local: +``` This note is meant to be an overview of the design of the *implementation* of the f18 Fortran compiler's runtime support library for I/O statements. @@ -66,8 +70,7 @@ template library of fast conversion algorithms used to interpret floating-point values in Fortran source programs and to emit them to module files. -Overview of Classes -=================== +## Overview of Classes A suite of C++ classes and class templates are composed to construct the Fortran I/O runtime support library. @@ -79,16 +82,16 @@ classes are in the process of being vigorously rearranged and modified; use `grep` or an IDE to discover these classes in the source for now. (Sorry!) -`Terminator` ----------- +### `Terminator` + A general facility for the entire library, `Terminator` latches a source program statement location in terms of an unowned pointer to its source file path name and line number and uses them to construct a fatal error message if needed. It is used for both user program errors and internal runtime library crashes. -`IoErrorHandler` --------------- +### `IoErrorHandler` + When I/O error conditions arise at runtime that the Fortran program might have the privilege to handle itself via `ERR=`, `END=`, or `EOR=` labels and/or by an `IOSTAT=` variable, this subclass of @@ -96,8 +99,8 @@ might have the privilege to handle itself via `ERR=`, `END=`, or It sorts out priorities in the case of multiple errors and determines the final `IOSTAT=` value at the end of an I/O statement. -`MutableModes` ------------- +### `MutableModes` + Fortran's formatted I/O statements are affected by a suite of modes that can be configured by `OPEN` statements, overridden by data transfer I/O statement control lists, and further overridden @@ -108,8 +111,8 @@ order to properly isolate their modifications. The modes in force at the time each data item is processed constitute a member of each `DataEdit`. -`DataEdit` --------- +### `DataEdit` + Represents a single data edit descriptor from a `FORMAT` statement or `FMT=` character value, with some hidden extensions to also support formatting of list-directed transfers. @@ -119,8 +122,8 @@ For simplicity and efficiency, each data edit descriptor is encoded in the `DataEdit` as a simple capitalized character (or two) and some optional field widths. -`FormatControl<>` ---------------- +### `FormatControl<>` + This class template traverses a `FORMAT` statement's contents (or `FMT=` character value) to extract data edit descriptors like `E20.14` to serve each item in an I/O data transfer statement's *io-list*, @@ -142,32 +145,32 @@ output strings or record positionings at the end of the *io-list*. The `DefaultFormatControlCallbacks` structure summarizes the API expected by `FormatControl` from its class template actual arguments. -`OpenFile` --------- +### `OpenFile` + This class encapsulates all (I hope) the operating system interfaces used to interact with the host's filesystems for operations on external units. Asynchronous I/O interfaces are faked for now with synchronous operations and deferred results. -`ConnectionState` ---------------- +### `ConnectionState` + An active connection to an external or internal unit maintains the common parts of its state in this subclass of `ConnectionAttributes`. The base class holds state that should not change during the lifetime of the connection, while the subclass maintains state that may change during I/O statement execution. -`InternalDescriptorUnit` ----------------------- +### `InternalDescriptorUnit` + When I/O is being performed from/to a Fortran `CHARACTER` array rather than an external file, this class manages the standard interoperable descriptor used to access its elements as records. It has the necessary interfaces to serve as an actual argument to the `FormatControl` class template. -`FileFrame<>` ------------ +### `FileFrame<>` + This CRTP class template isolates all of the complexity involved between an external unit's `OpenFile` and the buffering requirements imposed by the capabilities of Fortran `FORMAT` control edit @@ -192,8 +195,8 @@ a frame may come up short. As a CRTP class template, `FileFrame` accesses the raw filesystem facilities it needs from `*this`. -`ExternalFileUnit` ----------------- +### `ExternalFileUnit` + This class mixes in `ConnectionState`, `OpenFile`, and `FileFrame` to represent the state of an open (or soon to be opened) external file descriptor as a Fortran @@ -210,8 +213,8 @@ Static member functions `LookUp()`, `LookUpOrCrash()`, and `LookUpOrCreate()` probe the map to convert Fortran `UNIT=` numbers from I/O statements into references to active units. -`IoStatementBase` ---------------- +### `IoStatementBase` + The subclasses of `IoStatementBase` each encapsulate and maintain the state of one active Fortran I/O statement across the several I/O runtime library API function calls it may comprise. @@ -239,8 +242,8 @@ the I/O API supports a means whereby the code generated for the Fortran program may supply stack space to the I/O runtime support library for this purpose. -`IoStatementState` ----------------- +### `IoStatementState` + F18's Fortran I/O runtime support library defines and implements an API that uses a sequence of function calls to implement each Fortran I/O statement. @@ -269,8 +272,8 @@ unit, the library has to treat that (expected to be rare) situation as a weird variation of internal I/O since there's no `ExternalFileUnit` available to hold its `IoStatementBase` subclass or `IoStatementState`. -A Narrative Overview Of `PRINT *, 'HELLO, WORLD'` -================================================= +## A Narrative Overview Of `PRINT *, 'HELLO, WORLD'` + 1. When the compiled Fortran program begins execution at the `main()` entry point exported from its main program, it calls `ProgramStart()` with its arguments and environment. diff --git a/flang/docs/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md index 3bb16915cb880..35b107e4988eb 100644 --- a/flang/docs/ImplementingASemanticCheck.md +++ b/flang/docs/ImplementingASemanticCheck.md @@ -5,14 +5,20 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception --> -# Introduction +# How to implement a Sematic Check in Flang + +```eval_rst +.. contents:: + :local: +``` + I recently added a semantic check to the f18 compiler front end. This document describes my thought process and the resulting implementation. For more information about the compiler, start with the [compiler overview](Overview.md). -# Problem definition +## Problem definition In the 2018 Fortran standard, section 11.1.7.4.3, paragraph 2, states that: @@ -29,7 +35,7 @@ emit a warning if an active DO variable was passed to a dummy argument with INTENT(INOUT). Previously, I had implemented similar checks for SUBROUTINE calls. -# Creating a test +## Creating a test My first step was to create a test case to cause the problem. I called it testfun.f90 and used it to check the behavior of other Fortran compilers. Here's the initial version: @@ -94,14 +100,14 @@ constant 216 in the statement: ```fortran dummyArg = 216 ``` -# Analysis and implementation planning +## Analysis and implementation planning I then considered what I needed to do. I needed to detect situations where an active DO variable was passed to a dummy argument with `INTENT(OUT)` or `INTENT(INOUT)`. Once I detected such a situation, I needed to produce a message that highlighted the erroneous source code. -## Deciding where to add the code to the compiler +### Deciding where to add the code to the compiler This new semantic check would depend on several types of information -- the parse tree, source code location information, symbols, and expressions. Thus I needed to put my new code in a place in the compiler after the parse tree had @@ -151,7 +157,7 @@ Since my semantic check was focused on DO CONCURRENT statements, I added it to the file `lib/Semantics/check-do.cpp` where most of the semantic checking for DO statements already lived. -## Taking advantage of prior work +### Taking advantage of prior work When implementing a similar check for SUBROUTINE calls, I created a utility functions in `lib/Semantics/semantics.cpp` to emit messages if a symbol corresponding to an active DO variable was being potentially modified: @@ -173,7 +179,7 @@ information -- The first and third are needed since they're required to call the utility functions. The second is needed to determine whether to call them. -## Finding the source location +### Finding the source location The source code location information that I'd need for the error message must come from the parse tree. I looked in the file `include/flang/Parser/parse-tree.h` and determined that a `struct Expr` @@ -181,7 +187,7 @@ contained source location information since it had the field `CharBlock source`. Thus, if I visited a `parser::Expr` node, I could get the source location information for the associated expression. -## Determining the `INTENT` +### Determining the `INTENT` I knew that I could find the `INTENT` of the dummy argument associated with the actual argument from the function called `dummyIntent()` in the class `evaluate::ActualArgument` in the file `include/flang/Evaluate/call.h`. So @@ -248,7 +254,7 @@ This combination of the traversal framework and `dummyIntent()` would give me the `INTENT` of all of the dummy arguments in a FUNCTION call. Thus, I would have the second piece of information I needed. -## Determining if the actual argument is a variable +### Determining if the actual argument is a variable I also guessed that I could determine if the `evaluate::ActualArgument` consisted of a variable. @@ -264,9 +270,9 @@ needed -- the source location of the erroneous text, the `INTENT` of the dummy argument, and a symbol that I could use to determine whether the actual argument was an active DO variable. -# Implementation +## Implementation -## Adding a parse tree visitor +### Adding a parse tree visitor I started my implementation by adding a visitor for `parser::Expr` nodes. Since this analysis is part of DO construct checking, I did this in `lib/Semantics/check-do.cpp`. I added a print statement to the visitor to @@ -308,7 +314,7 @@ source position of the associated expression (`CharBlock source`). So I now had one of the three pieces of information needed to detect and report errors. -## Collecting the actual arguments +### Collecting the actual arguments To get the `INTENT` of the dummy arguments and the `semantics::Symbol` associated with the actual argument, I needed to find all of the actual arguments embedded in an expression that contained a FUNCTION call. So my next step was to write the @@ -474,7 +480,7 @@ node. So far, so good. -## Finding the `INTENT` of the dummy argument +### Finding the `INTENT` of the dummy argument I now wanted to find the `INTENT` of the dummy argument associated with the arguments in the set. As mentioned earlier, the type `evaluate::ActualArgument` has a member function called `dummyIntent()` @@ -518,7 +524,7 @@ I then modified my test case to convince myself that I was getting the correct So far, so good. -## Finding the symbols for arguments that are variables +### Finding the symbols for arguments that are variables The third and last piece of information I needed was to determine if a variable was being passed as an actual argument. In such cases, I wanted to get the symbol table node (`semantics::Symbol`) for the variable. My starting point was the @@ -638,7 +644,7 @@ Here's the result of running the modified compiler on my Fortran test case: Sweet. -## Emitting the messages +### Emitting the messages At this point, using the source location information from the original `parser::Expr`, I had enough information to plug into the exiting interfaces for emitting messages for active DO variables. I modified the @@ -701,7 +707,7 @@ output: Even sweeter. -# Improving the test case +## Improving the test case At this point, my implementation seemed to be working. But I was concerned about the limitations of my test case. So I augmented it to include arguments other than `INTENT(OUT)` and more complex expressions. Luckily, my @@ -762,7 +768,7 @@ Here's the test I ended up with: end subroutine s ``` -# Submitting the pull request +## Submitting the pull request At this point, my implementation seemed functionally complete, so I stripped out all of the debug statements, ran `clang-format` on it and reviewed it to make sure that the names were clear. Here's what I ended up with: @@ -790,7 +796,7 @@ to make sure that the names were clear. Here's what I ended up with: I then created a pull request to get review comments. -# Responding to pull request comments +## Responding to pull request comments I got feedback suggesting that I use an `if` statement rather than a `case` statement. Another comment reminded me that I should look at the code I'd previously writted to do a similar check for SUBROUTINE calls to see diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md index 7be0bf3e4a9ca..f9e47e5893bff 100644 --- a/flang/docs/Intrinsics.md +++ b/flang/docs/Intrinsics.md @@ -8,6 +8,11 @@ # A categorization of standard (2018) and extended Fortran intrinsic procedures +```eval_rst +.. contents:: + :local: +``` + This note attempts to group the intrinsic procedures of Fortran into categories of functions or subroutines with similar interfaces as an aid to comprehension beyond that which might be gained from the standard's @@ -53,14 +58,14 @@ Intrinsic modules are not covered here. may appear within the brackets to preserve the order of arguments (e.g., `COUNT`). -# Elemental intrinsic functions +## Elemental intrinsic functions Pure elemental semantics apply to these functions, to wit: when one or more of the actual arguments are arrays, the arguments must be conformable, and the result is also an array. Scalar arguments are expanded when the arguments are not all scalars. -## Elemental intrinsic functions that may have unrestricted specific procedures +### Elemental intrinsic functions that may have unrestricted specific procedures When an elemental intrinsic function is documented here as having an _unrestricted specific name_, that name may be passed as an actual @@ -349,7 +354,7 @@ that is present in `SET`, or zero if none is. `VERIFY` is essentially the opposite: it returns the index of the first (or last) character in `STRING` that is *not* present in `SET`, or zero if all are. -# Transformational intrinsic functions +## Transformational intrinsic functions This category comprises a large collection of intrinsic functions that are collected together because they somehow transform their arguments @@ -372,7 +377,7 @@ Some general rules apply to the transformational intrinsic functions: 1. The type `any` here denotes any intrinsic or derived type. 1. The notation `(..)` denotes an array of any rank (but not an assumed-rank array). -## Logical reduction transformational intrinsic functions +### Logical reduction transformational intrinsic functions ``` ALL(LOGICAL(k) MASK(..) [, DIM ]) -> LOGICAL(k) ANY(LOGICAL(k) MASK(..) [, DIM ]) -> LOGICAL(k) @@ -380,7 +385,7 @@ COUNT(LOGICAL(any) MASK(..) [, DIM, KIND=KIND(0) ]) -> INTEGER(KIND) PARITY(LOGICAL(k) MASK(..) [, DIM ]) -> LOGICAL(k) ``` -## Numeric reduction transformational intrinsic functions +### Numeric reduction transformational intrinsic functions ``` IALL(INTEGER(k) ARRAY(..) [, DIM, MASK ]) -> INTEGER(k) IANY(INTEGER(k) ARRAY(..) [, DIM, MASK ]) -> INTEGER(k) @@ -392,7 +397,7 @@ SUM(numeric ARRAY(..) [, DIM, MASK ]) -> numeric `NORM2` generalizes `HYPOT` by computing `SQRT(SUM(X*X))` while avoiding spurious overflows. -## Extrema reduction transformational intrinsic functions +### Extrema reduction transformational intrinsic functions ``` MAXVAL(relational(k) ARRAY(..) [, DIM, MASK ]) -> relational(k) MINVAL(relational(k) ARRAY(..) [, DIM, MASK ]) -> relational(k) @@ -419,7 +424,7 @@ MAXLOC(relational ARRAY(..) [, DIM, MASK, KIND=KIND(0), BACK=.FALSE. ]) MINLOC(relational ARRAY(..) [, DIM, MASK, KIND=KIND(0), BACK=.FALSE. ]) ``` -## Data rearrangement transformational intrinsic functions +### Data rearrangement transformational intrinsic functions The optional `DIM` argument to these functions must be a scalar integer of any kind, and it takes a default value of 1 when absent. @@ -475,7 +480,7 @@ UNPACK(any VECTOR(n), LOGICAL(any) MASK(..), FIELD) -> type and kind of VECTOR, ``` `FIELD` has same type and kind as `VECTOR` and is conformable with `MASK`. -## Other transformational intrinsic functions +### Other transformational intrinsic functions ``` BESSEL_JN(INTEGER(n1) N1, INTEGER(n2) N2, REAL(k) X) -> REAL(k) vector (MAX(N2-N1+1,0)) BESSEL_YN(INTEGER(n1) N1, INTEGER(n2) N2, REAL(k) X) -> REAL(k) vector (MAX(N2-N1+1,0)) @@ -517,7 +522,7 @@ At least one argument must be present in a call to `SELECTED_REAL_KIND`. An assumed-rank array may be passed to `SHAPE`, and if it is associated with an assumed-size array, the last element of the result will be -1. -## Coarray transformational intrinsic functions +### Coarray transformational intrinsic functions ``` FAILED_IMAGES([scalar TEAM_TYPE TEAM, KIND=KIND(0)]) -> INTEGER(KIND) vector GET_TEAM([scalar INTEGER(?) LEVEL]) -> scalar TEAM_TYPE @@ -532,10 +537,10 @@ THIS_IMAGE([COARRAY, DIM, scalar TEAM_TYPE TEAM]) -> default INTEGER The result of `THIS_IMAGE` is a scalar if `DIM` is present or if `COARRAY` is absent, and a vector whose length is the corank of `COARRAY` otherwise. -# Inquiry intrinsic functions +## Inquiry intrinsic functions These are neither elemental nor transformational; all are pure. -## Type inquiry intrinsic functions +### Type inquiry intrinsic functions All of these functions return constants. The value of the argument is not used, and may well be undefined. ``` @@ -554,7 +559,7 @@ RANGE(INTEGER(k) or REAL(k) or COMPLEX(k) X(..)) -> scalar default INTEGER TINY(REAL(k) X(..)) -> scalar REAL(k) ``` -## Bound and size inquiry intrinsic functions +### Bound and size inquiry intrinsic functions The results are scalar when `DIM` is present, and a vector of length=(co)rank(`(CO)ARRAY`) when `DIM` is absent. ``` @@ -567,7 +572,7 @@ UCOBOUND(any COARRAY [, DIM, KIND=KIND(0) ]) -> INTEGER(KIND) Assumed-rank arrays may be used with `LBOUND`, `SIZE`, and `UBOUND`. -## Object characteristic inquiry intrinsic functions +### Object characteristic inquiry intrinsic functions ``` ALLOCATED(any type ALLOCATABLE ARRAY) -> scalar default LOGICAL ALLOCATED(any type ALLOCATABLE SCALAR) -> scalar default LOGICAL @@ -584,11 +589,11 @@ The arguments to `EXTENDS_TYPE_OF` must be of extensible derived types or be unl An assumed-rank array may be used with `IS_CONTIGUOUS` and `RANK`. -# Intrinsic subroutines +## Intrinsic subroutines (*TODO*: complete these descriptions) -## One elemental intrinsic subroutine +### One elemental intrinsic subroutine ``` INTERFACE SUBROUTINE MVBITS(FROM, FROMPOS, LEN, TO, TOPOS) @@ -602,7 +607,7 @@ INTERFACE END INTERFACE ``` -## Non-elemental intrinsic subroutines +### Non-elemental intrinsic subroutines ``` CALL CPU_TIME(REAL INTENT(OUT) TIME) ``` @@ -627,7 +632,7 @@ CALL RANDOM_SEED([SIZE, PUT, GET]) CALL SYSTEM_CLOCK([COUNT, COUNT_RATE, COUNT_MAX]) ``` -## Atomic intrinsic subroutines +### Atomic intrinsic subroutines ``` CALL ATOMIC_ADD(ATOM, VALUE [, STAT=]) CALL ATOMIC_AND(ATOM, VALUE [, STAT=]) @@ -642,7 +647,7 @@ CALL ATOMIC_REF(VALUE, ATOM [, STAT=]) CALL ATOMIC_XOR(ATOM, VALUE [, STAT=]) ``` -## Collective intrinsic subroutines +### Collective intrinsic subroutines ``` CALL CO_BROADCAST CALL CO_MAX @@ -651,8 +656,8 @@ CALL CO_REDUCE CALL CO_SUM ``` -# Non-standard intrinsics -## PGI +## Non-standard intrinsics +### PGI ``` AND, OR, XOR LSHIFT, RSHIFT, SHIFT @@ -666,7 +671,7 @@ JINT, JNINT, KNINT LOC ``` -## Intel +### Intel ``` DCMPLX(X,Y), QCMPLX(X,Y) DREAL(DOUBLE COMPLEX A) -> DOUBLE PRECISION @@ -689,12 +694,12 @@ CACHESIZE, EOF, FP_CLASS, INT_PTR_KIND, ISNAN, LOC MALLOC ``` -# Intrinsic Procedure Support in f18 +## Intrinsic Procedure Support in f18 This section gives an overview of the support inside f18 libraries for the intrinsic procedures listed above. It may be outdated, refer to f18 code base for the actual support status. -## Semantic Analysis +### Semantic Analysis F18 semantic expression analysis phase detects intrinsic procedure references, validates the argument types and deduces the return types. This phase currently supports all the intrinsic procedures listed above but the ones in the table below. @@ -710,7 +715,7 @@ This phase currently supports all the intrinsic procedures listed above but the | Collective intrinsic subroutines | CO_BROADCAST &al. | -## Intrinsic Function Folding +### Intrinsic Function Folding Fortran Constant Expressions can contain references to a certain number of intrinsic functions (see Fortran 2018 standard section 10.1.12 for more details). Constant Expressions may be used to define kind arguments. Therefore, the semantic @@ -724,7 +729,7 @@ arrays when an implementation is provided for the scalars (regardless of whether it is using host hardware types or not). The status of intrinsic function folding support is given in the sub-sections below. -### Intrinsic Functions with Host Independent Folding Support +#### Intrinsic Functions with Host Independent Folding Support Implementations using f18 scalar types enables folding intrinsic functions on any host and with any possible type kind supported by f18. The intrinsic functions listed below are folded using host independent implementations. @@ -736,7 +741,7 @@ listed below are folded using host independent implementations. | COMPLEX | CMPLX, CONJG | | LOGICAL | BGE, BGT, BLE, BLT | -### Intrinsic Functions with Host Dependent Folding Support +#### Intrinsic Functions with Host Dependent Folding Support Implementations using the host runtime may not be available for all supported f18 types depending on the host hardware types and the libraries available on the host. The actual support on a host depends on what the host hardware types are. diff --git a/flang/docs/LabelResolution.md b/flang/docs/LabelResolution.md index e837b4fa6aece..c1227a8bc35a1 100644 --- a/flang/docs/LabelResolution.md +++ b/flang/docs/LabelResolution.md @@ -8,6 +8,11 @@ # Semantics: Resolving Labels and Construct Names +```eval_rst +.. contents:: + :local: +``` + ## Overview After the Fortran input file(s) has been parsed into a syntax tree, the compiler must check that the program checks semantically. Target labels must be checked and violations of legal semantics should be reported to the user. diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md index 483341bdd0f47..ccb849ab0decd 100644 --- a/flang/docs/ModFiles.md +++ b/flang/docs/ModFiles.md @@ -8,6 +8,11 @@ # Module Files +```eval_rst +.. contents:: + :local: +``` + Module files hold information from a module that is necessary to compile program units that depend on the module. diff --git a/flang/docs/OpenMP-4.5-grammar.txt b/flang/docs/OpenMP-4.5-grammar.md similarity index 97% rename from flang/docs/OpenMP-4.5-grammar.txt rename to flang/docs/OpenMP-4.5-grammar.md index 180494bbf509e..bc8a18a84e500 100644 --- a/flang/docs/OpenMP-4.5-grammar.txt +++ b/flang/docs/OpenMP-4.5-grammar.md @@ -1,18 +1,16 @@ -#===-- docs/OpenMP-4.5-grammar.txt --------------------------------===# -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===------------------------------------------------------------------------===# +# OpenMP 4.5 Grammar -# OpenMP 4.5 Specifications +Grammar used by Flang to parse OpenMP 4.5. +## OpenMP 4.5 Specifications +``` 2 omp-directive -> sentinel directive-name [clause[ [,] clause]...] 2.1.1 sentinel -> !$omp | c$omp | *$omp 2.1.2 sentinel -> !$omp +``` -# directive-name +## directive-name +``` 2.5 parallel -> PARALLEL [parallel-clause[ [,] parallel-clause]...] parallel-clause -> if-clause | num-threads-clause | @@ -464,3 +462,4 @@ ALLOC | RELEASE | DELETE 2.15.5.2 defaultmap -> DEFAULTMAP (TOFROM:SCALAR) +``` diff --git a/flang/docs/OpenMP-semantics.md b/flang/docs/OpenMP-semantics.md index 4e2a81739cf81..1511bc9e7b3b5 100644 --- a/flang/docs/OpenMP-semantics.md +++ b/flang/docs/OpenMP-semantics.md @@ -8,6 +8,11 @@ # OpenMP Semantic Analysis +```eval_rst +.. contents:: + :local: +``` + ## OpenMP for F18 1. Define and document the parse tree representation for diff --git a/flang/docs/OptionComparison.md b/flang/docs/OptionComparison.md index db5932411cc1e..347a1d6000ee2 100644 --- a/flang/docs/OptionComparison.md +++ b/flang/docs/OptionComparison.md @@ -6,14 +6,21 @@ --> -# Compiler options +# Compiler options comparison + +```eval_rst +.. contents:: + :local: +``` This document catalogs the options processed by F18's peers/competitors. Much of the document is taken up by a set of tables that list the options categorized into different topics. Some of the table headings link to more information about the contents of the tables. For example, the table on **Standards conformance** options links to [notes on Standards conformance](#standards). -**There's also important information in the ___[Notes section](#notes)___ near the end of the document on how this data was gathered and what ___is___ and ___is not___ included in this document.** +**There's also important information in the ___[Appendix section](#appendix)___ near the end of the document on how this data was gathered and what ___is___ and ___is not___ included in this document.** Note that compilers may support language features without having an option for them. Such cases are frequently, but not always noted in this document. +## Categorisation of Options + + + + + + + + + @@ -5322,6 +5346,60 @@

AST Traversal Matchers

+ + + + + + + + + + + + + + + +
Standards conformance @@ -1183,7 +1190,7 @@ Mcuda -## Notes +## Notes **Standards conformance:** @@ -1290,7 +1297,7 @@ GNU is the only compiler with options governing the use of non-standard intrinsi **Warn for bad call checking**: This Cray option ("-eb") issues a warning message rather than an error message when the compiler detects a call to a procedure with one or more dummy arguments having the TARGET, VOLATILE or ASYNCHRONOUS attribute and there is not an explicit interface definition. -## Notes +## Appendix ### What is and is not included diff --git a/flang/docs/Overview.md b/flang/docs/Overview.md index 75a8cd1c4cab0..9878589438450 100644 --- a/flang/docs/Overview.md +++ b/flang/docs/Overview.md @@ -8,6 +8,11 @@ # Overview of Compiler Phases +```eval_rst +.. contents:: + :local: +``` + Each phase produces either correct output or fatal errors. ## Prescan and Preprocess diff --git a/flang/docs/ParserCombinators.md b/flang/docs/ParserCombinators.md index 4f3dc6fd07ae6..ff94d341c1501 100644 --- a/flang/docs/ParserCombinators.md +++ b/flang/docs/ParserCombinators.md @@ -6,6 +6,15 @@ --> +# Parser Combinators + +```eval_rst +.. contents:: + :local: +``` + +This document is a primer on Parser Combinators and their use in Flang. + ## Concept The Fortran language recognizer here can be classified as an LL recursive descent parser. It is composed from a *parser combinator* library that diff --git a/flang/docs/Parsing.md b/flang/docs/Parsing.md index fad9a4d57278c..dec63e6fbdab4 100644 --- a/flang/docs/Parsing.md +++ b/flang/docs/Parsing.md @@ -6,8 +6,13 @@ --> -The F18 Parser -============== +# The F18 Parser + +```eval_rst +.. contents:: + :local: +``` + This program source code implements a parser for the Fortran programming language. @@ -42,8 +47,8 @@ source file and receive its parse tree and error messages. The interfaces of the Parsing class correspond to the two major passes of the parser, which are described below. -Prescanning and Preprocessing ------------------------------ +## Prescanning and Preprocessing + The first pass is performed by an instance of the Prescanner class, with help from an instance of Preprocessor. @@ -100,8 +105,8 @@ The content of the cooked character stream is available and useful for debugging, being as it is a simple value forwarded from the first major pass of the compiler to the second. -Source Provenance ------------------ +## Source Provenance + The prescanner constructs a chronicle of every file that is read by the parser, viz. the original source file and all others that it directly or indirectly includes. One copy of the content of each of these files @@ -124,8 +129,8 @@ Simple `const char *` pointers to characters in the cooked character stream, or to contiguous ranges thereof, are used as source position indicators within the parser and in the parse tree. -Messages --------- +## Messages + Message texts, and snprintf-like formatting strings for constructing messages, are instantiated in the various components of the parser with C++ user defined character literals tagged with `_err_en_US` and `_en_US` @@ -134,8 +139,8 @@ English used in the United States) so that they may be easily identified for localization. As described above, messages are associated with source code positions by means of provenance values. -The Parse Tree --------------- +## The Parse Tree + Each of the ca. 450 numbered requirement productions in the standard Fortran language grammar, as well as the productions implied by legacy extensions and preserved obsolescent features, maps to a distinct class @@ -174,8 +179,8 @@ stability of pointers into these lists. There is a general purpose library by means of which parse trees may be traversed. -Parsing -------- +## Parsing + This compiler attempts to recognize the entire cooked character stream (see above) as a Fortran program. It records the reductions made during a successful recognition as a parse tree value. The recognized grammar @@ -203,8 +208,8 @@ of "parser combinator" template functions that compose them to form more complicated recognizers and their correspondences to the construction of parse tree values. -Unparsing ---------- +## Unparsing + Parse trees can be converted back into free form Fortran source code. This formatter is not really a classical "pretty printer", but is more of a data structure dump whose output is suitable for compilation diff --git a/flang/docs/Preprocessing.md b/flang/docs/Preprocessing.md index 7f6f3951cfd16..3c6984cfa2fd0 100644 --- a/flang/docs/Preprocessing.md +++ b/flang/docs/Preprocessing.md @@ -6,11 +6,15 @@ --> -Fortran Preprocessing -===================== +# Fortran Preprocessing + +```eval_rst +.. contents:: + :local: +``` + +## Behavior common to (nearly) all compilers: -Behavior common to (nearly) all compilers: ------------------------------------------- * Macro and argument names are sensitive to case. * Fixed form right margin clipping after column 72 (or 132) has precedence over macro name recognition, and also over @@ -39,9 +43,8 @@ Behavior common to (nearly) all compilers: * A `#define` directive intermixed with continuation lines can't define a macro that's invoked earlier in the same continued statement. -Behavior that is not consistent over all extant compilers but which -probably should be uncontroversial: ------------------------------------ +## Behavior that is not consistent over all extant compilers but which probably should be uncontroversial: + * Invoked macro names can straddle a Fortran line continuation. * ... unless implicit fixed form card padding intervenes; i.e., in fixed form, a continued macro name has to be split at column @@ -65,8 +68,8 @@ probably should be uncontroversial: directive indicator. * `#define KWM !` allows KWM to signal a comment. -Judgement calls, where precedents are unclear: ----------------------------------------------- +## Judgement calls, where precedents are unclear: + * Expressions in `#if` and `#elif` should support both Fortran and C operators; e.g., `#if 2 .LT. 3` should work. * If a function-like macro does not close its parentheses, line @@ -84,16 +87,16 @@ Judgement calls, where precedents are unclear: lines, it may or may not affect text in the continued statement that appeared before the directive. -Behavior that few compilers properly support (or none), but should: -------------------------------------------------------------------- +## Behavior that few compilers properly support (or none), but should: + * A macro invocation can straddle free form continuation lines in all of their forms, with continuation allowed in the name, before the arguments, and within the arguments. * Directives can be capitalized in free form, too. * `__VA_ARGS__` and `__VA_OPT__` work in variadic function-like macros. -In short, a Fortran preprocessor should work as if: ---------------------------------------------------- +## In short, a Fortran preprocessor should work as if: + 1. Fixed form lines are padded up to column 72 (or 132) and clipped thereafter. 2. Fortran comments are removed. 3. C-style line continuations are processed in preprocessing directives. @@ -125,8 +128,7 @@ text. OpenMP-style directives that look like comments are not addressed by this scheme but are obvious extensions. -Appendix -======== +## Appendix `N` in the table below means "not supported"; this doesn't mean a bug, it just means that a particular behavior was not observed. diff --git a/flang/docs/PullRequestChecklist.md b/flang/docs/PullRequestChecklist.md index 12a67be374a20..b253c153f61ec 100644 --- a/flang/docs/PullRequestChecklist.md +++ b/flang/docs/PullRequestChecklist.md @@ -36,7 +36,7 @@ even though I've read the style guide, they regularly trip me up. clang-format will do this for most code. But you may need to break up long strings. * Review declarations for proper use of `constexpr` and `const`. -* Follow the C++ [naming guidelines](C++style.md#naming). +* Follow the C++ [naming guidelines](C++style.html#naming) * Ensure that the names evoke their purpose and are consistent with existing code. * Used braced initializers. * Review pointer and reference types to make sure that you're using them diff --git a/flang/docs/RuntimeDescriptor.md b/flang/docs/RuntimeDescriptor.md index d819517fa9795..f0bbd2e3fedaf 100644 --- a/flang/docs/RuntimeDescriptor.md +++ b/flang/docs/RuntimeDescriptor.md @@ -6,6 +6,13 @@ --> +# Runtime Descriptors + +```eval_rst +.. contents:: + :local: +``` + ## Concept The properties that characterize data values and objects in Fortran programs must sometimes be materialized when the program runs. diff --git a/flang/docs/Semantics.md b/flang/docs/Semantics.md index 6ea0b292de69f..361426c936c24 100644 --- a/flang/docs/Semantics.md +++ b/flang/docs/Semantics.md @@ -8,6 +8,11 @@ # Semantic Analysis +```eval_rst +.. contents:: + :local: +``` + The semantic analysis pass determines if a syntactically correct Fortran program is is legal by enforcing the constraints of the language. diff --git a/flang/docs/conf.py b/flang/docs/conf.py index 045d0a2c41678..21362fc3449e9 100644 --- a/flang/docs/conf.py +++ b/flang/docs/conf.py @@ -46,12 +46,23 @@ else: source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'} source_suffix['.md'] = 'markdown' + extensions.append('sphinx_markdown_tables') + + # Setup AutoStructify for inline .rst toctrees in index.md + from recommonmark.transform import AutoStructify + def setup(app): + # Disable inline math to avoid + # https://github.com/readthedocs/recommonmark/issues/120 in Extensions.md + app.add_config_value('recommonmark_config', { + 'enable_inline_math': False + }, True) + app.add_transform(AutoStructify) # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'Overview' +master_doc = 'index' # General information about the project. project = u'Flang' diff --git a/flang/docs/f2018-grammar.txt b/flang/docs/f2018-grammar.md similarity index 99% rename from flang/docs/f2018-grammar.txt rename to flang/docs/f2018-grammar.md index 9b2819d69c724..70f9ebc7f7641 100644 --- a/flang/docs/f2018-grammar.txt +++ b/flang/docs/f2018-grammar.md @@ -1,11 +1,8 @@ -#===-- docs/f2018-grammar.txt -------------------------------------===# -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===------------------------------------------------------------------------===# +# Fortran 2018 Grammar +Grammar used by Flang to parse Fortran 2018. + +``` R0001 digit -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 R0002 letter -> A | B | C | D | E | F | G | H | I | J | K | L | M | @@ -801,3 +798,4 @@ R1542 return-stmt -> RETURN [scalar-int-expr] R1543 contains-stmt -> CONTAINS R1544 stmt-function-stmt -> function-name ( [dummy-arg-name-list] ) = scalar-expr +``` diff --git a/flang/docs/index.md b/flang/docs/index.md new file mode 100644 index 0000000000000..4c07170565227 --- /dev/null +++ b/flang/docs/index.md @@ -0,0 +1,61 @@ +# Welcome to Flang's documentation + +Flang is LLVM's Fortran frontend + +```eval_rst +.. toctree:: + :titlesonly: + + ReleaseNotes +``` + +# Contributing to Flang + +```eval_rst +.. toctree:: + :titlesonly: + + FortranForCProgrammers + C++style + C++17 + PullRequestChecklist + ImplementingASemanticCheck +``` + +# Design Documents + +```eval_rst +.. toctree:: + :titlesonly: + + Overview + Preprocessing + Parsing + LabelResolution + ModFiles + Semantics + OpenMP-semantics + ControlFlowGraph + FortranIR + IORuntimeInternals + f2018-grammar.md + OpenMP-4.5-grammar.md + Directives + Extensions + Intrinsics + OptionComparison + ParserCombinators + RuntimeDescriptor + Calls + Character + ArrayComposition + BijectiveInternalNameUniquing +``` + +# Indices and tables + +```eval_rst +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` +``` From 6af8758ba4d7c42298a14fcc2433f9ab49215ac1 Mon Sep 17 00:00:00 2001 From: Mikhail Maltsev Date: Fri, 11 Sep 2020 14:41:36 +0100 Subject: [PATCH 0369/1079] [libcxx] Handle target triples with dashes in platform name Target triples may contain a dash in the platform name (e.g. "aarch64-arm-none-eabi"). Account for it when splitting the triple into components. Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D87508 --- libcxx/utils/libcxx/test/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 086db1d7f560d..42438b3ccf2e7 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -245,7 +245,7 @@ def configure_features(self): # XFAIL markers for tests that are known to fail with versions of # libc++ as were shipped with a particular triple. if self.use_system_cxx_lib: - (arch, vendor, platform) = self.config.target_triple.split('-') + (arch, vendor, platform) = self.config.target_triple.split('-', 2) (sysname, version) = re.match(r'([^0-9]+)([0-9\.]*)', platform).groups() self.config.available_features.add('with_system_cxx_lib={}-{}-{}{}'.format(arch, vendor, sysname, version)) From 3eb141e5078a0ce9d92eadc721bc49d214d23056 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 11 Sep 2020 14:33:06 +0100 Subject: [PATCH 0370/1079] [ConstraintSystem] Add helpers to deal with linear constraints. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a new ConstraintSystem class, that maintains a set of linear constraints and uses Fourier–Motzkin elimination to eliminate constraints to check if there are solutions for the system. It also adds a convert-constraint-log-to-z3.py script, which can parse the debug output of the constraint system and convert it to a python script that feeds the constraints into Z3 and checks if it produces the same result as the LLVM implementation. This is for verification purposes. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D84544 --- llvm/include/llvm/Analysis/ConstraintSystem.h | 57 +++++++ llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/ConstraintSystem.cpp | 141 ++++++++++++++++++ llvm/unittests/Analysis/CMakeLists.txt | 1 + .../Analysis/ConstraintSystemTest.cpp | 82 ++++++++++ llvm/utils/convert-constraint-log-to-z3.py | 69 +++++++++ 6 files changed, 351 insertions(+) create mode 100644 llvm/include/llvm/Analysis/ConstraintSystem.h create mode 100644 llvm/lib/Analysis/ConstraintSystem.cpp create mode 100644 llvm/unittests/Analysis/ConstraintSystemTest.cpp create mode 100755 llvm/utils/convert-constraint-log-to-z3.py diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h new file mode 100644 index 0000000000000..7de787c1fc390 --- /dev/null +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -0,0 +1,57 @@ +//===- ConstraintSystem.h - A system of linear constraints. --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CONSTRAINTSYSTEM_H +#define LLVM_ANALYSIS_CONSTRAINTSYSTEM_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +#include + +namespace llvm { + +class ConstraintSystem { + /// Current linear constraints in the system. + /// An entry of the form c0, c1, ... cn represents the following constraint: + /// c0 >= v0 * c1 + .... + v{n-1} * cn + SmallVector, 4> Constraints; + + /// Current greatest common divisor for all coefficients in the system. + uint32_t GCD = 1; + + // Eliminate constraints from the system using Fourier–Motzkin elimination. + bool eliminateUsingFM(); + + /// Print the constraints in the system, using \p Names as variable names. + void dump(ArrayRef Names) const; + + /// Print the constraints in the system, using x0...xn as variable names. + void dump() const; + + /// Returns true if there may be a solution for the constraints in the system. + bool mayHaveSolutionImpl(); + +public: + void addVariableRow(const SmallVector &R) { + assert(Constraints.empty() || R.size() == Constraints.back().size()); + for (const auto &C : R) { + auto A = std::abs(C); + GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) + .getZExtValue(); + } + Constraints.push_back(R); + } + + /// Returns true if there may be a solution for the constraints in the system. + bool mayHaveSolution(); +}; +} // namespace llvm + +#endif // LLVM_ANALYSIS_CONSTRAINTSYSTEM_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index f50439bc87627..78cc764379e17 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_component_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp DDG.cpp + ConstraintSystem.cpp Delinearization.cpp DemandedBits.cpp DependenceAnalysis.cpp diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp new file mode 100644 index 0000000000000..95fe6c9f1f9b7 --- /dev/null +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -0,0 +1,141 @@ +//===- ConstraintSytem.cpp - A system of linear constraints. ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstraintSystem.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" + +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "constraint-system" + +bool ConstraintSystem::eliminateUsingFM() { + // Implementation of Fourier–Motzkin elimination, with some tricks from the + // paper Pugh, William. "The Omega test: a fast and practical integer + // programming algorithm for dependence + // analysis." + // Supercomputing'91: Proceedings of the 1991 ACM/ + // IEEE conference on Supercomputing. IEEE, 1991. + assert(!Constraints.empty() && + "should only be called for non-empty constraint systems"); + unsigned NumVariables = Constraints[0].size(); + SmallVector, 4> NewSystem; + + unsigned NumConstraints = Constraints.size(); + uint32_t NewGCD = 1; + // FIXME do not use copy + for (unsigned R1 = 0; R1 < NumConstraints; R1++) { + if (Constraints[R1][1] == 0) { + SmallVector NR; + NR.push_back(Constraints[R1][0]); + for (unsigned i = 2; i < NumVariables; i++) { + NR.push_back(Constraints[R1][i]); + } + NewSystem.push_back(std::move(NR)); + continue; + } + + // FIXME do not use copy + bool EliminatedInRow = false; + for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) { + if (R1 == R2) + continue; + + // FIXME: can we do better than just dropping things here? + if (Constraints[R2][1] == 0) + continue; + + if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) || + (Constraints[R1][1] > 0 && Constraints[R2][1] > 0)) + continue; + + unsigned LowerR = R1; + unsigned UpperR = R2; + if (Constraints[UpperR][1] < 0) + std::swap(LowerR, UpperR); + + SmallVector NR; + for (unsigned I = 0; I < NumVariables; I++) { + if (I == 1) + continue; + + int64_t M1, M2, N; + if (__builtin_mul_overflow(Constraints[UpperR][I], + ((-1) * Constraints[LowerR][1] / GCD), &M1)) + return false; + if (__builtin_mul_overflow(Constraints[LowerR][I], + (Constraints[UpperR][1] / GCD), &M2)) + return false; + if (__builtin_add_overflow(M1, M2, &N)) + return false; + NR.push_back(N); + + NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()}, + {32, NewGCD}) + .getZExtValue(); + } + NewSystem.push_back(std::move(NR)); + EliminatedInRow = true; + } + } + Constraints = std::move(NewSystem); + GCD = NewGCD; + + return true; +} + +bool ConstraintSystem::mayHaveSolutionImpl() { + while (!Constraints.empty() && Constraints[0].size() > 1) { + if (!eliminateUsingFM()) + return true; + } + + if (Constraints.empty() || Constraints[0].size() > 1) + return true; + + return all_of(Constraints, [](auto &R) { return R[0] >= 0; }); +} + +void ConstraintSystem::dump(ArrayRef Names) const { + if (Constraints.empty()) + return; + + for (auto &Row : Constraints) { + SmallVector Parts; + for (unsigned I = 1, S = Row.size(); I < S; ++I) { + if (Row[I] == 0) + continue; + std::string Coefficient = ""; + if (Row[I] != 1) + Coefficient = std::to_string(Row[I]) + " * "; + Parts.push_back(Coefficient + Names[I - 1]); + } + assert(!Parts.empty() && "need to have at least some parts"); + LLVM_DEBUG(dbgs() << join(Parts, std::string(" + ")) + << " <= " << std::to_string(Row[0]) << "\n"); + } +} + +void ConstraintSystem::dump() const { + SmallVector Names; + for (unsigned i = 1; i < Constraints.back().size(); ++i) + Names.push_back("x" + std::to_string(i)); + LLVM_DEBUG(dbgs() << "---\n"); + dump(Names); +} + +bool ConstraintSystem::mayHaveSolution() { + dump(); + bool HasSolution = mayHaveSolutionImpl(); + LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n"); + return HasSolution; +} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index eb97f6289b67a..dfe570fd15749 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_unittest_with_input_files(AnalysisTests CaptureTrackingTest.cpp CFGTest.cpp CGSCCPassManagerTest.cpp + ConstraintSystemTest.cpp DDGTest.cpp DivergenceAnalysisTest.cpp DomTreeUpdaterTest.cpp diff --git a/llvm/unittests/Analysis/ConstraintSystemTest.cpp b/llvm/unittests/Analysis/ConstraintSystemTest.cpp new file mode 100644 index 0000000000000..2301da7ec296f --- /dev/null +++ b/llvm/unittests/Analysis/ConstraintSystemTest.cpp @@ -0,0 +1,82 @@ +//===--- ConstraintSystemTests.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstraintSystem.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(ConstraintSloverTest, TestSolutionChecks) { + { + ConstraintSystem CS; + // x + y <= 10, x >= 5, y >= 6, x <= 10, y <= 10 + CS.addVariableRow({10, 1, 1}); + CS.addVariableRow({-5, -1, 0}); + CS.addVariableRow({-6, 0, -1}); + CS.addVariableRow({10, 1, 0}); + CS.addVariableRow({10, 0, 1}); + + EXPECT_FALSE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + // x + y <= 10, x >= 2, y >= 3, x <= 10, y <= 10 + CS.addVariableRow({10, 1, 1}); + CS.addVariableRow({-2, -1, 0}); + CS.addVariableRow({-3, 0, -1}); + CS.addVariableRow({10, 1, 0}); + CS.addVariableRow({10, 0, 1}); + + EXPECT_TRUE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + // x + y <= 10, 10 >= x, 10 >= y; does not have a solution. + CS.addVariableRow({10, 1, 1}); + CS.addVariableRow({-10, -1, 0}); + CS.addVariableRow({-10, 0, -1}); + + EXPECT_FALSE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + // x + y >= 20, 10 >= x, 10 >= y; does HAVE a solution. + CS.addVariableRow({-20, -1, -1}); + CS.addVariableRow({-10, -1, 0}); + CS.addVariableRow({-10, 0, -1}); + + EXPECT_TRUE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + + // 2x + y + 3z <= 10, 2x + y >= 10, y >= 1 + CS.addVariableRow({10, 2, 1, 3}); + CS.addVariableRow({-10, -2, -1, 0}); + CS.addVariableRow({-1, 0, 0, -1}); + + EXPECT_FALSE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + + // 2x + y + 3z <= 10, 2x + y >= 10 + CS.addVariableRow({10, 2, 1, 3}); + CS.addVariableRow({-10, -2, -1, 0}); + + EXPECT_TRUE(CS.mayHaveSolution()); + } +} +} // namespace diff --git a/llvm/utils/convert-constraint-log-to-z3.py b/llvm/utils/convert-constraint-log-to-z3.py new file mode 100755 index 0000000000000..77b0a3d95b6d4 --- /dev/null +++ b/llvm/utils/convert-constraint-log-to-z3.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +""" +Helper script to convert the log generated by '-debug-only=constraint-system' +to a Python script that uses Z3 to verify the decisions using Z3's Python API. + +Example usage: + +> cat path/to/file.log +--- +x6 + -1 * x7 <= -1 +x6 + -1 * x7 <= -2 +sat + +> ./convert-constraint-log-to-z3.py path/to/file.log > check.py && python ./check.py + +> cat check.py + from z3 import * +x3 = Int("x3") +x1 = Int("x1") +x2 = Int("x2") +s = Solver() +s.add(x1 + -1 * x2 <= 0) +s.add(x2 + -1 * x3 <= 0) +s.add(-1 * x1 + x3 <= -1) +assert(s.check() == unsat) +print('all checks passed') +""" + + +import argparse +import re + + +def main(): + parser = argparse.ArgumentParser( + description='Convert constraint log to script to verify using Z3.') + parser.add_argument('log_file', metavar='log', type=str, + help='constraint-system log file') + args = parser.parse_args() + + content = '' + with open(args.log_file, 'rt') as f: + content = f.read() + + groups = content.split('---') + var_re = re.compile('x\d+') + + print('from z3 import *') + for group in groups: + constraints = [g.strip() for g in group.split('\n') if g.strip() != ''] + variables = set() + for c in constraints[:-1]: + for m in var_re.finditer(c): + variables.add(m.group()) + if len(variables) == 0: + continue + for v in variables: + print('{} = Int("{}")'.format(v, v)) + print('s = Solver()') + for c in constraints[:-1]: + print('s.add({})'.format(c)) + expected = constraints[-1].strip() + print('assert(s.check() == {})'.format(expected)) + print('print("all checks passed")') + + +if __name__ == '__main__': + main() From bbb6392c1471aa4c7b7433be6dc572444005f617 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 11 Sep 2020 13:43:45 +0000 Subject: [PATCH 0371/1079] [gn build] Port 3eb141e5078 --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 1c6d22dd672af..335e54b4f68c5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -35,6 +35,7 @@ static_library("Analysis") { "CmpInstAnalysis.cpp", "CodeMetrics.cpp", "ConstantFolding.cpp", + "ConstraintSystem.cpp", "CostModel.cpp", "DDG.cpp", "Delinearization.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index c4bed481e051b..6adc9866e883f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -19,6 +19,7 @@ unittest("AnalysisTests") { "CGSCCPassManagerTest.cpp", "CallGraphTest.cpp", "CaptureTrackingTest.cpp", + "ConstraintSystemTest.cpp", "DDGTest.cpp", "DivergenceAnalysisTest.cpp", "DomTreeUpdaterTest.cpp", From 8da6ae4ce1b686c5c13698e4c5ee937811fda6f7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 11 Sep 2020 14:48:26 +0100 Subject: [PATCH 0372/1079] Revert "[ConstraintSystem] Add helpers to deal with linear constraints." This reverts commit 3eb141e5078a0ce9d92eadc721bc49d214d23056. This uses __builtin_mul_overflow which is not available everywhere. --- llvm/include/llvm/Analysis/ConstraintSystem.h | 57 ------- llvm/lib/Analysis/CMakeLists.txt | 1 - llvm/lib/Analysis/ConstraintSystem.cpp | 141 ------------------ llvm/unittests/Analysis/CMakeLists.txt | 1 - .../Analysis/ConstraintSystemTest.cpp | 82 ---------- llvm/utils/convert-constraint-log-to-z3.py | 69 --------- 6 files changed, 351 deletions(-) delete mode 100644 llvm/include/llvm/Analysis/ConstraintSystem.h delete mode 100644 llvm/lib/Analysis/ConstraintSystem.cpp delete mode 100644 llvm/unittests/Analysis/ConstraintSystemTest.cpp delete mode 100755 llvm/utils/convert-constraint-log-to-z3.py diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h deleted file mode 100644 index 7de787c1fc390..0000000000000 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ /dev/null @@ -1,57 +0,0 @@ -//===- ConstraintSystem.h - A system of linear constraints. --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_CONSTRAINTSYSTEM_H -#define LLVM_ANALYSIS_CONSTRAINTSYSTEM_H - -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" - -#include - -namespace llvm { - -class ConstraintSystem { - /// Current linear constraints in the system. - /// An entry of the form c0, c1, ... cn represents the following constraint: - /// c0 >= v0 * c1 + .... + v{n-1} * cn - SmallVector, 4> Constraints; - - /// Current greatest common divisor for all coefficients in the system. - uint32_t GCD = 1; - - // Eliminate constraints from the system using Fourier–Motzkin elimination. - bool eliminateUsingFM(); - - /// Print the constraints in the system, using \p Names as variable names. - void dump(ArrayRef Names) const; - - /// Print the constraints in the system, using x0...xn as variable names. - void dump() const; - - /// Returns true if there may be a solution for the constraints in the system. - bool mayHaveSolutionImpl(); - -public: - void addVariableRow(const SmallVector &R) { - assert(Constraints.empty() || R.size() == Constraints.back().size()); - for (const auto &C : R) { - auto A = std::abs(C); - GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) - .getZExtValue(); - } - Constraints.push_back(R); - } - - /// Returns true if there may be a solution for the constraints in the system. - bool mayHaveSolution(); -}; -} // namespace llvm - -#endif // LLVM_ANALYSIS_CONSTRAINTSYSTEM_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 78cc764379e17..f50439bc87627 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -39,7 +39,6 @@ add_llvm_component_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp DDG.cpp - ConstraintSystem.cpp Delinearization.cpp DemandedBits.cpp DependenceAnalysis.cpp diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp deleted file mode 100644 index 95fe6c9f1f9b7..0000000000000 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===- ConstraintSytem.cpp - A system of linear constraints. ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ConstraintSystem.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Debug.h" - -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "constraint-system" - -bool ConstraintSystem::eliminateUsingFM() { - // Implementation of Fourier–Motzkin elimination, with some tricks from the - // paper Pugh, William. "The Omega test: a fast and practical integer - // programming algorithm for dependence - // analysis." - // Supercomputing'91: Proceedings of the 1991 ACM/ - // IEEE conference on Supercomputing. IEEE, 1991. - assert(!Constraints.empty() && - "should only be called for non-empty constraint systems"); - unsigned NumVariables = Constraints[0].size(); - SmallVector, 4> NewSystem; - - unsigned NumConstraints = Constraints.size(); - uint32_t NewGCD = 1; - // FIXME do not use copy - for (unsigned R1 = 0; R1 < NumConstraints; R1++) { - if (Constraints[R1][1] == 0) { - SmallVector NR; - NR.push_back(Constraints[R1][0]); - for (unsigned i = 2; i < NumVariables; i++) { - NR.push_back(Constraints[R1][i]); - } - NewSystem.push_back(std::move(NR)); - continue; - } - - // FIXME do not use copy - bool EliminatedInRow = false; - for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) { - if (R1 == R2) - continue; - - // FIXME: can we do better than just dropping things here? - if (Constraints[R2][1] == 0) - continue; - - if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) || - (Constraints[R1][1] > 0 && Constraints[R2][1] > 0)) - continue; - - unsigned LowerR = R1; - unsigned UpperR = R2; - if (Constraints[UpperR][1] < 0) - std::swap(LowerR, UpperR); - - SmallVector NR; - for (unsigned I = 0; I < NumVariables; I++) { - if (I == 1) - continue; - - int64_t M1, M2, N; - if (__builtin_mul_overflow(Constraints[UpperR][I], - ((-1) * Constraints[LowerR][1] / GCD), &M1)) - return false; - if (__builtin_mul_overflow(Constraints[LowerR][I], - (Constraints[UpperR][1] / GCD), &M2)) - return false; - if (__builtin_add_overflow(M1, M2, &N)) - return false; - NR.push_back(N); - - NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()}, - {32, NewGCD}) - .getZExtValue(); - } - NewSystem.push_back(std::move(NR)); - EliminatedInRow = true; - } - } - Constraints = std::move(NewSystem); - GCD = NewGCD; - - return true; -} - -bool ConstraintSystem::mayHaveSolutionImpl() { - while (!Constraints.empty() && Constraints[0].size() > 1) { - if (!eliminateUsingFM()) - return true; - } - - if (Constraints.empty() || Constraints[0].size() > 1) - return true; - - return all_of(Constraints, [](auto &R) { return R[0] >= 0; }); -} - -void ConstraintSystem::dump(ArrayRef Names) const { - if (Constraints.empty()) - return; - - for (auto &Row : Constraints) { - SmallVector Parts; - for (unsigned I = 1, S = Row.size(); I < S; ++I) { - if (Row[I] == 0) - continue; - std::string Coefficient = ""; - if (Row[I] != 1) - Coefficient = std::to_string(Row[I]) + " * "; - Parts.push_back(Coefficient + Names[I - 1]); - } - assert(!Parts.empty() && "need to have at least some parts"); - LLVM_DEBUG(dbgs() << join(Parts, std::string(" + ")) - << " <= " << std::to_string(Row[0]) << "\n"); - } -} - -void ConstraintSystem::dump() const { - SmallVector Names; - for (unsigned i = 1; i < Constraints.back().size(); ++i) - Names.push_back("x" + std::to_string(i)); - LLVM_DEBUG(dbgs() << "---\n"); - dump(Names); -} - -bool ConstraintSystem::mayHaveSolution() { - dump(); - bool HasSolution = mayHaveSolutionImpl(); - LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n"); - return HasSolution; -} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index dfe570fd15749..eb97f6289b67a 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -23,7 +23,6 @@ add_llvm_unittest_with_input_files(AnalysisTests CaptureTrackingTest.cpp CFGTest.cpp CGSCCPassManagerTest.cpp - ConstraintSystemTest.cpp DDGTest.cpp DivergenceAnalysisTest.cpp DomTreeUpdaterTest.cpp diff --git a/llvm/unittests/Analysis/ConstraintSystemTest.cpp b/llvm/unittests/Analysis/ConstraintSystemTest.cpp deleted file mode 100644 index 2301da7ec296f..0000000000000 --- a/llvm/unittests/Analysis/ConstraintSystemTest.cpp +++ /dev/null @@ -1,82 +0,0 @@ -//===--- ConstraintSystemTests.cpp ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ConstraintSystem.h" -#include "gtest/gtest.h" - -using namespace llvm; - -namespace { - -TEST(ConstraintSloverTest, TestSolutionChecks) { - { - ConstraintSystem CS; - // x + y <= 10, x >= 5, y >= 6, x <= 10, y <= 10 - CS.addVariableRow({10, 1, 1}); - CS.addVariableRow({-5, -1, 0}); - CS.addVariableRow({-6, 0, -1}); - CS.addVariableRow({10, 1, 0}); - CS.addVariableRow({10, 0, 1}); - - EXPECT_FALSE(CS.mayHaveSolution()); - } - - { - ConstraintSystem CS; - // x + y <= 10, x >= 2, y >= 3, x <= 10, y <= 10 - CS.addVariableRow({10, 1, 1}); - CS.addVariableRow({-2, -1, 0}); - CS.addVariableRow({-3, 0, -1}); - CS.addVariableRow({10, 1, 0}); - CS.addVariableRow({10, 0, 1}); - - EXPECT_TRUE(CS.mayHaveSolution()); - } - - { - ConstraintSystem CS; - // x + y <= 10, 10 >= x, 10 >= y; does not have a solution. - CS.addVariableRow({10, 1, 1}); - CS.addVariableRow({-10, -1, 0}); - CS.addVariableRow({-10, 0, -1}); - - EXPECT_FALSE(CS.mayHaveSolution()); - } - - { - ConstraintSystem CS; - // x + y >= 20, 10 >= x, 10 >= y; does HAVE a solution. - CS.addVariableRow({-20, -1, -1}); - CS.addVariableRow({-10, -1, 0}); - CS.addVariableRow({-10, 0, -1}); - - EXPECT_TRUE(CS.mayHaveSolution()); - } - - { - ConstraintSystem CS; - - // 2x + y + 3z <= 10, 2x + y >= 10, y >= 1 - CS.addVariableRow({10, 2, 1, 3}); - CS.addVariableRow({-10, -2, -1, 0}); - CS.addVariableRow({-1, 0, 0, -1}); - - EXPECT_FALSE(CS.mayHaveSolution()); - } - - { - ConstraintSystem CS; - - // 2x + y + 3z <= 10, 2x + y >= 10 - CS.addVariableRow({10, 2, 1, 3}); - CS.addVariableRow({-10, -2, -1, 0}); - - EXPECT_TRUE(CS.mayHaveSolution()); - } -} -} // namespace diff --git a/llvm/utils/convert-constraint-log-to-z3.py b/llvm/utils/convert-constraint-log-to-z3.py deleted file mode 100755 index 77b0a3d95b6d4..0000000000000 --- a/llvm/utils/convert-constraint-log-to-z3.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python - -""" -Helper script to convert the log generated by '-debug-only=constraint-system' -to a Python script that uses Z3 to verify the decisions using Z3's Python API. - -Example usage: - -> cat path/to/file.log ---- -x6 + -1 * x7 <= -1 -x6 + -1 * x7 <= -2 -sat - -> ./convert-constraint-log-to-z3.py path/to/file.log > check.py && python ./check.py - -> cat check.py - from z3 import * -x3 = Int("x3") -x1 = Int("x1") -x2 = Int("x2") -s = Solver() -s.add(x1 + -1 * x2 <= 0) -s.add(x2 + -1 * x3 <= 0) -s.add(-1 * x1 + x3 <= -1) -assert(s.check() == unsat) -print('all checks passed') -""" - - -import argparse -import re - - -def main(): - parser = argparse.ArgumentParser( - description='Convert constraint log to script to verify using Z3.') - parser.add_argument('log_file', metavar='log', type=str, - help='constraint-system log file') - args = parser.parse_args() - - content = '' - with open(args.log_file, 'rt') as f: - content = f.read() - - groups = content.split('---') - var_re = re.compile('x\d+') - - print('from z3 import *') - for group in groups: - constraints = [g.strip() for g in group.split('\n') if g.strip() != ''] - variables = set() - for c in constraints[:-1]: - for m in var_re.finditer(c): - variables.add(m.group()) - if len(variables) == 0: - continue - for v in variables: - print('{} = Int("{}")'.format(v, v)) - print('s = Solver()') - for c in constraints[:-1]: - print('s.add({})'.format(c)) - expected = constraints[-1].strip() - print('assert(s.check() == {})'.format(expected)) - print('print("all checks passed")') - - -if __name__ == '__main__': - main() From de2adfaf2575b3193bdef5bde7dd19ac338e1f2e Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 11 Sep 2020 13:49:35 +0000 Subject: [PATCH 0373/1079] [gn build] Port 8da6ae4ce1b --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 - 2 files changed, 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 335e54b4f68c5..1c6d22dd672af 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -35,7 +35,6 @@ static_library("Analysis") { "CmpInstAnalysis.cpp", "CodeMetrics.cpp", "ConstantFolding.cpp", - "ConstraintSystem.cpp", "CostModel.cpp", "DDG.cpp", "Delinearization.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 6adc9866e883f..c4bed481e051b 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -19,7 +19,6 @@ unittest("AnalysisTests") { "CGSCCPassManagerTest.cpp", "CallGraphTest.cpp", "CaptureTrackingTest.cpp", - "ConstraintSystemTest.cpp", "DDGTest.cpp", "DivergenceAnalysisTest.cpp", "DomTreeUpdaterTest.cpp", From b9bca883c970d36f408db80df21838c713c326db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= Date: Fri, 11 Sep 2020 15:51:25 +0200 Subject: [PATCH 0374/1079] [analyzer][NFC] Don't bind values to ObjCForCollectionStmt, replace it with a GDM trait Based on the discussion in D82598#2171312. Thanks @NoQ! D82598 is titled "Get rid of statement liveness, because such a thing doesn't exist", and indeed, expressions express a value, non-expression statements don't. if (a && get() || []{ return true; }()) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has a value ~ has a value ~~~~~~~~~~ has a value ~~~~~~~~~~~~~~~~~~~~ has a value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't have a value That is simple enough, so it would only make sense if we only assigned symbolic values to expressions in the static analyzer. Yet the interface checkers can access presents, among other strange things, the following two methods: ProgramState::BindExpr(const Stmt *S, const LocationContext *LCtx, SVal V, bool Invalidate=true) ProgramState::getSVal(const Stmt *S, const LocationContext *LCtx) So, what gives? Turns out, we make an exception for ReturnStmt (which we'll leave for another time) and ObjCForCollectionStmt. For any other loops, in order to know whether we should analyze another iteration, among other things, we evaluate it's condition. Which is a problem for ObjCForCollectionStmt, because it simply doesn't have one (CXXForRangeStmt has an implicit one!). In its absence, we assigned the actual statement with a concrete 1 or 0 to indicate whether there are any more iterations left. However, this is wildly incorrect, its just simply not true that the for statement has a value of 1 or 0, we can't calculate its liveness because that doesn't make any sense either, so this patch turns it into a GDM trait. Fixing this allows us to reinstate the assert removed in https://reviews.llvm.org/rG032b78a0762bee129f33e4255ada6d374aa70c71. Differential Revision: https://reviews.llvm.org/D86736 --- .../Core/PathSensitive/ExprEngine.h | 17 +++ .../Checkers/BasicObjCFoundationChecks.cpp | 3 +- .../Checkers/UndefBranchChecker.cpp | 7 +- clang/lib/StaticAnalyzer/Core/Environment.cpp | 16 ++- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 125 +++++++++++++----- .../StaticAnalyzer/Core/ExprEngineObjC.cpp | 13 +- .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 4 +- clang/test/Analysis/objc-live-crash.mm | 30 +++++ 8 files changed, 168 insertions(+), 47 deletions(-) create mode 100644 clang/test/Analysis/objc-live-crash.mm diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index cdfe986355c56..582a56cbee1ee 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -869,6 +869,23 @@ class ExprEngine { void handleConstructor(const Expr *E, ExplodedNode *Pred, ExplodedNodeSet &Dst); +public: + /// Note whether this loop has any more iteratios to model. These methods are + /// essentially an interface for a GDM trait. Further reading in + /// ExprEngine::VisitObjCForCollectionStmt(). + LLVM_NODISCARD static ProgramStateRef + setWhetherHasMoreIteration(ProgramStateRef State, + const ObjCForCollectionStmt *O, + const LocationContext *LC, bool HasMoreIteraton); + + LLVM_NODISCARD static ProgramStateRef + removeIterationState(ProgramStateRef State, const ObjCForCollectionStmt *O, + const LocationContext *LC); + + LLVM_NODISCARD static bool hasMoreIteration(ProgramStateRef State, + const ObjCForCollectionStmt *O, + const LocationContext *LC); +private: /// Store the location of a C++ object corresponding to a statement /// until the statement is actually encountered. For example, if a DeclStmt /// has CXXConstructExpr as its initializer, the object would be considered diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp index 918c6e361381e..a86a410ebcbc1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -978,8 +978,7 @@ void ObjCLoopChecker::checkPostStmt(const ObjCForCollectionStmt *FCS, ProgramStateRef State = C.getState(); // Check if this is the branch for the end of the loop. - SVal CollectionSentinel = C.getSVal(FCS); - if (CollectionSentinel.isZeroConstant()) { + if (!ExprEngine::hasMoreIteration(State, FCS, C.getLocationContext())) { if (!alreadyExecutedAtLeastOneLoopIteration(C.getPredecessor(), FCS)) State = assumeCollectionNonEmpty(C, State, FCS, /*Assumption*/false); diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp index 3e0caaf79ca09..ebe5ad53cc303 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/StmtObjC.h" +#include "clang/AST/Type.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" @@ -54,10 +56,13 @@ class UndefBranchChecker : public Checker { void checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const; }; -} +} // namespace void UndefBranchChecker::checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const { + // ObjCForCollection is a loop, but has no actual condition. + if (isa(Condition)) + return; SVal X = Ctx.getSVal(Condition); if (X.isUndef()) { // Generate a sink node, which implicitly marks both outgoing branches as diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp index 1ccf4c6104a65..556ff6af15de2 100644 --- a/clang/lib/StaticAnalyzer/Core/Environment.cpp +++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -15,6 +15,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtObjC.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" @@ -85,6 +86,12 @@ SVal Environment::lookupExpr(const EnvironmentEntry &E) const { SVal Environment::getSVal(const EnvironmentEntry &Entry, SValBuilder& svalBuilder) const { const Stmt *S = Entry.getStmt(); + assert(!isa(S) && + "Use ExprEngine::hasMoreIteration()!"); + assert((isa(S) || isa(S)) && + "Environment can only argue about Exprs, since only they express " + "a value! Any non-expression statement stored in Environment is a " + "result of a hack!"); const LocationContext *LCtx = Entry.getLocationContext(); switch (S->getStmtClass()) { @@ -188,7 +195,14 @@ EnvironmentManager::removeDeadBindings(Environment Env, const EnvironmentEntry &BlkExpr = I.getKey(); const SVal &X = I.getData(); - if (SymReaper.isLive(BlkExpr.getStmt(), BlkExpr.getLocationContext())) { + const bool IsBlkExprLive = + SymReaper.isLive(BlkExpr.getStmt(), BlkExpr.getLocationContext()); + + assert((isa(BlkExpr.getStmt()) || !IsBlkExprLive) && + "Only Exprs can be live, LivenessAnalysis argues about the liveness " + "of *values*!"); + + if (IsBlkExprLive) { // Copy the binding to the new map. EBMapRef = EBMapRef.add(BlkExpr, X); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index a4b11b5e8a961..409741cdb6e41 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2129,6 +2129,83 @@ static const Stmt *ResolveCondition(const Stmt *Condition, llvm_unreachable("could not resolve condition"); } +using ObjCForLctxPair = + std::pair; + +REGISTER_MAP_WITH_PROGRAMSTATE(ObjCForHasMoreIterations, ObjCForLctxPair, bool) + +ProgramStateRef ExprEngine::setWhetherHasMoreIteration( + ProgramStateRef State, const ObjCForCollectionStmt *O, + const LocationContext *LC, bool HasMoreIteraton) { + assert(!State->contains({O, LC})); + return State->set({O, LC}, HasMoreIteraton); +} + +ProgramStateRef +ExprEngine::removeIterationState(ProgramStateRef State, + const ObjCForCollectionStmt *O, + const LocationContext *LC) { + assert(State->contains({O, LC})); + return State->remove({O, LC}); +} + +bool ExprEngine::hasMoreIteration(ProgramStateRef State, + const ObjCForCollectionStmt *O, + const LocationContext *LC) { + assert(State->contains({O, LC})); + return *State->get({O, LC}); +} + +/// Split the state on whether there are any more iterations left for this loop. +/// Returns a (HasMoreIteration, HasNoMoreIteration) pair, or None when the +/// acquisition of the loop condition value failed. +static Optional> +assumeCondition(const Stmt *Condition, ExplodedNode *N) { + ProgramStateRef State = N->getState(); + if (const auto *ObjCFor = dyn_cast(Condition)) { + bool HasMoreIteraton = + ExprEngine::hasMoreIteration(State, ObjCFor, N->getLocationContext()); + // Checkers have already ran on branch conditions, so the current + // information as to whether the loop has more iteration becomes outdated + // after this point. + State = ExprEngine::removeIterationState(State, ObjCFor, + N->getLocationContext()); + if (HasMoreIteraton) + return std::pair{State, nullptr}; + else + return std::pair{nullptr, State}; + } + SVal X = State->getSVal(Condition, N->getLocationContext()); + + if (X.isUnknownOrUndef()) { + // Give it a chance to recover from unknown. + if (const auto *Ex = dyn_cast(Condition)) { + if (Ex->getType()->isIntegralOrEnumerationType()) { + // Try to recover some path-sensitivity. Right now casts of symbolic + // integers that promote their values are currently not tracked well. + // If 'Condition' is such an expression, try and recover the + // underlying value and use that instead. + SVal recovered = + RecoverCastedSymbol(State, Condition, N->getLocationContext(), + N->getState()->getStateManager().getContext()); + + if (!recovered.isUnknown()) { + X = recovered; + } + } + } + } + + // If the condition is still unknown, give up. + if (X.isUnknownOrUndef()) + return None; + + DefinedSVal V = X.castAs(); + + ProgramStateRef StTrue, StFalse; + return State->assume(V); +} + void ExprEngine::processBranch(const Stmt *Condition, NodeBuilderContext& BldCtx, ExplodedNode *Pred, @@ -2165,48 +2242,28 @@ void ExprEngine::processBranch(const Stmt *Condition, return; BranchNodeBuilder builder(CheckersOutSet, Dst, BldCtx, DstT, DstF); - for (const auto PredI : CheckersOutSet) { - if (PredI->isSink()) + for (ExplodedNode *PredN : CheckersOutSet) { + if (PredN->isSink()) continue; - ProgramStateRef PrevState = PredI->getState(); - SVal X = PrevState->getSVal(Condition, PredI->getLocationContext()); - - if (X.isUnknownOrUndef()) { - // Give it a chance to recover from unknown. - if (const auto *Ex = dyn_cast(Condition)) { - if (Ex->getType()->isIntegralOrEnumerationType()) { - // Try to recover some path-sensitivity. Right now casts of symbolic - // integers that promote their values are currently not tracked well. - // If 'Condition' is such an expression, try and recover the - // underlying value and use that instead. - SVal recovered = RecoverCastedSymbol(PrevState, Condition, - PredI->getLocationContext(), - getContext()); - - if (!recovered.isUnknown()) { - X = recovered; - } - } - } - } + ProgramStateRef PrevState = PredN->getState(); - // If the condition is still unknown, give up. - if (X.isUnknownOrUndef()) { - builder.generateNode(PrevState, true, PredI); - builder.generateNode(PrevState, false, PredI); + ProgramStateRef StTrue, StFalse; + if (const auto KnownCondValueAssumption = assumeCondition(Condition, PredN)) + std::tie(StTrue, StFalse) = *KnownCondValueAssumption; + else { + assert(!isa(Condition)); + builder.generateNode(PrevState, true, PredN); + builder.generateNode(PrevState, false, PredN); continue; } - - DefinedSVal V = X.castAs(); - - ProgramStateRef StTrue, StFalse; - std::tie(StTrue, StFalse) = PrevState->assume(V); + if (StTrue && StFalse) + assert(!isa(Condition));; // Process the true branch. if (builder.isFeasible(true)) { if (StTrue) - builder.generateNode(StTrue, true, PredI); + builder.generateNode(StTrue, true, PredN); else builder.markInfeasible(true); } @@ -2214,7 +2271,7 @@ void ExprEngine::processBranch(const Stmt *Condition, // Process the false branch. if (builder.isFeasible(false)) { if (StFalse) - builder.generateNode(StFalse, false, PredI); + builder.generateNode(StFalse, false, PredN); else builder.markInfeasible(false); } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp index eb9a0be2e5d6e..5a55e81497b03 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp @@ -53,10 +53,8 @@ static void populateObjCForDestinationSet( ProgramStateRef state = Pred->getState(); const LocationContext *LCtx = Pred->getLocationContext(); - SVal hasElementsV = svalBuilder.makeTruthVal(hasElements); - - // FIXME: S is not an expression. We should not be binding values to it. - ProgramStateRef nextState = state->BindExpr(S, LCtx, hasElementsV); + ProgramStateRef nextState = + ExprEngine::setWhetherHasMoreIteration(state, S, LCtx, hasElements); if (auto MV = elementV.getAs()) if (const auto *R = dyn_cast(MV->getRegion())) { @@ -93,10 +91,9 @@ void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S, // (1) binds the next container value to 'element'. This creates a new // node in the ExplodedGraph. // - // (2) binds the value 0/1 to the ObjCForCollectionStmt* itself, indicating - // whether or not the container has any more elements. This value - // will be tested in ProcessBranch. We need to explicitly bind - // this value because a container can contain nil elements. + // (2) note whether the collection has any more elements (or in other words, + // whether the loop has more iterations). This will be tested in + // processBranch. // // FIXME: Eventually this logic should actually do dispatches to // 'countByEnumeratingWithState:objects:count:' (NSFastEnumeration). diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 6ca7aec9caeca..ae40ad910d843 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -14,6 +14,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" +#include "clang/AST/StmtObjC.h" #include "clang/Analysis/Analyses/LiveVariables.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Basic/LLVM.h" @@ -494,7 +495,8 @@ SymbolReaper::isLive(const Stmt *ExprVal, const LocationContext *ELCtx) const { return true; } - // If no statement is provided, everything is this and parent contexts is live. + // If no statement is provided, everything in this and parent contexts is + // live. if (!Loc) return true; diff --git a/clang/test/Analysis/objc-live-crash.mm b/clang/test/Analysis/objc-live-crash.mm new file mode 100644 index 0000000000000..b3b4f19bfc0dd --- /dev/null +++ b/clang/test/Analysis/objc-live-crash.mm @@ -0,0 +1,30 @@ +// RUN: %clang --analyze %s -fblocks + +// https://reviews.llvm.org/D82598#2171312 + +@interface Item +// ... +@end + +@interface Collection +// ... +@end + +typedef void (^Blk)(); + +struct RAII { + Blk blk; + +public: + RAII(Blk blk): blk(blk) {} + ~RAII() { blk(); } +}; + +void foo(Collection *coll) { + RAII raii(^{}); + for (Item *item in coll) {} + int i; + { + int j; + } +} From 4d12d6149ced575be5386889b27f3bb1891052ab Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 26 Aug 2020 10:43:05 -0400 Subject: [PATCH 0375/1079] [libc++] NFC: Add missing license to test --- .../function_type_default_deleter.fail.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/function_type_default_deleter.fail.cpp b/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/function_type_default_deleter.fail.cpp index 5dea3cb7cc175..0bba136ade6dc 100644 --- a/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/function_type_default_deleter.fail.cpp +++ b/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/function_type_default_deleter.fail.cpp @@ -1,3 +1,11 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03 #include From 48b510c4bc0fe090e635ee0440e46fc176527d7e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 11 Sep 2020 15:32:03 +0100 Subject: [PATCH 0376/1079] [NFC] Fix compiler warnings due to integer comparison of different signedness Fix by directly using INT_MAX and INT32_MAX. Patch by: @nullptr.cpp (Yang Fan) Differential Revision: https://reviews.llvm.org/D87347 --- clang/lib/Lex/Pragma.cpp | 2 +- llvm/lib/Analysis/VectorUtils.cpp | 3 +-- llvm/lib/MC/WasmObjectWriter.cpp | 5 ++--- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 3 +-- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index b512a547de7df..a05df060813e7 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1356,7 +1356,7 @@ struct PragmaWarningHandler : public PragmaHandler { while (Tok.is(tok::numeric_constant)) { uint64_t Value; if (!PP.parseSimpleIntegerLiteral(Tok, Value) || Value == 0 || - Value > std::numeric_limits::max()) { + Value > INT_MAX) { PP.Diag(Tok, diag::warn_pragma_warning_expected_number); return; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 0b10983442e20..34fa0f283b03c 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -416,8 +416,7 @@ void llvm::narrowShuffleMaskElts(int Scale, ArrayRef Mask, ScaledMask.clear(); for (int MaskElt : Mask) { if (MaskElt >= 0) { - assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= - std::numeric_limits::max() && + assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX && "Overflowed 32-bits"); } for (int SliceElt = 0; SliceElt != Scale; ++SliceElt) diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index af4620361c34d..6075423fa0f26 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -939,9 +939,8 @@ uint32_t WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) { if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX) encodeULEB128(0, W.OS); // memory index if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) { - W.OS << char(Segment.Offset > std::numeric_limits().max() - ? wasm::WASM_OPCODE_I64_CONST - : wasm::WASM_OPCODE_I32_CONST); + W.OS << char(Segment.Offset > INT32_MAX ? wasm::WASM_OPCODE_I64_CONST + : wasm::WASM_OPCODE_I32_CONST); encodeSLEB128(Segment.Offset, W.OS); // offset W.OS << char(wasm::WASM_OPCODE_END); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index ef56cb77447aa..55c6ce6eb7832 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2037,8 +2037,7 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf, if (Mask[i] == UndefMaskElem) continue; uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio; - assert(LSBIndex <= std::numeric_limits::max() && - "Overflowed 32-bits"); + assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits"); if (Mask[i] != (int)LSBIndex) return nullptr; } From 0825fa9526818d7d9c94fa47e1fbe19de91003d1 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Fri, 11 Sep 2020 15:30:52 +0100 Subject: [PATCH 0377/1079] [LiveDebugValues][NFC] Add additional tests These were supposed to be in 0caeaff1237 and D83054, but a fat-fingered error when git-adding missed them. Ooops. --- .../MIR/X86/livedebugvalues_load_in_loop.mir | 113 ++++++++++ .../X86/livedebugvalues_many_loop_heads.mir | 196 ++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir create mode 100644 llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir new file mode 100644 index 0000000000000..97af3bf502196 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir @@ -0,0 +1,113 @@ +--- | + ; RUN: llc %s -march=x86-64 -run-pass=livedebugvalues -o - -experimental-debug-variable-locations -emulate-old-livedebugvalues=0 | FileCheck %s -implicit-check-not=DBG_VALUE + + ; Sometimes, variables can have multiple locations, and when control flow + ; merges LiveDebugValues has a hard time picking which one the variable lives + ; in. Test two of these scenarios that old LiveDebugValues can't handle: when + ; a value is in two registers, and when a value is both in a register and + ; on the stack. + + ; In a register: + + ; CHECK-LABEL: bb.0.entry: + ; CHECK: DBG_VALUE $rdi, $noreg, !16, !DIExpression() + ; CHECK-LABEL: bb.1.bb1: + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + ; CHECK-LABEL: bb.2.bb2: + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + ; CHECK-LABEL: bb.3.bb3: + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + + ; On the stack: we move from $rbp to a stack slot in bb4, but join back on + ; $rbp in bb6. + + ; CHECK-LABEL: bb.4: + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + ; CHECK: DBG_VALUE $rsp, 0, !16, !DIExpression() + ; CHECK-LABEL: bb.5: + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + ; CHECK-LABEL: bb.6: + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() + + declare i64 @bees(i64 %arg); + + define i32 @_Z8bb_to_bb(i64 %arg) local_unnamed_addr !dbg !12 { + entry: + br label %bb1, !dbg !17 + bb1: + br label %bb2, !dbg !17 + bb2: + br label %bb3, !dbg !17 + bb3: + ret i32 0, !dbg !17 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!7, !8, !9, !10} + !llvm.ident = !{!11} + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, debugInfoForProfiling: true, nameTableKind: None) + !1 = !DIFile(filename: "main.cpp", directory: "F:\") + !2 = !{} + !3 = !{!4} + !4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) + !5 = distinct !DIGlobalVariable(name: "start", scope: !0, file: !1, line: 4, type: !6, isLocal: false, isDefinition: true) + !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !7 = !{i32 2, !"Dwarf Version", i32 4} + !8 = !{i32 2, !"Debug Info Version", i32 3} + !9 = !{i32 1, !"wchar_size", i32 2} + !10 = !{i32 7, !"PIC Level", i32 2} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "bb_to_bb", linkageName: "bb_to_bb", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!6, !6} + !15 = !{!16} + !16 = !DILocalVariable(name: "myVar", scope: !12, file: !1, line: 7, type: !6) + !17 = !DILocation(line: 10, scope: !12) + +... +--- +name: _Z8bb_to_bb +tracksRegLiveness: true +liveins: + - { reg: '$rdi', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + liveins: $rdi + successors: %bb.1, %bb.2 + DBG_VALUE $rdi, $noreg, !16, !DIExpression(), debug-location !17 + $rbp = MOV64rr $rdi, debug-location !17 + dead $rcx = MOV64ri 0, debug-location !17 + CALL64pcrel32 @bees, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax, debug-location !17 + CMP64ri8 renamable $rax, 1, implicit-def $eflags, debug-location !17 + JCC_1 %bb.2, 4, implicit killed $eflags, debug-location !17 + bb.1.bb1: + liveins: $rax, $rbp + successors: %bb.3 + $rbp = MOV64ri 0, debug-location !17 + DBG_VALUE $rbp, $noreg, !16, !DIExpression(), debug-location !17 + JMP_1 %bb.3 + bb.2.bb2: + liveins: $rax, $rbp + successors: %bb.3 + $rax = MOV64ri 0, debug-location !17 + bb.3.bb3: + liveins: $rax, $rbp + $rdi = MOV64rr $rbp, debug-location !17 + CALL64pcrel32 @bees, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax, debug-location !17 + CMP64ri8 renamable $rax, 1, implicit-def $eflags, debug-location !17 + JCC_1 %bb.5, 4, implicit killed $eflags, debug-location !17 + bb.4: + liveins: $rax, $rbp + MOV64mr $rsp, 1, $noreg, 8, $noreg, killed renamable $rbp :: (store 8 into %stack.0) + JMP_1 %bb.6 + bb.5: + liveins: $rax, $rbp + bb.6: + liveins: $rax, $rbp + RETQ $rax, debug-location !17 +... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir new file mode 100644 index 0000000000000..f5332c29c837f --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir @@ -0,0 +1,196 @@ +--- | + ; RUN: llc %s -march=x86-64 -run-pass=livedebugvalues -o - -experimental-debug-variable-locations | FileCheck %s -implicit-check-not=DBG_VALUE + + ; The MIR below represents a pathalogical case for value-tracking + ; LiveDebugValues. The code structure is eight nested loops, with loop heads + ; from bb.1 to bb.8, a central block bb.9 that does nothing, and loop ends + ; from bb.10 to bb.17. The CMP's and jumps might be broken; the only + ; important part is that it looks like nested loops to LiveDebugValues. + ; + ; The variable location is always $rsi, which enters the function live. + ; There's also a def of $rsi in bb.14, in a loop tail, half way into the + ; loop nest.s. + ; + ; This presents a serious problem: the outer four loops each implicitly have + ; a PHI value for $rsi, because the block could be entered on a path straight + ; from entry, or from bb.14 where $rsi is def'd. While the innermost four + ; loops have a value of $rsi that is live-through each loop from bb.5 + ; onwards. + ; + ; Value-tracking LiveDebugValues _must_ correctly identify each PHI value. + ; Observe the DBG_VALUE in bb.2: this variable location musn't be propagated + ; any further, because there's a path to either successor that goes through + ; bb.14 where the value is overwritten.Value tracking needs to identify the + ; PHI value on entry to the block; and that each successor has a different + ; PHI value in that register. + ; + ; Likewise, we mustn't identify values as PHIs which aren't. Entering bb.5 + ; has a PHI value (from bb.4) in $rsi. There are no paths to bb.5 that pass + ; through the clobbering bb.14, which don't also pass through bb.4: thus + ; that value is live-through the innermost four loops. If we + ; over-approximated where PHIs happened, we would lose variable location + ; coverage here, by not propagating the variable location through the inner + ; loops. + ; + ; Getting this right requires the lattice descent (described in the + ; implementation) to search loop head PHI values, until one is found that is + ; live-through a loop. + + ; This location in bb.2 should not be propagated further, + ; CHECK-LABEL: bb.2: + ; CHECK: DBG_VALUE $rsi, $noreg + + ; This location should be live through the inner loops, til bb.14 + ; CHECK-LABEL: bb.5: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.6: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.7: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.8: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.9: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.10: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.11: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.12: + ; CHECK: DBG_VALUE $rsi, $noreg + ; CHECK-LABEL: bb.13: + ; CHECK: DBG_VALUE $rsi, $noreg + + declare i64 @bees(i64 %arg); + + define i32 @chiasm(i64 %arg) local_unnamed_addr !dbg !12 { + entry: + br label %bb1, !dbg !17 + bb1: + br label %bb2, !dbg !17 + bb2: + br label %bb3, !dbg !17 + bb3: + ret i32 0, !dbg !17 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!7, !8, !9, !10} + !llvm.ident = !{!11} + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, debugInfoForProfiling: true, nameTableKind: None) + !1 = !DIFile(filename: "main.cpp", directory: "F:\") + !2 = !{} + !3 = !{!4} + !4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) + !5 = distinct !DIGlobalVariable(name: "start", scope: !0, file: !1, line: 4, type: !6, isLocal: false, isDefinition: true) + !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !7 = !{i32 2, !"Dwarf Version", i32 4} + !8 = !{i32 2, !"Debug Info Version", i32 3} + !9 = !{i32 1, !"wchar_size", i32 2} + !10 = !{i32 7, !"PIC Level", i32 2} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "bb_to_bb", linkageName: "bb_to_bb", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!6, !6} + !15 = !{!16} + !16 = !DILocalVariable(name: "myVar", scope: !12, file: !1, line: 7, type: !6) + !17 = !DILocation(line: 10, scope: !12) + +... +--- +name: chiasm +tracksRegLiveness: true +liveins: + - { reg: '$rdi', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + liveins: $rdi, $rsi + + bb.1: + liveins: $rsi, $rdi + CMP64ri8 renamable $rdi, 1, implicit-def $eflags, debug-location !17 + JCC_1 %bb.17, 4, implicit $eflags, debug-location !17 + + bb.2: + liveins: $rsi, $rdi + DBG_VALUE $rsi, $noreg, !16, !DIExpression(), debug-location !17 + CMP64ri8 renamable $rdi, 2, implicit-def $eflags, debug-location !17 + JCC_1 %bb.16, 4, implicit $eflags, debug-location !17 + + bb.3: + liveins: $rsi, $rdi + CMP64ri8 renamable $rdi, 3, implicit-def $eflags, debug-location !17 + JCC_1 %bb.15, 4, implicit $eflags, debug-location !17 + + bb.4: + liveins: $rsi, $rdi + CMP64ri8 renamable $rdi, 4, implicit-def $eflags, debug-location !17 + JCC_1 %bb.14, 4, implicit $eflags, debug-location !17 + + bb.5: + liveins: $rsi, $rdi + DBG_VALUE $rsi, $noreg, !16, !DIExpression(), debug-location !17 + CMP64ri8 renamable $rdi, 4, implicit-def $eflags, debug-location !17 + JCC_1 %bb.13, 4, implicit $eflags, debug-location !17 + + bb.6: + liveins: $rsi, $rdi + CMP64ri8 renamable $rdi, 4, implicit-def $eflags, debug-location !17 + JCC_1 %bb.12, 4, implicit $eflags, debug-location !17 + + bb.7: + liveins: $rsi, $rdi + CMP64ri8 renamable $rdi, 4, implicit-def $eflags, debug-location !17 + JCC_1 %bb.11, 4, implicit $eflags, debug-location !17 + + bb.8: + liveins: $rsi, $rdi + CMP64ri8 renamable $rdi, 4, implicit-def $eflags, debug-location !17 + JCC_1 %bb.10, 4, implicit $eflags, debug-location !17 + + bb.9: + liveins: $rsi, $rdi, $eflags + ;$rsi = MOV64ri 0, debug-location !17 + ;JMP_1 %bb.1, debug-location !17 + + bb.10: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.8, 4, implicit $eflags, debug-location !17 + + bb.11: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.7, 4, implicit $eflags, debug-location !17 + + bb.12: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.6, 4, implicit $eflags, debug-location !17 + + bb.13: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.5, 4, implicit $eflags, debug-location !17 + + bb.14: + liveins: $rsi, $rdi, $eflags + $rsi = MOV64ri 0, debug-location !17 + JCC_1 %bb.4, 4, implicit $eflags, debug-location !17 + + bb.15: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.3, 4, implicit $eflags, debug-location !17 + + bb.16: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.2, 4, implicit $eflags, debug-location !17 + + bb.17: + liveins: $rsi, $rdi, $eflags + JCC_1 %bb.1, 4, implicit $eflags, debug-location !17 + + bb.18: + liveins: $rsi, $rdi, $eflags + RETQ + +... From 6b5b6511a52276820d4a2e8529370a67cf0bd746 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 10 Sep 2020 16:38:12 -0400 Subject: [PATCH 0378/1079] [InstCombine] add/move tests for ptr diff; NFC --- llvm/test/Transforms/InstCombine/sub-gep.ll | 186 ++++++++++++++++++++ llvm/test/Transforms/InstCombine/sub.ll | 159 ----------------- 2 files changed, 186 insertions(+), 159 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index f31eeb46d8823..ce9657433bb78 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -instcombine < %s | FileCheck %s +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + define i64 @test_inbounds([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds( ; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 @@ -151,3 +153,187 @@ define i64 @test_inbounds_nuw_multi_index([0 x [2 x i32]]* %base, i64 %idx, i64 %d = sub nuw i64 %i2, %i1 ret i64 %d } + +; rdar://7362831 +define i32 @test23(i8* %P, i64 %A){ +; CHECK-LABEL: @test23( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i32 +; CHECK-NEXT: ret i32 [[TMP1]] +; + %B = getelementptr inbounds i8, i8* %P, i64 %A + %C = ptrtoint i8* %B to i64 + %D = trunc i64 %C to i32 + %E = ptrtoint i8* %P to i64 + %F = trunc i64 %E to i32 + %G = sub i32 %D, %F + ret i32 %G +} + +define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) { +; CHECK-LABEL: @test23_as1( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[A:%.*]] to i8 +; CHECK-NEXT: ret i8 [[TMP1]] +; + %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A + %C = ptrtoint i8 addrspace(1)* %B to i16 + %D = trunc i16 %C to i8 + %E = ptrtoint i8 addrspace(1)* %P to i16 + %F = trunc i16 %E to i8 + %G = sub i8 %D, %F + ret i8 %G +} + +define i64 @test24(i8* %P, i64 %A){ +; CHECK-LABEL: @test24( +; CHECK-NEXT: ret i64 [[A:%.*]] +; + %B = getelementptr inbounds i8, i8* %P, i64 %A + %C = ptrtoint i8* %B to i64 + %E = ptrtoint i8* %P to i64 + %G = sub i64 %C, %E + ret i64 %G +} + +define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) { +; CHECK-LABEL: @test24_as1( +; CHECK-NEXT: ret i16 [[A:%.*]] +; + %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A + %C = ptrtoint i8 addrspace(1)* %B to i16 + %E = ptrtoint i8 addrspace(1)* %P to i16 + %G = sub i16 %C, %E + ret i16 %G +} + +define i64 @test24a(i8* %P, i64 %A){ +; CHECK-LABEL: @test24a( +; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i64 0, [[A:%.*]] +; CHECK-NEXT: ret i64 [[DIFF_NEG]] +; + %B = getelementptr inbounds i8, i8* %P, i64 %A + %C = ptrtoint i8* %B to i64 + %E = ptrtoint i8* %P to i64 + %G = sub i64 %E, %C + ret i64 %G +} + +define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) { +; CHECK-LABEL: @test24a_as1( +; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i16 0, [[A:%.*]] +; CHECK-NEXT: ret i16 [[DIFF_NEG]] +; + %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A + %C = ptrtoint i8 addrspace(1)* %B to i16 + %E = ptrtoint i8 addrspace(1)* %P to i16 + %G = sub i16 %E, %C + ret i16 %G +} + +@Arr = external global [42 x i16] + +define i64 @test24b(i8* %P, i64 %A){ +; CHECK-LABEL: @test24b( +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: ret i64 [[B_IDX]] +; + %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A + %C = ptrtoint i16* %B to i64 + %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64) + ret i64 %G +} + +define i64 @test25(i8* %P, i64 %A){ +; CHECK-LABEL: @test25( +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A + %C = ptrtoint i16* %B to i64 + %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16], [42 x i16]* @Arr, i64 1, i64 0) to i64) + ret i64 %G +} + +@Arr_as1 = external addrspace(1) global [42 x i16] + +define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { +; CHECK-LABEL: @test25_as1( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: ret i16 [[GEPDIFF]] +; + %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A + %C = ptrtoint i16 addrspace(1)* %B to i16 + %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16) + ret i16 %G +} + +define i64 @test30(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test30( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %bit = bitcast i8* %foo to i32* + %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i + %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j + %cast1 = ptrtoint i32* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + ret i64 %sub +} + +define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { +; CHECK-LABEL: @test30_as1( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[GEPDIFF]] +; + %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* + %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i + %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j + %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16 + %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16 + %sub = sub i16 %cast1, %cast2 + ret i16 %sub +} + +define i64 @gep_diff_both_inbounds(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @gep_diff_both_inbounds( +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i + %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + ret i64 %sub +} + +define i64 @gep_diff_first_inbounds(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @gep_diff_first_inbounds( +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i + %gep2 = getelementptr i8, i8* %foo, i64 %j + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + ret i64 %sub +} + +define i64 @gep_diff_second_inbounds(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @gep_diff_second_inbounds( +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %gep1 = getelementptr i8, i8* %foo, i64 %i + %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + ret i64 %sub +} diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 437d8f8c5c023..98d8a9e6b5ca6 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -414,122 +414,6 @@ define zeroext i1 @test22(i32 %a, i32 %b) nounwind { ret i1 %i5 } -; rdar://7362831 -define i32 @test23(i8* %P, i64 %A){ -; CHECK-LABEL: @test23( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: ret i32 [[TMP1]] -; - %B = getelementptr inbounds i8, i8* %P, i64 %A - %C = ptrtoint i8* %B to i64 - %D = trunc i64 %C to i32 - %E = ptrtoint i8* %P to i64 - %F = trunc i64 %E to i32 - %G = sub i32 %D, %F - ret i32 %G -} - -define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) { -; CHECK-LABEL: @test23_as1( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[A:%.*]] to i8 -; CHECK-NEXT: ret i8 [[TMP1]] -; - %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A - %C = ptrtoint i8 addrspace(1)* %B to i16 - %D = trunc i16 %C to i8 - %E = ptrtoint i8 addrspace(1)* %P to i16 - %F = trunc i16 %E to i8 - %G = sub i8 %D, %F - ret i8 %G -} - -define i64 @test24(i8* %P, i64 %A){ -; CHECK-LABEL: @test24( -; CHECK-NEXT: ret i64 [[A:%.*]] -; - %B = getelementptr inbounds i8, i8* %P, i64 %A - %C = ptrtoint i8* %B to i64 - %E = ptrtoint i8* %P to i64 - %G = sub i64 %C, %E - ret i64 %G -} - -define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) { -; CHECK-LABEL: @test24_as1( -; CHECK-NEXT: ret i16 [[A:%.*]] -; - %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A - %C = ptrtoint i8 addrspace(1)* %B to i16 - %E = ptrtoint i8 addrspace(1)* %P to i16 - %G = sub i16 %C, %E - ret i16 %G -} - -define i64 @test24a(i8* %P, i64 %A){ -; CHECK-LABEL: @test24a( -; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i64 0, [[A:%.*]] -; CHECK-NEXT: ret i64 [[DIFF_NEG]] -; - %B = getelementptr inbounds i8, i8* %P, i64 %A - %C = ptrtoint i8* %B to i64 - %E = ptrtoint i8* %P to i64 - %G = sub i64 %E, %C - ret i64 %G -} - -define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) { -; CHECK-LABEL: @test24a_as1( -; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i16 0, [[A:%.*]] -; CHECK-NEXT: ret i16 [[DIFF_NEG]] -; - %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A - %C = ptrtoint i8 addrspace(1)* %B to i16 - %E = ptrtoint i8 addrspace(1)* %P to i16 - %G = sub i16 %E, %C - ret i16 %G -} - - -@Arr = external global [42 x i16] - -define i64 @test24b(i8* %P, i64 %A){ -; CHECK-LABEL: @test24b( -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 -; CHECK-NEXT: ret i64 [[B_IDX]] -; - %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A - %C = ptrtoint i16* %B to i64 - %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64) - ret i64 %G -} - -define i64 @test25(i8* %P, i64 %A){ -; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 -; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 -; CHECK-NEXT: ret i64 [[GEPDIFF]] -; - %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A - %C = ptrtoint i16* %B to i64 - %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16], [42 x i16]* @Arr, i64 1, i64 0) to i64) - ret i64 %G -} - -@Arr_as1 = external addrspace(1) global [42 x i16] - -define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { -; CHECK-LABEL: @test25_as1( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 -; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 -; CHECK-NEXT: ret i16 [[GEPDIFF]] -; - %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A - %C = ptrtoint i16 addrspace(1)* %B to i16 - %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16) - ret i16 %G -} - define i32 @test26(i32 %x) { ; CHECK-LABEL: @test26( ; CHECK-NEXT: [[SHL_NEG:%.*]] = shl i32 -3, [[X:%.*]] @@ -823,49 +707,6 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) { ret i32 %sub } -define i64 @test29(i8* %foo, i64 %i, i64 %j) { -; CHECK-LABEL: @test29( -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF]] -; - %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i - %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j - %cast1 = ptrtoint i8* %gep1 to i64 - %cast2 = ptrtoint i8* %gep2 to i64 - %sub = sub i64 %cast1, %cast2 - ret i64 %sub -} - -define i64 @test30(i8* %foo, i64 %i, i64 %j) { -; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF]] -; - %bit = bitcast i8* %foo to i32* - %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i - %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j - %cast1 = ptrtoint i32* %gep1 to i64 - %cast2 = ptrtoint i8* %gep2 to i64 - %sub = sub i64 %cast1, %cast2 - ret i64 %sub -} - -define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { -; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[GEPDIFF]] -; - %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* - %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i - %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j - %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16 - %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16 - %sub = sub i16 %cast1, %cast2 - ret i16 %sub -} - define <2 x i64> @test31(<2 x i64> %A) { ; CHECK-LABEL: @test31( ; CHECK-NEXT: [[SUB:%.*]] = add <2 x i64> [[A:%.*]], From 324a53205a3af979e3de109fdd52f91781816cba Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 10 Sep 2020 17:09:36 -0400 Subject: [PATCH 0379/1079] [InstCombine] propagate 'nsw' on pointer difference of 'inbounds' geps (PR47430) There's no signed wrap if both geps have 'inbounds': https://alive2.llvm.org/ce/z/nZkQTg https://alive2.llvm.org/ce/z/7qFauh --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 7 ++++--- llvm/test/Transforms/InstCombine/sub-gep.ll | 16 +++++++++++----- llvm/test/Transforms/InstCombine/sub.ll | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 5ce32bc592d05..a5dd8f6d7c9d0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1671,11 +1671,12 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, I->getOpcode() == Instruction::Mul) I->setHasNoUnsignedWrap(); - // If we had a constant expression GEP on the other side offsetting the - // pointer, subtract it from the offset we have. + // If we have a 2nd GEP of the same base pointer, subtract the offsets. + // If both GEPs are inbounds, then the subtract does not have signed overflow. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder.CreateSub(Result, Offset, "gepdiff"); + Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false, + GEP1->isInBounds() && GEP2->isInBounds()); } // If we have p - gep(p, ...) then we have to negate the result. diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index ce9657433bb78..ee0c9ffaa0ef2 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -245,7 +245,7 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( ; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 -; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add nsw i64 [[B_IDX]], -84 ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A @@ -260,7 +260,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 ; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 -; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add nsw i16 [[B_IDX]], -84 ; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A @@ -272,7 +272,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* @@ -287,7 +287,7 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i16 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* @@ -299,9 +299,11 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ret i16 %sub } +; Inbounds translates to 'nsw' on sub + define i64 @gep_diff_both_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_diff_both_inbounds( -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[I:%.*]], [[J:%.*]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i @@ -312,6 +314,8 @@ define i64 @gep_diff_both_inbounds(i8* %foo, i64 %i, i64 %j) { ret i64 %sub } +; Negative test for 'nsw' - both geps must be inbounds + define i64 @gep_diff_first_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_diff_first_inbounds( ; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] @@ -325,6 +329,8 @@ define i64 @gep_diff_first_inbounds(i8* %foo, i64 %i, i64 %j) { ret i64 %sub } +; Negative test for 'nsw' - both geps must be inbounds + define i64 @gep_diff_second_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_diff_second_inbounds( ; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 98d8a9e6b5ca6..0940a08bbb443 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1077,7 +1077,7 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 ; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_OFFS]], [[GEP2_OFFS]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i From 4c14ee61b73746b314d83e7c52e03d6527b78105 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Fri, 11 Sep 2020 08:56:10 +0000 Subject: [PATCH 0380/1079] [SyntaxTree] Rename functions to start with verb According to LLVM coding standards: https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly Differential Revision: https://reviews.llvm.org/D87498 --- clang/include/clang/Tooling/Syntax/Nodes.h | 178 +++++++++--------- clang/include/clang/Tooling/Syntax/Tree.h | 38 ++-- clang/lib/Tooling/Syntax/BuildTree.cpp | 43 +++-- .../Tooling/Syntax/ComputeReplacements.cpp | 15 +- clang/lib/Tooling/Syntax/Mutations.cpp | 20 +- clang/lib/Tooling/Syntax/Nodes.cpp | 8 +- clang/lib/Tooling/Syntax/Synthesis.cpp | 4 +- clang/lib/Tooling/Syntax/Tree.cpp | 80 ++++---- .../Tooling/Syntax/BuildTreeTest.cpp | 6 +- .../Tooling/Syntax/SynthesisTest.cpp | 2 +- .../unittests/Tooling/Syntax/TreeTestBase.cpp | 8 +- 11 files changed, 208 insertions(+), 194 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index a6505c8167eed..8b393c5423b4d 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -190,7 +190,7 @@ class TranslationUnit final : public Tree { public: TranslationUnit() : Tree(NodeKind::TranslationUnit) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::TranslationUnit; + return N->getKind() == NodeKind::TranslationUnit; } }; @@ -200,8 +200,8 @@ class Expression : public Tree { public: Expression(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::UnknownExpression <= N->kind() && - N->kind() <= NodeKind::UnknownExpression; + return NodeKind::UnknownExpression <= N->getKind() && + N->getKind() <= NodeKind::UnknownExpression; } }; @@ -211,10 +211,10 @@ class NameSpecifier : public Tree { public: NameSpecifier(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::GlobalNameSpecifier || - N->kind() == NodeKind::DecltypeNameSpecifier || - N->kind() == NodeKind::IdentifierNameSpecifier || - N->kind() == NodeKind::SimpleTemplateNameSpecifier; + return N->getKind() == NodeKind::GlobalNameSpecifier || + N->getKind() == NodeKind::DecltypeNameSpecifier || + N->getKind() == NodeKind::IdentifierNameSpecifier || + N->getKind() == NodeKind::SimpleTemplateNameSpecifier; } }; @@ -226,7 +226,7 @@ class GlobalNameSpecifier final : public NameSpecifier { public: GlobalNameSpecifier() : NameSpecifier(NodeKind::GlobalNameSpecifier) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::GlobalNameSpecifier; + return N->getKind() == NodeKind::GlobalNameSpecifier; } }; @@ -236,7 +236,7 @@ class DecltypeNameSpecifier final : public NameSpecifier { public: DecltypeNameSpecifier() : NameSpecifier(NodeKind::DecltypeNameSpecifier) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::DecltypeNameSpecifier; + return N->getKind() == NodeKind::DecltypeNameSpecifier; } }; @@ -247,7 +247,7 @@ class IdentifierNameSpecifier final : public NameSpecifier { IdentifierNameSpecifier() : NameSpecifier(NodeKind::IdentifierNameSpecifier) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IdentifierNameSpecifier; + return N->getKind() == NodeKind::IdentifierNameSpecifier; } }; @@ -259,7 +259,7 @@ class SimpleTemplateNameSpecifier final : public NameSpecifier { SimpleTemplateNameSpecifier() : NameSpecifier(NodeKind::SimpleTemplateNameSpecifier) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::SimpleTemplateNameSpecifier; + return N->getKind() == NodeKind::SimpleTemplateNameSpecifier; } }; @@ -269,7 +269,7 @@ class NestedNameSpecifier final : public List { public: NestedNameSpecifier() : List(NodeKind::NestedNameSpecifier) {} static bool classof(const Node *N) { - return N->kind() <= NodeKind::NestedNameSpecifier; + return N->getKind() <= NodeKind::NestedNameSpecifier; } std::vector getSpecifiers(); std::vector> @@ -282,7 +282,7 @@ class UnqualifiedId final : public Tree { public: UnqualifiedId() : Tree(NodeKind::UnqualifiedId) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::UnqualifiedId; + return N->getKind() == NodeKind::UnqualifiedId; } }; @@ -297,7 +297,7 @@ class IdExpression final : public Expression { public: IdExpression() : Expression(NodeKind::IdExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IdExpression; + return N->getKind() == NodeKind::IdExpression; } NestedNameSpecifier *getQualifier(); Leaf *getTemplateKeyword(); @@ -310,7 +310,7 @@ class UnknownExpression final : public Expression { public: UnknownExpression() : Expression(NodeKind::UnknownExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::UnknownExpression; + return N->getKind() == NodeKind::UnknownExpression; } }; @@ -319,7 +319,7 @@ class ThisExpression final : public Expression { public: ThisExpression() : Expression(NodeKind::ThisExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ThisExpression; + return N->getKind() == NodeKind::ThisExpression; } Leaf *getThisKeyword(); }; @@ -333,7 +333,7 @@ class CallArguments final : public List { public: CallArguments() : List(NodeKind::CallArguments) {} static bool classof(const Node *N) { - return N->kind() <= NodeKind::CallArguments; + return N->getKind() <= NodeKind::CallArguments; } std::vector getArguments(); std::vector> getArgumentsAndCommas(); @@ -347,7 +347,7 @@ class CallExpression final : public Expression { public: CallExpression() : Expression(NodeKind::CallExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::CallExpression; + return N->getKind() == NodeKind::CallExpression; } Expression *getCallee(); Leaf *getOpenParen(); @@ -361,7 +361,7 @@ class ParenExpression final : public Expression { public: ParenExpression() : Expression(NodeKind::ParenExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ParenExpression; + return N->getKind() == NodeKind::ParenExpression; } Leaf *getOpenParen(); Expression *getSubExpression(); @@ -380,7 +380,7 @@ class MemberExpression final : public Expression { public: MemberExpression() : Expression(NodeKind::MemberExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::MemberExpression; + return N->getKind() == NodeKind::MemberExpression; } Expression *getObject(); Leaf *getAccessToken(); @@ -393,16 +393,16 @@ class LiteralExpression : public Expression { public: LiteralExpression(NodeKind K) : Expression(K) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IntegerLiteralExpression || - N->kind() == NodeKind::CharacterLiteralExpression || - N->kind() == NodeKind::FloatingLiteralExpression || - N->kind() == NodeKind::StringLiteralExpression || - N->kind() == NodeKind::BoolLiteralExpression || - N->kind() == NodeKind::CxxNullPtrExpression || - N->kind() == NodeKind::IntegerUserDefinedLiteralExpression || - N->kind() == NodeKind::FloatUserDefinedLiteralExpression || - N->kind() == NodeKind::CharUserDefinedLiteralExpression || - N->kind() == NodeKind::StringUserDefinedLiteralExpression; + return N->getKind() == NodeKind::IntegerLiteralExpression || + N->getKind() == NodeKind::CharacterLiteralExpression || + N->getKind() == NodeKind::FloatingLiteralExpression || + N->getKind() == NodeKind::StringLiteralExpression || + N->getKind() == NodeKind::BoolLiteralExpression || + N->getKind() == NodeKind::CxxNullPtrExpression || + N->getKind() == NodeKind::IntegerUserDefinedLiteralExpression || + N->getKind() == NodeKind::FloatUserDefinedLiteralExpression || + N->getKind() == NodeKind::CharUserDefinedLiteralExpression || + N->getKind() == NodeKind::StringUserDefinedLiteralExpression; } Leaf *getLiteralToken(); }; @@ -413,7 +413,7 @@ class IntegerLiteralExpression final : public LiteralExpression { IntegerLiteralExpression() : LiteralExpression(NodeKind::IntegerLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IntegerLiteralExpression; + return N->getKind() == NodeKind::IntegerLiteralExpression; } }; @@ -423,7 +423,7 @@ class CharacterLiteralExpression final : public LiteralExpression { CharacterLiteralExpression() : LiteralExpression(NodeKind::CharacterLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::CharacterLiteralExpression; + return N->getKind() == NodeKind::CharacterLiteralExpression; } }; @@ -433,7 +433,7 @@ class FloatingLiteralExpression final : public LiteralExpression { FloatingLiteralExpression() : LiteralExpression(NodeKind::FloatingLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::FloatingLiteralExpression; + return N->getKind() == NodeKind::FloatingLiteralExpression; } }; @@ -443,7 +443,7 @@ class StringLiteralExpression final : public LiteralExpression { StringLiteralExpression() : LiteralExpression(NodeKind::StringLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::StringLiteralExpression; + return N->getKind() == NodeKind::StringLiteralExpression; } }; @@ -453,7 +453,7 @@ class BoolLiteralExpression final : public LiteralExpression { BoolLiteralExpression() : LiteralExpression(NodeKind::BoolLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::BoolLiteralExpression; + return N->getKind() == NodeKind::BoolLiteralExpression; } }; @@ -462,7 +462,7 @@ class CxxNullPtrExpression final : public LiteralExpression { public: CxxNullPtrExpression() : LiteralExpression(NodeKind::CxxNullPtrExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::CxxNullPtrExpression; + return N->getKind() == NodeKind::CxxNullPtrExpression; } }; @@ -476,10 +476,10 @@ class UserDefinedLiteralExpression : public LiteralExpression { public: UserDefinedLiteralExpression(NodeKind K) : LiteralExpression(K) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression || - N->kind() == NodeKind::FloatUserDefinedLiteralExpression || - N->kind() == NodeKind::CharUserDefinedLiteralExpression || - N->kind() == NodeKind::StringUserDefinedLiteralExpression; + return N->getKind() == NodeKind::IntegerUserDefinedLiteralExpression || + N->getKind() == NodeKind::FloatUserDefinedLiteralExpression || + N->getKind() == NodeKind::CharUserDefinedLiteralExpression || + N->getKind() == NodeKind::StringUserDefinedLiteralExpression; } }; @@ -491,7 +491,7 @@ class IntegerUserDefinedLiteralExpression final : UserDefinedLiteralExpression( NodeKind::IntegerUserDefinedLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression; + return N->getKind() == NodeKind::IntegerUserDefinedLiteralExpression; } }; @@ -503,7 +503,7 @@ class FloatUserDefinedLiteralExpression final : UserDefinedLiteralExpression( NodeKind::FloatUserDefinedLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::FloatUserDefinedLiteralExpression; + return N->getKind() == NodeKind::FloatUserDefinedLiteralExpression; } }; @@ -515,7 +515,7 @@ class CharUserDefinedLiteralExpression final : UserDefinedLiteralExpression( NodeKind::CharUserDefinedLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::CharUserDefinedLiteralExpression; + return N->getKind() == NodeKind::CharUserDefinedLiteralExpression; } }; @@ -527,7 +527,7 @@ class StringUserDefinedLiteralExpression final : UserDefinedLiteralExpression( NodeKind::StringUserDefinedLiteralExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::StringUserDefinedLiteralExpression; + return N->getKind() == NodeKind::StringUserDefinedLiteralExpression; } }; @@ -536,8 +536,8 @@ class UnaryOperatorExpression : public Expression { public: UnaryOperatorExpression(NodeKind K) : Expression(K) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::PrefixUnaryOperatorExpression || - N->kind() == NodeKind::PostfixUnaryOperatorExpression; + return N->getKind() == NodeKind::PrefixUnaryOperatorExpression || + N->getKind() == NodeKind::PostfixUnaryOperatorExpression; } Leaf *getOperatorToken(); Expression *getOperand(); @@ -557,7 +557,7 @@ class PrefixUnaryOperatorExpression final : public UnaryOperatorExpression { PrefixUnaryOperatorExpression() : UnaryOperatorExpression(NodeKind::PrefixUnaryOperatorExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::PrefixUnaryOperatorExpression; + return N->getKind() == NodeKind::PrefixUnaryOperatorExpression; } }; @@ -571,7 +571,7 @@ class PostfixUnaryOperatorExpression final : public UnaryOperatorExpression { PostfixUnaryOperatorExpression() : UnaryOperatorExpression(NodeKind::PostfixUnaryOperatorExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::PostfixUnaryOperatorExpression; + return N->getKind() == NodeKind::PostfixUnaryOperatorExpression; } }; @@ -586,7 +586,7 @@ class BinaryOperatorExpression final : public Expression { public: BinaryOperatorExpression() : Expression(NodeKind::BinaryOperatorExpression) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::BinaryOperatorExpression; + return N->getKind() == NodeKind::BinaryOperatorExpression; } Expression *getLhs(); Leaf *getOperatorToken(); @@ -599,8 +599,8 @@ class Statement : public Tree { public: Statement(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::UnknownStatement <= N->kind() && - N->kind() <= NodeKind::CompoundStatement; + return NodeKind::UnknownStatement <= N->getKind() && + N->getKind() <= NodeKind::CompoundStatement; } }; @@ -610,7 +610,7 @@ class UnknownStatement final : public Statement { public: UnknownStatement() : Statement(NodeKind::UnknownStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::UnknownStatement; + return N->getKind() == NodeKind::UnknownStatement; } }; @@ -619,7 +619,7 @@ class DeclarationStatement final : public Statement { public: DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::DeclarationStatement; + return N->getKind() == NodeKind::DeclarationStatement; } }; @@ -628,7 +628,7 @@ class EmptyStatement final : public Statement { public: EmptyStatement() : Statement(NodeKind::EmptyStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::EmptyStatement; + return N->getKind() == NodeKind::EmptyStatement; } }; @@ -637,7 +637,7 @@ class SwitchStatement final : public Statement { public: SwitchStatement() : Statement(NodeKind::SwitchStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::SwitchStatement; + return N->getKind() == NodeKind::SwitchStatement; } Leaf *getSwitchKeyword(); Statement *getBody(); @@ -648,7 +648,7 @@ class CaseStatement final : public Statement { public: CaseStatement() : Statement(NodeKind::CaseStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::CaseStatement; + return N->getKind() == NodeKind::CaseStatement; } Leaf *getCaseKeyword(); Expression *getCaseValue(); @@ -660,7 +660,7 @@ class DefaultStatement final : public Statement { public: DefaultStatement() : Statement(NodeKind::DefaultStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::DefaultStatement; + return N->getKind() == NodeKind::DefaultStatement; } Leaf *getDefaultKeyword(); Statement *getBody(); @@ -672,7 +672,7 @@ class IfStatement final : public Statement { public: IfStatement() : Statement(NodeKind::IfStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::IfStatement; + return N->getKind() == NodeKind::IfStatement; } Leaf *getIfKeyword(); Statement *getThenStatement(); @@ -685,7 +685,7 @@ class ForStatement final : public Statement { public: ForStatement() : Statement(NodeKind::ForStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ForStatement; + return N->getKind() == NodeKind::ForStatement; } Leaf *getForKeyword(); Statement *getBody(); @@ -696,7 +696,7 @@ class WhileStatement final : public Statement { public: WhileStatement() : Statement(NodeKind::WhileStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::WhileStatement; + return N->getKind() == NodeKind::WhileStatement; } Leaf *getWhileKeyword(); Statement *getBody(); @@ -707,7 +707,7 @@ class ContinueStatement final : public Statement { public: ContinueStatement() : Statement(NodeKind::ContinueStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ContinueStatement; + return N->getKind() == NodeKind::ContinueStatement; } Leaf *getContinueKeyword(); }; @@ -717,7 +717,7 @@ class BreakStatement final : public Statement { public: BreakStatement() : Statement(NodeKind::BreakStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::BreakStatement; + return N->getKind() == NodeKind::BreakStatement; } Leaf *getBreakKeyword(); }; @@ -728,7 +728,7 @@ class ReturnStatement final : public Statement { public: ReturnStatement() : Statement(NodeKind::ReturnStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ReturnStatement; + return N->getKind() == NodeKind::ReturnStatement; } Leaf *getReturnKeyword(); Expression *getReturnValue(); @@ -739,7 +739,7 @@ class RangeBasedForStatement final : public Statement { public: RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::RangeBasedForStatement; + return N->getKind() == NodeKind::RangeBasedForStatement; } Leaf *getForKeyword(); Statement *getBody(); @@ -751,7 +751,7 @@ class ExpressionStatement final : public Statement { public: ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ExpressionStatement; + return N->getKind() == NodeKind::ExpressionStatement; } Expression *getExpression(); }; @@ -761,7 +761,7 @@ class CompoundStatement final : public Statement { public: CompoundStatement() : Statement(NodeKind::CompoundStatement) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::CompoundStatement; + return N->getKind() == NodeKind::CompoundStatement; } Leaf *getLbrace(); /// FIXME: use custom iterator instead of 'vector'. @@ -777,8 +777,8 @@ class Declaration : public Tree { public: Declaration(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::UnknownDeclaration <= N->kind() && - N->kind() <= NodeKind::TypeAliasDeclaration; + return NodeKind::UnknownDeclaration <= N->getKind() && + N->getKind() <= NodeKind::TypeAliasDeclaration; } }; @@ -787,7 +787,7 @@ class UnknownDeclaration final : public Declaration { public: UnknownDeclaration() : Declaration(NodeKind::UnknownDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::UnknownDeclaration; + return N->getKind() == NodeKind::UnknownDeclaration; } }; @@ -796,7 +796,7 @@ class EmptyDeclaration final : public Declaration { public: EmptyDeclaration() : Declaration(NodeKind::EmptyDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::EmptyDeclaration; + return N->getKind() == NodeKind::EmptyDeclaration; } }; @@ -806,7 +806,7 @@ class StaticAssertDeclaration final : public Declaration { public: StaticAssertDeclaration() : Declaration(NodeKind::StaticAssertDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::StaticAssertDeclaration; + return N->getKind() == NodeKind::StaticAssertDeclaration; } Expression *getCondition(); Expression *getMessage(); @@ -819,7 +819,7 @@ class LinkageSpecificationDeclaration final : public Declaration { LinkageSpecificationDeclaration() : Declaration(NodeKind::LinkageSpecificationDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::LinkageSpecificationDeclaration; + return N->getKind() == NodeKind::LinkageSpecificationDeclaration; } }; @@ -830,7 +830,7 @@ class SimpleDeclaration final : public Declaration { public: SimpleDeclaration() : Declaration(NodeKind::SimpleDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::SimpleDeclaration; + return N->getKind() == NodeKind::SimpleDeclaration; } /// FIXME: use custom iterator instead of 'vector'. std::vector getDeclarators(); @@ -841,7 +841,7 @@ class TemplateDeclaration final : public Declaration { public: TemplateDeclaration() : Declaration(NodeKind::TemplateDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::TemplateDeclaration; + return N->getKind() == NodeKind::TemplateDeclaration; } Leaf *getTemplateKeyword(); Declaration *getDeclaration(); @@ -857,7 +857,7 @@ class ExplicitTemplateInstantiation final : public Declaration { ExplicitTemplateInstantiation() : Declaration(NodeKind::ExplicitTemplateInstantiation) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ExplicitTemplateInstantiation; + return N->getKind() == NodeKind::ExplicitTemplateInstantiation; } Leaf *getTemplateKeyword(); Leaf *getExternKeyword(); @@ -869,7 +869,7 @@ class NamespaceDefinition final : public Declaration { public: NamespaceDefinition() : Declaration(NodeKind::NamespaceDefinition) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::NamespaceDefinition; + return N->getKind() == NodeKind::NamespaceDefinition; } }; @@ -879,7 +879,7 @@ class NamespaceAliasDefinition final : public Declaration { NamespaceAliasDefinition() : Declaration(NodeKind::NamespaceAliasDefinition) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::NamespaceAliasDefinition; + return N->getKind() == NodeKind::NamespaceAliasDefinition; } }; @@ -888,7 +888,7 @@ class UsingNamespaceDirective final : public Declaration { public: UsingNamespaceDirective() : Declaration(NodeKind::UsingNamespaceDirective) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::UsingNamespaceDirective; + return N->getKind() == NodeKind::UsingNamespaceDirective; } }; @@ -898,7 +898,7 @@ class UsingDeclaration final : public Declaration { public: UsingDeclaration() : Declaration(NodeKind::UsingDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::UsingDeclaration; + return N->getKind() == NodeKind::UsingDeclaration; } }; @@ -907,7 +907,7 @@ class TypeAliasDeclaration final : public Declaration { public: TypeAliasDeclaration() : Declaration(NodeKind::TypeAliasDeclaration) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::TypeAliasDeclaration; + return N->getKind() == NodeKind::TypeAliasDeclaration; } }; @@ -927,8 +927,8 @@ class Declarator : public Tree { public: Declarator(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::SimpleDeclarator <= N->kind() && - N->kind() <= NodeKind::ParenDeclarator; + return NodeKind::SimpleDeclarator <= N->getKind() && + N->getKind() <= NodeKind::ParenDeclarator; } }; @@ -938,7 +938,7 @@ class SimpleDeclarator final : public Declarator { public: SimpleDeclarator() : Declarator(NodeKind::SimpleDeclarator) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::SimpleDeclarator; + return N->getKind() == NodeKind::SimpleDeclarator; } }; @@ -949,7 +949,7 @@ class ParenDeclarator final : public Declarator { public: ParenDeclarator() : Declarator(NodeKind::ParenDeclarator) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ParenDeclarator; + return N->getKind() == NodeKind::ParenDeclarator; } Leaf *getLparen(); Leaf *getRparen(); @@ -963,7 +963,7 @@ class ArraySubscript final : public Tree { public: ArraySubscript() : Tree(NodeKind::ArraySubscript) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ArraySubscript; + return N->getKind() == NodeKind::ArraySubscript; } // TODO: add an accessor for the "static" keyword. Leaf *getLbracket(); @@ -977,7 +977,7 @@ class TrailingReturnType final : public Tree { public: TrailingReturnType() : Tree(NodeKind::TrailingReturnType) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::TrailingReturnType; + return N->getKind() == NodeKind::TrailingReturnType; } // TODO: add accessors for specifiers. Leaf *getArrowToken(); @@ -992,7 +992,7 @@ class ParameterDeclarationList final : public List { public: ParameterDeclarationList() : List(NodeKind::ParameterDeclarationList) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ParameterDeclarationList; + return N->getKind() == NodeKind::ParameterDeclarationList; } std::vector getParameterDeclarations(); std::vector> @@ -1014,7 +1014,7 @@ class ParametersAndQualifiers final : public Tree { public: ParametersAndQualifiers() : Tree(NodeKind::ParametersAndQualifiers) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::ParametersAndQualifiers; + return N->getKind() == NodeKind::ParametersAndQualifiers; } Leaf *getLparen(); ParameterDeclarationList *getParameters(); @@ -1028,7 +1028,7 @@ class MemberPointer final : public Tree { public: MemberPointer() : Tree(NodeKind::MemberPointer) {} static bool classof(const Node *N) { - return N->kind() == NodeKind::MemberPointer; + return N->getKind() == NodeKind::MemberPointer; } }; diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index f7f9e6bdc5a09..aab904ab65d32 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -41,11 +41,11 @@ class Arena { Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, const TokenBuffer &Tokens); - const SourceManager &sourceManager() const { return SourceMgr; } - const LangOptions &langOptions() const { return LangOpts; } + const SourceManager &getSourceManager() const { return SourceMgr; } + const LangOptions &getLangOptions() const { return LangOpts; } - const TokenBuffer &tokenBuffer() const; - llvm::BumpPtrAllocator &allocator() { return Allocator; } + const TokenBuffer &getTokenBuffer() const; + llvm::BumpPtrAllocator &getAllocator() { return Allocator; } /// Add \p Buffer to the underlying source manager, tokenize it and store the /// resulting tokens. Useful when there is a need to materialize tokens that @@ -79,8 +79,8 @@ class Node { /// set when the node is added as a child to another one. Node(NodeKind Kind); - NodeKind kind() const { return static_cast(Kind); } - NodeRole role() const { return static_cast(Role); } + NodeKind getKind() const { return static_cast(Kind); } + NodeRole getRole() const { return static_cast(Role); } /// Whether the node is detached from a tree, i.e. does not have a parent. bool isDetached() const; @@ -99,11 +99,11 @@ class Node { /// modifiable. bool canModify() const { return CanModify; } - const Tree *parent() const { return Parent; } - Tree *parent() { return Parent; } + const Tree *getParent() const { return Parent; } + Tree *getParent() { return Parent; } - const Node *nextSibling() const { return NextSibling; } - Node *nextSibling() { return NextSibling; } + const Node *getNextSibling() const { return NextSibling; } + Node *getNextSibling() { return NextSibling; } /// Dumps the structure of a subtree. For debugging and testing purposes. std::string dump(const SourceManager &SM) const; @@ -142,7 +142,7 @@ class Leaf final : public Node { Leaf(const Token *T); static bool classof(const Node *N); - const Token *token() const { return Tok; } + const Token *getToken() const { return Tok; } private: const Token *Tok; @@ -154,16 +154,18 @@ class Tree : public Node { using Node::Node; static bool classof(const Node *N); - Node *firstChild() { return FirstChild; } - const Node *firstChild() const { return FirstChild; } + Node *getFirstChild() { return FirstChild; } + const Node *getFirstChild() const { return FirstChild; } - Leaf *firstLeaf(); - const Leaf *firstLeaf() const { - return const_cast(this)->firstLeaf(); + Leaf *findFirstLeaf(); + const Leaf *findFirstLeaf() const { + return const_cast(this)->findFirstLeaf(); } - Leaf *lastLeaf(); - const Leaf *lastLeaf() const { return const_cast(this)->lastLeaf(); } + Leaf *findLastLeaf(); + const Leaf *findLastLeaf() const { + return const_cast(this)->findLastLeaf(); + } protected: /// Find the first node with a corresponding role. diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 1942290b5abc5..8de50dd02162a 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -366,12 +366,14 @@ class ASTToSyntaxMapping { class syntax::TreeBuilder { public: TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { - for (const auto &T : Arena.tokenBuffer().expandedTokens()) + for (const auto &T : Arena.getTokenBuffer().expandedTokens()) LocationToToken.insert({T.location().getRawEncoding(), &T}); } - llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } - const SourceManager &sourceManager() const { return Arena.sourceManager(); } + llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } + const SourceManager &sourceManager() const { + return Arena.getSourceManager(); + } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. @@ -421,13 +423,13 @@ class syntax::TreeBuilder { /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { - auto Tokens = Arena.tokenBuffer().expandedTokens(); + auto Tokens = Arena.getTokenBuffer().expandedTokens(); assert(!Tokens.empty()); assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. Pending.foldChildren(Arena, Tokens.drop_back(), - new (Arena.allocator()) syntax::TranslationUnit); + new (Arena.getAllocator()) syntax::TranslationUnit); auto *TU = cast(std::move(Pending).finalize()); TU->assertInvariantsRecursive(); @@ -451,7 +453,7 @@ class syntax::TreeBuilder { assert(First.isValid()); assert(Last.isValid()); assert(First == Last || - Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); + Arena.getSourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } @@ -540,7 +542,7 @@ class syntax::TreeBuilder { } void setRole(syntax::Node *N, NodeRole R) { - assert(N->role() == NodeRole::Detached); + assert(N->getRole() == NodeRole::Detached); N->setRole(R); } @@ -552,14 +554,14 @@ class syntax::TreeBuilder { /// Ensures that added nodes properly nest and cover the whole token stream. struct Forest { Forest(syntax::Arena &A) { - assert(!A.tokenBuffer().expandedTokens().empty()); - assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); + assert(!A.getTokenBuffer().expandedTokens().empty()); + assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { - auto *L = new (A.allocator()) syntax::Leaf(&T); + for (auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { + auto *L = new (A.getAllocator()) syntax::Leaf(&T); L->Original = true; - L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); + L->CanModify = A.getTokenBuffer().spelledForExpanded(T).hasValue(); Trees.insert(Trees.end(), {&T, L}); } } @@ -572,7 +574,7 @@ class syntax::TreeBuilder { assert((std::next(It) == Trees.end() || std::next(It)->first == Range.end()) && "no child with the specified range"); - assert(It->second->role() == NodeRole::Detached && + assert(It->second->getRole() == NodeRole::Detached && "re-assigning role for a child"); It->second->setRole(Role); } @@ -581,7 +583,7 @@ class syntax::TreeBuilder { void foldChildren(const syntax::Arena &A, ArrayRef Tokens, syntax::Tree *Node) { // Attach children to `Node`. - assert(Node->firstChild() == nullptr && "node already has children"); + assert(Node->getFirstChild() == nullptr && "node already has children"); auto *FirstToken = Tokens.begin(); auto BeginChildren = Trees.lower_bound(FirstToken); @@ -597,14 +599,15 @@ class syntax::TreeBuilder { // We need to go in reverse order, because we can only prepend. for (auto It = EndChildren; It != BeginChildren; --It) { auto *C = std::prev(It)->second; - if (C->role() == NodeRole::Detached) + if (C->getRole() == NodeRole::Detached) C->setRole(NodeRole::Unknown); Node->prependChildLowLevel(C); } // Mark that this node came from the AST and is backed by the source code. Node->Original = true; - Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); + Node->CanModify = + A.getTokenBuffer().spelledForExpanded(Tokens).hasValue(); Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, Node}); @@ -624,12 +627,12 @@ class syntax::TreeBuilder { unsigned CoveredTokens = It != Trees.end() ? (std::next(It)->first - It->first) - : A.tokenBuffer().expandedTokens().end() - It->first; + : A.getTokenBuffer().expandedTokens().end() - It->first; R += std::string( - formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), - It->first->text(A.sourceManager()), CoveredTokens)); - R += It->second->dump(A.sourceManager()); + formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), + It->first->text(A.getSourceManager()), CoveredTokens)); + R += It->second->dump(A.getSourceManager()); } return R; } diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp index 30b3ee17d0926..93b1c4416bf45 100644 --- a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -32,13 +32,14 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { private: void process(const syntax::Node *N) { if (auto *T = dyn_cast(N)) { - for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) + for (auto *C = T->getFirstChild(); C != nullptr; + C = C->getNextSibling()) process(C); return; } auto *L = cast(N); - if (SpanEnd == L->token() && SpanIsOriginal == L->isOriginal()) { + if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) { // Extend the current span. ++SpanEnd; return; @@ -47,7 +48,7 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { if (SpanBegin) Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); // Start recording a new span. - SpanBegin = L->token(); + SpanBegin = L->getToken(); SpanEnd = SpanBegin + 1; SpanIsOriginal = L->isOriginal(); } @@ -63,8 +64,8 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { syntax::FileRange rangeOfExpanded(const syntax::Arena &A, llvm::ArrayRef Expanded) { - auto &Buffer = A.tokenBuffer(); - auto &SM = A.sourceManager(); + auto &Buffer = A.getTokenBuffer(); + auto &SM = A.getSourceManager(); // Check that \p Expanded actually points into expanded tokens. assert(Buffer.expandedTokens().begin() <= Expanded.begin()); @@ -84,8 +85,8 @@ syntax::FileRange rangeOfExpanded(const syntax::Arena &A, tooling::Replacements syntax::computeReplacements(const syntax::Arena &A, const syntax::TranslationUnit &TU) { - auto &Buffer = A.tokenBuffer(); - auto &SM = A.sourceManager(); + auto &Buffer = A.getTokenBuffer(); + auto &SM = A.getSourceManager(); tooling::Replacements Replacements; // Text inserted by the replacement we are building now. diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp index 24048b297a112..bf1bcda26455b 100644 --- a/clang/lib/Tooling/Syntax/Mutations.cpp +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -36,7 +36,7 @@ class syntax::MutationsImpl { assert(Role != NodeRole::Detached); New->setRole(Role); - auto *P = Anchor->parent(); + auto *P = Anchor->getParent(); P->replaceChildRangeLowLevel(Anchor, Anchor, New); P->assertInvariants(); @@ -52,16 +52,16 @@ class syntax::MutationsImpl { assert(New->isDetached()); New->Role = Old->Role; - auto *P = Old->parent(); - P->replaceChildRangeLowLevel(findPrevious(Old), Old->nextSibling(), New); + auto *P = Old->getParent(); + P->replaceChildRangeLowLevel(findPrevious(Old), Old->getNextSibling(), New); P->assertInvariants(); } /// Completely remove the node from its parent. static void remove(syntax::Node *N) { - auto *P = N->parent(); - P->replaceChildRangeLowLevel(findPrevious(N), N->nextSibling(), + auto *P = N->getParent(); + P->replaceChildRangeLowLevel(findPrevious(N), N->getNextSibling(), /*New=*/nullptr); P->assertInvariants(); @@ -70,11 +70,11 @@ class syntax::MutationsImpl { private: static syntax::Node *findPrevious(syntax::Node *N) { - if (N->parent()->firstChild() == N) + if (N->getParent()->getFirstChild() == N) return nullptr; - for (syntax::Node *C = N->parent()->firstChild(); C != nullptr; - C = C->nextSibling()) { - if (C->nextSibling() == N) + for (syntax::Node *C = N->getParent()->getFirstChild(); C != nullptr; + C = C->getNextSibling()) { + if (C->getNextSibling() == N) return C; } llvm_unreachable("could not find a child node"); @@ -85,7 +85,7 @@ void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { assert(S); assert(S->canModify()); - if (isa(S->parent())) { + if (isa(S->getParent())) { // A child of CompoundStatement can just be safely removed. MutationsImpl::remove(S); return; diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 6102c45a08e4d..bb63585cbd7c4 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -501,8 +501,8 @@ syntax::Leaf *syntax::CompoundStatement::getLbrace() { std::vector syntax::CompoundStatement::getStatements() { std::vector Children; - for (auto *C = firstChild(); C; C = C->nextSibling()) { - assert(C->role() == syntax::NodeRole::Statement); + for (auto *C = getFirstChild(); C; C = C->getNextSibling()) { + assert(C->getRole() == syntax::NodeRole::Statement); Children.push_back(cast(C)); } return Children; @@ -524,8 +524,8 @@ syntax::Expression *syntax::StaticAssertDeclaration::getMessage() { std::vector syntax::SimpleDeclaration::getDeclarators() { std::vector Children; - for (auto *C = firstChild(); C; C = C->nextSibling()) { - if (C->role() == syntax::NodeRole::Declarator) + for (auto *C = getFirstChild(); C; C = C->getNextSibling()) { + if (C->getRole() == syntax::NodeRole::Declarator) Children.push_back(cast(C)); } return Children; diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp index aa01a34c761fd..701a1e60a4f38 100644 --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -28,7 +28,7 @@ clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A, .second; assert(Tokens.size() == 1); assert(Tokens.front().kind() == K); - auto *L = new (A.allocator()) clang::syntax::Leaf(Tokens.begin()); + auto *L = new (A.getAllocator()) clang::syntax::Leaf(Tokens.begin()); FactoryImpl::setCanModify(L); L->assertInvariants(); return L; @@ -36,7 +36,7 @@ clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A, clang::syntax::EmptyStatement * syntax::createEmptyStatement(clang::syntax::Arena &A) { - auto *S = new (A.allocator()) clang::syntax::EmptyStatement; + auto *S = new (A.getAllocator()) clang::syntax::EmptyStatement; FactoryImpl::setCanModify(S); FactoryImpl::prependChildLowLevel(S, createPunctuation(A, clang::tok::semi), NodeRole::Unknown); diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 2cef806937bfc..f9d1fa6110ffc 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -19,7 +19,7 @@ namespace { static void traverse(const syntax::Node *N, llvm::function_ref Visit) { if (auto *T = dyn_cast(N)) { - for (auto *C = T->firstChild(); C; C = C->nextSibling()) + for (auto *C = T->getFirstChild(); C; C = C->getNextSibling()) traverse(C, Visit); } Visit(N); @@ -36,7 +36,9 @@ syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, const TokenBuffer &Tokens) : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} -const syntax::TokenBuffer &syntax::Arena::tokenBuffer() const { return Tokens; } +const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { + return Tokens; +} std::pair> syntax::Arena::lexBuffer(std::unique_ptr Input) { @@ -51,7 +53,7 @@ syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { } bool syntax::Leaf::classof(const Node *N) { - return N->kind() == NodeKind::Leaf; + return N->getKind() == NodeKind::Leaf; } syntax::Node::Node(NodeKind Kind) @@ -60,16 +62,20 @@ syntax::Node::Node(NodeKind Kind) this->setRole(NodeRole::Detached); } -bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } +bool syntax::Node::isDetached() const { + return getRole() == NodeRole::Detached; +} void syntax::Node::setRole(NodeRole NR) { this->Role = static_cast(NR); } -bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } +bool syntax::Tree::classof(const Node *N) { + return N->getKind() > NodeKind::Leaf; +} void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { - assert(Child->role() == NodeRole::Detached); + assert(Child->getRole() == NodeRole::Detached); assert(Role != NodeRole::Detached); Child->setRole(Role); @@ -79,7 +85,7 @@ void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { void syntax::Tree::prependChildLowLevel(Node *Child) { assert(Child->Parent == nullptr); assert(Child->NextSibling == nullptr); - assert(Child->role() != NodeRole::Detached); + assert(Child->getRole() != NodeRole::Detached); Child->Parent = this; Child->NextSibling = this->FirstChild; @@ -91,15 +97,15 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, assert(!BeforeBegin || BeforeBegin->Parent == this); #ifndef NDEBUG - for (auto *N = New; N; N = N->nextSibling()) { + for (auto *N = New; N; N = N->getNextSibling()) { assert(N->Parent == nullptr); - assert(N->role() != NodeRole::Detached && "Roles must be set"); + assert(N->getRole() != NodeRole::Detached && "Roles must be set"); // FIXME: sanity-check the role. } #endif // Detach old nodes. - for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->nextSibling(); + for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->getNextSibling(); N != End;) { auto *Next = N->NextSibling; @@ -120,7 +126,7 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, if (New) { auto *Last = New; - for (auto *N = New; N != nullptr; N = N->nextSibling()) { + for (auto *N = New; N != nullptr; N = N->getNextSibling()) { Last = N; N->Parent = this; } @@ -136,7 +142,7 @@ namespace { static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, const SourceManager &SM) { assert(L); - const auto *Token = L->token(); + const auto *Token = L->getToken(); assert(Token); // Handle 'eof' separately, calling text() on it produces an empty string. if (Token->kind() == tok::eof) @@ -148,8 +154,8 @@ static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, static void dumpNode(raw_ostream &OS, const syntax::Node *N, const SourceManager &SM, std::vector IndentMask) { auto dumpExtraInfo = [&OS](const syntax::Node *N) { - if (N->role() != syntax::NodeRole::Unknown) - OS << " " << N->role(); + if (N->getRole() != syntax::NodeRole::Unknown) + OS << " " << N->getRole(); if (!N->isOriginal()) OS << " synthesized"; if (!N->canModify()) @@ -167,18 +173,18 @@ static void dumpNode(raw_ostream &OS, const syntax::Node *N, } const auto *T = cast(N); - OS << T->kind(); + OS << T->getKind(); dumpExtraInfo(N); OS << "\n"; - for (const auto *It = T->firstChild(); It; It = It->nextSibling()) { + for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) { for (bool Filled : IndentMask) { if (Filled) OS << "| "; else OS << " "; } - if (!It->nextSibling()) { + if (!It->getNextSibling()) { OS << "`-"; IndentMask.push_back(false); } else { @@ -213,18 +219,18 @@ std::string syntax::Node::dumpTokens(const SourceManager &SM) const { void syntax::Node::assertInvariants() const { #ifndef NDEBUG if (isDetached()) - assert(parent() == nullptr); + assert(getParent() == nullptr); else - assert(parent() != nullptr); + assert(getParent() != nullptr); auto *T = dyn_cast(this); if (!T) return; - for (auto *C = T->firstChild(); C; C = C->nextSibling()) { + for (auto *C = T->getFirstChild(); C; C = C->getNextSibling()) { if (T->isOriginal()) assert(C->isOriginal()); assert(!C->isDetached()); - assert(C->parent() == T); + assert(C->getParent() == T); } #endif } @@ -235,9 +241,9 @@ void syntax::Node::assertInvariantsRecursive() const { #endif } -syntax::Leaf *syntax::Tree::firstLeaf() { +syntax::Leaf *syntax::Tree::findFirstLeaf() { auto *T = this; - while (auto *C = T->firstChild()) { + while (auto *C = T->getFirstChild()) { if (auto *L = dyn_cast(C)) return L; T = cast(C); @@ -245,11 +251,11 @@ syntax::Leaf *syntax::Tree::firstLeaf() { return nullptr; } -syntax::Leaf *syntax::Tree::lastLeaf() { +syntax::Leaf *syntax::Tree::findLastLeaf() { auto *T = this; - while (auto *C = T->firstChild()) { + while (auto *C = T->getFirstChild()) { // Find the last child. - while (auto *Next = C->nextSibling()) + while (auto *Next = C->getNextSibling()) C = Next; if (auto *L = dyn_cast(C)) @@ -260,8 +266,8 @@ syntax::Leaf *syntax::Tree::lastLeaf() { } syntax::Node *syntax::Tree::findChild(NodeRole R) { - for (auto *C = FirstChild; C; C = C->nextSibling()) { - if (C->role() == R) + for (auto *C = FirstChild; C; C = C->getNextSibling()) { + if (C->getRole() == R) return C; } return nullptr; @@ -269,13 +275,13 @@ syntax::Node *syntax::Tree::findChild(NodeRole R) { std::vector> syntax::List::getElementsAsNodesAndDelimiters() { - if (!firstChild()) + if (!getFirstChild()) return {}; auto children = std::vector>(); syntax::Node *elementWithoutDelimiter = nullptr; - for (auto *C = firstChild(); C; C = C->nextSibling()) { - switch (C->role()) { + for (auto *C = getFirstChild(); C; C = C->getNextSibling()) { + switch (C->getRole()) { case syntax::NodeRole::ListElement: { if (elementWithoutDelimiter) { children.push_back({elementWithoutDelimiter, nullptr}); @@ -314,13 +320,13 @@ syntax::List::getElementsAsNodesAndDelimiters() { // Almost the same implementation of `getElementsAsNodesAndDelimiters` but // ignoring delimiters std::vector syntax::List::getElementsAsNodes() { - if (!firstChild()) + if (!getFirstChild()) return {}; auto children = std::vector(); syntax::Node *elementWithoutDelimiter = nullptr; - for (auto *C = firstChild(); C; C = C->nextSibling()) { - switch (C->role()) { + for (auto *C = getFirstChild(); C; C = C->getNextSibling()) { + switch (C->getRole()) { case syntax::NodeRole::ListElement: { if (elementWithoutDelimiter) { children.push_back(elementWithoutDelimiter); @@ -356,7 +362,7 @@ std::vector syntax::List::getElementsAsNodes() { } clang::tok::TokenKind syntax::List::getDelimiterTokenKind() { - switch (this->kind()) { + switch (this->getKind()) { case NodeKind::NestedNameSpecifier: return clang::tok::coloncolon; case NodeKind::CallArguments: @@ -369,7 +375,7 @@ clang::tok::TokenKind syntax::List::getDelimiterTokenKind() { } syntax::List::TerminationKind syntax::List::getTerminationKind() { - switch (this->kind()) { + switch (this->getKind()) { case NodeKind::NestedNameSpecifier: return TerminationKind::Terminated; case NodeKind::CallArguments: @@ -382,7 +388,7 @@ syntax::List::TerminationKind syntax::List::getTerminationKind() { } bool syntax::List::canBeEmpty() { - switch (this->kind()) { + switch (this->getKind()) { case NodeKind::NestedNameSpecifier: return false; case NodeKind::CallArguments: diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 6fcc74ba55d0c..95ebeb2c59403 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -28,7 +28,7 @@ class BuildSyntaxTreeTest : public SyntaxTreeTest { << "Source file has syntax errors, they were printed to the test " "log"; } - auto Actual = StringRef(Root->dump(Arena->sourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Tree.trim().str(), Actual); if (Actual != Tree.trim().str()) { @@ -63,7 +63,9 @@ class BuildSyntaxTreeTest : public SyntaxTreeTest { auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); assert(AnnotatedNode); auto AnnotatedNodeDump = - StringRef(AnnotatedNode->dump(Arena->sourceManager())).trim().str(); + StringRef(AnnotatedNode->dump(Arena->getSourceManager())) + .trim() + .str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(TreeDumps[i].trim().str(), AnnotatedNodeDump) << "Dumps diverged for the code:\n" diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp index db4ee6b585fb5..884f3797edef2 100644 --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -26,7 +26,7 @@ TEST_P(SyntaxTreeTest, Leaf_Punctuation) { auto *C = syntax::createPunctuation(*Arena, tok::comma); ASSERT_NE(C, nullptr); - EXPECT_EQ(C->token()->kind(), tok::comma); + EXPECT_EQ(C->getToken()->kind(), tok::comma); EXPECT_TRUE(C->canModify()); EXPECT_FALSE(C->isOriginal()); EXPECT_TRUE(C->isDetached()); diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp index 3618949c36ae2..2305b78006b1e 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -38,10 +38,10 @@ namespace { ArrayRef tokens(syntax::Node *N) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) - return llvm::makeArrayRef(L->token(), 1); + return llvm::makeArrayRef(L->getToken(), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->firstLeaf()->token(), - T->lastLeaf()->token() + 1); + return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), + T->findLastLeaf()->getToken() + 1); } } // namespace @@ -170,7 +170,7 @@ syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, auto *T = dyn_cast(Root); if (!T) return nullptr; - for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) { + for (auto *C = T->getFirstChild(); C != nullptr; C = C->getNextSibling()) { if (auto *Result = nodeByRange(R, C)) return Result; } From 6aa3fc4a5b88bd0175212e06b183c87cf87c306c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 11 Sep 2020 10:51:14 -0400 Subject: [PATCH 0381/1079] Revert "[InstCombine] propagate 'nsw' on pointer difference of 'inbounds' geps (PR47430)" This reverts commit 324a53205a3af979e3de109fdd52f91781816cba. On closer examination of at least one of the test diffs, this does not appear to be correct in all cases. Even the existing 'nsw' creation may be wrong based on this example: https://alive2.llvm.org/ce/z/uL4Hw9 https://alive2.llvm.org/ce/z/fJMKQS --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 7 +++---- llvm/test/Transforms/InstCombine/sub-gep.ll | 16 +++++----------- llvm/test/Transforms/InstCombine/sub.ll | 2 +- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index a5dd8f6d7c9d0..5ce32bc592d05 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1671,12 +1671,11 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, I->getOpcode() == Instruction::Mul) I->setHasNoUnsignedWrap(); - // If we have a 2nd GEP of the same base pointer, subtract the offsets. - // If both GEPs are inbounds, then the subtract does not have signed overflow. + // If we had a constant expression GEP on the other side offsetting the + // pointer, subtract it from the offset we have. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false, - GEP1->isInBounds() && GEP2->isInBounds()); + Result = Builder.CreateSub(Result, Offset, "gepdiff"); } // If we have p - gep(p, ...) then we have to negate the result. diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index ee0c9ffaa0ef2..ce9657433bb78 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -245,7 +245,7 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( ; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 -; CHECK-NEXT: [[GEPDIFF:%.*]] = add nsw i64 [[B_IDX]], -84 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A @@ -260,7 +260,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 ; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 -; CHECK-NEXT: [[GEPDIFF:%.*]] = add nsw i16 [[B_IDX]], -84 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 ; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A @@ -272,7 +272,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* @@ -287,7 +287,7 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* @@ -299,11 +299,9 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ret i16 %sub } -; Inbounds translates to 'nsw' on sub - define i64 @gep_diff_both_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_diff_both_inbounds( -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i @@ -314,8 +312,6 @@ define i64 @gep_diff_both_inbounds(i8* %foo, i64 %i, i64 %j) { ret i64 %sub } -; Negative test for 'nsw' - both geps must be inbounds - define i64 @gep_diff_first_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_diff_first_inbounds( ; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] @@ -329,8 +325,6 @@ define i64 @gep_diff_first_inbounds(i8* %foo, i64 %i, i64 %j) { ret i64 %sub } -; Negative test for 'nsw' - both geps must be inbounds - define i64 @gep_diff_second_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_diff_second_inbounds( ; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 0940a08bbb443..98d8a9e6b5ca6 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1077,7 +1077,7 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 ; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i From f92908cc749ead7a14960343636549409380d12b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 9 Sep 2020 15:23:34 -0500 Subject: [PATCH 0382/1079] [DSE] Make sure that DSE+MSSA can handle masked stores Differential Revision: https://reviews.llvm.org/D87414 --- .../Scalar/DeadStoreElimination.cpp | 100 ++++++++++-------- .../DeadStoreElimination/masked-dead-store.ll | 1 + 2 files changed, 58 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index a9700bf47a9e4..10b00287552ab 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -411,22 +411,53 @@ enum OverwriteResult { } // end anonymous namespace -/// Return 'OW_Complete' if a store to the 'Later' location completely -/// overwrites a store to the 'Earlier' location. Return OW_MaybePartial -/// if \p Later does not completely overwrite \p Earlier, but they both -/// write to the same underlying object. In that case, use isPartialOverwrite to -/// check if \p Later partially overwrites \p Earlier. Returns 'OW_Unknown' if -/// nothing can be determined. +/// Check if two instruction are masked stores that completely +/// overwrite one another. More specifically, \p Later has to +/// overwrite \p Earlier. +template +static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, + const Instruction *Earlier, + AATy &AA) { + const auto *IIL = dyn_cast(Later); + const auto *IIE = dyn_cast(Earlier); + if (IIL == nullptr || IIE == nullptr) + return OW_Unknown; + if (IIL->getIntrinsicID() != Intrinsic::masked_store || + IIE->getIntrinsicID() != Intrinsic::masked_store) + return OW_Unknown; + // Pointers. + Value *LP = IIL->getArgOperand(1)->stripPointerCasts(); + Value *EP = IIE->getArgOperand(1)->stripPointerCasts(); + if (LP != EP && !AA.isMustAlias(LP, EP)) + return OW_Unknown; + // Masks. + // TODO: check that Later's mask is a superset of the Earlier's mask. + if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) + return OW_Unknown; + return OW_Complete; +} + +/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI +/// instruction) completely overwrites a store to the 'Earlier' location. +/// (by \p EarlierI instruction). +/// Return OW_MaybePartial if \p Later does not completely overwrite +/// \p Earlier, but they both write to the same underlying object. In that +/// case, use isPartialOverwrite to check if \p Later partially overwrites +/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. template static OverwriteResult -isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, +isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, + const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, const TargetLibraryInfo &TLI, int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, const Function *F) { // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). - if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) - return OW_Unknown; + if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { + // Masked stores have imprecise locations, but we can reason about them + // to some extent. + return isMaskedStoreOverwrite(LaterI, EarlierI, AA); + } const uint64_t LaterSize = Later.Size.getValue(); const uint64_t EarlierSize = Earlier.Size.getValue(); @@ -494,24 +525,6 @@ isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, return OW_MaybePartial; } -static OverwriteResult isMaskedStoreOverwrite(Instruction *Later, - Instruction *Earlier) { - auto *IIL = dyn_cast(Later); - auto *IIE = dyn_cast(Earlier); - if (IIL == nullptr || IIE == nullptr) - return OW_Unknown; - if (IIL->getIntrinsicID() != Intrinsic::masked_store || - IIE->getIntrinsicID() != Intrinsic::masked_store) - return OW_Unknown; - // Pointers. - if (IIL->getArgOperand(1) != IIE->getArgOperand(1)) - return OW_Unknown; - // Masks. - if (IIL->getArgOperand(3) != IIE->getArgOperand(3)) - return OW_Unknown; - return OW_Complete; -} - /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the @@ -1376,13 +1389,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, - InstWriteOffset, *AA, BB.getParent()); - if (OR == OW_Unknown) { - // isOverwrite punts on MemoryLocations with an imprecise size, such - // as masked stores. Handle this here, somwewhat inelegantly. - OR = isMaskedStoreOverwrite(Inst, DepWrite); - } + OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, + DepWriteOffset, InstWriteOffset, *AA, + BB.getParent()); if (OR == OW_MaybePartial) OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, DepWrite, IOL); @@ -1707,6 +1716,8 @@ struct DSEState { switch (CB->getIntrinsicID()) { case Intrinsic::init_trampoline: return {MemoryLocation(CB->getArgOperand(0))}; + case Intrinsic::masked_store: + return {MemoryLocation::getForArgument(CB, 1, TLI)}; default: break; } @@ -1716,8 +1727,10 @@ struct DSEState { return MemoryLocation::getOrNone(I); } - /// Returns true if \p Use completely overwrites \p DefLoc. - bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) { + /// Returns true if \p UseInst completely overwrites \p DefLoc + /// (stored by \p DefInst). + bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *DefInst, + Instruction *UseInst) { // UseInst has a MemoryDef associated in MemorySSA. It's possible for a // MemoryDef to not write to memory, e.g. a volatile load is modeled as a // MemoryDef. @@ -1729,9 +1742,10 @@ struct DSEState { return false; int64_t InstWriteOffset, DepWriteOffset; - auto CC = getLocForWriteEx(UseInst); - return CC && isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; + if (auto CC = getLocForWriteEx(UseInst)) + return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, + InstWriteOffset, BatchAA, &F) == OW_Complete; + return false; } /// Returns true if \p Def is not read before returning from the function. @@ -1977,8 +1991,8 @@ struct DSEState { continue; } else { int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(DefLoc, *CurrentLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F); + auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, + DepWriteOffset, InstWriteOffset, BatchAA, &F); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { @@ -2122,7 +2136,7 @@ struct DSEState { // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(DefLoc, UseInst)) { + if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) { if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); @@ -2479,7 +2493,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = - isOverwrite(SILoc, NILoc, State.DL, TLI, DepWriteOffset, + isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, InstWriteOffset, State.BatchAA, &F); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll index ef74d8eae63f9..85673e9fe5431 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -enable-dse-memoryssa=true -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { From 320624784c49ccaa0fb6dc5147a9c94d9170afb7 Mon Sep 17 00:00:00 2001 From: Lubomir Litchev Date: Tue, 8 Sep 2020 11:50:08 -0700 Subject: [PATCH 0383/1079] [NFC] Follow up on D87111 - Add an option for unrolling loops up to a factor - CR issues addressed. Addressed some CR issues pointed out in D87111. Formatting and other nits. The original Diff D87111 - Add an option for unrolling loops up to a factor. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D87313 --- mlir/include/mlir/Dialect/Affine/Passes.td | 4 ++-- .../Dialect/Affine/Transforms/LoopUnroll.cpp | 3 +-- mlir/test/Dialect/SCF/loop-unroll.mlir | 18 +++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 7515dbaa33d86..4359ea0fa0a2c 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -71,8 +71,8 @@ def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> { let options = [ Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4", "Use this unroll factor for all loops being unrolled">, - Option<"unrollUpToFactor", "unroll-up-to-factor", "bool", /*default=*/"false", - "Allow unroling up to the factor specicied">, + Option<"unrollUpToFactor", "unroll-up-to-factor", "bool", + /*default=*/"false", "Allow unrolling up to the factor specified">, Option<"unrollFull", "unroll-full", "bool", /*default=*/"false", "Fully unroll loops">, Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1", diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index 3dc236f3c0686..26669967ff329 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -127,9 +127,8 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { if (unrollFull) return loopUnrollFull(forOp); // Otherwise, unroll by the given unroll factor. - if (unrollUpToFactor) { + if (unrollUpToFactor) return loopUnrollUpToFactor(forOp, unrollFactor); - } return loopUnrollByFactor(forOp, unrollFactor); } diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir index 134daa303ed86..0b6e178ed0aab 100644 --- a/mlir/test/Dialect/SCF/loop-unroll.mlir +++ b/mlir/test/Dialect/SCF/loop-unroll.mlir @@ -250,23 +250,23 @@ func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref) { // UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref // UNROLL-BY-3-NEXT: return - // Test unroll-up-to functionality. func @static_loop_unroll_up_to_factor(%arg0 : memref) { %0 = constant 7.0 : f32 %lb = constant 0 : index %ub = constant 2 : index affine.for %i0 = %lb to %ub { - store %0, %arg0[%i0] : memref + affine.store %0, %arg0[%i0] : memref } return } // UNROLL-UP-TO-LABEL: func @static_loop_unroll_up_to_factor // UNROLL-UP-TO-SAME: %[[MEM:.*0]]: memref -// UNROLL-UP-TO-DAG: %[[C0:.*]] = constant 0 : index -// UNROLL-UP-TO-DAG: %[[C2:.*]] = constant 2 : index -// UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}} -// UNROLL-UP-TO-NEXT: store %{{.*}}, %[[MEM]][%[[V0]]] : memref -// UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}} -// UNROLL-UP-TO-NEXT: tore %{{.*}}, %[[MEM]][%[[V1]]] : memref -// UNROLL-UP-TO-NEXT: return +// +// UNROLL-UP-TO-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-UP-TO-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: store %{{.*}}, %[[MEM]][%[[V0]]] : memref +// UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: affine.store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-UP-TO-NEXT: return \ No newline at end of file From d2c69c2f4947b38832a34cab14fe32c6b94dd4d2 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Fri, 11 Sep 2020 15:46:39 +0100 Subject: [PATCH 0384/1079] [flang] Fix build issue with BUILD_SHARED_LIBS=ON Define Fortran::Semantics::Scope::GetName in the header so it is available to Fortran::Evaluate::Tool::AttachDeclaration without a circular dependency introduced in 82edd42. Reviewed By: tskeith Differential Revision: https://reviews.llvm.org/D87505 --- flang/include/flang/Semantics/scope.h | 10 +++++++++- flang/lib/Semantics/scope.cpp | 8 -------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/flang/include/flang/Semantics/scope.h b/flang/include/flang/Semantics/scope.h index 853d7044f7fd5..fd2198b2ae617 100644 --- a/flang/include/flang/Semantics/scope.h +++ b/flang/include/flang/Semantics/scope.h @@ -95,7 +95,7 @@ class Scope { inline const Symbol *GetSymbol() const; const Scope *GetDerivedTypeParent() const; const Scope &GetDerivedTypeBase() const; - std::optional GetName() const; + inline std::optional GetName() const; bool Contains(const Scope &) const; /// Make a scope nested in this one Scope &MakeScope(Kind kind, Symbol *symbol = nullptr); @@ -266,5 +266,13 @@ inline const Symbol *Scope::GetSymbol() const { : nullptr; } +inline std::optional Scope::GetName() const { + if (const auto *sym{GetSymbol()}) { + return sym->name(); + } else { + return std::nullopt; + } +} + } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_SCOPE_H_ diff --git a/flang/lib/Semantics/scope.cpp b/flang/lib/Semantics/scope.cpp index c7635c0b1a3bb..768f9f5aab1b8 100644 --- a/flang/lib/Semantics/scope.cpp +++ b/flang/lib/Semantics/scope.cpp @@ -114,14 +114,6 @@ Symbol *Scope::FindComponent(SourceName name) const { } } -std::optional Scope::GetName() const { - if (const auto *sym{GetSymbol()}) { - return sym->name(); - } else { - return std::nullopt; - } -} - bool Scope::Contains(const Scope &that) const { for (const Scope *scope{&that};; scope = &scope->parent()) { if (*scope == *this) { From 87494def4830f0b20af6cb8a4d8b3b668c8d3ec5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 11 Sep 2020 11:32:17 -0400 Subject: [PATCH 0385/1079] [gn build] slightly improve libcxx_needs_site_config The write_cmake_config() here still looks busted, but at least the value that's explicitly set is now set correctly. --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 904ace07585f0..e30622f52195f 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -23,7 +23,7 @@ if (libcxx_needs_site_config) { values += [ "_LIBCPP_ABI_NAMESPACE=$libcxx_abi_namespace" ] } if (libcxx_abi_unstable) { - values += [ "_LIBCPP_ABI_UNSTABLE=" ] + values += [ "_LIBCPP_ABI_UNSTABLE=1" ] } } From bfbaf172ce9978d8367ff08fdf90eb05fff5759d Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 11 Sep 2020 08:32:55 -0700 Subject: [PATCH 0386/1079] [examples] Adjust ThinLtoInstrumentationLayer for emit signature change Emit now takes a std::unique_ptr instead of a MaterializationResponsibility directly. This should fix: http://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake-standalone/ --- llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp | 4 ++-- llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp index 345bfd8dd8705..df844bf19b9cc 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp @@ -120,8 +120,8 @@ void ThinLtoInstrumentationLayer::nudgeIntoDiscovery( LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n"); } -void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R, - ThreadSafeModule TSM) { +void ThinLtoInstrumentationLayer::emit( + std::unique_ptr R, ThreadSafeModule TSM) { TSM.withModuleDo([this](Module &M) { std::vector FunctionsToInstrument; diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h index cd87207894745..25006b40607fe 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h @@ -34,7 +34,8 @@ class ThinLtoInstrumentationLayer : public IRLayer { ~ThinLtoInstrumentationLayer() override; - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; unsigned reserveDiscoveryFlags(unsigned Count); void registerDiscoveryFlagOwners(std::vector Guids, From f980ed4184f9d9139961e21739d7692ea86b0ccf Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 11 Sep 2020 11:05:22 -0400 Subject: [PATCH 0387/1079] [libcxx] Remove the 'availability' Lit feature Instead, use with_system_cxx_lib with various compile-only tests to ensure that we're getting compile-time errors, as expected. This follows the lead of ec46cfefe80d5. --- libcxx/docs/DesignDocs/AvailabilityMarkup.rst | 2 - .../aligned_alloc_availability.verify.cpp | 61 +++++++++++++++++++ .../support.dynamic/libcpp_deallocate.sh.cpp | 5 -- ...aligned_allocation_macro.compile.pass.cpp} | 14 ++--- .../thread/atomic.availability.verify.cpp | 9 ++- .../thread/barrier.availability.verify.cpp | 9 ++- .../thread/latch.availability.verify.cpp | 9 ++- .../thread/semaphore.availability.verify.cpp | 9 ++- .../charconv.to.chars/availability.fail.cpp | 8 ++- .../delete_align_val_t_replace.pass.cpp | 23 +++---- .../new.delete.array/new_align_val_t.pass.cpp | 23 +++---- .../new_align_val_t_nothrow.pass.cpp | 23 +++---- .../new_align_val_t_nothrow_replace.pass.cpp | 23 +++---- ...d_delete_array_fsizeddeallocation.pass.cpp | 8 +-- .../delete_align_val_t_replace.pass.cpp | 23 +++---- .../new_align_val_t.pass.cpp | 23 +++---- .../new_align_val_t_nothrow.pass.cpp | 23 +++---- .../new_align_val_t_nothrow_replace.pass.cpp | 23 +++---- .../sized_delete_fsizeddeallocation.pass.cpp | 6 +- libcxx/utils/libcxx/test/config.py | 3 - 20 files changed, 170 insertions(+), 157 deletions(-) create mode 100644 libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp rename libcxx/test/libcxx/memory/{aligned_allocation_macro.pass.cpp => aligned_allocation_macro.compile.pass.cpp} (79%) diff --git a/libcxx/docs/DesignDocs/AvailabilityMarkup.rst b/libcxx/docs/DesignDocs/AvailabilityMarkup.rst index 2380385392876..26975a7370683 100644 --- a/libcxx/docs/DesignDocs/AvailabilityMarkup.rst +++ b/libcxx/docs/DesignDocs/AvailabilityMarkup.rst @@ -78,8 +78,6 @@ the following features will be made available: - with_system_cxx_lib=macosx - with_system_cxx_lib=macosx10.12 - with_system_cxx_lib=x86_64-apple-macosx10.12 - - availability=macosx - - availability=macosx10.12 These features are used to XFAIL a test that fails when deployed on (or is compiled for) an older system. For example, if the test exhibits a bug in the diff --git a/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp b/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp new file mode 100644 index 0000000000000..aa75b70adee6b --- /dev/null +++ b/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Make sure we get compile-time availability errors when trying to use aligned +// allocation/deallocation on deployment targets that don't support it. + +// UNSUPPORTED: c++03, c++11, c++14 + +// Aligned allocation was not provided before macosx10.14. +// Support for that is broken prior to Clang 8 and Apple Clang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 + +// REQUIRES: with_system_cxx_lib=macosx10.13 || \ +// REQUIRES: with_system_cxx_lib=macosx10.12 || \ +// REQUIRES: with_system_cxx_lib=macosx10.11 || \ +// REQUIRES: with_system_cxx_lib=macosx10.10 || \ +// REQUIRES: with_system_cxx_lib=macosx10.9 + +#include +#include + +#include "test_macros.h" + +constexpr auto OverAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2; + +struct alignas(OverAligned) A { }; + +int main(int, char**) +{ + // Normal versions + { + A *a1 = new A; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} + // `delete` is also required by the line above if construction fails + // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} + + delete a1; // expected-error-re {{aligned deallocation function of type {{.+}} is only available on}} + + A* a2 = new(std::nothrow) A; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} + // `delete` is also required above for the same reason + // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} + } + + // Array versions + { + A *a1 = new A[2]; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} + // `delete` is also required by the line above if construction fails + // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} + + delete[] a1; // expected-error-re {{aligned deallocation function of type {{.+}} is only available on}} + + A* a2 = new(std::nothrow) A[2]; // expected-error-re {{aligned allocation function of type {{.+}} is only available on}} + // `delete` is also required above for the same reason + // expected-error-re@-2 {{aligned deallocation function of type {{.+}} is only available on}} + } +} diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp index 6ed7e7536bb7d..0d67cdafadd8e 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -21,11 +21,6 @@ // XFAIL: with_system_cxx_lib=macosx10.10 // XFAIL: with_system_cxx_lib=macosx10.9 -// The test will fail on deployment targets that do not support sized deallocation. -// XFAIL: availability=macosx10.11 -// XFAIL: availability=macosx10.10 -// XFAIL: availability=macosx10.9 - // AppleClang < 10 incorrectly warns that aligned allocation is not supported // even when it is supported. // UNSUPPORTED: apple-clang-9 diff --git a/libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp b/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp similarity index 79% rename from libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp rename to libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp index 749c9470c3063..4b5a47ee0e4bd 100644 --- a/libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp +++ b/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp @@ -15,11 +15,11 @@ // GCC 5 doesn't support aligned allocation // UNSUPPORTED: gcc-5 -// XFAIL: availability=macosx10.13 -// XFAIL: availability=macosx10.12 -// XFAIL: availability=macosx10.11 -// XFAIL: availability=macosx10.10 -// XFAIL: availability=macosx10.9 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 #include @@ -29,7 +29,3 @@ #ifdef _LIBCPP_HAS_NO_ALIGNED_ALLOCATION # error "libc++ should have aligned allocation in C++17 and up when targeting a platform that supports it" #endif - -int main(int, char**) { - return 0; -} diff --git a/libcxx/test/libcxx/thread/atomic.availability.verify.cpp b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp index 45028da5281a8..643e5910cc52c 100644 --- a/libcxx/test/libcxx/thread/atomic.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp @@ -7,8 +7,13 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: with_system_cxx_lib=macosx -// REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 +// REQUIRES: with_system_cxx_lib=macosx10.9 || \ +// REQUIRES: with_system_cxx_lib=macosx10.10 || \ +// REQUIRES: with_system_cxx_lib=macosx10.11 || \ +// REQUIRES: with_system_cxx_lib=macosx10.12 || \ +// REQUIRES: with_system_cxx_lib=macosx10.13 || \ +// REQUIRES: with_system_cxx_lib=macosx10.14 || \ +// REQUIRES: with_system_cxx_lib=macosx10.15 // Test the availability markup on the C++20 Synchronization Library // additions to . diff --git a/libcxx/test/libcxx/thread/barrier.availability.verify.cpp b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp index 16d67fbce7b7b..f8537f5e86b43 100644 --- a/libcxx/test/libcxx/thread/barrier.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp @@ -7,8 +7,13 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: with_system_cxx_lib=macosx -// REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 +// REQUIRES: with_system_cxx_lib=macosx10.9 || \ +// REQUIRES: with_system_cxx_lib=macosx10.10 || \ +// REQUIRES: with_system_cxx_lib=macosx10.11 || \ +// REQUIRES: with_system_cxx_lib=macosx10.12 || \ +// REQUIRES: with_system_cxx_lib=macosx10.13 || \ +// REQUIRES: with_system_cxx_lib=macosx10.14 || \ +// REQUIRES: with_system_cxx_lib=macosx10.15 // Test the availability markup on std::barrier. diff --git a/libcxx/test/libcxx/thread/latch.availability.verify.cpp b/libcxx/test/libcxx/thread/latch.availability.verify.cpp index f468ebfe9f4ab..25a1610541d43 100644 --- a/libcxx/test/libcxx/thread/latch.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/latch.availability.verify.cpp @@ -7,8 +7,13 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: with_system_cxx_lib=macosx -// REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 +// REQUIRES: with_system_cxx_lib=macosx10.9 || \ +// REQUIRES: with_system_cxx_lib=macosx10.10 || \ +// REQUIRES: with_system_cxx_lib=macosx10.11 || \ +// REQUIRES: with_system_cxx_lib=macosx10.12 || \ +// REQUIRES: with_system_cxx_lib=macosx10.13 || \ +// REQUIRES: with_system_cxx_lib=macosx10.14 || \ +// REQUIRES: with_system_cxx_lib=macosx10.15 // Test the availability markup on std::latch. diff --git a/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp index 5d92461c0a000..284ee96f567f1 100644 --- a/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp @@ -7,8 +7,13 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: with_system_cxx_lib=macosx -// REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 +// REQUIRES: with_system_cxx_lib=macosx10.9 || \ +// REQUIRES: with_system_cxx_lib=macosx10.10 || \ +// REQUIRES: with_system_cxx_lib=macosx10.11 || \ +// REQUIRES: with_system_cxx_lib=macosx10.12 || \ +// REQUIRES: with_system_cxx_lib=macosx10.13 || \ +// REQUIRES: with_system_cxx_lib=macosx10.14 || \ +// REQUIRES: with_system_cxx_lib=macosx10.15 // Test the availability markup on std::counting_semaphore and std::binary_semaphore. diff --git a/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp b/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp index cd099420d1829..70f5d3c1808d7 100644 --- a/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp +++ b/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp @@ -7,8 +7,12 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// REQUIRES: with_system_cxx_lib=macosx -// REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 +// REQUIRES: with_system_cxx_lib=macosx10.9 || \ +// REQUIRES: with_system_cxx_lib=macosx10.10 || \ +// REQUIRES: with_system_cxx_lib=macosx10.11 || \ +// REQUIRES: with_system_cxx_lib=macosx10.12 || \ +// REQUIRES: with_system_cxx_lib=macosx10.13 || \ +// REQUIRES: with_system_cxx_lib=macosx10.14 // Test the availability markup on std::to_chars. diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp index b092fa141e611..eb7f5ad4aafd1 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp @@ -15,21 +15,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13. -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp index bfa5f155a9c56..6b372e076915a 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp @@ -13,21 +13,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13. -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp index 869e29a8e87be..e9e9d95e83a3c 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp @@ -13,21 +13,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13. -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp index 6f346a72a0ae6..e7a1e403d73dd 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp @@ -11,21 +11,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13. -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp index cdebcda46a0b7..1274ddff54236 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp @@ -12,12 +12,10 @@ // when sized deallocation is not supported, e.g., prior to C++14. // UNSUPPORTED: sanitizer-new-delete -// XFAIL: availability=macosx10.11 -// XFAIL: availability=macosx10.10 -// XFAIL: availability=macosx10.9 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 - -// NOTE: Only clang-3.7 and GCC 5.1 and greater support -fsized-deallocation. // REQUIRES: -fsized-deallocation // ADDITIONAL_COMPILE_FLAGS: -fsized-deallocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp index f50507a815d43..4d0100d04597d 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp @@ -15,21 +15,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp index 80ec88e437fe0..01cb88658954e 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp @@ -10,21 +10,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13. -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp index 0a42fbac6fd4c..930eff95bb999 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp @@ -10,21 +10,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp index 655ec9352d682..62ceafb7644af 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp @@ -11,21 +11,14 @@ // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. -// However, AppleClang 10 (and older) don't trigger availability errors, and -// Clang < 8.0 doesn't warn for 10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 -// XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.9 - -// On AppleClang 10 (and older), instead of getting an availability failure -// like above, we get a link error when we link against a dylib that does -// not export the aligned allocation functions. -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.12 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.11 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.10 -// XFAIL: (apple-clang-9 || apple-clang-10) && with_system_cxx_lib=macosx10.9 +// However, support for that was broken prior to Clang 8 and AppleClang 11. +// UNSUPPORTED: apple-clang-9, apple-clang-10 +// UNSUPPORTED: clang-5, clang-6, clang-7 +// XFAIL: with_system_cxx_lib=macosx10.13 +// XFAIL: with_system_cxx_lib=macosx10.12 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp index e827ff618ec5a..22ea35ebced97 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp @@ -12,9 +12,9 @@ // when sized deallocation is not supported, e.g., prior to C++14. // UNSUPPORTED: sanitizer-new-delete -// XFAIL: availability=macosx10.11 -// XFAIL: availability=macosx10.10 -// XFAIL: availability=macosx10.9 +// XFAIL: with_system_cxx_lib=macosx10.11 +// XFAIL: with_system_cxx_lib=macosx10.10 +// XFAIL: with_system_cxx_lib=macosx10.9 // NOTE: Only clang-3.7 and GCC 5.1 and greater support -fsized-deallocation. // REQUIRES: -fsized-deallocation diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 42438b3ccf2e7..fdc8bbce1cf18 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -252,9 +252,6 @@ def configure_features(self): self.config.available_features.add('with_system_cxx_lib={}{}'.format(sysname, version)) self.config.available_features.add('with_system_cxx_lib={}'.format(sysname)) - self.config.available_features.add('availability={}'.format(sysname)) - self.config.available_features.add('availability={}{}'.format(sysname, version)) - if self.target_info.is_windows(): if self.cxx_stdlib_under_test == 'libc++': # LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the From 54680591e8bf13322d265478d10f043a503fb4f2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 11 Sep 2020 11:33:41 -0400 Subject: [PATCH 0388/1079] [SLP] add test for missed store vectorization; NFC --- .../SLPVectorizer/X86/bad-reduction.ll | 55 +++++++++++++------ 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index 3094f9bc2549a..c78bec1b6a20b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -15,14 +15,14 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 -; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]] -; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]] -; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]] -; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]] -; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] -; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] -; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] -; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] +; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]], align 1 +; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]], align 1 +; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]], align 1 +; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]], align 1 +; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]], align 1 +; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]], align 1 +; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]], align 1 +; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]], align 1 ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64 ; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64 ; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64 @@ -103,14 +103,14 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 -; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]] -; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]] -; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]] -; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]] -; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] -; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] -; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] -; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] +; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]], align 1 +; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]], align 1 +; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]], align 1 +; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]], align 1 +; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]], align 1 +; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]], align 1 +; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]], align 1 +; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]], align 1 ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64 ; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64 ; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64 @@ -537,3 +537,26 @@ define void @load_combine_constant_expression(i64* %t1) { store i64 or (i64 shl (i64 zext (i32 ptrtoint ([8 x i8]* @g1 to i32) to i64), i64 32), i64 zext (i32 ptrtoint ([5 x i8]* @g2 to i32) to i64)), i64* %t3, align 4 ret void } + +@output = dso_local local_unnamed_addr global [8 x i32] zeroinitializer, align 16 + +define void @PR47450(i16* nocapture readonly %p) { +; CHECK-LABEL: @PR47450( +; CHECK-NEXT: [[X:%.*]] = load i16, i16* [[P:%.*]], align 2 +; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X]] to i32 +; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i32 [[Z]], 1 +; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 0), align 16 +; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 1), align 4 +; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 2), align 8 +; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 3), align 4 +; CHECK-NEXT: ret void +; + %x = load i16, i16* %p, align 2 + %z = zext i16 %x to i32 + %s = shl nuw nsw i32 %z, 1 + store i32 %s, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 0), align 16 + store i32 %s, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 1), align 4 + store i32 %s, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 2), align 8 + store i32 %s, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 3), align 4 + ret void +} From 40f12ef621d9fd2fb2dfe24f82b3f4f8c091f4ba Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 11 Sep 2020 11:47:23 -0400 Subject: [PATCH 0389/1079] [SLP] further limit bailout for load combine candidate (PR47450) The test example based on PR47450 shows that we can match non-byte-sized shifts, but those won't ever be bswap opportunities. This isn't a full fix (we'd still match if the shifts were by 8-bits for example), but this should be enough until there's evidence that we need to do more (this is a borderline case for vectorization in the first place). --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++++-- llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll | 9 +++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5ff2cd18c73c8..000bd863a7c54 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3694,11 +3694,13 @@ static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts, TargetTransformInfo *TTI) { // Look past the root to find a source value. Arbitrarily follow the // path through operand 0 of any 'or'. Also, peek through optional - // shift-left-by-constant. + // shift-left-by-multiple-of-8-bits. Value *ZextLoad = Root; + const APInt *ShAmtC; while (!isa(ZextLoad) && (match(ZextLoad, m_Or(m_Value(), m_Value())) || - match(ZextLoad, m_Shl(m_Value(), m_Constant())))) + (match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) && + ShAmtC->urem(8) == 0))) ZextLoad = cast(ZextLoad)->getOperand(0); // Check if the input is an extended load of the required or/shift expression. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index c78bec1b6a20b..e1028cf552762 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -545,10 +545,11 @@ define void @PR47450(i16* nocapture readonly %p) { ; CHECK-NEXT: [[X:%.*]] = load i16, i16* [[P:%.*]], align 2 ; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X]] to i32 ; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i32 [[Z]], 1 -; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 0), align 16 -; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 1), align 4 -; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 2), align 8 -; CHECK-NEXT: store i32 [[S]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @output, i64 0, i64 3), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[S]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[S]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[S]], i32 3 +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([8 x i32]* @output to <4 x i32>*), align 16 ; CHECK-NEXT: ret void ; %x = load i16, i16* %p, align 2 From f2bb4b88550a04be977d85e2efe0bef1664c9b31 Mon Sep 17 00:00:00 2001 From: YangZhihui Date: Fri, 11 Sep 2020 17:51:36 +0200 Subject: [PATCH 0390/1079] [docs] Fix typos Differential Revision: https://reviews.llvm.org/D87356 --- llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst index 8cc29803f2182..777e271423abe 100644 --- a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst +++ b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst @@ -2678,7 +2678,7 @@ architectures. DWARF address space identifiers are used by: -* The DWARF expession operations: ``DW_OP_LLVM_aspace_bregx``, +* The DWARF expression operations: ``DW_OP_LLVM_aspace_bregx``, ``DW_OP_LLVM_form_aspace_address``, ``DW_OP_LLVM_implicit_aspace_pointer``, and ``DW_OP_xderef*``. @@ -3387,7 +3387,7 @@ Standard Content Descriptions provided by the* ``DW_LNCT_path`` *field. When the source field is absent, consumers can access the file to get the source text.* - *This is particularly useful for programing languages that support runtime + *This is particularly useful for programming languages that support runtime compilation and runtime generation of source text. In these cases, the source text does not reside in any permanent file. For example, the OpenCL language [:ref:`OpenCL `] supports online compilation.* From 2df6efedef5c7647f966ba238a2901eb4b98204d Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Fri, 11 Sep 2020 09:13:34 -0700 Subject: [PATCH 0391/1079] [DFSan] Re-enable event_callbacks test. Mark the dest pointers for memcpy and memmove as volatile, to avoid dead store elimination. Fixes https://bugs.llvm.org/show_bug.cgi?id=47488. --- compiler-rt/test/dfsan/event_callbacks.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/compiler-rt/test/dfsan/event_callbacks.c b/compiler-rt/test/dfsan/event_callbacks.c index 6f9fd289c226a..b154c9679d45f 100644 --- a/compiler-rt/test/dfsan/event_callbacks.c +++ b/compiler-rt/test/dfsan/event_callbacks.c @@ -2,10 +2,6 @@ // RUN: %clang_dfsan -O2 -mllvm -dfsan-event-callbacks %s %t-callbacks.o -o %t // RUN: %run %t FooBarBaz 2>&1 | FileCheck %s -// See PR47488, parts of this test get optimized out by a more aggressive -// dead store eliminator. -// XFAIL: * - // Tests that callbacks are inserted for store events when // -dfsan-event-callbacks is specified. @@ -118,14 +114,16 @@ int main(int Argc, char *Argv[]) { LabelArgv = dfsan_create_label("Argv", 0); dfsan_set_label(LabelArgv, Argv[1], LenArgv); - char SinkBuf[64]; - assert(LenArgv < sizeof(SinkBuf) - 1); + char Buf[64]; + assert(LenArgv < sizeof(Buf) - 1); // CHECK: Label 4 copied to memory - memcpy(SinkBuf, Argv[1], LenArgv); + void *volatile SinkPtr = Buf; + memcpy(SinkPtr, Argv[1], LenArgv); // CHECK: Label 4 copied to memory - memmove(&SinkBuf[1], SinkBuf, LenArgv); + SinkPtr = &Buf[1]; + memmove(SinkPtr, Buf, LenArgv); return 0; } From 560188ddcccb4e5ca2261c1990f085101238c8df Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 10 Sep 2020 14:37:11 -0700 Subject: [PATCH 0392/1079] [ELF][PowerPC] Define NOP as 0x60000000 to tidy up code. NFC Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D87483 --- lld/ELF/Arch/PPC64.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index f5c91c1ff3b56..de4321d903994 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -62,6 +62,8 @@ enum DFormOpcd { ADDI = 14 }; +constexpr uint32_t NOP = 0x60000000; + enum class PPCLegacyInsn : uint32_t { NOINSN = 0, // Loads. @@ -691,7 +693,7 @@ void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const { writePrefixedInstruction(loc, pcRelInsn | ((totalDisp & 0x3ffff0000) << 16) | (totalDisp & 0xffff)); - write32(loc + rel.addend, 0x60000000); // nop accessInsn. + write32(loc + rel.addend, NOP); // nop accessInsn. break; } default: @@ -718,7 +720,7 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, switch (rel.type) { case R_PPC64_GOT_TLSGD16_HA: - writeFromHalf16(loc, 0x60000000); // nop + writeFromHalf16(loc, NOP); break; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: @@ -726,7 +728,7 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; case R_PPC64_TLSGD: - write32(loc, 0x60000000); // nop + write32(loc, NOP); write32(loc + 4, 0x38630000); // addi r3, r3 // Since we are relocating a half16 type relocation and Loc + 4 points to // the start of an instruction we need to advance the buffer by an extra @@ -758,13 +760,13 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, switch (rel.type) { case R_PPC64_GOT_TLSLD16_HA: - writeFromHalf16(loc, 0x60000000); // nop + writeFromHalf16(loc, NOP); break; case R_PPC64_GOT_TLSLD16_LO: writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13, 0 break; case R_PPC64_TLSLD: - write32(loc, 0x60000000); // nop + write32(loc, NOP); write32(loc + 4, 0x38631000); // addi r3, r3, 4096 break; case R_PPC64_DTPREL16: @@ -829,7 +831,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0; switch (rel.type) { case R_PPC64_GOT_TPREL16_HA: - write32(loc - offset, 0x60000000); // nop + write32(loc - offset, NOP); break; case R_PPC64_GOT_TPREL16_LO_DS: case R_PPC64_GOT_TPREL16_DS: { @@ -1128,7 +1130,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_PPC64_REL16_HA: case R_PPC64_TPREL16_HA: if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) - writeFromHalf16(loc, 0x60000000); + writeFromHalf16(loc, NOP); else write16(loc, ha(val)); break; @@ -1353,7 +1355,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, return; } case R_PPC64_TLSGD: - write32(loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop + write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 return; default: @@ -1424,7 +1426,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint32_t secondInstr = read32(loc + 8); if (!loImm && getPrimaryOpCode(secondInstr) == 14) { loImm = secondInstr & 0xFFFF; - } else if (secondInstr != 0x60000000) { + } else if (secondInstr != NOP) { return false; } @@ -1438,7 +1440,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, }; if (!checkRegOperands(firstInstr, 12, 1)) return false; - if (secondInstr != 0x60000000 && !checkRegOperands(secondInstr, 12, 12)) + if (secondInstr != NOP && !checkRegOperands(secondInstr, 12, 12)) return false; int32_t stackFrameSize = (hiImm * 65536) + loImm; @@ -1457,12 +1459,12 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, if (hiImm) { write32(loc + 4, 0x3D810000 | (uint16_t)hiImm); // If the low immediate is zero the second instruction will be a nop. - secondInstr = loImm ? 0x398C0000 | (uint16_t)loImm : 0x60000000; + secondInstr = loImm ? 0x398C0000 | (uint16_t)loImm : NOP; write32(loc + 8, secondInstr); } else { // addi r12, r1, imm write32(loc + 4, (0x39810000) | (uint16_t)loImm); - write32(loc + 8, 0x60000000); + write32(loc + 8, NOP); } return true; From bd2f7ad6036caf214c4e3f46bcea9d4aa70bb810 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 11 Sep 2020 09:22:42 -0700 Subject: [PATCH 0393/1079] Revert "[examples] Adjust ThinLtoInstrumentationLayer for emit signature change" I raced with Florian and he had already reverted the original patch. --- llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp | 4 ++-- llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp index df844bf19b9cc..345bfd8dd8705 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp @@ -120,8 +120,8 @@ void ThinLtoInstrumentationLayer::nudgeIntoDiscovery( LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n"); } -void ThinLtoInstrumentationLayer::emit( - std::unique_ptr R, ThreadSafeModule TSM) { +void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R, + ThreadSafeModule TSM) { TSM.withModuleDo([this](Module &M) { std::vector FunctionsToInstrument; diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h index 25006b40607fe..cd87207894745 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h @@ -34,8 +34,7 @@ class ThinLtoInstrumentationLayer : public IRLayer { ~ThinLtoInstrumentationLayer() override; - void emit(std::unique_ptr R, - ThreadSafeModule TSM) override; + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; unsigned reserveDiscoveryFlags(unsigned Count); void registerDiscoveryFlagOwners(std::vector Guids, From 8ecc8520bc5bc20ae00c13e5ae13f8edbb80642e Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Sat, 12 Sep 2020 00:37:36 +0800 Subject: [PATCH 0394/1079] [FPEnv] [Clang] Enable constrained FP support for PowerPC d4ce862f introduced HasStrictFP to disable generating constrained FP operations for platforms lacking support. Since work for enabling constrained FP on PowerPC is almost done, we'd like to enable it. Reviewed By: kpn, steven.zhang Differential Revision: https://reviews.llvm.org/D87223 --- clang/lib/Basic/Targets/PPC.h | 1 + clang/test/CodeGen/builtins-ppc-fpconstrained.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index bca06a7a802dd..ec067d8811fc6 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -82,6 +82,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { SimdDefaultAlign = 128; LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble(); + HasStrictFP = true; } // Set the language option for altivec based on our value. diff --git a/clang/test/CodeGen/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/builtins-ppc-fpconstrained.c index 7c770845090fc..880c0c339ef33 100644 --- a/clang/test/CodeGen/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/builtins-ppc-fpconstrained.c @@ -2,14 +2,12 @@ // RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \ // RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-UNCONSTRAINED %s // RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \ -// RUN: -fexperimental-strict-floating-point \ // RUN: -ffp-exception-behavior=strict -emit-llvm %s -o - | FileCheck \ // RUN: --check-prefix=CHECK-CONSTRAINED -vv %s // RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \ // RUN: -fallow-half-arguments-and-returns -S -o - %s | \ // RUN: FileCheck --check-prefix=CHECK-ASM --check-prefix=NOT-FIXME-CHECK %s // RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \ -// RUN: -fexperimental-strict-floating-point \ // RUN: -fallow-half-arguments-and-returns -S -ffp-exception-behavior=strict \ // RUN: -o - %s | FileCheck --check-prefix=CHECK-ASM \ // RUN: --check-prefix=FIXME-CHECK %s From 40b72c9c792057f71319cfde3d7c7904dd8df6bc Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 11 Sep 2020 17:51:15 +0100 Subject: [PATCH 0395/1079] [ARM] Extra MLA reductions tests. NFC --- llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll | 1238 +++++++++ .../CodeGen/Thumb2/mve-vecreduce-mlapred.ll | 2250 ++++++++++++++++- 2 files changed, 3463 insertions(+), 25 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll index 93e3b16590b32..4010e3c911126 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -170,6 +170,279 @@ entry: ret i64 %z } +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_zext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmullb.u16 q3, q3, q2 +; CHECK-NEXT: vmov.i64 q2, #0xffffffff +; CHECK-NEXT: vmov.f32 s16, s12 +; CHECK-NEXT: vmov.f32 s18, s13 +; CHECK-NEXT: vand q4, q4, q2 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r1, s17 +; CHECK-NEXT: vmov.f32 s16, s14 +; CHECK-NEXT: vmov.f32 s18, s15 +; CHECK-NEXT: vand q3, q4, q2 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: vmov r1, s13 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s15 +; CHECK-NEXT: adcs r1, r0 +; CHECK-NEXT: vmov r0, s14 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: vmov.u16 r2, q1[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmullb.u16 q0, q1, q3 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s7 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: vand q0, q1, q2 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %xx = zext <8 x i16> %x to <8 x i32> + %yy = zext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = zext <8 x i32> %m to <8 x i64> + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + ret i64 %z +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmullb.s16 q2, q3, q2 +; CHECK-NEXT: vmov.f32 s12, s8 +; CHECK-NEXT: vmov.f32 s14, s9 +; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov r0, s14 +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.f32 s12, s10 +; CHECK-NEXT: vmov.f32 s14, s11 +; CHECK-NEXT: asrs r1, r0, #31 +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov r1, s17 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: adc.w r12, r1, r0, asr #31 +; CHECK-NEXT: vmov r1, s12 +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: asrs r3, r1, #31 +; CHECK-NEXT: vmov.32 q2[3], r3 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov r3, s9 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adc.w r2, r12, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adds.w r12, r0, r3 +; CHECK-NEXT: adc.w r1, r2, r1, asr #31 +; CHECK-NEXT: vmov.u16 r2, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.s16 q0, q1, q2 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: vmov.32 q2[3], r3 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov r3, s9 +; CHECK-NEXT: adds.w r0, r0, r12 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adc.w r1, r1, r2, asr #31 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adc.w r1, r1, r2, asr #31 +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adc.w r1, r1, r2, asr #31 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %yy = sext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = sext <8 x i32> %m to <8 x i64> + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + ret i64 %z +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sextzext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_sextzext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmullb.s16 q2, q1, q1 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vmov.f32 s12, s8 +; CHECK-NEXT: vmov.f32 s14, s9 +; CHECK-NEXT: vand q3, q3, q1 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r0, s15 +; CHECK-NEXT: vmov r1, s13 +; CHECK-NEXT: vmov.f32 s12, s10 +; CHECK-NEXT: vmov.f32 s14, s11 +; CHECK-NEXT: vand q2, q3, q1 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: vmov r1, s9 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: vmov r1, s11 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmullb.s16 q0, q2, q2 +; CHECK-NEXT: vmov.f32 s8, s0 +; CHECK-NEXT: vmov.f32 s10, s1 +; CHECK-NEXT: vand q2, q2, q1 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r1, s9 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: vmov r1, s11 +; CHECK-NEXT: vmov.f32 s8, s2 +; CHECK-NEXT: vmov.f32 s10, s3 +; CHECK-NEXT: vand q0, q2, q1 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s3 +; CHECK-NEXT: adcs r1, r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: bx lr +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %m = mul <8 x i32> %xx, %xx + %ma = zext <8 x i32> %m to <8 x i64> + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + ret i64 %z +} + define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: add_v2i16_v2i64_zext: ; CHECK: @ %bb.0: @ %entry @@ -239,6 +512,336 @@ entry: ret i32 %z } +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_zext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[8] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[9] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[11] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[12] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[13] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[15] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmullb.u8 q2, q3, q2 +; CHECK-NEXT: vmov.u16 r0, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vmov.16 q4[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q4[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q4[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q4[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q4[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q4[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q4[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q4[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[2] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[3] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmovlb.u16 q3, q3 +; CHECK-NEXT: vmullb.u8 q0, q1, q4 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov.u16 r0, q2[0] +; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q3 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmovlb.u16 q2, q3 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmovlb.u16 q0, q3 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %xx = zext <16 x i8> %x to <16 x i16> + %yy = zext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = zext <16 x i16> %m to <16 x i32> + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[8] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[9] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[11] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[12] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[13] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[15] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmullb.s8 q2, q3, q2 +; CHECK-NEXT: vmov.u16 r0, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vmov.16 q4[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q4[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q4[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q4[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q4[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q4[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q4[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q4[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[2] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[3] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmovlb.s16 q3, q3 +; CHECK-NEXT: vmullb.s8 q0, q1, q4 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov.u16 r0, q2[0] +; CHECK-NEXT: vmovlb.s16 q1, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q3 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmovlb.s16 q2, q3 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmovlb.s16 q0, q3 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %yy = sext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = sext <16 x i16> %m to <16 x i32> + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sextzext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_sextzext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u8 r0, q0[8] +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[9] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[11] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[12] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[13] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[15] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmullb.s8 q1, q1, q1 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[2] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[3] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmovlb.u16 q2, q2 +; CHECK-NEXT: vmullb.s8 q0, q3, q3 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmovlb.u16 q3, q3 +; CHECK-NEXT: vadd.i32 q2, q3, q2 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmovlb.u16 q1, q3 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmovlb.u16 q0, q3 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: bx lr +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %m = mul <16 x i16> %xx, %xx + %ma = zext <16 x i16> %m to <16 x i32> + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + ret i32 %z +} + define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: add_v4i8_v4i32_zext: ; CHECK: @ %bb.0: @ %entry @@ -990,6 +1593,308 @@ entry: ret i64 %r } +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %y, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u16 r2, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmullb.u16 q3, q3, q2 +; CHECK-NEXT: vmov.i64 q2, #0xffffffff +; CHECK-NEXT: vmov.f32 s16, s12 +; CHECK-NEXT: vmov.f32 s18, s13 +; CHECK-NEXT: vand q4, q4, q2 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov r12, s19 +; CHECK-NEXT: vmov lr, s17 +; CHECK-NEXT: vmov.f32 s16, s14 +; CHECK-NEXT: vmov.f32 s18, s15 +; CHECK-NEXT: vand q3, q4, q2 +; CHECK-NEXT: adds r4, r3, r2 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r2, s13 +; CHECK-NEXT: adc.w r12, r12, lr +; CHECK-NEXT: adds.w lr, r4, r3 +; CHECK-NEXT: vmov r3, s14 +; CHECK-NEXT: adc.w r4, r12, r2 +; CHECK-NEXT: vmov r2, s15 +; CHECK-NEXT: adds.w r12, lr, r3 +; CHECK-NEXT: adc.w r3, r4, r2 +; CHECK-NEXT: vmov.u16 r2, q1[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.u16 q0, q1, q3 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vmov r4, s4 +; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: adds.w r4, r4, r12 +; CHECK-NEXT: adc.w r12, r3, r2 +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: vand q0, q1, q2 +; CHECK-NEXT: adds.w lr, r4, r2 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: adc.w r3, r3, r12 +; CHECK-NEXT: adds.w r12, lr, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: vmov r4, s3 +; CHECK-NEXT: adds.w r2, r2, r12 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, pc} +entry: + %xx = zext <8 x i16> %x to <8 x i32> + %yy = zext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = zext <8 x i32> %m to <8 x i64> + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %r = add i64 %z, %a + ret i64 %r +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u16 r2, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmullb.s16 q2, q3, q2 +; CHECK-NEXT: vmov.f32 s12, s8 +; CHECK-NEXT: vmov.f32 s14, s9 +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q4[1], r2 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.f32 s12, s10 +; CHECK-NEXT: vmov.f32 s14, s11 +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: vmov.32 q4[3], r3 +; CHECK-NEXT: vmov lr, s18 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov r12, s17 +; CHECK-NEXT: adds.w lr, lr, r3 +; CHECK-NEXT: adc.w r12, r12, r2, asr #31 +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: vmov.32 q2[3], r3 +; CHECK-NEXT: vmov r4, s8 +; CHECK-NEXT: vmov r3, s9 +; CHECK-NEXT: adds.w r4, r4, lr +; CHECK-NEXT: adc.w lr, r12, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adds.w r12, r4, r3 +; CHECK-NEXT: adc.w lr, lr, r2, asr #31 +; CHECK-NEXT: vmov.u16 r2, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.s16 q0, q1, q2 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: asrs r4, r2, #31 +; CHECK-NEXT: vmov.32 q2[3], r4 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r4, s9 +; CHECK-NEXT: adds.w r12, r12, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adc.w r4, r4, lr +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adc.w r2, r4, r2, asr #31 +; CHECK-NEXT: vmov r4, s4 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: adc.w r2, r2, r4, asr #31 +; CHECK-NEXT: vmov r4, s6 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: adc.w r2, r2, r4, asr #31 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, pc} +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %yy = sext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = sext <8 x i32> %m to <8 x i64> + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %r = add i64 %z, %a + ret i64 %r +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sextzext(<8 x i16> %x, <8 x i16> %y, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sextzext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[1] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[2] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.s16 q1, q1, q1 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: vmov.32 q3[3], r3 +; CHECK-NEXT: vmov lr, s14 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r12, s13 +; CHECK-NEXT: adds.w lr, lr, r3 +; CHECK-NEXT: adc.w r12, r12, r2, asr #31 +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: vmov.32 q1[3], r3 +; CHECK-NEXT: vmov r4, s4 +; CHECK-NEXT: vmov r3, s5 +; CHECK-NEXT: adds.w r4, r4, lr +; CHECK-NEXT: adc.w lr, r12, r3 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: adds.w r12, r4, r3 +; CHECK-NEXT: adc.w lr, lr, r2, asr #31 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.s16 q0, q1, q1 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: asrs r4, r2, #31 +; CHECK-NEXT: vmov.32 q2[3], r4 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r4, s9 +; CHECK-NEXT: adds.w r12, r12, r3 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: adc.w r4, r4, lr +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adc.w r2, r4, r2, asr #31 +; CHECK-NEXT: vmov r4, s4 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: adc.w r2, r2, r4, asr #31 +; CHECK-NEXT: vmov r4, s6 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: adc.w r2, r2, r4, asr #31 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: pop {r4, pc} +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %m = mul <8 x i32> %xx, %xx + %ma = sext <8 x i32> %m to <8 x i64> + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) + %r = add i64 %z, %a + ret i64 %r +} + define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %y, i64 %a) { ; CHECK-LABEL: add_v2i16_v2i64_acc_zext: ; CHECK: @ %bb.0: @ %entry @@ -1071,6 +1976,339 @@ entry: ret i32 %r } +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %y, i32 %a) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u8 r1, q1[8] +; CHECK-NEXT: vmov.16 q2[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[9] +; CHECK-NEXT: vmov.16 q2[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[10] +; CHECK-NEXT: vmov.16 q2[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[11] +; CHECK-NEXT: vmov.16 q2[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[12] +; CHECK-NEXT: vmov.16 q2[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[13] +; CHECK-NEXT: vmov.16 q2[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[14] +; CHECK-NEXT: vmov.16 q2[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[15] +; CHECK-NEXT: vmov.16 q2[7], r1 +; CHECK-NEXT: vmov.u8 r1, q0[8] +; CHECK-NEXT: vmov.16 q3[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[9] +; CHECK-NEXT: vmov.16 q3[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[10] +; CHECK-NEXT: vmov.16 q3[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[11] +; CHECK-NEXT: vmov.16 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[12] +; CHECK-NEXT: vmov.16 q3[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[13] +; CHECK-NEXT: vmov.16 q3[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[14] +; CHECK-NEXT: vmov.16 q3[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[15] +; CHECK-NEXT: vmov.16 q3[7], r1 +; CHECK-NEXT: vmullb.u8 q2, q3, q2 +; CHECK-NEXT: vmov.u16 r1, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[0] +; CHECK-NEXT: vmov.16 q4[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[1] +; CHECK-NEXT: vmov.16 q4[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[2] +; CHECK-NEXT: vmov.16 q4[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[3] +; CHECK-NEXT: vmov.16 q4[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[4] +; CHECK-NEXT: vmov.16 q4[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[5] +; CHECK-NEXT: vmov.16 q4[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[6] +; CHECK-NEXT: vmov.16 q4[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[7] +; CHECK-NEXT: vmov.16 q4[7], r1 +; CHECK-NEXT: vmov.u8 r1, q0[0] +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[1] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[2] +; CHECK-NEXT: vmov.16 q1[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[3] +; CHECK-NEXT: vmov.16 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[4] +; CHECK-NEXT: vmov.16 q1[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[5] +; CHECK-NEXT: vmov.16 q1[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[6] +; CHECK-NEXT: vmov.16 q1[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[7] +; CHECK-NEXT: vmov.16 q1[7], r1 +; CHECK-NEXT: vmovlb.u16 q3, q3 +; CHECK-NEXT: vmullb.u8 q0, q1, q4 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov.u16 r1, q2[0] +; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q3 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q2[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q2[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q2[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmovlb.u16 q2, q3 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmovlb.u16 q0, q3 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %xx = zext <16 x i8> %x to <16 x i16> + %yy = zext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = zext <16 x i16> %m to <16 x i32> + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %r = add i32 %z, %a + ret i32 %r +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %y, i32 %a) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.u8 r1, q1[8] +; CHECK-NEXT: vmov.16 q2[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[9] +; CHECK-NEXT: vmov.16 q2[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[10] +; CHECK-NEXT: vmov.16 q2[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[11] +; CHECK-NEXT: vmov.16 q2[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[12] +; CHECK-NEXT: vmov.16 q2[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[13] +; CHECK-NEXT: vmov.16 q2[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[14] +; CHECK-NEXT: vmov.16 q2[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[15] +; CHECK-NEXT: vmov.16 q2[7], r1 +; CHECK-NEXT: vmov.u8 r1, q0[8] +; CHECK-NEXT: vmov.16 q3[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[9] +; CHECK-NEXT: vmov.16 q3[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[10] +; CHECK-NEXT: vmov.16 q3[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[11] +; CHECK-NEXT: vmov.16 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[12] +; CHECK-NEXT: vmov.16 q3[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[13] +; CHECK-NEXT: vmov.16 q3[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[14] +; CHECK-NEXT: vmov.16 q3[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[15] +; CHECK-NEXT: vmov.16 q3[7], r1 +; CHECK-NEXT: vmullb.s8 q2, q3, q2 +; CHECK-NEXT: vmov.u16 r1, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[0] +; CHECK-NEXT: vmov.16 q4[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[1] +; CHECK-NEXT: vmov.16 q4[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[2] +; CHECK-NEXT: vmov.16 q4[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[3] +; CHECK-NEXT: vmov.16 q4[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[4] +; CHECK-NEXT: vmov.16 q4[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[5] +; CHECK-NEXT: vmov.16 q4[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[6] +; CHECK-NEXT: vmov.16 q4[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[7] +; CHECK-NEXT: vmov.16 q4[7], r1 +; CHECK-NEXT: vmov.u8 r1, q0[0] +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[1] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[2] +; CHECK-NEXT: vmov.16 q1[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[3] +; CHECK-NEXT: vmov.16 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[4] +; CHECK-NEXT: vmov.16 q1[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[5] +; CHECK-NEXT: vmov.16 q1[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[6] +; CHECK-NEXT: vmov.16 q1[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[7] +; CHECK-NEXT: vmov.16 q1[7], r1 +; CHECK-NEXT: vmovlb.s16 q3, q3 +; CHECK-NEXT: vmullb.s8 q0, q1, q4 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov.u16 r1, q2[0] +; CHECK-NEXT: vmovlb.s16 q1, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q3 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q2[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q2[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q2[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmovlb.s16 q2, q3 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmovlb.s16 q0, q3 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %yy = sext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = sext <16 x i16> %m to <16 x i32> + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %r = add i32 %z, %a + ret i32 %r +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sextzext(<16 x i8> %x, i32 %a) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sextzext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u8 r1, q0[8] +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[9] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[10] +; CHECK-NEXT: vmov.16 q1[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[11] +; CHECK-NEXT: vmov.16 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[12] +; CHECK-NEXT: vmov.16 q1[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[13] +; CHECK-NEXT: vmov.16 q1[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[14] +; CHECK-NEXT: vmov.16 q1[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[15] +; CHECK-NEXT: vmov.16 q1[7], r1 +; CHECK-NEXT: vmullb.s8 q1, q1, q1 +; CHECK-NEXT: vmov.u16 r1, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[0] +; CHECK-NEXT: vmov.16 q3[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[1] +; CHECK-NEXT: vmov.16 q3[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[2] +; CHECK-NEXT: vmov.16 q3[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[3] +; CHECK-NEXT: vmov.16 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[4] +; CHECK-NEXT: vmov.16 q3[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[5] +; CHECK-NEXT: vmov.16 q3[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[6] +; CHECK-NEXT: vmov.16 q3[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[7] +; CHECK-NEXT: vmov.16 q3[7], r1 +; CHECK-NEXT: vmovlb.u16 q2, q2 +; CHECK-NEXT: vmullb.s8 q0, q3, q3 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u16 r1, q1[0] +; CHECK-NEXT: vmovlb.u16 q3, q3 +; CHECK-NEXT: vadd.i32 q2, q3, q2 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmovlb.u16 q1, q3 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmovlb.u16 q0, q3 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: bx lr +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %m = mul <16 x i16> %xx, %xx + %ma = zext <16 x i16> %m to <16 x i32> + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma) + %r = add i32 %z, %a + ret i32 %r +} + define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %y, i32 %a) { ; CHECK-LABEL: add_v4i8_v4i32_acc_zext: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll index f30856d32b113..bc316c3c2478a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll @@ -236,6 +236,483 @@ entry: ret i64 %z } +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_zext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmov.i8 q3, #0x0 +; CHECK-NEXT: vmov.i8 q4, #0xff +; CHECK-NEXT: vcmp.i16 eq, q2, zr +; CHECK-NEXT: vpsel q3, q4, q3 +; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov.32 q4[0], r1 +; CHECK-NEXT: vmov.32 q4[1], r1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov.32 q4[2], r1 +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov.u16 r1, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmov.32 q5[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q5[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q5[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q5[3], r1 +; CHECK-NEXT: vmullb.u16 q5, q5, q2 +; CHECK-NEXT: vmov.i64 q2, #0xffffffff +; CHECK-NEXT: vmov.f32 s24, s20 +; CHECK-NEXT: vmov.f32 s26, s21 +; CHECK-NEXT: vand q6, q6, q2 +; CHECK-NEXT: vand q4, q6, q4 +; CHECK-NEXT: vmov.f32 s24, s22 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vmov r1, s19 +; CHECK-NEXT: vmov r2, s17 +; CHECK-NEXT: vmov.f32 s26, s23 +; CHECK-NEXT: vand q5, q6, q2 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: ubfx r2, r12, #8, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: vmov.32 q4[1], r2 +; CHECK-NEXT: ubfx r2, r12, #12, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: vand q4, q5, q4 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov r2, s17 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s19 +; CHECK-NEXT: adds.w r12, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov.u16 r2, q3[4] +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: vmov.u16 r2, q3[5] +; CHECK-NEXT: vmov.32 q4[1], r2 +; CHECK-NEXT: vmov.u16 r2, q3[6] +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.u16 r2, q3[7] +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q4, zr +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r3, lr, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q3[0], r3 +; CHECK-NEXT: vmov.32 q3[1], r3 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q3[2], r3 +; CHECK-NEXT: vmov.32 q3[3], r3 +; CHECK-NEXT: vmov.u16 r3, q1[4] +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: vmov.u16 r3, q1[5] +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: vmov.u16 r3, q1[6] +; CHECK-NEXT: vmov.32 q4[2], r3 +; CHECK-NEXT: vmov.u16 r3, q1[7] +; CHECK-NEXT: vmov.32 q4[3], r3 +; CHECK-NEXT: vmov.u16 r3, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r3 +; CHECK-NEXT: vmov.u16 r3, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r3 +; CHECK-NEXT: vmov.u16 r3, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r3 +; CHECK-NEXT: vmov.u16 r3, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r3 +; CHECK-NEXT: vmullb.u16 q0, q1, q4 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vmov.f32 s12, s2 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r3, s5 +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vand q0, q3, q2 +; CHECK-NEXT: adds.w r0, r0, r12 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: ubfx r2, lr, #8, #1 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: ubfx r2, lr, #12, #1 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: pop {r7, pc} +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = zext <8 x i16> %x to <8 x i32> + %yy = zext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = zext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + ret i64 %z +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.i8 q6, #0xff +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vcmp.i16 eq, q2, zr +; CHECK-NEXT: vmullb.s16 q3, q4, q3 +; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vmov.f32 s22, s13 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: vmov.i8 q5, #0x0 +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vpsel q2, q6, q5 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.u16 r0, q2[0] +; CHECK-NEXT: vmov.32 q5[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov.32 q5[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.32 q5[2], r0 +; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vmov.32 q5[3], r0 +; CHECK-NEXT: vcmp.i32 ne, q5, zr +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: and r1, r0, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov.32 q5[0], r1 +; CHECK-NEXT: vmov.32 q5[1], r1 +; CHECK-NEXT: ubfx r1, r0, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov.32 q5[2], r1 +; CHECK-NEXT: vmov.32 q5[3], r1 +; CHECK-NEXT: vand q4, q4, q5 +; CHECK-NEXT: vmov r1, s18 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r12, s19 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: vmov.f32 s16, s14 +; CHECK-NEXT: vmov.f32 s18, s15 +; CHECK-NEXT: adds r1, r1, r2 +; CHECK-NEXT: adc.w r2, r3, r12 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov.32 q3[0], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q3[1], r3 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov.32 q3[2], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q3[3], r3 +; CHECK-NEXT: ubfx r3, r0, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: ubfx r0, r0, #12, #1 +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vand q3, q3, q4 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r0, s13 +; CHECK-NEXT: adds r1, r1, r3 +; CHECK-NEXT: vmov r3, s15 +; CHECK-NEXT: adcs r2, r0 +; CHECK-NEXT: vmov r0, s14 +; CHECK-NEXT: adds.w r12, r1, r0 +; CHECK-NEXT: adc.w r1, r2, r3 +; CHECK-NEXT: vmov.u16 r2, q1[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.s16 q0, q1, q3 +; CHECK-NEXT: vmov.f32 s12, s0 +; CHECK-NEXT: vmov.f32 s14, s1 +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmov.u16 r2, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q3, zr +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r3, r2, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q2[0], r3 +; CHECK-NEXT: vmov.32 q2[1], r3 +; CHECK-NEXT: ubfx r3, r2, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q2[2], r3 +; CHECK-NEXT: vmov.32 q2[3], r3 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r3, s5 +; CHECK-NEXT: adds.w r12, r12, r0 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: adds.w r0, r0, r12 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov.32 q0[0], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q0[1], r3 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov.32 q0[2], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q0[3], r3 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: vmov.32 q1[0], r3 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmov.32 q1[1], r3 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = sext <8 x i16> %x to <8 x i32> + %yy = sext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = sext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + ret i64 %z +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sextzext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_sextzext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov.i8 q1, #0x0 +; CHECK-NEXT: vmov.i8 q3, #0xff +; CHECK-NEXT: vcmp.i16 eq, q2, zr +; CHECK-NEXT: vpsel q2, q3, q1 +; CHECK-NEXT: vmov.u16 r0, q2[0] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: and r1, r0, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov.32 q4[0], r1 +; CHECK-NEXT: vmov.32 q4[1], r1 +; CHECK-NEXT: ubfx r1, r0, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov.32 q4[2], r1 +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmullb.s16 q3, q1, q1 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vmov.f32 s22, s13 +; CHECK-NEXT: vand q5, q5, q1 +; CHECK-NEXT: vand q4, q5, q4 +; CHECK-NEXT: vmov.f32 s20, s14 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov r1, s16 +; CHECK-NEXT: vmov r12, s19 +; CHECK-NEXT: vmov r2, s17 +; CHECK-NEXT: vmov.f32 s22, s15 +; CHECK-NEXT: vand q3, q5, q1 +; CHECK-NEXT: adds r1, r1, r3 +; CHECK-NEXT: ubfx r3, r0, #8, #1 +; CHECK-NEXT: rsb.w r3, r3, #0 +; CHECK-NEXT: ubfx r0, r0, #12, #1 +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: rsb.w r0, r0, #0 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: adc.w r2, r2, r12 +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vand q3, q3, q4 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r0, s13 +; CHECK-NEXT: adds r1, r1, r3 +; CHECK-NEXT: vmov r3, s15 +; CHECK-NEXT: adcs r2, r0 +; CHECK-NEXT: vmov r0, s14 +; CHECK-NEXT: adds.w r12, r1, r0 +; CHECK-NEXT: adc.w r1, r2, r3 +; CHECK-NEXT: vmov.u16 r2, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q3, zr +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r3, r2, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q2[0], r3 +; CHECK-NEXT: vmov.32 q2[1], r3 +; CHECK-NEXT: ubfx r3, r2, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q2[2], r3 +; CHECK-NEXT: vmov.32 q2[3], r3 +; CHECK-NEXT: vmov.u16 r3, q0[4] +; CHECK-NEXT: vmov.32 q3[0], r3 +; CHECK-NEXT: vmov.u16 r3, q0[5] +; CHECK-NEXT: vmov.32 q3[1], r3 +; CHECK-NEXT: vmov.u16 r3, q0[6] +; CHECK-NEXT: vmov.32 q3[2], r3 +; CHECK-NEXT: vmov.u16 r3, q0[7] +; CHECK-NEXT: vmov.32 q3[3], r3 +; CHECK-NEXT: vmullb.s16 q0, q3, q3 +; CHECK-NEXT: vmov.f32 s12, s0 +; CHECK-NEXT: vmov.f32 s14, s1 +; CHECK-NEXT: vand q3, q3, q1 +; CHECK-NEXT: vand q2, q3, q2 +; CHECK-NEXT: vmov.f32 s12, s2 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov r3, s9 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vand q0, q3, q1 +; CHECK-NEXT: adds.w r12, r12, r0 +; CHECK-NEXT: vmov r0, s10 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov r3, s11 +; CHECK-NEXT: adds.w r0, r0, r12 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: vmov.32 q2[0], r3 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmov.32 q2[1], r3 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vand q0, q0, q2 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = sext <8 x i16> %x to <8 x i32> + %m = mul <8 x i32> %xx, %xx + %ma = zext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + ret i64 %z +} + define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y, <2 x i16> %b) { ; CHECK-LABEL: add_v2i16_v2i64_zext: ; CHECK: @ %bb.0: @ %entry @@ -347,26 +824,641 @@ define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y, <1 ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer - %xx = zext <16 x i8> %x to <16 x i32> - %yy = zext <16 x i8> %y to <16 x i32> - %m = mul <16 x i32> %xx, %yy - %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer + %xx = zext <16 x i8> %x to <16 x i32> + %yy = zext <16 x i8> %y to <16 x i32> + %m = mul <16 x i32> %xx, %yy + %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { +; CHECK-LABEL: add_v16i8_v16i32_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavt.s8 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = sext <16 x i8> %x to <16 x i32> + %yy = sext <16 x i8> %y to <16 x i32> + %m = mul <16 x i32> %xx, %yy + %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_zext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, #64 +; CHECK-NEXT: vcmp.i8 eq, q2, zr +; CHECK-NEXT: vmov.i8 q2, #0x0 +; CHECK-NEXT: vmov.i8 q7, #0xff +; CHECK-NEXT: vmov q6, q1 +; CHECK-NEXT: vpsel q1, q7, q2 +; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vcmp.i16 ne, q3, zr +; CHECK-NEXT: vpsel q3, q7, q2 +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vstrw.32 q3, [sp, #48] @ 16-byte Spill +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[5] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[6] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[7] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q6[0] +; CHECK-NEXT: vmov.16 q4[0], r0 +; CHECK-NEXT: vmov.u8 r0, q6[1] +; CHECK-NEXT: vmov.16 q4[1], r0 +; CHECK-NEXT: vmov.u8 r0, q6[2] +; CHECK-NEXT: vmov.16 q4[2], r0 +; CHECK-NEXT: vmov.u8 r0, q6[3] +; CHECK-NEXT: vmov.16 q4[3], r0 +; CHECK-NEXT: vmov.u8 r0, q6[4] +; CHECK-NEXT: vmov.16 q4[4], r0 +; CHECK-NEXT: vmov.u8 r0, q6[5] +; CHECK-NEXT: vmov.16 q4[5], r0 +; CHECK-NEXT: vmov.u8 r0, q6[6] +; CHECK-NEXT: vmov.16 q4[6], r0 +; CHECK-NEXT: vmov.u8 r0, q6[7] +; CHECK-NEXT: vmov.16 q4[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.16 q5[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vmov.16 q5[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[2] +; CHECK-NEXT: vmov.16 q5[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[3] +; CHECK-NEXT: vmov.16 q5[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.16 q5[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.16 q5[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.16 q5[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.16 q5[7], r0 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vmullb.u8 q5, q5, q4 +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmov.u16 r0, q5[4] +; CHECK-NEXT: vmov.i32 q4, #0x0 +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q5[5] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q5[6] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q5[7] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmov.i32 q2, #0xffff +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q4, q0, q2 +; CHECK-NEXT: vmov.16 q0[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q0[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q0[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q0[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vpsel q0, q7, q0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q6[8] +; CHECK-NEXT: vmov.16 q7[0], r0 +; CHECK-NEXT: vmov.u8 r0, q6[9] +; CHECK-NEXT: vmov.16 q7[1], r0 +; CHECK-NEXT: vmov.u8 r0, q6[10] +; CHECK-NEXT: vmov.16 q7[2], r0 +; CHECK-NEXT: vmov.u8 r0, q6[11] +; CHECK-NEXT: vmov.16 q7[3], r0 +; CHECK-NEXT: vmov.u8 r0, q6[12] +; CHECK-NEXT: vmov.16 q7[4], r0 +; CHECK-NEXT: vmov.u8 r0, q6[13] +; CHECK-NEXT: vmov.16 q7[5], r0 +; CHECK-NEXT: vmov.u8 r0, q6[14] +; CHECK-NEXT: vmov.16 q7[6], r0 +; CHECK-NEXT: vmov.u8 r0, q6[15] +; CHECK-NEXT: vmov.16 q7[7], r0 +; CHECK-NEXT: vmov.u8 r0, q3[8] +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q3[9] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q3[10] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q3[11] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q3[12] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q3[13] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q3[14] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q3[15] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmullb.u8 q1, q1, q7 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmovlb.u16 q2, q2 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i32 q4, q4, q2 +; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q5[0] +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q5[1] +; CHECK-NEXT: vldrw.u32 q3, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q5[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q5[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q3, q2, q5 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmov q1, q3 +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: vpt.i32 ne, q2, zr +; CHECK-NEXT: vaddt.i32 q1, q3, q0 +; CHECK-NEXT: vadd.i32 q0, q1, q4 +; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: add sp, #64 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = zext <16 x i8> %x to <16 x i16> + %yy = zext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = zext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vmov q6, q0 +; CHECK-NEXT: vmov.i8 q0, #0x0 +; CHECK-NEXT: vcmp.i8 eq, q3, zr +; CHECK-NEXT: vmov.i8 q5, #0xff +; CHECK-NEXT: vmov q2, q1 +; CHECK-NEXT: vpsel q1, q5, q0 +; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vcmp.i16 ne, q3, zr +; CHECK-NEXT: vpsel q3, q5, q0 +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[5] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[6] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[7] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q2[0] +; CHECK-NEXT: vmov.16 q4[0], r0 +; CHECK-NEXT: vmov.u8 r0, q2[1] +; CHECK-NEXT: vmov.16 q4[1], r0 +; CHECK-NEXT: vmov.u8 r0, q2[2] +; CHECK-NEXT: vmov.16 q4[2], r0 +; CHECK-NEXT: vmov.u8 r0, q2[3] +; CHECK-NEXT: vmov.16 q4[3], r0 +; CHECK-NEXT: vmov.u8 r0, q2[4] +; CHECK-NEXT: vmov.16 q4[4], r0 +; CHECK-NEXT: vmov.u8 r0, q2[5] +; CHECK-NEXT: vmov.16 q4[5], r0 +; CHECK-NEXT: vmov.u8 r0, q2[6] +; CHECK-NEXT: vmov.16 q4[6], r0 +; CHECK-NEXT: vmov.u8 r0, q2[7] +; CHECK-NEXT: vmov.16 q4[7], r0 +; CHECK-NEXT: vmov.u8 r0, q6[0] +; CHECK-NEXT: vmov.16 q7[0], r0 +; CHECK-NEXT: vmov.u8 r0, q6[1] +; CHECK-NEXT: vmov.16 q7[1], r0 +; CHECK-NEXT: vmov.u8 r0, q6[2] +; CHECK-NEXT: vmov.16 q7[2], r0 +; CHECK-NEXT: vmov.u8 r0, q6[3] +; CHECK-NEXT: vmov.16 q7[3], r0 +; CHECK-NEXT: vmov.u8 r0, q6[4] +; CHECK-NEXT: vmov.16 q7[4], r0 +; CHECK-NEXT: vmov.u8 r0, q6[5] +; CHECK-NEXT: vmov.16 q7[5], r0 +; CHECK-NEXT: vmov.u8 r0, q6[6] +; CHECK-NEXT: vmov.16 q7[6], r0 +; CHECK-NEXT: vmov.u8 r0, q6[7] +; CHECK-NEXT: vmov.16 q7[7], r0 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vmullb.s8 q4, q7, q4 +; CHECK-NEXT: vmov.u16 r0, q4[4] +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q4[5] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q4[6] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q4[7] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vmovlb.s16 q7, q0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vpsel q7, q7, q0 +; CHECK-NEXT: vmov.16 q0[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q0[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q0[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q0[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vpsel q0, q5, q0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q2[8] +; CHECK-NEXT: vmov.16 q5[0], r0 +; CHECK-NEXT: vmov.u8 r0, q2[9] +; CHECK-NEXT: vmov.16 q5[1], r0 +; CHECK-NEXT: vmov.u8 r0, q2[10] +; CHECK-NEXT: vmov.16 q5[2], r0 +; CHECK-NEXT: vmov.u8 r0, q2[11] +; CHECK-NEXT: vmov.16 q5[3], r0 +; CHECK-NEXT: vmov.u8 r0, q2[12] +; CHECK-NEXT: vmov.16 q5[4], r0 +; CHECK-NEXT: vmov.u8 r0, q2[13] +; CHECK-NEXT: vmov.16 q5[5], r0 +; CHECK-NEXT: vmov.u8 r0, q2[14] +; CHECK-NEXT: vmov.16 q5[6], r0 +; CHECK-NEXT: vmov.u8 r0, q2[15] +; CHECK-NEXT: vmov.16 q5[7], r0 +; CHECK-NEXT: vmov.u8 r0, q6[8] +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q6[9] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q6[10] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q6[11] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q6[12] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q6[13] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q6[14] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q6[15] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmullb.s8 q1, q1, q5 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vmovlb.s16 q2, q2 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i32 q7, q7, q2 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q4[1] +; CHECK-NEXT: vldrw.u32 q3, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q4[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q4[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmovlb.s16 q2, q2 +; CHECK-NEXT: vpsel q2, q2, q3 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: vpt.i32 ne, q3, zr +; CHECK-NEXT: vaddt.i32 q2, q2, q0 +; CHECK-NEXT: vadd.i32 q0, q2, q7 +; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = sext <16 x i8> %x to <16 x i16> + %yy = sext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = sext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } -define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { -; CHECK-LABEL: add_v16i8_v16i32_sext: +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sextzext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpt.i8 eq, q2, zr -; CHECK-NEXT: vmlavt.s8 r0, q0, q1 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcmp.i8 eq, q2, zr +; CHECK-NEXT: vmov.i8 q2, #0xff +; CHECK-NEXT: vmov.i8 q0, #0x0 +; CHECK-NEXT: vpsel q1, q2, q0 +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vcmp.i16 ne, q2, zr +; CHECK-NEXT: vmov.i32 q6, #0x0 +; CHECK-NEXT: vpsel q5, q3, q0 +; CHECK-NEXT: vmov q7, q6 +; CHECK-NEXT: vmov.u16 r0, q5[4] +; CHECK-NEXT: vmov.i32 q2, #0xffff +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q5[5] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q5[6] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q5[7] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q4[0] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q4[1] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q4[2] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q4[3] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q4[4] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q4[5] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q4[6] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q4[7] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vmullb.s8 q3, q3, q3 +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[5] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[6] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[7] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q7, q0, q2 +; CHECK-NEXT: vmov.16 q0[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q0[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q0[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q0[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q0[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q4[8] +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q4[9] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q4[10] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q4[11] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q4[12] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q4[13] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q4[14] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q4[15] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmullb.s8 q1, q1, q1 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.u16 r0, q5[0] +; CHECK-NEXT: vmovlb.u16 q4, q4 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i32 q7, q7, q4 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.u16 r0, q5[1] +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.u16 r0, q5[2] +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.u16 r0, q5[3] +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vcmp.i32 ne, q4, zr +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q6, q4, q2 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: vpt.i32 ne, q2, zr +; CHECK-NEXT: vaddt.i32 q6, q6, q0 +; CHECK-NEXT: vadd.i32 q0, q6, q7 +; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer - %xx = sext <16 x i8> %x to <16 x i32> - %yy = sext <16 x i8> %y to <16 x i32> - %m = mul <16 x i32> %xx, %yy - %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer + %xx = sext <16 x i8> %x to <16 x i16> + %m = mul <16 x i16> %xx, %xx + %ma = zext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) ret i32 %z } @@ -1642,27 +2734,517 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %y, ; CHECK-NEXT: bx lr entry: %c = icmp eq <8 x i16> %b, zeroinitializer - %xx = zext <8 x i16> %x to <8 x i64> - %yy = zext <8 x i16> %y to <8 x i64> - %m = mul <8 x i64> %xx, %yy - %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer + %xx = zext <8 x i16> %x to <8 x i64> + %yy = zext <8 x i16> %y to <8 x i64> + %m = mul <8 x i64> %xx, %yy + %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %r = add i64 %z, %a + ret i64 %r +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i64_acc_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvat.s16 r0, r1, q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = sext <8 x i16> %x to <8 x i64> + %yy = sext <8 x i16> %y to <8 x i64> + %m = mul <8 x i64> %xx, %yy + %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %r = add i64 %z, %a + ret i64 %r +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmov.i8 q3, #0x0 +; CHECK-NEXT: vmov.i8 q4, #0xff +; CHECK-NEXT: vcmp.i16 eq, q2, zr +; CHECK-NEXT: vpsel q3, q4, q3 +; CHECK-NEXT: vmov.u16 r2, q3[0] +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov.u16 r2, q3[1] +; CHECK-NEXT: vmov.32 q2[1], r2 +; CHECK-NEXT: vmov.u16 r2, q3[2] +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q3[3] +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r3, r12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: ubfx r3, r12, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q4[2], r3 +; CHECK-NEXT: vmov.32 q4[3], r3 +; CHECK-NEXT: vmov.u16 r3, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r3 +; CHECK-NEXT: vmov.u16 r3, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r3 +; CHECK-NEXT: vmov.u16 r3, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r3 +; CHECK-NEXT: vmov.u16 r3, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r3 +; CHECK-NEXT: vmov.u16 r3, q0[0] +; CHECK-NEXT: vmov.32 q5[0], r3 +; CHECK-NEXT: vmov.u16 r3, q0[1] +; CHECK-NEXT: vmov.32 q5[1], r3 +; CHECK-NEXT: vmov.u16 r3, q0[2] +; CHECK-NEXT: vmov.32 q5[2], r3 +; CHECK-NEXT: vmov.u16 r3, q0[3] +; CHECK-NEXT: vmov.32 q5[3], r3 +; CHECK-NEXT: vmullb.u16 q5, q5, q2 +; CHECK-NEXT: vmov.i64 q2, #0xffffffff +; CHECK-NEXT: vmov.f32 s24, s20 +; CHECK-NEXT: vmov.f32 s26, s21 +; CHECK-NEXT: vand q6, q6, q2 +; CHECK-NEXT: vand q4, q6, q4 +; CHECK-NEXT: vmov.f32 s24, s22 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov r4, s16 +; CHECK-NEXT: vmov lr, s19 +; CHECK-NEXT: vmov r2, s17 +; CHECK-NEXT: vmov.f32 s26, s23 +; CHECK-NEXT: vand q5, q6, q2 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: ubfx r4, r12, #8, #1 +; CHECK-NEXT: rsb.w r4, r4, #0 +; CHECK-NEXT: vmov.32 q4[0], r4 +; CHECK-NEXT: adc.w lr, lr, r2 +; CHECK-NEXT: vmov.32 q4[1], r4 +; CHECK-NEXT: ubfx r4, r12, #12, #1 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: vmov.32 q4[2], r4 +; CHECK-NEXT: vmov.32 q4[3], r4 +; CHECK-NEXT: vand q4, q5, q4 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r4, s17 +; CHECK-NEXT: adds.w r12, r3, r2 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: adc.w r3, lr, r4 +; CHECK-NEXT: vmov r4, s19 +; CHECK-NEXT: adds.w r12, r12, r2 +; CHECK-NEXT: vmov.u16 r2, q3[4] +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: vmov.u16 r2, q3[5] +; CHECK-NEXT: vmov.32 q4[1], r2 +; CHECK-NEXT: vmov.u16 r2, q3[6] +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.u16 r2, q3[7] +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: adc.w lr, r3, r4 +; CHECK-NEXT: vcmp.i32 ne, q4, zr +; CHECK-NEXT: vmrs r6, p0 +; CHECK-NEXT: and r4, r6, #1 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: vmov.32 q3[0], r4 +; CHECK-NEXT: vmov.32 q3[1], r4 +; CHECK-NEXT: ubfx r4, r6, #4, #1 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: vmov.32 q3[2], r4 +; CHECK-NEXT: vmov.32 q3[3], r4 +; CHECK-NEXT: vmov.u16 r4, q1[4] +; CHECK-NEXT: vmov.32 q4[0], r4 +; CHECK-NEXT: vmov.u16 r4, q1[5] +; CHECK-NEXT: vmov.32 q4[1], r4 +; CHECK-NEXT: vmov.u16 r4, q1[6] +; CHECK-NEXT: vmov.32 q4[2], r4 +; CHECK-NEXT: vmov.u16 r4, q1[7] +; CHECK-NEXT: vmov.32 q4[3], r4 +; CHECK-NEXT: vmov.u16 r4, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r4 +; CHECK-NEXT: vmov.u16 r4, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r4 +; CHECK-NEXT: vmov.u16 r4, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r4 +; CHECK-NEXT: vmov.u16 r4, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r4 +; CHECK-NEXT: vmullb.u16 q0, q1, q4 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vmov.f32 s12, s2 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov r4, s5 +; CHECK-NEXT: vmov r5, s6 +; CHECK-NEXT: vmov r2, s7 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vand q0, q3, q2 +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adc.w r4, r4, lr +; CHECK-NEXT: adds r3, r3, r5 +; CHECK-NEXT: ubfx r5, r6, #8, #1 +; CHECK-NEXT: rsb.w r5, r5, #0 +; CHECK-NEXT: ubfx r6, r6, #12, #1 +; CHECK-NEXT: vmov.32 q1[0], r5 +; CHECK-NEXT: rsb.w r6, r6, #0 +; CHECK-NEXT: vmov.32 q1[1], r5 +; CHECK-NEXT: adcs r2, r4 +; CHECK-NEXT: vmov.32 q1[2], r6 +; CHECK-NEXT: vmov.32 q1[3], r6 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vmov r5, s0 +; CHECK-NEXT: vmov r6, s1 +; CHECK-NEXT: adds r3, r3, r5 +; CHECK-NEXT: vmov r5, s2 +; CHECK-NEXT: adcs r2, r6 +; CHECK-NEXT: vmov r6, s3 +; CHECK-NEXT: adds r3, r3, r5 +; CHECK-NEXT: adcs r2, r6 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: pop {r4, r5, r6, pc} +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = zext <8 x i16> %x to <8 x i32> + %yy = zext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = zext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %r = add i64 %z, %a + ret i64 %r +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmov.u16 r2, q1[0] +; CHECK-NEXT: vmov.i8 q6, #0xff +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[1] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[2] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[3] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[1] +; CHECK-NEXT: vmov.32 q4[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[2] +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: vcmp.i16 eq, q2, zr +; CHECK-NEXT: vmullb.s16 q3, q4, q3 +; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vmov.f32 s22, s13 +; CHECK-NEXT: vmov r2, s20 +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q4[1], r2 +; CHECK-NEXT: vmov r2, s22 +; CHECK-NEXT: vmov.i8 q5, #0x0 +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vpsel q2, q6, q5 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: vmov.u16 r2, q2[0] +; CHECK-NEXT: vmov.32 q5[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[1] +; CHECK-NEXT: vmov.32 q5[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[2] +; CHECK-NEXT: vmov.32 q5[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[3] +; CHECK-NEXT: vmov.32 q5[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q5, zr +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r3, r2, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q5[0], r3 +; CHECK-NEXT: vmov.32 q5[1], r3 +; CHECK-NEXT: ubfx r3, r2, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q5[2], r3 +; CHECK-NEXT: vmov.32 q5[3], r3 +; CHECK-NEXT: vand q4, q4, q5 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov r4, s16 +; CHECK-NEXT: vmov r12, s19 +; CHECK-NEXT: vmov r5, s17 +; CHECK-NEXT: vmov.f32 s16, s14 +; CHECK-NEXT: vmov.f32 s18, s15 +; CHECK-NEXT: adds.w lr, r4, r3 +; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: vmov.32 q3[0], r3 +; CHECK-NEXT: adc.w r12, r12, r5 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q3[1], r3 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov.32 q3[2], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q3[3], r3 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: vand q3, q3, q4 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r2, s13 +; CHECK-NEXT: vmov r4, s14 +; CHECK-NEXT: vmov r5, s15 +; CHECK-NEXT: adds.w r3, r3, lr +; CHECK-NEXT: adc.w r2, r2, r12 +; CHECK-NEXT: adds.w r12, r3, r4 +; CHECK-NEXT: adc.w r3, r2, r5 +; CHECK-NEXT: vmov.u16 r2, q1[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q1[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q1[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q1[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmullb.s16 q0, q1, q3 +; CHECK-NEXT: vmov.f32 s12, s0 +; CHECK-NEXT: vmov.f32 s14, s1 +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vmov.u16 r2, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q3, zr +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r5, r2, #1 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: vmov.32 q2[0], r5 +; CHECK-NEXT: vmov.32 q2[1], r5 +; CHECK-NEXT: ubfx r5, r2, #4, #1 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: vmov.32 q2[2], r5 +; CHECK-NEXT: vmov.32 q2[3], r5 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vmov r4, s4 +; CHECK-NEXT: vmov r5, s5 +; CHECK-NEXT: adds.w r12, r12, r4 +; CHECK-NEXT: vmov r4, s6 +; CHECK-NEXT: adcs r5, r3 +; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: adds.w r4, r4, r12 +; CHECK-NEXT: adc.w r12, r5, r3 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov.32 q0[0], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q0[1], r3 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov.32 q0[2], r3 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: vmov.32 q0[3], r3 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: vmov.32 q1[0], r3 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmov.32 q1[1], r3 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov r5, s3 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: vmov r4, s2 +; CHECK-NEXT: adc.w r2, r2, r12 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: adcs r2, r5 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = sext <8 x i16> %x to <8 x i32> + %yy = sext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = sext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r } -define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { -; CHECK-LABEL: add_v8i16_v8i64_acc_sext: +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sextzext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpt.i16 eq, q2, zr -; CHECK-NEXT: vmlalvat.s16 r0, r1, q0, q1 -; CHECK-NEXT: bx lr +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov.i8 q1, #0x0 +; CHECK-NEXT: vmov.i8 q3, #0xff +; CHECK-NEXT: vcmp.i16 eq, q2, zr +; CHECK-NEXT: vpsel q2, q3, q1 +; CHECK-NEXT: vmov.u16 r2, q2[0] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[1] +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[2] +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[3] +; CHECK-NEXT: vmov.32 q1[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r3, r2, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: ubfx r3, r2, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: vmov.32 q4[2], r3 +; CHECK-NEXT: vmov.32 q4[3], r3 +; CHECK-NEXT: vmov.u16 r3, q0[0] +; CHECK-NEXT: vmov.32 q1[0], r3 +; CHECK-NEXT: vmov.u16 r3, q0[1] +; CHECK-NEXT: vmov.32 q1[1], r3 +; CHECK-NEXT: vmov.u16 r3, q0[2] +; CHECK-NEXT: vmov.32 q1[2], r3 +; CHECK-NEXT: vmov.u16 r3, q0[3] +; CHECK-NEXT: vmov.32 q1[3], r3 +; CHECK-NEXT: vmullb.s16 q3, q1, q1 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vmov.f32 s22, s13 +; CHECK-NEXT: vand q5, q5, q1 +; CHECK-NEXT: vand q4, q5, q4 +; CHECK-NEXT: vmov.f32 s20, s14 +; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov r4, s16 +; CHECK-NEXT: vmov r12, s19 +; CHECK-NEXT: vmov lr, s17 +; CHECK-NEXT: vmov.f32 s22, s15 +; CHECK-NEXT: vand q3, q5, q1 +; CHECK-NEXT: adds r5, r4, r3 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: rsb.w r3, r3, #0 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: vmov.32 q4[0], r3 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: adc.w r4, lr, r12 +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.32 q4[3], r2 +; CHECK-NEXT: vand q3, q3, q4 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r2, s13 +; CHECK-NEXT: adds r3, r3, r5 +; CHECK-NEXT: vmov r5, s15 +; CHECK-NEXT: adcs r2, r4 +; CHECK-NEXT: vmov r4, s14 +; CHECK-NEXT: adds.w r12, r3, r4 +; CHECK-NEXT: adc.w r3, r2, r5 +; CHECK-NEXT: vmov.u16 r2, q2[4] +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[5] +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[6] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[7] +; CHECK-NEXT: vmov.32 q3[3], r2 +; CHECK-NEXT: vcmp.i32 ne, q3, zr +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r5, r2, #1 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: vmov.32 q2[0], r5 +; CHECK-NEXT: vmov.32 q2[1], r5 +; CHECK-NEXT: ubfx r5, r2, #4, #1 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: vmov.32 q2[2], r5 +; CHECK-NEXT: vmov.32 q2[3], r5 +; CHECK-NEXT: vmov.u16 r5, q0[4] +; CHECK-NEXT: vmov.32 q3[0], r5 +; CHECK-NEXT: vmov.u16 r5, q0[5] +; CHECK-NEXT: vmov.32 q3[1], r5 +; CHECK-NEXT: vmov.u16 r5, q0[6] +; CHECK-NEXT: vmov.32 q3[2], r5 +; CHECK-NEXT: vmov.u16 r5, q0[7] +; CHECK-NEXT: vmov.32 q3[3], r5 +; CHECK-NEXT: vmullb.s16 q0, q3, q3 +; CHECK-NEXT: vmov.f32 s12, s0 +; CHECK-NEXT: vmov.f32 s14, s1 +; CHECK-NEXT: vand q3, q3, q1 +; CHECK-NEXT: vand q2, q3, q2 +; CHECK-NEXT: vmov.f32 s12, s2 +; CHECK-NEXT: vmov r4, s8 +; CHECK-NEXT: vmov r5, s9 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vand q0, q3, q1 +; CHECK-NEXT: adds.w r4, r4, r12 +; CHECK-NEXT: adc.w r12, r3, r5 +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: vmov r5, s11 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: ubfx r4, r2, #8, #1 +; CHECK-NEXT: rsb.w r4, r4, #0 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: vmov.32 q2[0], r4 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vmov.32 q2[1], r4 +; CHECK-NEXT: adc.w r5, r5, r12 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.32 q2[3], r2 +; CHECK-NEXT: vand q0, q0, q2 +; CHECK-NEXT: vmov r4, s0 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: vmov r4, s2 +; CHECK-NEXT: adcs r2, r5 +; CHECK-NEXT: vmov r5, s3 +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: adcs r2, r5 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %c = icmp eq <8 x i16> %b, zeroinitializer - %xx = sext <8 x i16> %x to <8 x i64> - %yy = sext <8 x i16> %y to <8 x i64> - %m = mul <8 x i64> %xx, %yy - %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer + %xx = sext <8 x i16> %x to <8 x i32> + %m = mul <8 x i32> %xx, %xx + %ma = zext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) %r = add i64 %z, %a ret i64 %r @@ -1815,6 +3397,624 @@ entry: ret i32 %r } +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b, i32 %a) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, #64 +; CHECK-NEXT: vcmp.i8 eq, q2, zr +; CHECK-NEXT: vmov.i8 q2, #0x0 +; CHECK-NEXT: vmov.i8 q7, #0xff +; CHECK-NEXT: vmov q6, q1 +; CHECK-NEXT: vpsel q1, q7, q2 +; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.u8 r1, q1[0] +; CHECK-NEXT: vmov.16 q3[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[1] +; CHECK-NEXT: vmov.16 q3[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[2] +; CHECK-NEXT: vmov.16 q3[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[3] +; CHECK-NEXT: vmov.16 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[4] +; CHECK-NEXT: vmov.16 q3[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[5] +; CHECK-NEXT: vmov.16 q3[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[6] +; CHECK-NEXT: vmov.16 q3[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[7] +; CHECK-NEXT: vmov.16 q3[7], r1 +; CHECK-NEXT: vcmp.i16 ne, q3, zr +; CHECK-NEXT: vpsel q3, q7, q2 +; CHECK-NEXT: vmov.u16 r1, q3[4] +; CHECK-NEXT: vstrw.32 q3, [sp, #48] @ 16-byte Spill +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q3[5] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q3[6] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q3[7] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u8 r1, q6[0] +; CHECK-NEXT: vmov.16 q4[0], r1 +; CHECK-NEXT: vmov.u8 r1, q6[1] +; CHECK-NEXT: vmov.16 q4[1], r1 +; CHECK-NEXT: vmov.u8 r1, q6[2] +; CHECK-NEXT: vmov.16 q4[2], r1 +; CHECK-NEXT: vmov.u8 r1, q6[3] +; CHECK-NEXT: vmov.16 q4[3], r1 +; CHECK-NEXT: vmov.u8 r1, q6[4] +; CHECK-NEXT: vmov.16 q4[4], r1 +; CHECK-NEXT: vmov.u8 r1, q6[5] +; CHECK-NEXT: vmov.16 q4[5], r1 +; CHECK-NEXT: vmov.u8 r1, q6[6] +; CHECK-NEXT: vmov.16 q4[6], r1 +; CHECK-NEXT: vmov.u8 r1, q6[7] +; CHECK-NEXT: vmov.16 q4[7], r1 +; CHECK-NEXT: vmov.u8 r1, q0[0] +; CHECK-NEXT: vmov.16 q5[0], r1 +; CHECK-NEXT: vmov.u8 r1, q0[1] +; CHECK-NEXT: vmov.16 q5[1], r1 +; CHECK-NEXT: vmov.u8 r1, q0[2] +; CHECK-NEXT: vmov.16 q5[2], r1 +; CHECK-NEXT: vmov.u8 r1, q0[3] +; CHECK-NEXT: vmov.16 q5[3], r1 +; CHECK-NEXT: vmov.u8 r1, q0[4] +; CHECK-NEXT: vmov.16 q5[4], r1 +; CHECK-NEXT: vmov.u8 r1, q0[5] +; CHECK-NEXT: vmov.16 q5[5], r1 +; CHECK-NEXT: vmov.u8 r1, q0[6] +; CHECK-NEXT: vmov.16 q5[6], r1 +; CHECK-NEXT: vmov.u8 r1, q0[7] +; CHECK-NEXT: vmov.16 q5[7], r1 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vmullb.u8 q5, q5, q4 +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmov.u16 r1, q5[4] +; CHECK-NEXT: vmov.i32 q4, #0x0 +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q5[5] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q5[6] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q5[7] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmov.i32 q2, #0xffff +; CHECK-NEXT: vmov.u8 r1, q1[8] +; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q4, q0, q2 +; CHECK-NEXT: vmov.16 q0[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[9] +; CHECK-NEXT: vmov.16 q0[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[10] +; CHECK-NEXT: vmov.16 q0[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[11] +; CHECK-NEXT: vmov.16 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[12] +; CHECK-NEXT: vmov.16 q0[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[13] +; CHECK-NEXT: vmov.16 q0[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[14] +; CHECK-NEXT: vmov.16 q0[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[15] +; CHECK-NEXT: vmov.16 q0[7], r1 +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vpsel q0, q7, q0 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q6[8] +; CHECK-NEXT: vmov.16 q7[0], r1 +; CHECK-NEXT: vmov.u8 r1, q6[9] +; CHECK-NEXT: vmov.16 q7[1], r1 +; CHECK-NEXT: vmov.u8 r1, q6[10] +; CHECK-NEXT: vmov.16 q7[2], r1 +; CHECK-NEXT: vmov.u8 r1, q6[11] +; CHECK-NEXT: vmov.16 q7[3], r1 +; CHECK-NEXT: vmov.u8 r1, q6[12] +; CHECK-NEXT: vmov.16 q7[4], r1 +; CHECK-NEXT: vmov.u8 r1, q6[13] +; CHECK-NEXT: vmov.16 q7[5], r1 +; CHECK-NEXT: vmov.u8 r1, q6[14] +; CHECK-NEXT: vmov.16 q7[6], r1 +; CHECK-NEXT: vmov.u8 r1, q6[15] +; CHECK-NEXT: vmov.16 q7[7], r1 +; CHECK-NEXT: vmov.u8 r1, q3[8] +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.u8 r1, q3[9] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov.u8 r1, q3[10] +; CHECK-NEXT: vmov.16 q1[2], r1 +; CHECK-NEXT: vmov.u8 r1, q3[11] +; CHECK-NEXT: vmov.16 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q3[12] +; CHECK-NEXT: vmov.16 q1[4], r1 +; CHECK-NEXT: vmov.u8 r1, q3[13] +; CHECK-NEXT: vmov.16 q1[5], r1 +; CHECK-NEXT: vmov.u8 r1, q3[14] +; CHECK-NEXT: vmov.16 q1[6], r1 +; CHECK-NEXT: vmov.u8 r1, q3[15] +; CHECK-NEXT: vmov.16 q1[7], r1 +; CHECK-NEXT: vmullb.u8 q1, q1, q7 +; CHECK-NEXT: vmov.u16 r1, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmovlb.u16 q2, q2 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i32 q4, q4, q2 +; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vmov.u16 r1, q3[0] +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q3[1] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q3[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q3[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q5[0] +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q5[1] +; CHECK-NEXT: vldrw.u32 q3, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q5[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q5[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q3, q2, q5 +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q1[0] +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[1] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[2] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[3] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmov q1, q3 +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: vpt.i32 ne, q2, zr +; CHECK-NEXT: vaddt.i32 q1, q3, q0 +; CHECK-NEXT: vadd.i32 q0, q1, q4 +; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: add sp, #64 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = zext <16 x i8> %x to <16 x i16> + %yy = zext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = zext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %r = add i32 %z, %a + ret i32 %r +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b, i32 %a) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vmov q6, q0 +; CHECK-NEXT: vmov.i8 q0, #0x0 +; CHECK-NEXT: vcmp.i8 eq, q3, zr +; CHECK-NEXT: vmov.i8 q5, #0xff +; CHECK-NEXT: vmov q2, q1 +; CHECK-NEXT: vpsel q1, q5, q0 +; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.u8 r1, q1[0] +; CHECK-NEXT: vmov.16 q3[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[1] +; CHECK-NEXT: vmov.16 q3[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[2] +; CHECK-NEXT: vmov.16 q3[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[3] +; CHECK-NEXT: vmov.16 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[4] +; CHECK-NEXT: vmov.16 q3[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[5] +; CHECK-NEXT: vmov.16 q3[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[6] +; CHECK-NEXT: vmov.16 q3[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[7] +; CHECK-NEXT: vmov.16 q3[7], r1 +; CHECK-NEXT: vcmp.i16 ne, q3, zr +; CHECK-NEXT: vpsel q3, q5, q0 +; CHECK-NEXT: vmov.u16 r1, q3[4] +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q3[5] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q3[6] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q3[7] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q2[0] +; CHECK-NEXT: vmov.16 q4[0], r1 +; CHECK-NEXT: vmov.u8 r1, q2[1] +; CHECK-NEXT: vmov.16 q4[1], r1 +; CHECK-NEXT: vmov.u8 r1, q2[2] +; CHECK-NEXT: vmov.16 q4[2], r1 +; CHECK-NEXT: vmov.u8 r1, q2[3] +; CHECK-NEXT: vmov.16 q4[3], r1 +; CHECK-NEXT: vmov.u8 r1, q2[4] +; CHECK-NEXT: vmov.16 q4[4], r1 +; CHECK-NEXT: vmov.u8 r1, q2[5] +; CHECK-NEXT: vmov.16 q4[5], r1 +; CHECK-NEXT: vmov.u8 r1, q2[6] +; CHECK-NEXT: vmov.16 q4[6], r1 +; CHECK-NEXT: vmov.u8 r1, q2[7] +; CHECK-NEXT: vmov.16 q4[7], r1 +; CHECK-NEXT: vmov.u8 r1, q6[0] +; CHECK-NEXT: vmov.16 q7[0], r1 +; CHECK-NEXT: vmov.u8 r1, q6[1] +; CHECK-NEXT: vmov.16 q7[1], r1 +; CHECK-NEXT: vmov.u8 r1, q6[2] +; CHECK-NEXT: vmov.16 q7[2], r1 +; CHECK-NEXT: vmov.u8 r1, q6[3] +; CHECK-NEXT: vmov.16 q7[3], r1 +; CHECK-NEXT: vmov.u8 r1, q6[4] +; CHECK-NEXT: vmov.16 q7[4], r1 +; CHECK-NEXT: vmov.u8 r1, q6[5] +; CHECK-NEXT: vmov.16 q7[5], r1 +; CHECK-NEXT: vmov.u8 r1, q6[6] +; CHECK-NEXT: vmov.16 q7[6], r1 +; CHECK-NEXT: vmov.u8 r1, q6[7] +; CHECK-NEXT: vmov.16 q7[7], r1 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vmullb.s8 q4, q7, q4 +; CHECK-NEXT: vmov.u16 r1, q4[4] +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q4[5] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q4[6] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q4[7] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[8] +; CHECK-NEXT: vmovlb.s16 q7, q0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vpsel q7, q7, q0 +; CHECK-NEXT: vmov.16 q0[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[9] +; CHECK-NEXT: vmov.16 q0[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[10] +; CHECK-NEXT: vmov.16 q0[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[11] +; CHECK-NEXT: vmov.16 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[12] +; CHECK-NEXT: vmov.16 q0[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[13] +; CHECK-NEXT: vmov.16 q0[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[14] +; CHECK-NEXT: vmov.16 q0[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[15] +; CHECK-NEXT: vmov.16 q0[7], r1 +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vpsel q0, q5, q0 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q2[8] +; CHECK-NEXT: vmov.16 q5[0], r1 +; CHECK-NEXT: vmov.u8 r1, q2[9] +; CHECK-NEXT: vmov.16 q5[1], r1 +; CHECK-NEXT: vmov.u8 r1, q2[10] +; CHECK-NEXT: vmov.16 q5[2], r1 +; CHECK-NEXT: vmov.u8 r1, q2[11] +; CHECK-NEXT: vmov.16 q5[3], r1 +; CHECK-NEXT: vmov.u8 r1, q2[12] +; CHECK-NEXT: vmov.16 q5[4], r1 +; CHECK-NEXT: vmov.u8 r1, q2[13] +; CHECK-NEXT: vmov.16 q5[5], r1 +; CHECK-NEXT: vmov.u8 r1, q2[14] +; CHECK-NEXT: vmov.16 q5[6], r1 +; CHECK-NEXT: vmov.u8 r1, q2[15] +; CHECK-NEXT: vmov.16 q5[7], r1 +; CHECK-NEXT: vmov.u8 r1, q6[8] +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.u8 r1, q6[9] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov.u8 r1, q6[10] +; CHECK-NEXT: vmov.16 q1[2], r1 +; CHECK-NEXT: vmov.u8 r1, q6[11] +; CHECK-NEXT: vmov.16 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q6[12] +; CHECK-NEXT: vmov.16 q1[4], r1 +; CHECK-NEXT: vmov.u8 r1, q6[13] +; CHECK-NEXT: vmov.16 q1[5], r1 +; CHECK-NEXT: vmov.u8 r1, q6[14] +; CHECK-NEXT: vmov.16 q1[6], r1 +; CHECK-NEXT: vmov.u8 r1, q6[15] +; CHECK-NEXT: vmov.16 q1[7], r1 +; CHECK-NEXT: vmullb.s8 q1, q1, q5 +; CHECK-NEXT: vmov.u16 r1, q1[4] +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[5] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[6] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[7] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q3[0] +; CHECK-NEXT: vmovlb.s16 q2, q2 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i32 q7, q7, q2 +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q3[1] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q3[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q3[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q4[0] +; CHECK-NEXT: vcmp.i32 ne, q2, zr +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q4[1] +; CHECK-NEXT: vldrw.u32 q3, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q4[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q4[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmovlb.s16 q2, q2 +; CHECK-NEXT: vpsel q2, q2, q3 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r1 +; CHECK-NEXT: vmov.u16 r1, q1[0] +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[1] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[2] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[3] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: vpt.i32 ne, q3, zr +; CHECK-NEXT: vaddt.i32 q2, q2, q0 +; CHECK-NEXT: vadd.i32 q0, q2, q7 +; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = sext <16 x i8> %x to <16 x i16> + %yy = sext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = sext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %r = add i32 %z, %a + ret i32 %r +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sextzext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b, i32 %a) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sextzext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcmp.i8 eq, q2, zr +; CHECK-NEXT: vmov.i8 q2, #0xff +; CHECK-NEXT: vmov.i8 q0, #0x0 +; CHECK-NEXT: vpsel q1, q2, q0 +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vmov.u8 r1, q1[0] +; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q2[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[1] +; CHECK-NEXT: vmov.16 q2[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[2] +; CHECK-NEXT: vmov.16 q2[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[3] +; CHECK-NEXT: vmov.16 q2[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[4] +; CHECK-NEXT: vmov.16 q2[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[5] +; CHECK-NEXT: vmov.16 q2[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[6] +; CHECK-NEXT: vmov.16 q2[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[7] +; CHECK-NEXT: vmov.16 q2[7], r1 +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vcmp.i16 ne, q2, zr +; CHECK-NEXT: vmov.i32 q6, #0x0 +; CHECK-NEXT: vpsel q5, q3, q0 +; CHECK-NEXT: vmov q7, q6 +; CHECK-NEXT: vmov.u16 r1, q5[4] +; CHECK-NEXT: vmov.i32 q2, #0xffff +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q5[5] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q5[6] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q5[7] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q4[0] +; CHECK-NEXT: vmov.16 q3[0], r1 +; CHECK-NEXT: vmov.u8 r1, q4[1] +; CHECK-NEXT: vmov.16 q3[1], r1 +; CHECK-NEXT: vmov.u8 r1, q4[2] +; CHECK-NEXT: vmov.16 q3[2], r1 +; CHECK-NEXT: vmov.u8 r1, q4[3] +; CHECK-NEXT: vmov.16 q3[3], r1 +; CHECK-NEXT: vmov.u8 r1, q4[4] +; CHECK-NEXT: vmov.16 q3[4], r1 +; CHECK-NEXT: vmov.u8 r1, q4[5] +; CHECK-NEXT: vmov.16 q3[5], r1 +; CHECK-NEXT: vmov.u8 r1, q4[6] +; CHECK-NEXT: vmov.16 q3[6], r1 +; CHECK-NEXT: vmov.u8 r1, q4[7] +; CHECK-NEXT: vmov.16 q3[7], r1 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vmullb.s8 q3, q3, q3 +; CHECK-NEXT: vmov.u16 r1, q3[4] +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q3[5] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q3[6] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q3[7] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[8] +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q7, q0, q2 +; CHECK-NEXT: vmov.16 q0[0], r1 +; CHECK-NEXT: vmov.u8 r1, q1[9] +; CHECK-NEXT: vmov.16 q0[1], r1 +; CHECK-NEXT: vmov.u8 r1, q1[10] +; CHECK-NEXT: vmov.16 q0[2], r1 +; CHECK-NEXT: vmov.u8 r1, q1[11] +; CHECK-NEXT: vmov.16 q0[3], r1 +; CHECK-NEXT: vmov.u8 r1, q1[12] +; CHECK-NEXT: vmov.16 q0[4], r1 +; CHECK-NEXT: vmov.u8 r1, q1[13] +; CHECK-NEXT: vmov.16 q0[5], r1 +; CHECK-NEXT: vmov.u8 r1, q1[14] +; CHECK-NEXT: vmov.16 q0[6], r1 +; CHECK-NEXT: vmov.u8 r1, q1[15] +; CHECK-NEXT: vmov.16 q0[7], r1 +; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: vmov.u16 r1, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q4[8] +; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.u8 r1, q4[9] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov.u8 r1, q4[10] +; CHECK-NEXT: vmov.16 q1[2], r1 +; CHECK-NEXT: vmov.u8 r1, q4[11] +; CHECK-NEXT: vmov.16 q1[3], r1 +; CHECK-NEXT: vmov.u8 r1, q4[12] +; CHECK-NEXT: vmov.16 q1[4], r1 +; CHECK-NEXT: vmov.u8 r1, q4[13] +; CHECK-NEXT: vmov.16 q1[5], r1 +; CHECK-NEXT: vmov.u8 r1, q4[14] +; CHECK-NEXT: vmov.16 q1[6], r1 +; CHECK-NEXT: vmov.u8 r1, q4[15] +; CHECK-NEXT: vmov.16 q1[7], r1 +; CHECK-NEXT: vmullb.s8 q1, q1, q1 +; CHECK-NEXT: vmov.u16 r1, q1[4] +; CHECK-NEXT: vmov.32 q4[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[5] +; CHECK-NEXT: vmov.32 q4[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[6] +; CHECK-NEXT: vmov.32 q4[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[7] +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov.u16 r1, q5[0] +; CHECK-NEXT: vmovlb.u16 q4, q4 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i32 q7, q7, q4 +; CHECK-NEXT: vmov.32 q4[0], r1 +; CHECK-NEXT: vmov.u16 r1, q5[1] +; CHECK-NEXT: vmov.32 q4[1], r1 +; CHECK-NEXT: vmov.u16 r1, q5[2] +; CHECK-NEXT: vmov.32 q4[2], r1 +; CHECK-NEXT: vmov.u16 r1, q5[3] +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov.u16 r1, q3[0] +; CHECK-NEXT: vcmp.i32 ne, q4, zr +; CHECK-NEXT: vmov.32 q4[0], r1 +; CHECK-NEXT: vmov.u16 r1, q3[1] +; CHECK-NEXT: vmov.32 q4[1], r1 +; CHECK-NEXT: vmov.u16 r1, q3[2] +; CHECK-NEXT: vmov.32 q4[2], r1 +; CHECK-NEXT: vmov.u16 r1, q3[3] +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q6, q4, q2 +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov.u16 r1, q0[2] +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov.u16 r1, q1[0] +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: vmov.u16 r1, q1[1] +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.u16 r1, q1[2] +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.u16 r1, q1[3] +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: vpt.i32 ne, q2, zr +; CHECK-NEXT: vaddt.i32 q6, q6, q0 +; CHECK-NEXT: vadd.i32 q0, q6, q7 +; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = sext <16 x i8> %x to <16 x i16> + %m = mul <16 x i16> %xx, %xx + %ma = zext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + %r = add i32 %z, %a + ret i32 %r +} + define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %y, <4 x i8> %b, i32 %a) { ; CHECK-LABEL: add_v4i8_v4i32_acc_zext: ; CHECK: @ %bb.0: @ %entry From ab2ed8bce9e924a2fc734ca4369419c18d124043 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 11 Sep 2020 18:51:57 +0100 Subject: [PATCH 0396/1079] [SVE] Regenerate sve vector bits tests. NFC --- .../attr-arm-sve-vector-bits-bitcast.c | 96 +++++++-------- .../CodeGen/attr-arm-sve-vector-bits-call.c | 112 +++++++++--------- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 30 ++--- .../attr-arm-sve-vector-bits-globals.c | 48 ++++---- 4 files changed, 143 insertions(+), 143 deletions(-) diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index cab424c3dbe17..84559e9edb9a3 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -31,21 +31,21 @@ DEFINE_STRUCT(bool) // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x i64>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_int64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x i64>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_int64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x i64>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]] // CHECK-512-NEXT: ret [[TMP1]] // svint64_t read_int64(struct struct_int64 *s) { @@ -55,31 +55,31 @@ svint64_t read_int64(struct struct_int64 *s) { // CHECK-128-LABEL: @write_int64( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !5 +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <2 x i64>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, [[TBAA2]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_int64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !5 +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <4 x i64>* -// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 16, [[TBAA2]] // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-256-NEXT: store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_int64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !5 +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x i64>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, [[TBAA2]] // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-512-NEXT: ret void // void write_int64(struct struct_int64 *s, svint64_t x) { @@ -94,21 +94,21 @@ void write_int64(struct struct_int64 *s, svint64_t x) { // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x double>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_float64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x double>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_float64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] // CHECK-512-NEXT: ret [[TMP1]] // svfloat64_t read_float64(struct struct_float64 *s) { @@ -118,31 +118,31 @@ svfloat64_t read_float64(struct struct_float64 *s) { // CHECK-128-LABEL: @write_float64( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !7 +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <2 x double>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 16, [[TBAA2]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-128-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_float64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !7 +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <4 x double>* -// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 16, [[TBAA2]] // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <4 x double> [[TMP1]], <4 x double>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-256-NEXT: store <4 x double> [[TMP1]], <4 x double>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_float64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !7 +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x double>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, [[TBAA2]] // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-512-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-512-NEXT: ret void // void write_float64(struct struct_float64 *s, svfloat64_t x) { @@ -157,21 +157,21 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) { // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bfloat16( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <16 x bfloat>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_bfloat16( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] // CHECK-512-NEXT: ret [[TMP1]] // svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { @@ -181,31 +181,31 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { // CHECK-128-LABEL: @write_bfloat16( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !9 +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x bfloat>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, [[TBAA2]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bfloat16( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !9 +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <16 x bfloat>* -// CHECK-256-NEXT: [[TMP1:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[TMP0]], align 16, [[TBAA2]] // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <16 x bfloat> [[TMP1]], <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-256-NEXT: store <16 x bfloat> [[TMP1]], <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bfloat16( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !9 +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <32 x bfloat>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, [[TBAA2]] // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa !2 +// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA2]] // CHECK-512-NEXT: ret void // void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { @@ -220,21 +220,21 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x i8>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA2]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bool( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x i8>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA2]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA2]] // CHECK-512-NEXT: ret [[TMP1]] // svbool_t read_bool(struct struct_bool *s) { @@ -244,33 +244,33 @@ svbool_t read_bool(struct struct_bool *s) { // CHECK-128-LABEL: @write_bool( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !11 +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <2 x i8>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, !tbaa !2 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA2]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa !2 +// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, [[TBAA2]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bool( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !11 +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i32* -// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, !tbaa !2 +// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA2]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 // CHECK-256-NEXT: [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32* -// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, !tbaa !2 +// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA2]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !11 +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, !tbaa !2 +// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA2]] // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 // CHECK-512-NEXT: [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64* -// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, !tbaa !2 +// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA2]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index 490ec92dfdeb5..1c08e46681fbc 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -30,13 +30,13 @@ svint32_t sizeless_callee(svint32_t x) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[X]] to * // CHECK-NEXT: store [[X_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, !tbaa !2 -// CHECK-NEXT: store <16 x i32> [[X1]], <16 x i32>* [[X_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: store <16 x i32> [[X1]], <16 x i32>* [[X_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[X_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !tbaa !2 -// CHECK-NEXT: store [[TMP2]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !5 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: store [[TMP2]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5:!tbaa !.*]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -52,7 +52,7 @@ fixed_int32_t fixed_caller(fixed_int32_t x) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[X]] to * // CHECK-NEXT: store [[X_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, !tbaa !2 +// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[X1]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -68,19 +68,19 @@ fixed_int32_t fixed_callee(fixed_int32_t x) { // CHECK-NEXT: [[COERCE_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[COERCE1:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, !tbaa !5 +// CHECK-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <16 x i32>* -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA2]] // CHECK-NEXT: [[COERCE_0__SROA_CAST:%.*]] = bitcast * [[COERCE_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[COERCE_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP2:%.*]] = load , * [[COERCE_COERCE]], align 16 // CHECK-NEXT: [[CALL:%.*]] = call @fixed_callee( [[TMP2]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32>* [[COERCE1]] to * // CHECK-NEXT: store [[CALL]], * [[TMP3]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, !tbaa !2 -// CHECK-NEXT: store <16 x i32> [[TMP4]], <16 x i32>* [[SAVED_CALL_RVALUE]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[TMP4]], <16 x i32>* [[SAVED_CALL_RVALUE]], align 64, [[TBAA2]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <16 x i32>* [[SAVED_CALL_RVALUE]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[CASTFIXEDSVE]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[CASTFIXEDSVE]], align 64, [[TBAA2]] // CHECK-NEXT: ret [[TMP5]] // svint32_t sizeless_caller(svint32_t x) { @@ -101,21 +101,21 @@ svint32_t sizeless_caller(svint32_t x) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[OP2]] to * // CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 16 -// CHECK-NEXT: [[OP22:%.*]] = load <16 x i32>, <16 x i32>* [[OP2]], align 16, !tbaa !2 -// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, !tbaa !2 -// CHECK-NEXT: store <16 x i32> [[OP22]], <16 x i32>* [[OP2_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP22:%.*]] = load <16 x i32>, <16 x i32>* [[OP2]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[OP22]], <16 x i32>* [[OP2_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP6]], [[TMP3]], [[TMP5]]) -// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !5 +// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP8]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -135,21 +135,21 @@ fixed_int32_t call_int32_ff(svbool_t pg, fixed_int32_t op1, fixed_int32_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x double>* [[OP2]] to * // CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 16 -// CHECK-NEXT: [[OP22:%.*]] = load <8 x double>, <8 x double>* [[OP2]], align 16, !tbaa !2 -// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, !tbaa !2 -// CHECK-NEXT: store <8 x double> [[OP22]], <8 x double>* [[OP2_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP22:%.*]] = load <8 x double>, <8 x double>* [[OP2]], align 16, [[TBAA2]] +// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: store <8 x double> [[OP22]], <8 x double>* [[OP2_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x double>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP6]], [[TMP3]], [[TMP5]]) -// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !7 +// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x double>* -// CHECK-NEXT: [[TMP8:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP8:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP8]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -170,23 +170,23 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to * // CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP2]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64* -// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, !tbaa !2 +// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, !tbaa !2 +// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP7:%.*]] = load , * [[TMP6]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP7:%.*]] = load , * [[TMP6]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP9:%.*]] = load , * [[TMP8]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP9:%.*]] = load , * [[TMP8]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP10:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP7]], [[TMP9]]) -// CHECK-NEXT: store [[TMP10]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !9 +// CHECK-NEXT: store [[TMP10]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-NEXT: [[TMP11:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP13:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP12]], i64* [[TMP13]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -208,15 +208,15 @@ fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, !tbaa !2 -// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP3]], [[TMP2]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !5 +// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP5]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP6:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -234,15 +234,15 @@ fixed_int32_t call_int32_fs(svbool_t pg, fixed_int32_t op1, svint32_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, !tbaa !2 -// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, [[TBAA2]] +// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x double>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP3]], [[TMP2]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !7 +// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA7]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x double>* -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP5]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP6:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -261,15 +261,15 @@ fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, !tbaa !2 +// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP4]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP5]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !9 +// CHECK-NEXT: store [[TMP5]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] // CHECK-NEXT: [[TMP6:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP8:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP8]], align 16 // CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -289,9 +289,9 @@ fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !5 +// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP2]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -307,9 +307,9 @@ fixed_int32_t call_int32_ss(svbool_t pg, svint32_t op1, svint32_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !7 +// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA7]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x double>* -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP2]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -324,9 +324,9 @@ fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { // CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP0]], * [[SAVED_CALL_RVALUE]], align 16, !tbaa !9 +// CHECK-NEXT: store [[TMP0]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP2]], i64* [[TMP3]], align 16 // CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index 13d8f14f991a8..18a7e1f1496cf 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -16,10 +16,10 @@ typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N))); // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, !tbaa !2 -// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[TYPE_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[TYPE_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: ret [[TMP2]] // svint32_t to_svint32_t(fixed_int32_t type) { @@ -30,9 +30,9 @@ svint32_t to_svint32_t(fixed_int32_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, !tbaa !5 +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA5:!tbaa !.*]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <16 x i32>* -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -48,10 +48,10 @@ fixed_int32_t from_svint32_t(svint32_t type) { // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <8 x double>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x double>, <8 x double>* [[TYPE]], align 16, !tbaa !2 -// CHECK-NEXT: store <8 x double> [[TYPE1]], <8 x double>* [[TYPE_ADDR]], align 16, !tbaa !2 +// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x double>, <8 x double>* [[TYPE]], align 16, [[TBAA2]] +// CHECK-NEXT: store <8 x double> [[TYPE1]], <8 x double>* [[TYPE_ADDR]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x double>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: ret [[TMP2]] // svfloat64_t to_svfloat64_t(fixed_float64_t type) { @@ -62,9 +62,9 @@ svfloat64_t to_svfloat64_t(fixed_float64_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, !tbaa !7 +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <8 x double>* -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, [[TBAA2]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -81,11 +81,11 @@ fixed_float64_t from_svfloat64_t(svfloat64_t type) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64* -// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2 +// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64* -// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, !tbaa !2 +// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA2]] // CHECK-NEXT: ret [[TMP4]] // svbool_t to_svbool_t(fixed_bool_t type) { @@ -96,9 +96,9 @@ svbool_t to_svbool_t(fixed_bool_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, !tbaa !9 +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to i64* -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index d567c718000c8..28464ed4af2b7 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -22,19 +22,19 @@ fixed_bool_t global_bool; // CHECK-128-LABEL: @write_global_i64( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, !tbaa !2 +// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA2:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <2 x i64>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, !tbaa !6 -// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* @global_i64, align 16, !tbaa !6 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* @global_i64, align 16, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_i64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, !tbaa !2 +// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA2:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <8 x i64>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, !tbaa !6 -// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* @global_i64, align 16, !tbaa !6 +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* @global_i64, align 16, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_global_i64(svint64_t v) { global_i64 = v; } @@ -42,19 +42,19 @@ void write_global_i64(svint64_t v) { global_i64 = v; } // CHECK-128-LABEL: @write_global_bf16( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, !tbaa !7 +// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <8 x bfloat>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, !tbaa !6 -// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* @global_bf16, align 16, !tbaa !6 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* @global_bf16, align 16, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_bf16( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, !tbaa !7 +// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA7:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <32 x bfloat>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, !tbaa !6 -// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* @global_bf16, align 16, !tbaa !6 +// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* @global_bf16, align 16, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } @@ -62,19 +62,19 @@ void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } // CHECK-128-LABEL: @write_global_bool( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, !tbaa !9 +// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <2 x i8>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, !tbaa !6 -// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, !tbaa !6 +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, !tbaa !9 +// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, !tbaa !6 -// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, !tbaa !6 +// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] +// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } @@ -85,36 +85,36 @@ void write_global_bool(svbool_t v) { global_bool = v; } // CHECK-128-LABEL: @read_global_i64( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i64>* @global_i64 to *), align 16, !tbaa !6 +// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i64>* @global_i64 to *), align 16, [[TBAA6]] // CHECK-128-NEXT: ret [[TMP0]] // // CHECK-512-LABEL: @read_global_i64( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i64>* @global_i64 to *), align 16, !tbaa !6 +// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i64>* @global_i64 to *), align 16, [[TBAA6]] // CHECK-512-NEXT: ret [[TMP0]] // svint64_t read_global_i64() { return global_i64; } // CHECK-128-LABEL: @read_global_bf16( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x bfloat>* @global_bf16 to *), align 16, !tbaa !6 +// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x bfloat>* @global_bf16 to *), align 16, [[TBAA6]] // CHECK-128-NEXT: ret [[TMP0]] // // CHECK-512-LABEL: @read_global_bf16( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<32 x bfloat>* @global_bf16 to *), align 16, !tbaa !6 +// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<32 x bfloat>* @global_bf16 to *), align 16, [[TBAA6]] // CHECK-512-NEXT: ret [[TMP0]] // svbfloat16_t read_global_bf16() { return global_bf16; } // CHECK-128-LABEL: @read_global_bool( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i8>* @global_bool to *), align 2, !tbaa !6 +// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i8>* @global_bool to *), align 2, [[TBAA6]] // CHECK-128-NEXT: ret [[TMP0]] // // CHECK-512-LABEL: @read_global_bool( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i8>* @global_bool to *), align 2, !tbaa !6 +// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i8>* @global_bool to *), align 2, [[TBAA6]] // CHECK-512-NEXT: ret [[TMP0]] // svbool_t read_global_bool() { return global_bool; } From aeb4314391f2afa865fc6650666ea29d9b6afc8a Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 11 Sep 2020 10:39:00 -0700 Subject: [PATCH 0397/1079] [mlir][spirv] OpConvertSToF support operands with different bitwidth. close SameBitWidth check in verifier. Differential Revision: https://reviews.llvm.org/D87265 --- .../mlir/Dialect/SPIRV/SPIRVCastOps.td | 8 +++ mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 7 +- .../Dialect/SPIRV/Serialization/cast-ops.mlir | 20 ++++++ mlir/test/Dialect/SPIRV/ops.mlir | 64 ++++++++++++++++--- 4 files changed, 90 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td index c67c8d5e45423..0e595984dde4d 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td @@ -122,6 +122,8 @@ def SPV_ConvertFToSOp : SPV_CastOp<"ConvertFToS", SPV_Integer, SPV_Float, []> { %3 = spv.ConvertFToS %2 : vector<3xf32> to vector<3xi32> ``` }]; + + let verifier = [{ return verifyCastOp(this->getOperation(), false, true); }]; } // ----- @@ -155,6 +157,8 @@ def SPV_ConvertFToUOp : SPV_CastOp<"ConvertFToU", SPV_Integer, SPV_Float, []> { %3 = spv.ConvertFToU %2 : vector<3xf32> to vector<3xi32> ``` }]; + + let verifier = [{ return verifyCastOp(this->getOperation(), false, true); }]; } // ----- @@ -186,6 +190,8 @@ def SPV_ConvertSToFOp : SPV_CastOp<"ConvertSToF", SPV_Float, SPV_Integer, []> { %3 = spv.ConvertSToF %2 : vector<3xi32> to vector<3xf32> ``` }]; + + let verifier = [{ return verifyCastOp(this->getOperation(), false, true); }]; } // ----- @@ -217,6 +223,8 @@ def SPV_ConvertUToFOp : SPV_CastOp<"ConvertUToF", SPV_Float, SPV_Integer, []> { %3 = spv.ConvertUToF %2 : vector<3xi32> to vector<3xf32> ``` }]; + + let verifier = [{ return verifyCastOp(this->getOperation(), false, true); }]; } // ----- diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index 339f588541f6e..c171a755891bb 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -305,7 +305,12 @@ static void printSourceMemoryAccessAttribute( } static LogicalResult verifyCastOp(Operation *op, - bool requireSameBitWidth = true) { + bool requireSameBitWidth = true, + bool skipBitWidthCheck = false) { + // Some CastOps have no limit on bit widths for result and operand type. + if (skipBitWidthCheck) + return success(); + Type operandType = op->getOperand(0).getType(); Type resultType = op->getResult(0).getType(); diff --git a/mlir/test/Dialect/SPIRV/Serialization/cast-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/cast-ops.mlir index 76bac23e6f8ff..e04ac316f8736 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/cast-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/cast-ops.mlir @@ -20,21 +20,41 @@ spv.module Logical GLSL450 requires #spv.vce { %0 = spv.ConvertFToS %arg0 : f32 to i32 spv.ReturnValue %0 : i32 } + spv.func @convert_f64_to_s32(%arg0 : f64) -> i32 "None" { + // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : f64 to i32 + %0 = spv.ConvertFToS %arg0 : f64 to i32 + spv.ReturnValue %0 : i32 + } spv.func @convert_f_to_u(%arg0 : f32) -> i32 "None" { // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f32 to i32 %0 = spv.ConvertFToU %arg0 : f32 to i32 spv.ReturnValue %0 : i32 } + spv.func @convert_f64_to_u32(%arg0 : f64) -> i32 "None" { + // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f64 to i32 + %0 = spv.ConvertFToU %arg0 : f64 to i32 + spv.ReturnValue %0 : i32 + } spv.func @convert_s_to_f(%arg0 : i32) -> f32 "None" { // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i32 to f32 %0 = spv.ConvertSToF %arg0 : i32 to f32 spv.ReturnValue %0 : f32 } + spv.func @convert_s64_to_f32(%arg0 : i64) -> f32 "None" { + // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i64 to f32 + %0 = spv.ConvertSToF %arg0 : i64 to f32 + spv.ReturnValue %0 : f32 + } spv.func @convert_u_to_f(%arg0 : i32) -> f32 "None" { // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i32 to f32 %0 = spv.ConvertUToF %arg0 : i32 to f32 spv.ReturnValue %0 : f32 } + spv.func @convert_u64_to_f32(%arg0 : i64) -> f32 "None" { + // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i64 to f32 + %0 = spv.ConvertUToF %arg0 : i64 to f32 + spv.ReturnValue %0 : f32 + } spv.func @f_convert(%arg0 : f32) -> f64 "None" { // CHECK: {{%.*}} = spv.FConvert {{%.*}} : f32 to f64 %0 = spv.FConvert %arg0 : f32 to f64 diff --git a/mlir/test/Dialect/SPIRV/ops.mlir b/mlir/test/Dialect/SPIRV/ops.mlir index c91a81fe239c4..fe845ae572fa3 100644 --- a/mlir/test/Dialect/SPIRV/ops.mlir +++ b/mlir/test/Dialect/SPIRV/ops.mlir @@ -335,6 +335,22 @@ func @convert_f_to_s_scalar(%arg0 : f32) -> i32 { // ----- +func @convert_f64_to_s32_scalar(%arg0 : f64) -> i32 { + // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : f64 to i32 + %0 = spv.ConvertFToS %arg0 : f64 to i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +func @convert_f_to_s_vector(%arg0 : vector<3xf32>) -> vector<3xi32> { + // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : vector<3xf32> to vector<3xi32> + %0 = spv.ConvertFToS %arg0 : vector<3xf32> to vector<3xi32> + spv.ReturnValue %0 : vector<3xi32> +} + +// ----- + //===----------------------------------------------------------------------===// // spv.ConvertFToU //===----------------------------------------------------------------------===// @@ -347,6 +363,14 @@ func @convert_f_to_u_scalar(%arg0 : f32) -> i32 { // ----- +func @convert_f64_to_u32_scalar(%arg0 : f64) -> i32 { + // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f64 to i32 + %0 = spv.ConvertFToU %arg0 : f64 to i32 + spv.ReturnValue %0 : i32 +} + +// ----- + func @convert_f_to_u_vector(%arg0 : vector<3xf32>) -> vector<3xi32> { // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : vector<3xf32> to vector<3xi32> %0 = spv.ConvertFToU %arg0 : vector<3xf32> to vector<3xi32> @@ -363,14 +387,6 @@ func @convert_f_to_u_coopmatrix(%arg0 : !spv.coopmatrix<8x16xf32, Subgroup>) { // ----- -func @convert_f_to_u_scalar_invalid(%arg0 : f16) -> i32 { - // expected-error @+1 {{expected the same bit widths for operand type and result type, but provided 'f16' and 'i32'}} - %0 = spv.ConvertFToU %arg0 : f16 to i32 - spv.ReturnValue %0 : i32 -} - -// ----- - //===----------------------------------------------------------------------===// // spv.ConvertSToF //===----------------------------------------------------------------------===// @@ -383,6 +399,22 @@ func @convert_s_to_f_scalar(%arg0 : i32) -> f32 { // ----- +func @convert_s64_to_f32_scalar(%arg0 : i64) -> f32 { + // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i64 to f32 + %0 = spv.ConvertSToF %arg0 : i64 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +func @convert_s_to_f_vector(%arg0 : vector<3xi32>) -> vector<3xf32> { + // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : vector<3xi32> to vector<3xf32> + %0 = spv.ConvertSToF %arg0 : vector<3xi32> to vector<3xf32> + spv.ReturnValue %0 : vector<3xf32> +} + +// ----- + //===----------------------------------------------------------------------===// // spv.ConvertUToF //===----------------------------------------------------------------------===// @@ -395,6 +427,22 @@ func @convert_u_to_f_scalar(%arg0 : i32) -> f32 { // ----- +func @convert_u64_to_f32_scalar(%arg0 : i64) -> f32 { + // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i64 to f32 + %0 = spv.ConvertUToF %arg0 : i64 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +func @convert_u_to_f_vector(%arg0 : vector<3xi32>) -> vector<3xf32> { + // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : vector<3xi32> to vector<3xf32> + %0 = spv.ConvertUToF %arg0 : vector<3xi32> to vector<3xf32> + spv.ReturnValue %0 : vector<3xf32> +} + +// ----- + //===----------------------------------------------------------------------===// // spv.FConvert //===----------------------------------------------------------------------===// From 84a6da67e6b2a76b15ad1862f4cbb7625fe318df Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Thu, 10 Sep 2020 22:04:58 -0700 Subject: [PATCH 0398/1079] [mlir] Fix some edge cases around 0-element TensorFromElementsOp This introduces a builder for the more general case that supports zero elements (where the element type can't be inferred from the ValueRange, since it might be empty). Also, fix up some cases in ShapeToStandard lowering that hit this. It happens very easily when dealing with shapes of 0-D tensors. The SameOperandsAndResultElementType is redundant with the new TypesMatchWith and prevented having zero elements. Differential Revision: https://reviews.llvm.org/D87492 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 5 +++- .../ShapeToStandard/ShapeToStandard.cpp | 7 +++--- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 12 +++++++--- .../ShapeToStandard/shape-to-standard.mlir | 24 +++++++++++++++++++ mlir/test/IR/core-ops.mlir | 3 +++ 5 files changed, 44 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index ec7ecf9b92d40..afdc3edae86c3 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1613,7 +1613,6 @@ def ExtractElementOp : Std_Op<"extract_element", def TensorFromElementsOp : Std_Op<"tensor_from_elements", [ NoSideEffect, - SameOperandsAndResultElementType, TypesMatchWith<"operand types match result element type", "result", "elements", "SmallVector(" "$_self.cast().getDimSize(0), " @@ -1638,7 +1637,11 @@ def TensorFromElementsOp : Std_Op<"tensor_from_elements", [ // This op is fully verified by its traits. let verifier = ?; + let skipDefaultBuilders = 1; let builders = [ + OpBuilder<"OpBuilder &b, OperationState &result, Type elementType," + "ValueRange elements">, + // Special case builder for when `elements` has size >=1. OpBuilder<"OpBuilder &b, OperationState &result, ValueRange elements"> ]; diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index f3f11e89af02f..0a6953842a149 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -182,8 +182,9 @@ LogicalResult ConstShapeOpConverter::matchAndRewrite( extentOperands.push_back( rewriter.create(loc, extent.getLimitedValue())); } - Value tensor = rewriter.create(loc, extentOperands); Type indexTy = rewriter.getIndexType(); + Value tensor = + rewriter.create(loc, indexTy, extentOperands); Type resultTy = RankedTensorType::get({ShapedType::kDynamicSize}, indexTy); rewriter.replaceOpWithNewOp(op, tensor, resultTy); return success(); @@ -444,8 +445,8 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( } // Materialize extent tensor. - Value staticExtentTensor = - rewriter.create(loc, extentValues); + Value staticExtentTensor = rewriter.create( + loc, rewriter.getIndexType(), extentValues); rewriter.replaceOpWithNewOp(op, staticExtentTensor, op.getType()); return success(); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index dc45d5175277c..cf085a604b46b 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1756,12 +1756,18 @@ OpFoldResult ExtractElementOp::fold(ArrayRef operands) { // TensorFromElementsOp //===----------------------------------------------------------------------===// +void TensorFromElementsOp::build(OpBuilder &builder, OperationState &result, + Type elementType, ValueRange elements) { + Type resultTy = RankedTensorType::get({static_cast(elements.size())}, + elementType); + result.addOperands(elements); + result.addTypes(resultTy); +} + void TensorFromElementsOp::build(OpBuilder &builder, OperationState &result, ValueRange elements) { assert(!elements.empty() && "expected at least one element"); - Type resultTy = RankedTensorType::get({static_cast(elements.size())}, - elements.front().getType()); - build(builder, result, resultTy, elements); + build(builder, result, elements.front().getType(), elements); } namespace { diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index 4168634f1240d..01ba6abcc6c4e 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -103,6 +103,19 @@ func @const_shape() -> tensor { // ----- +// Lower `const_shape` in the case of rank 0. +// CHECK-LABEL: func @const_shape_zero_elements +// CHECK-SAME: () -> tensor +func @const_shape_zero_elements() -> tensor { + // CHECK: %[[TENSOR:.*]] = tensor_from_elements : tensor<0xindex> + // CHECK: %[[RESULT:.*]] = tensor_cast %[[TENSOR]] : tensor<0xindex> to tensor + // CHECK: return %[[RESULT]] : tensor + %shape = shape.const_shape [] : tensor + return %shape : tensor +} + +// ----- + // Lower `any` to its first operand. // CHECK-LABEL: @any_of_three // CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor, %[[C:.*]]: tensor) -> tensor @@ -227,6 +240,17 @@ func @shape_of_stat(%arg : tensor<1x2x3xf32>) { // ----- +// Lower `shape_of` for 0-D tensor. +// CHECK-LABEL: @shape_of_zero_d +// CHECK-SAME: (%[[ARG:.*]]: tensor) +func @shape_of_zero_d(%arg : tensor) { + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements : tensor<0xindex> + %shape = shape.shape_of %arg : tensor -> tensor + return +} + +// ----- + // Lower `shape_of` for dynamically shaped tensor. // CHECK-LABEL: @shape_of_dyn // CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index e4472b444f034..f182936c87032 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -673,6 +673,9 @@ func @tensor_from_elements() { // CHECK: %2 = tensor_from_elements [[C0_F32]] : tensor<1xf32> %2 = tensor_from_elements %c0_f32 : tensor<1xf32> + // CHECK: tensor_from_elements : tensor<0xindex> + %3 = tensor_from_elements : tensor<0xindex> + return } From 4da8fa45a0968a1f98010777d3731a921431ee55 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 11 Sep 2020 20:09:44 +0200 Subject: [PATCH 0399/1079] [lldb/API] Add Breakpoint::SerializeToStructuredData to SBAPI This patch adds a way to fetch breakpoint metadatas as a serialized `Structured` Data format (JSON). This can be used by IDEs to update their UI when a breakpoint is set or modified from the console. rdar://11013798 Differential Revision: https://reviews.llvm.org/D87491 Signed-off-by: Med Ismail Bennani --- lldb/bindings/interface/SBBreakpoint.i | 2 ++ lldb/include/lldb/API/SBBreakpoint.h | 4 ++- lldb/source/API/SBBreakpoint.cpp | 19 +++++++++- .../serialize/TestBreakpointSerialization.py | 36 +++++++++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/lldb/bindings/interface/SBBreakpoint.i b/lldb/bindings/interface/SBBreakpoint.i index a2d747db0bf6d..e386ace9dee8a 100644 --- a/lldb/bindings/interface/SBBreakpoint.i +++ b/lldb/bindings/interface/SBBreakpoint.i @@ -234,6 +234,8 @@ public: SBError AddLocation(SBAddress &address); + SBStructuredData SBBreakpoint::SerializeToStructuredData(); + static bool EventIsBreakpointEvent (const lldb::SBEvent &event); diff --git a/lldb/include/lldb/API/SBBreakpoint.h b/lldb/include/lldb/API/SBBreakpoint.h index c9a52fcacf1a4..39a021145fb7b 100644 --- a/lldb/include/lldb/API/SBBreakpoint.h +++ b/lldb/include/lldb/API/SBBreakpoint.h @@ -140,7 +140,9 @@ class LLDB_API SBBreakpoint { // Can only be called from a ScriptedBreakpointResolver... SBError AddLocation(SBAddress &address); - + + SBStructuredData SerializeToStructuredData(); + private: friend class SBBreakpointList; friend class SBBreakpointLocation; diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp index eb75bf8b33f43..96b77bd8539e8 100644 --- a/lldb/source/API/SBBreakpoint.cpp +++ b/lldb/source/API/SBBreakpoint.cpp @@ -575,7 +575,22 @@ SBError SBBreakpoint::AddLocation(SBAddress &address) { return LLDB_RECORD_RESULT(error); } -void SBBreakpoint ::SetCallback(SBBreakpointHitCallback callback, void *baton) { +SBStructuredData SBBreakpoint::SerializeToStructuredData() { + LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBBreakpoint, + SerializeToStructuredData); + + SBStructuredData data; + BreakpointSP bkpt_sp = GetSP(); + + if (!bkpt_sp) + return LLDB_RECORD_RESULT(data); + + StructuredData::ObjectSP bkpt_dict = bkpt_sp->SerializeToStructuredData(); + data.m_impl_up->SetObjectSP(bkpt_dict); + return LLDB_RECORD_RESULT(data); +} + +void SBBreakpoint::SetCallback(SBBreakpointHitCallback callback, void *baton) { LLDB_RECORD_DUMMY(void, SBBreakpoint, SetCallback, (lldb::SBBreakpointHitCallback, void *), callback, baton); @@ -1017,6 +1032,8 @@ void RegisterMethods(Registry &R) { (lldb::SBStream &, bool)); LLDB_REGISTER_METHOD(lldb::SBError, SBBreakpoint, AddLocation, (lldb::SBAddress &)); + LLDB_REGISTER_METHOD(lldb::SBStructuredData, SBBreakpoint, + SerializeToStructuredData, ()); LLDB_REGISTER_METHOD(void, SBBreakpoint, SetScriptCallbackFunction, (const char *)); LLDB_REGISTER_METHOD(lldb::SBError, SBBreakpoint, SetScriptCallbackFunction, diff --git a/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py b/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py index 6a3f40ff3a35b..b26af93525dc9 100644 --- a/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py +++ b/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py @@ -3,6 +3,7 @@ """ import os +import json import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -56,6 +57,41 @@ def test_scripted_extra_args(self): self.setup_targets_and_cleanup() self.do_check_extra_args() + def test_structured_data_serialization(self): + target = self.dbg.GetDummyTarget() + self.assertTrue(target.IsValid(), VALID_TARGET) + + interpreter = self.dbg.GetCommandInterpreter() + result = lldb.SBCommandReturnObject() + interpreter.HandleCommand("br set -f foo -l 42", result) + result = lldb.SBCommandReturnObject() + interpreter.HandleCommand("br set -c 'argc == 1' -n main", result) + + bkp1 = target.GetBreakpointAtIndex(0) + self.assertTrue(bkp1.IsValid(), VALID_BREAKPOINT) + stream = lldb.SBStream() + sd = bkp1.SerializeToStructuredData() + sd.GetAsJSON(stream) + serialized_data = json.loads(stream.GetData()) + self.assertEqual(serialized_data["Breakpoint"]["BKPTResolver"]["Options"]["FileName"], "foo") + self.assertEqual(serialized_data["Breakpoint"]["BKPTResolver"]["Options"]["LineNumber"], 42) + + bkp2 = target.GetBreakpointAtIndex(1) + self.assertTrue(bkp2.IsValid(), VALID_BREAKPOINT) + stream = lldb.SBStream() + sd = bkp2.SerializeToStructuredData() + sd.GetAsJSON(stream) + serialized_data = json.loads(stream.GetData()) + self.assertIn("main", serialized_data["Breakpoint"]["BKPTResolver"]["Options"]["SymbolNames"]) + self.assertEqual(serialized_data["Breakpoint"]["BKPTOptions"]["ConditionText"],"argc == 1") + + invalid_bkp = lldb.SBBreakpoint() + self.assertFalse(invalid_bkp.IsValid(), "Breakpoint should not be valid.") + stream = lldb.SBStream() + sd = invalid_bkp.SerializeToStructuredData() + sd.GetAsJSON(stream) + self.assertFalse(stream.GetData(), "Invalid breakpoint should have an empty structured data") + def setup_targets_and_cleanup(self): def cleanup (): self.RemoveTempFile(self.bkpts_file_path) From fa2a8acc71ffc3632b7c5ed584af8709639443f2 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 11 Sep 2020 07:20:40 -0700 Subject: [PATCH 0400/1079] [WebAssembly] Add assembly syntax for mutable globals This adds and optional ", immutable" to the end of a `.globaltype` declaration. I would have prefered to match the `.wat` syntax where immutable is the default and `mut` is the signifier for mutable globals. Sadly changing the default would break backwards compat with existing assembly in the wild so I think its best to stick with this approach. Differential Revision: https://reviews.llvm.org/D87515 --- lld/test/wasm/globals.s | 16 +++++++++++++--- .../AsmParser/WebAssemblyAsmParser.cpp | 15 ++++++++++++++- .../MCTargetDesc/WebAssemblyTargetStreamer.cpp | 6 ++++-- llvm/test/MC/WebAssembly/globals.s | 8 +++++++- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/lld/test/wasm/globals.s b/lld/test/wasm/globals.s index ec8d247779de1..6e049e1e73f91 100644 --- a/lld/test/wasm/globals.s +++ b/lld/test/wasm/globals.s @@ -8,10 +8,11 @@ .globaltype foo_global, i32 .globaltype bar_global, f32 +.globaltype immutable_global, i32, immutable read_global: .functype read_global () -> (i32) - global.get foo_global + global.get immutable_global end_function write_global: @@ -26,10 +27,13 @@ _start: .functype _start () -> () i32.const 1 call write_global + call read_global + drop end_function foo_global: bar_global: +immutable_global: # CHECK: - Type: GLOBAL # CHECK-NEXT: Globals: @@ -39,13 +43,19 @@ bar_global: # CHECK-NEXT: InitExpr: # CHECK-NEXT: Opcode: I32_CONST # CHECK-NEXT: Value: 66560 -# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Type: I32 +# CHECK-NEXT: Mutable: false +# CHECK-NEXT: InitExpr: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: - Index: 2 # CHECK-NEXT: Type: I32 # CHECK-NEXT: Mutable: true # CHECK-NEXT: InitExpr: # CHECK-NEXT: Opcode: I32_CONST # CHECK-NEXT: Value: 0 -# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: - Index: 3 # CHECK-NEXT: Type: F32 # CHECK-NEXT: Mutable: true # CHECK-NEXT: InitExpr: diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index b0137384971cb..0e6c95d5dd3b1 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -689,11 +689,24 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { auto Type = parseType(TypeName); if (!Type) return error("Unknown type in .globaltype directive: ", TypeTok); + // Optional mutable modifier. Default to mutable for historical reasons. + // Ideally we would have gone with immutable as the default and used `mut` + // as the modifier to match the `.wat` format. + bool Mutable = true; + if (isNext(AsmToken::Comma)) { + TypeTok = Lexer.getTok(); + auto Id = expectIdent(); + if (Id == "immutable") + Mutable = false; + else + // Should we also allow `mutable` and `mut` here for clarity? + return error("Unknown type in .globaltype modifier: ", TypeTok); + } // Now set this symbol with the correct type. auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType( - wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); + wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable}); // And emit the directive again. TOut.emitGlobalType(WasmSym); return expect(AsmToken::EndOfStatement, "EOL"); diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index e954eeaebb141..d2b2de0dca1f4 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -71,8 +71,10 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) { assert(Sym->isGlobal()); OS << "\t.globaltype\t" << Sym->getName() << ", " << WebAssembly::typeToString( - static_cast(Sym->getGlobalType().Type)) - << '\n'; + static_cast(Sym->getGlobalType().Type)); + if (!Sym->getGlobalType().Mutable) + OS << ", immutable"; + OS << '\n'; } void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) { diff --git a/llvm/test/MC/WebAssembly/globals.s b/llvm/test/MC/WebAssembly/globals.s index 10d696b7090a7..717d28b2945c5 100644 --- a/llvm/test/MC/WebAssembly/globals.s +++ b/llvm/test/MC/WebAssembly/globals.s @@ -6,7 +6,7 @@ .globl read_global .globl write_global .globaltype foo_global, i32 -.globaltype global2, i64 +.globaltype global2, i64, immutable .globaltype global3, f32 .globaltype global4, f64 @@ -42,6 +42,12 @@ global4: # BIN-NEXT: InitExpr: # BIN-NEXT: Opcode: I32_CONST # BIN-NEXT: Value: 0 +# BIN-NEXT: - Index: 1 +# BIN-NEXT: Type: I64 +# BIN-NEXT: Mutable: false +# BIN-NEXT: InitExpr: +# BIN-NEXT: Opcode: I64_CONST +# BIN-NEXT: Value: 0 # BIN: - Type: CUSTOM # BIN-NEXT: Name: linking From c42f96cb23bedb0e4bc31d2e88b60275083a420d Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 18:27:04 +0300 Subject: [PATCH 0401/1079] [CMake][OpenMP] Simplify getting CUDA library directory LLVM now requires CMake 3.13.4 so we can simplify this. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D87195 --- .../cmake/Modules/LibomptargetGetDependencies.cmake | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake index 95254e7a9e128..05742bd4fbf7a 100644 --- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake +++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -137,17 +137,8 @@ find_library ( # There is a libcuda.so in lib64/stubs that can be used for linking. if (NOT LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES AND CUDA_FOUND) - # Since CMake 3.3 FindCUDA.cmake defaults to using static libraries. In this - # case CUDA_LIBRARIES contains additional linker arguments which breaks - # get_filename_component below. Fortunately, since that change the module - # exports CUDA_cudart_static_LIBRARY which points to a single file in the - # right directory. - set(cuda_library ${CUDA_LIBRARIES}) - if (DEFINED CUDA_cudart_static_LIBRARY) - set(cuda_library ${CUDA_cudart_static_LIBRARY}) - endif() - get_filename_component(CUDA_LIBDIR ${cuda_library} DIRECTORY) - find_library ( + get_filename_component(CUDA_LIBDIR "${CUDA_cudart_static_LIBRARY}" DIRECTORY) + find_library( LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES NAMES cuda From 5d152127d48fbcf47a8d059aa68a84c365ae3cb9 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Thu, 10 Sep 2020 17:54:54 +0000 Subject: [PATCH 0402/1079] [SyntaxTree][Synthesis] Add support for simple Leafs and test based on tree dump Differential Revision: https://reviews.llvm.org/D87495 --- .../include/clang/Tooling/Syntax/BuildTree.h | 13 +++- clang/lib/Tooling/Syntax/Synthesis.cpp | 39 ++++++---- .../Tooling/Syntax/SynthesisTest.cpp | 76 +++++++++++++++---- 3 files changed, 97 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h index b7ad50c941d18..c2ae4348bc166 100644 --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -24,8 +24,17 @@ syntax::TranslationUnit *buildSyntaxTree(Arena &A, // Create syntax trees from subtrees not backed by the source code. -clang::syntax::Leaf *createPunctuation(clang::syntax::Arena &A, - clang::tok::TokenKind K); +// Synthesis of Leafs +/// Create `Leaf` from token with `Spelling` and assert it has the desired +/// `TokenKind`. +syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K, + StringRef Spelling); + +/// Infer the token spelling from its `TokenKind`, then create `Leaf` from +/// this token +syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K); + +// Synthesis of Syntax Nodes clang::syntax::EmptyStatement *createEmptyStatement(clang::syntax::Arena &A); } // namespace syntax diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp index 701a1e60a4f38..8d51325706fa0 100644 --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -5,13 +5,14 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/BuildTree.h" using namespace clang; /// Exposes private syntax tree APIs required to implement node synthesis. /// Should not be used for anything else. -class syntax::FactoryImpl { +class clang::syntax::FactoryImpl { public: static void setCanModify(syntax::Node *N) { N->CanModify = true; } @@ -21,24 +22,32 @@ class syntax::FactoryImpl { } }; -clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A, - clang::tok::TokenKind K) { - auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer( - clang::tok::getPunctuatorSpelling(K))) - .second; +syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K, + StringRef Spelling) { + auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer(Spelling)).second; assert(Tokens.size() == 1); - assert(Tokens.front().kind() == K); - auto *L = new (A.getAllocator()) clang::syntax::Leaf(Tokens.begin()); - FactoryImpl::setCanModify(L); - L->assertInvariants(); - return L; + assert(Tokens.front().kind() == K && + "spelling is not lexed into the expected kind of token"); + + auto *Leaf = new (A.getAllocator()) syntax::Leaf(Tokens.begin()); + syntax::FactoryImpl::setCanModify(Leaf); + Leaf->assertInvariants(); + return Leaf; +} + +syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K) { + const auto *Spelling = tok::getPunctuatorSpelling(K); + if (!Spelling) + Spelling = tok::getKeywordSpelling(K); + assert(Spelling && + "Cannot infer the spelling of the token from its token kind."); + return createLeaf(A, K, Spelling); } -clang::syntax::EmptyStatement * -syntax::createEmptyStatement(clang::syntax::Arena &A) { - auto *S = new (A.getAllocator()) clang::syntax::EmptyStatement; +syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A) { + auto *S = new (A.getAllocator()) syntax::EmptyStatement; FactoryImpl::setCanModify(S); - FactoryImpl::prependChildLowLevel(S, createPunctuation(A, clang::tok::semi), + FactoryImpl::prependChildLowLevel(S, createLeaf(A, tok::semi), NodeRole::Unknown); S->assertInvariants(); return S; diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp index 884f3797edef2..1c1aef8bd8c8c 100644 --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -12,33 +12,81 @@ #include "TreeTestBase.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "gtest/gtest.h" using namespace clang; using namespace clang::syntax; namespace { -INSTANTIATE_TEST_CASE_P(SyntaxTreeTests, SyntaxTreeTest, +class SynthesisTest : public SyntaxTreeTest { +protected: + ::testing::AssertionResult treeDumpEqual(syntax::Node *Root, StringRef Dump) { + if (!Root) + return ::testing::AssertionFailure() + << "Root was not built successfully."; + + auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Expected = Dump.trim().str(); + // EXPECT_EQ shows the diff between the two strings if they are different. + EXPECT_EQ(Expected, Actual); + if (Actual != Expected) { + return ::testing::AssertionFailure(); + } + return ::testing::AssertionSuccess(); + } +}; + +INSTANTIATE_TEST_CASE_P(SynthesisTests, SynthesisTest, ::testing::ValuesIn(allTestClangConfigs()), ); -TEST_P(SyntaxTreeTest, Leaf_Punctuation) { +TEST_P(SynthesisTest, Leaf_Punctuation) { + buildTree("", GetParam()); + + auto *Leaf = createLeaf(*Arena, tok::comma); + + EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( +',' Detached synthesized + )txt")); +} + +TEST_P(SynthesisTest, Leaf_Keyword) { + buildTree("", GetParam()); + + auto *Leaf = createLeaf(*Arena, tok::kw_if); + + EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( +'if' Detached synthesized + )txt")); +} + +TEST_P(SynthesisTest, Leaf_Identifier) { buildTree("", GetParam()); - auto *C = syntax::createPunctuation(*Arena, tok::comma); - ASSERT_NE(C, nullptr); - EXPECT_EQ(C->getToken()->kind(), tok::comma); - EXPECT_TRUE(C->canModify()); - EXPECT_FALSE(C->isOriginal()); - EXPECT_TRUE(C->isDetached()); + auto *Leaf = createLeaf(*Arena, tok::identifier, "a"); + + EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( +'a' Detached synthesized + )txt")); +} + +TEST_P(SynthesisTest, Leaf_Number) { + buildTree("", GetParam()); + + auto *Leaf = createLeaf(*Arena, tok::numeric_constant, "1"); + + EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( +'1' Detached synthesized + )txt")); } -TEST_P(SyntaxTreeTest, Statement_Empty) { +TEST_P(SynthesisTest, Statement_EmptyStatement) { buildTree("", GetParam()); - auto *S = syntax::createEmptyStatement(*Arena); - ASSERT_NE(S, nullptr); - EXPECT_TRUE(S->canModify()); - EXPECT_FALSE(S->isOriginal()); - EXPECT_TRUE(S->isDetached()); + auto *S = createEmptyStatement(*Arena); + EXPECT_TRUE(treeDumpEqual(S, R"txt( +EmptyStatement Detached synthesized +`-';' synthesized + )txt")); } } // namespace From 515238d5b1133f87f85445b9f35783ca2d3a2e7b Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Fri, 11 Sep 2020 13:13:19 +0000 Subject: [PATCH 0403/1079] [SyntaxTree] Reduce visibility of `Arena::lexBuffer`. Differential Revision: https://reviews.llvm.org/D87523 --- clang/include/clang/Tooling/Syntax/Tree.h | 6 ++++-- clang/lib/Tooling/Syntax/Synthesis.cpp | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index aab904ab65d32..b49a09344c0fb 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -47,11 +47,13 @@ class Arena { const TokenBuffer &getTokenBuffer() const; llvm::BumpPtrAllocator &getAllocator() { return Allocator; } +private: /// Add \p Buffer to the underlying source manager, tokenize it and store the - /// resulting tokens. Useful when there is a need to materialize tokens that - /// were not written in user code. + /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens + /// that were not written in user code. std::pair> lexBuffer(std::unique_ptr Buffer); + friend class FactoryImpl; private: SourceManager &SourceMgr; diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp index 8d51325706fa0..772429ff4c466 100644 --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Tree.h" using namespace clang; @@ -20,11 +21,18 @@ class clang::syntax::FactoryImpl { syntax::NodeRole R) { T->prependChildLowLevel(Child, R); } + + static std::pair> + lexBuffer(syntax::Arena &A, std::unique_ptr Buffer) { + return A.lexBuffer(std::move(Buffer)); + } }; syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K, StringRef Spelling) { - auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer(Spelling)).second; + auto Tokens = + FactoryImpl::lexBuffer(A, llvm::MemoryBuffer::getMemBuffer(Spelling)) + .second; assert(Tokens.size() == 1); assert(Tokens.front().kind() == K && "spelling is not lexed into the expected kind of token"); From 238ae4eee05187758e42c00af237592612d585c2 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Fri, 11 Sep 2020 16:33:18 +0000 Subject: [PATCH 0404/1079] [SyntaxTree] Add const qualifiers, from [llvm-qualified-auto] Differential Revision: https://reviews.llvm.org/D87522 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 2 +- clang/lib/Tooling/Syntax/ComputeReplacements.cpp | 10 +++++----- clang/lib/Tooling/Syntax/Tree.cpp | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 8de50dd02162a..dab1457fbdba6 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -558,7 +558,7 @@ class syntax::TreeBuilder { assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { + for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { auto *L = new (A.getAllocator()) syntax::Leaf(&T); L->Original = true; L->CanModify = A.getTokenBuffer().spelledForExpanded(T).hasValue(); diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp index 93b1c4416bf45..31e1a40c74b61 100644 --- a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -32,7 +32,7 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { private: void process(const syntax::Node *N) { if (auto *T = dyn_cast(N)) { - for (auto *C = T->getFirstChild(); C != nullptr; + for (const auto *C = T->getFirstChild(); C != nullptr; C = C->getNextSibling()) process(C); return; @@ -64,8 +64,8 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { syntax::FileRange rangeOfExpanded(const syntax::Arena &A, llvm::ArrayRef Expanded) { - auto &Buffer = A.getTokenBuffer(); - auto &SM = A.getSourceManager(); + const auto &Buffer = A.getTokenBuffer(); + const auto &SM = A.getSourceManager(); // Check that \p Expanded actually points into expanded tokens. assert(Buffer.expandedTokens().begin() <= Expanded.begin()); @@ -85,8 +85,8 @@ syntax::FileRange rangeOfExpanded(const syntax::Arena &A, tooling::Replacements syntax::computeReplacements(const syntax::Arena &A, const syntax::TranslationUnit &TU) { - auto &Buffer = A.getTokenBuffer(); - auto &SM = A.getSourceManager(); + const auto &Buffer = A.getTokenBuffer(); + const auto &SM = A.getSourceManager(); tooling::Replacements Replacements; // Text inserted by the replacement we are building now. diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index f9d1fa6110ffc..ca1e2880af9f2 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -19,7 +19,7 @@ namespace { static void traverse(const syntax::Node *N, llvm::function_ref Visit) { if (auto *T = dyn_cast(N)) { - for (auto *C = T->getFirstChild(); C; C = C->getNextSibling()) + for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) traverse(C, Visit); } Visit(N); @@ -226,7 +226,7 @@ void syntax::Node::assertInvariants() const { auto *T = dyn_cast(this); if (!T) return; - for (auto *C = T->getFirstChild(); C; C = C->getNextSibling()) { + for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) { if (T->isOriginal()) assert(C->isOriginal()); assert(!C->isDetached()); From 398fcf224b8dd0968f27cdcc7e75bb0bc8ed6d09 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Fri, 11 Sep 2020 11:02:04 -0700 Subject: [PATCH 0405/1079] [flang] Fix bug for forward referenced type A type name in an IMPLICIT declaration that was later used in a PARAMETER statement caused problems because the default symbol scope had not yet been initialized. I avoided dereferencing in the situation where the default scope was uninitialized and added a test that triggers the problem. Differential Revision: https://reviews.llvm.org/D87535 --- flang/lib/Semantics/symbol.cpp | 8 +++----- flang/test/Semantics/bad-forward-type.f90 | 10 ++++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index e0d80ec6d1c8b..c15c60406c36c 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -541,13 +541,11 @@ const DerivedTypeSpec *Symbol::GetParentTypeSpec(const Scope *scope) const { const Symbol *Symbol::GetParentComponent(const Scope *scope) const { if (const auto *dtDetails{detailsIf()}) { - if (!scope) { - scope = scope_; + if (const Scope * localScope{scope ? scope : scope_}) { + return dtDetails->GetParentComponent(DEREF(localScope)); } - return dtDetails->GetParentComponent(DEREF(scope)); - } else { - return nullptr; } + return nullptr; } void DerivedTypeDetails::add_component(const Symbol &symbol) { diff --git a/flang/test/Semantics/bad-forward-type.f90 b/flang/test/Semantics/bad-forward-type.f90 index 5fe17ad833ad4..2a8cbc0c9b1af 100644 --- a/flang/test/Semantics/bad-forward-type.f90 +++ b/flang/test/Semantics/bad-forward-type.f90 @@ -70,3 +70,13 @@ subroutine s7(x) type, extends(undef) :: t end type end subroutine + +subroutine s8 + !ERROR: Derived type 't2' was used but never defined + !ERROR: The derived type 't2' was forward-referenced but not defined + implicit type(t2)(x) + parameter(y=t2(12.3)) + type t2 + real :: c + end type +end subroutine From 59fc86779038b19cf85f87b51052d468286788f2 Mon Sep 17 00:00:00 2001 From: Olivier Giroux Date: Fri, 11 Sep 2020 12:13:35 -0700 Subject: [PATCH 0406/1079] Re-split integral & pointer overloads. Add tests. --- libcxx/include/atomic | 80 +++++++++++++++++-- .../atomic_fetch_add.pass.cpp | 2 + .../atomic_fetch_add_explicit.pass.cpp | 2 + .../atomic_fetch_sub.pass.cpp | 2 + .../atomic_fetch_sub_explicit.pass.cpp | 2 + 5 files changed, 80 insertions(+), 8 deletions(-) diff --git a/libcxx/include/atomic b/libcxx/include/atomic index be81f6491edf6..56bd03584c9b4 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -2163,7 +2163,7 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_add(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT @@ -2175,7 +2175,7 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_add(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT @@ -2183,13 +2183,29 @@ atomic_fetch_add(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _ return __o->fetch_add(__op); } +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_add(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT +{ + return __o->fetch_add(__op); +} + +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_add(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT +{ + return __o->fetch_add(__op); +} + // atomic_fetch_add_explicit template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT @@ -2201,7 +2217,7 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_add_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT @@ -2209,13 +2225,29 @@ atomic_fetch_add_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_typ return __o->fetch_add(__op, __m); } +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT +{ + return __o->fetch_add(__op, __m); +} + +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_add_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT +{ + return __o->fetch_add(__op, __m); +} + // atomic_fetch_sub template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_sub(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT @@ -2227,7 +2259,7 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_sub(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT @@ -2235,13 +2267,29 @@ atomic_fetch_sub(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _ return __o->fetch_sub(__op); } +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_sub(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT +{ + return __o->fetch_sub(__op); +} + +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_sub(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT +{ + return __o->fetch_sub(__op); +} + // atomic_fetch_sub_explicit template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT @@ -2253,7 +2301,7 @@ template _LIBCPP_INLINE_VISIBILITY typename enable_if < - is_pointer<_Tp>::value || (is_integral<_Tp>::value && !is_same<_Tp, bool>::value), + is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value, _Tp >::type atomic_fetch_sub_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT @@ -2261,6 +2309,22 @@ atomic_fetch_sub_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_typ return __o->fetch_sub(__op, __m); } +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT +{ + return __o->fetch_sub(__op, __m); +} + +template +_LIBCPP_INLINE_VISIBILITY +_Tp* +atomic_fetch_sub_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT +{ + return __o->fetch_sub(__op, __m); +} + // atomic_fetch_and template diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp index e584ea955d754..38ce06e2817b5 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp @@ -63,6 +63,7 @@ void testp() A t; std::atomic_init(&t, T(1*sizeof(X))); assert(std::atomic_fetch_add(&t, 2) == T(1*sizeof(X))); + std::atomic_fetch_add(&t, 0); assert(t == T(3*sizeof(X))); } { @@ -71,6 +72,7 @@ void testp() volatile A t; std::atomic_init(&t, T(1*sizeof(X))); assert(std::atomic_fetch_add(&t, 2) == T(1*sizeof(X))); + std::atomic_fetch_add(&t, 0); assert(t == T(3*sizeof(X))); } } diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp index 548101a409e9e..f39adb14effac 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp @@ -67,6 +67,7 @@ testp() std::atomic_init(&t, T(1*sizeof(X))); assert(std::atomic_fetch_add_explicit(&t, 2, std::memory_order_seq_cst) == T(1*sizeof(X))); + std::atomic_fetch_add_explicit(&t, 0, std::memory_order_relaxed); assert(t == T(3*sizeof(X))); } { @@ -76,6 +77,7 @@ testp() std::atomic_init(&t, T(1*sizeof(X))); assert(std::atomic_fetch_add_explicit(&t, 2, std::memory_order_seq_cst) == T(1*sizeof(X))); + std::atomic_fetch_add_explicit(&t, 0, std::memory_order_relaxed); assert(t == T(3*sizeof(X))); } } diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp index 20ec7688bb2ba..3568d2fa60ff6 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp @@ -63,6 +63,7 @@ void testp() A t; std::atomic_init(&t, T(3*sizeof(X))); assert(std::atomic_fetch_sub(&t, 2) == T(3*sizeof(X))); + std::atomic_fetch_sub(&t, 0); assert(t == T(1*sizeof(X))); } { @@ -71,6 +72,7 @@ void testp() volatile A t; std::atomic_init(&t, T(3*sizeof(X))); assert(std::atomic_fetch_sub(&t, 2) == T(3*sizeof(X))); + std::atomic_fetch_sub(&t, 0); assert(t == T(1*sizeof(X))); } } diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp index f26cefcbdb074..261917f8087e0 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp @@ -67,6 +67,7 @@ void testp() std::atomic_init(&t, T(3*sizeof(X))); assert(std::atomic_fetch_sub_explicit(&t, 2, std::memory_order_seq_cst) == T(3*sizeof(X))); + std::atomic_fetch_sub_explicit(&t, 0, std::memory_order_relaxed); assert(t == T(1*sizeof(X))); } { @@ -76,6 +77,7 @@ void testp() std::atomic_init(&t, T(3*sizeof(X))); assert(std::atomic_fetch_sub_explicit(&t, 2, std::memory_order_seq_cst) == T(3*sizeof(X))); + std::atomic_fetch_sub_explicit(&t, 0, std::memory_order_relaxed); assert(t == T(1*sizeof(X))); } } From 9a2bab5ea2f4aacbb267e634ff1189fa64143b76 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 10 Sep 2020 12:16:26 -0700 Subject: [PATCH 0407/1079] [ThinLTO] Make -lto-embed-bitcode an enum The current behavior of -lto-embed-bitcode is not quite the same as that of -fembed-bitcode. While both populate .llvmbc with bitcode, the latter populates it with pre-optimized bitcode(*), while the former with post-optimized. The scenarios driving them are different - the latter's goal is to allow re-compilation, while the former, IIUC, is execution. I plan to add a third mode for thinlto cases, closely-related to -fembed-bitcode's scenario: adding the bitcode pre-optimization, but post-merging. This would allow re-compilation without requiring the other .bc files that were merged (akin to how -fembed-bitcode allows recompilation without all the .h files) The third mode can't co-exist with the current -lto-embed-bitcode mode, because the latter would overwrite it. For clarity, we change -lto-embed-bitcode to be an enum. (*) That's the compiler semantics. The driver splits compilation in 2 phases, so if -fembed-bitcode is given to the driver, the .llvmbc is optimized bitcode; if the option is passed to the compiler (after -cc1), the section is pre-optimized. Differential Revision: https://reviews.llvm.org/D87477 --- llvm/lib/LTO/LTOBackend.cpp | 29 +++++++++++++++++------------ llvm/test/LTO/X86/embed-bitcode.ll | 4 ++-- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 966edcf693752..00309b6d712f8 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -50,6 +50,19 @@ using namespace llvm; using namespace lto; +enum class LTOBitcodeEmbedding { + DoNotEmbed = 0, + EmbedOptimized = 1, +}; + +static cl::opt EmbedBitcode( + "lto-embed-bitcode", cl::init(LTOBitcodeEmbedding::DoNotEmbed), + cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none", + "Do not embed"), + clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized", + "Embed after all optimization passes")), + cl::desc("Embed LLVM bitcode in object files produced by LTO")); + LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { errs() << "failed to open " << Path << ": " << Msg << '\n'; errs().flush(); @@ -346,24 +359,16 @@ bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); } -static cl::opt EmbedBitcode( - "lto-embed-bitcode", cl::init(false), - cl::desc("Embed LLVM bitcode in object files produced by LTO")); - -static void EmitBitcodeSection(Module &M) { - if (!EmbedBitcode) - return; - llvm::EmbedBitcodeInModule(M, llvm::MemoryBufferRef(), /*EmbedBitcode*/ true, - /*EmbedMarker*/ false, /*CmdArgs*/ nullptr); -} - void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, unsigned Task, Module &Mod, const ModuleSummaryIndex &CombinedIndex) { if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; - EmitBitcodeSection(Mod); + if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) + llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + /*EmbedBitcode*/ true, + /*EmbedMarker*/ false, /*CmdArgs*/ nullptr); std::unique_ptr DwoOut; SmallString<1024> DwoFile(Conf.SplitDwarfOutput); diff --git a/llvm/test/LTO/X86/embed-bitcode.ll b/llvm/test/LTO/X86/embed-bitcode.ll index 151f27f55eefb..c8b4d0faa7479 100644 --- a/llvm/test/LTO/X86/embed-bitcode.ll +++ b/llvm/test/LTO/X86/embed-bitcode.ll @@ -5,10 +5,10 @@ ; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -o %t3 %t1.o %t2.o %t3.o ; RUN: llvm-readelf -S %t3.0 | FileCheck %s --implicit-check-not=.llvmbc -; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=false -o %t3 %t1.o %t2.o %t3.o +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=none -o %t3 %t1.o %t2.o %t3.o ; RUN: llvm-readelf -S %t3.0 | FileCheck %s --implicit-check-not=.llvmbc -; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode -o %t3 %t1.o %t2.o %t3.o +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=optimized -o %t3 %t1.o %t2.o %t3.o ; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF ; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t3.0 /dev/null ; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefix=CHECK-LL From df477db5f9e0ea2a4890040b65002d93e33209b0 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Fri, 11 Sep 2020 13:34:03 -0700 Subject: [PATCH 0408/1079] [Coroutine][Sema] Tighten the lifetime of symmetric transfer returned handle In generating the code for symmetric transfer, a temporary object is created to store the returned handle from await_suspend() call of the awaiter. Previously this temp won't be cleaned up until very later, which ends up causing this temp to be spilled to the heap. However, we know that this temp will no longer be needed after the coro_resume call. We can clean it up right after. Differential Revision: https://reviews.llvm.org/D87470 --- clang/lib/Sema/SemaCoroutine.cpp | 4 ++ .../test/CodeGenCoroutines/Inputs/coroutine.h | 2 +- .../coro-semmetric-transfer.cpp | 53 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 990ab26335209..565f907e05b28 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -398,6 +398,10 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E, diag::warn_coroutine_handle_address_invalid_return_type) << JustAddress->getType(); + // The coroutine handle used to obtain the address is no longer needed + // at this point, clean it up to avoid unnecessarily long lifetime which + // could lead to unnecessary spilling. + JustAddress = S.MaybeCreateExprWithCleanups(JustAddress); return buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_resume, JustAddress); } diff --git a/clang/test/CodeGenCoroutines/Inputs/coroutine.h b/clang/test/CodeGenCoroutines/Inputs/coroutine.h index 5cc78a4904aad..2dd1ce7e97351 100644 --- a/clang/test/CodeGenCoroutines/Inputs/coroutine.h +++ b/clang/test/CodeGenCoroutines/Inputs/coroutine.h @@ -15,7 +15,7 @@ template <> struct coroutine_handle { return me; } void operator()() { resume(); } - void *address() const { return ptr; } + void *address() const noexcept { return ptr; } void resume() const { __builtin_coro_resume(ptr); } void destroy() const { __builtin_coro_destroy(ptr); } bool done() const { return __builtin_coro_done(ptr); } diff --git a/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp b/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp new file mode 100644 index 0000000000000..09205799c3f7f --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp @@ -0,0 +1,53 @@ +// RUN: %clang -std=c++14 -fcoroutines-ts -emit-llvm -S -O1 %s -o - + +#include "Inputs/coroutine.h" + +namespace coro = std::experimental::coroutines_v1; + +struct detached_task { + struct promise_type { + detached_task get_return_object() noexcept { + return detached_task{coro::coroutine_handle::from_promise(*this)}; + } + + void return_void() noexcept {} + + struct final_awaiter { + bool await_ready() noexcept { return false; } + coro::coroutine_handle<> await_suspend(coro::coroutine_handle h) noexcept { + h.destroy(); + return {}; + } + void await_resume() noexcept {} + }; + + void unhandled_exception() noexcept {} + + final_awaiter final_suspend() noexcept { return {}; } + + coro::suspend_always initial_suspend() noexcept { return {}; } + }; + + ~detached_task() { + if (coro_) { + coro_.destroy(); + coro_ = {}; + } + } + + void start() && { + auto tmp = coro_; + coro_ = {}; + tmp.resume(); + } + + coro::coroutine_handle coro_; +}; + +detached_task foo() { + co_return; +} + +// check that the lifetime of the coroutine handle used to obtain the address ended right away. +// CHECK: %{{.*}} = call i8* @{{.*address.*}}(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* nonnull %{{.*}}) +// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %{{.*}}) From 7c37b82f5ba5883b331608b0077c0b30bf301874 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Fri, 11 Sep 2020 15:59:22 +0000 Subject: [PATCH 0409/1079] [SyntaxTree][Synthesis] Add support for Tree. In a future patch * Implement helper function to generate Trees for tests * and test Tree methods, namely `findFirstLeaf` and `findLastLeaf` Differential Revision: https://reviews.llvm.org/D87533 --- .../include/clang/Tooling/Syntax/BuildTree.h | 6 ++ clang/lib/Tooling/Syntax/Synthesis.cpp | 14 +++++ .../Tooling/Syntax/SynthesisTest.cpp | 57 +++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h index c2ae4348bc166..b9405167bf99b 100644 --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -34,6 +34,12 @@ syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K, /// this token syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K); +// Synthesis of Trees +syntax::Tree * +createTree(Arena &A, + std::vector> Children, + syntax::NodeKind K); + // Synthesis of Syntax Nodes clang::syntax::EmptyStatement *createEmptyStatement(clang::syntax::Arena &A); diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp index 772429ff4c466..6de3d5b5752da 100644 --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -52,6 +52,20 @@ syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K) { return createLeaf(A, K, Spelling); } +syntax::Tree *clang::syntax::createTree( + syntax::Arena &A, + std::vector> Children, + syntax::NodeKind K) { + auto *T = new (A.getAllocator()) syntax::Tree(K); + FactoryImpl::setCanModify(T); + for (auto ChildIt = Children.rbegin(); ChildIt != Children.rend(); + std::advance(ChildIt, 1)) + FactoryImpl::prependChildLowLevel(T, ChildIt->first, ChildIt->second); + + T->assertInvariants(); + return T; +} + syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A) { auto *S = new (A.getAllocator()) syntax::EmptyStatement; FactoryImpl::setCanModify(S); diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp index 1c1aef8bd8c8c..a882714ccf33f 100644 --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -12,6 +12,7 @@ #include "TreeTestBase.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Nodes.h" #include "gtest/gtest.h" using namespace clang; @@ -80,6 +81,62 @@ TEST_P(SynthesisTest, Leaf_Number) { )txt")); } +TEST_P(SynthesisTest, Tree_Empty) { + buildTree("", GetParam()); + + auto *Tree = createTree(*Arena, {}, NodeKind::UnknownExpression); + + EXPECT_TRUE(treeDumpEqual(Tree, R"txt( +UnknownExpression Detached synthesized + )txt")); +} + +TEST_P(SynthesisTest, Tree_Flat) { + buildTree("", GetParam()); + + auto *LeafLParen = createLeaf(*Arena, tok::l_paren); + auto *LeafRParen = createLeaf(*Arena, tok::r_paren); + auto *TreeParen = createTree(*Arena, + {{LeafLParen, NodeRole::LeftHandSide}, + {LeafRParen, NodeRole::RightHandSide}}, + NodeKind::ParenExpression); + + EXPECT_TRUE(treeDumpEqual(TreeParen, R"txt( +ParenExpression Detached synthesized +|-'(' LeftHandSide synthesized +`-')' RightHandSide synthesized + )txt")); +} + +TEST_P(SynthesisTest, Tree_OfTree) { + buildTree("", GetParam()); + + auto *Leaf1 = createLeaf(*Arena, tok::numeric_constant, "1"); + auto *Int1 = createTree(*Arena, {{Leaf1, NodeRole::LiteralToken}}, + NodeKind::IntegerLiteralExpression); + + auto *LeafPlus = createLeaf(*Arena, tok::plus); + + auto *Leaf2 = createLeaf(*Arena, tok::numeric_constant, "2"); + auto *Int2 = createTree(*Arena, {{Leaf2, NodeRole::LiteralToken}}, + NodeKind::IntegerLiteralExpression); + + auto *TreeBinaryOperator = createTree(*Arena, + {{Int1, NodeRole::LeftHandSide}, + {LeafPlus, NodeRole::OperatorToken}, + {Int2, NodeRole::RightHandSide}}, + NodeKind::BinaryOperatorExpression); + + EXPECT_TRUE(treeDumpEqual(TreeBinaryOperator, R"txt( +BinaryOperatorExpression Detached synthesized +|-IntegerLiteralExpression LeftHandSide synthesized +| `-'1' LiteralToken synthesized +|-'+' OperatorToken synthesized +`-IntegerLiteralExpression RightHandSide synthesized + `-'2' LiteralToken synthesized + )txt")); +} + TEST_P(SynthesisTest, Statement_EmptyStatement) { buildTree("", GetParam()); From 7dcd0042e8b8581751bd9b915207058d2ab88e1d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 11 Sep 2020 09:23:14 -0700 Subject: [PATCH 0410/1079] Re-apply "[ORC] Make MaterializationResponsibility immovable..." with fixes. Re-applies c74900ca672 with fixes for the ThinLtoJIT example. --- .../SpeculativeJIT/SpeculativeJIT.cpp | 15 +- .../ThinLtoInstrumentationLayer.cpp | 4 +- .../ThinLtoJIT/ThinLtoInstrumentationLayer.h | 3 +- llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp | 11 +- .../Orc/CompileOnDemandLayer.h | 6 +- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 37 +-- .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 3 +- .../ExecutionEngine/Orc/IRTransformLayer.h | 3 +- llvm/include/llvm/ExecutionEngine/Orc/Layer.h | 11 +- .../llvm/ExecutionEngine/Orc/LazyReexports.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- .../Orc/ObjectTransformLayer.h | 2 +- .../Orc/RTDyldObjectLinkingLayer.h | 2 +- .../llvm/ExecutionEngine/Orc/Speculation.h | 3 +- .../Orc/CompileOnDemandLayer.cpp | 42 +-- llvm/lib/ExecutionEngine/Orc/Core.cpp | 50 ++-- .../ExecutionEngine/Orc/IRCompileLayer.cpp | 6 +- .../ExecutionEngine/Orc/IRTransformLayer.cpp | 6 +- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 6 +- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 20 +- llvm/lib/ExecutionEngine/Orc/Layer.cpp | 8 +- .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 16 +- .../Orc/ObjectLinkingLayer.cpp | 59 ++--- .../Orc/ObjectTransformLayer.cpp | 7 +- .../Orc/RTDyldObjectLinkingLayer.cpp | 25 +- llvm/lib/ExecutionEngine/Orc/Speculation.cpp | 4 +- .../ExecutionEngine/Orc/CoreAPIsTest.cpp | 242 ++++++++++-------- .../Orc/LazyCallThroughAndReexportsTest.cpp | 6 +- .../ExecutionEngine/Orc/OrcTestCommon.h | 5 +- 29 files changed, 323 insertions(+), 283 deletions(-) diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp index 4de4897053c1b..24cf0847558f9 100644 --- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp +++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp @@ -113,14 +113,13 @@ class SpeculativeJIT { this->CODLayer.setImplMap(&Imps); this->ES->setDispatchMaterialization( [this](std::unique_ptr MU, - MaterializationResponsibility MR) { - // FIXME: Switch to move capture once we have C++14. - auto SharedMU = std::shared_ptr(std::move(MU)); - auto SharedMR = - std::make_shared(std::move(MR)); - CompileThreads.async([SharedMU, SharedMR]() { - SharedMU->materialize(std::move(*SharedMR)); - }); + std::unique_ptr MR) { + CompileThreads.async( + [UnownedMU = MU.release(), UnownedMR = MR.release()]() { + std::unique_ptr MU(UnownedMU); + std::unique_ptr MR(UnownedMR); + MU->materialize(std::move(MR)); + }); }); ExitOnErr(S.addSpeculationRuntime(MainJD, Mangle)); LocalCXXRuntimeOverrides CXXRuntimeoverrides; diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp index 345bfd8dd8705..df844bf19b9cc 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp @@ -120,8 +120,8 @@ void ThinLtoInstrumentationLayer::nudgeIntoDiscovery( LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n"); } -void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R, - ThreadSafeModule TSM) { +void ThinLtoInstrumentationLayer::emit( + std::unique_ptr R, ThreadSafeModule TSM) { TSM.withModuleDo([this](Module &M) { std::vector FunctionsToInstrument; diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h index cd87207894745..25006b40607fe 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h @@ -34,7 +34,8 @@ class ThinLtoInstrumentationLayer : public IRLayer { ~ThinLtoInstrumentationLayer() override; - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; unsigned reserveDiscoveryFlags(unsigned Count); void registerDiscoveryFlagOwners(std::vector Guids, diff --git a/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp b/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp index f5c2b0696f55c..e668be7d11b7e 100644 --- a/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp +++ b/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp @@ -267,19 +267,18 @@ void ThinLtoJIT::setupLayers(JITTargetMachineBuilder JTMB, llvm::hardware_concurrency(NumCompileThreads)); ES.setDispatchMaterialization( [this](std::unique_ptr MU, - MaterializationResponsibility MR) { + std::unique_ptr MR) { if (IsTrivialModule(MU.get())) { // This should be quick and we may save a few session locks. MU->materialize(std::move(MR)); } else { // FIXME: Drop the std::shared_ptr workaround once ThreadPool::async() // accepts llvm::unique_function to define jobs. - auto SharedMU = std::shared_ptr(std::move(MU)); - auto SharedMR = - std::make_shared(std::move(MR)); CompileThreads->async( - [MU = std::move(SharedMU), MR = std::move(SharedMR)]() { - MU->materialize(std::move(*MR)); + [UnownedMU = MU.release(), UnownedMR = MR.release()]() { + std::unique_ptr MU(UnownedMU); + std::unique_ptr MR(UnownedMR); + MU->materialize(std::move(MR)); }); } }); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 9ecc0464dec1b..3a2f8b54ad22b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -96,7 +96,8 @@ class CompileOnDemandLayer : public IRLayer { /// Emits the given module. This should not be called by clients: it will be /// called by the JIT when a definition added via the add method is requested. - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; private: struct PerDylibResources { @@ -120,7 +121,8 @@ class CompileOnDemandLayer : public IRLayer { void expandPartition(GlobalValueSet &Partition); - void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM, + void emitPartition(std::unique_ptr R, + ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs); mutable std::mutex CODLayerMutex; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 6951df3f2d3f2..70bd983c40ce0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -410,7 +410,7 @@ class UnexpectedSymbolDefinitions : public ErrorInfo + delegate(const SymbolNameSet &Symbols, VModuleKey NewKey = VModuleKey()); void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies); @@ -577,7 +577,8 @@ class MaterializationUnit { /// Implementations of this method should materialize all symbols /// in the materialzation unit, except for those that have been /// previously discarded. - virtual void materialize(MaterializationResponsibility R) = 0; + virtual void + materialize(std::unique_ptr R) = 0; /// Called by JITDylibs to notify MaterializationUnits that the given symbol /// has been overridden. @@ -594,10 +595,11 @@ class MaterializationUnit { private: virtual void anchor(); - MaterializationResponsibility + std::unique_ptr createMaterializationResponsibility(std::shared_ptr JD) { - return MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), - std::move(InitSymbol), K); + return std::unique_ptr( + new MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), + std::move(InitSymbol), K)); } /// Implementations of this method should discard the given symbol @@ -621,7 +623,7 @@ class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolMap &Symbols); @@ -663,7 +665,7 @@ class ReExportsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); @@ -1116,7 +1118,7 @@ class ExecutionSession { /// For dispatching MaterializationUnit::materialize calls. using DispatchMaterializationFunction = std::function MU, - MaterializationResponsibility MR)>; + std::unique_ptr MR)>; /// Construct an ExecutionSession. /// @@ -1268,10 +1270,11 @@ class ExecutionSession { SymbolState RequiredState = SymbolState::Ready); /// Materialize the given unit. - void dispatchMaterialization(std::unique_ptr MU, - MaterializationResponsibility MR) { + void + dispatchMaterialization(std::unique_ptr MU, + std::unique_ptr MR) { assert(MU && "MU must be non-null"); - DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR.getTargetJITDylib(), *MU)); + DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR->getTargetJITDylib(), *MU)); DispatchMaterialization(std::move(MU), std::move(MR)); } @@ -1283,9 +1286,9 @@ class ExecutionSession { logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: "); } - static void - materializeOnCurrentThread(std::unique_ptr MU, - MaterializationResponsibility MR) { + static void materializeOnCurrentThread( + std::unique_ptr MU, + std::unique_ptr MR) { MU->materialize(std::move(MR)); } @@ -1309,7 +1312,7 @@ class ExecutionSession { // with callbacks from asynchronous queries. mutable std::recursive_mutex OutstandingMUsMutex; std::vector, - MaterializationResponsibility>> + std::unique_ptr>> OutstandingMUs; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index eb74d283f0435..2c53e2f66e851 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -55,7 +55,8 @@ class IRCompileLayer : public IRLayer { void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled); - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; private: mutable std::mutex IRLayerMutex; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index 296d74ae6b865..ee4ee3437fa6d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -37,7 +37,8 @@ class IRTransformLayer : public IRLayer { this->Transform = std::move(Transform); } - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; static ThreadSafeModule identityTransform(ThreadSafeModule TSM, MaterializationResponsibility &R) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h index e843d0f562455..c8a41199760da 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h @@ -100,7 +100,8 @@ class IRLayer { VModuleKey K = VModuleKey()); /// Emit should materialize the given IR. - virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0; + virtual void emit(std::unique_ptr R, + ThreadSafeModule TSM) = 0; private: bool CloneToNewContextOnEmit = false; @@ -117,8 +118,7 @@ class BasicIRLayerMaterializationUnit : public IRMaterializationUnit { ThreadSafeModule TSM, VModuleKey K); private: - - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; IRLayer &L; VModuleKey K; @@ -139,7 +139,7 @@ class ObjectLayer { VModuleKey K = VModuleKey()); /// Emit should materialize the given IR. - virtual void emit(MaterializationResponsibility R, + virtual void emit(std::unique_ptr R, std::unique_ptr O) = 0; private: @@ -162,8 +162,7 @@ class BasicObjectLayerMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; ObjectLayer &L; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 9206e40fffb1c..63e3a80d87d86 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -149,7 +149,7 @@ class LazyReexportsMaterializationUnit : public MaterializationUnit { StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index cb8ee130ab614..cbcf3928be3df 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -119,7 +119,7 @@ class ObjectLinkingLayer : public ObjectLayer { } /// Emit the object. - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr R, std::unique_ptr O) override; /// Instructs this ObjectLinkingLayer instance to override the symbol flags diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index bf989cc8677cf..c77649f19fc74 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -31,7 +31,7 @@ class ObjectTransformLayer : public ObjectLayer { ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, TransformFunction Transform = TransformFunction()); - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr R, std::unique_ptr O) override; void setTransform(TransformFunction Transform) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 9ada0871cf0cb..9cd3c57a19c6a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -58,7 +58,7 @@ class RTDyldObjectLinkingLayer : public ObjectLayer { ~RTDyldObjectLinkingLayer(); /// Emit the object. - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr R, std::unique_ptr O) override; /// Set the NotifyLoaded callback. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index 10f78c8bc6beb..a138f60a77564 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -181,7 +181,8 @@ class IRSpeculationLayer : public IRLayer { : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr R, + ThreadSafeModule TSM) override; private: TargetAndLikelies diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 9e38dc36faae7..dfb0d06bdba3d 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -88,7 +88,7 @@ class PartitioningIRMaterializationUnit : public IRMaterializationUnit { Parent(Parent) {} private: - void materialize(MaterializationResponsibility R) override { + void materialize(std::unique_ptr R) override { Parent.emitPartition(std::move(R), std::move(TSM), std::move(SymbolToDefinition)); } @@ -128,15 +128,15 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) { void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) { this->AliaseeImpls = Imp; } -void CompileOnDemandLayer::emit(MaterializationResponsibility R, - ThreadSafeModule TSM) { +void CompileOnDemandLayer::emit( + std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Null module"); auto &ES = getExecutionSession(); // Sort the callables and non-callables, build re-exports and lodge the // actual module with the implementation dylib. - auto &PDR = getPerDylibResources(R.getTargetJITDylib()); + auto &PDR = getPerDylibResources(R->getTargetJITDylib()); SymbolAliasMap NonCallables; SymbolAliasMap Callables; @@ -145,7 +145,7 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, cleanUpModule(M); }); - for (auto &KV : R.getSymbols()) { + for (auto &KV : R->getSymbols()) { auto &Name = KV.first; auto &Flags = KV.second; if (Flags.isCallable()) @@ -158,19 +158,19 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, // implementation dylib. if (auto Err = PDR.getImplDylib().define( std::make_unique( - ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), + ES, *getManglingOptions(), std::move(TSM), R->getVModuleKey(), *this))) { ES.reportError(std::move(Err)); - R.failMaterialization(); + R->failMaterialization(); return; } if (!NonCallables.empty()) - R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), - JITDylibLookupFlags::MatchAllSymbols)); + R->replace(reexports(PDR.getImplDylib(), std::move(NonCallables), + JITDylibLookupFlags::MatchAllSymbols)); if (!Callables.empty()) - R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), - std::move(Callables), AliaseeImpls)); + R->replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), + std::move(Callables), AliaseeImpls)); } CompileOnDemandLayer::PerDylibResources & @@ -247,7 +247,7 @@ void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) { } void CompileOnDemandLayer::emitPartition( - MaterializationResponsibility R, ThreadSafeModule TSM, + std::unique_ptr R, ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs) { // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the @@ -257,8 +257,8 @@ void CompileOnDemandLayer::emitPartition( auto &ES = getExecutionSession(); GlobalValueSet RequestedGVs; - for (auto &Name : R.getRequestedSymbols()) { - if (Name == R.getInitializerSymbol()) + for (auto &Name : R->getRequestedSymbols()) { + if (Name == R->getInitializerSymbol()) TSM.withModuleDo([&](Module &M) { for (auto &GV : getStaticInitGVs(M)) RequestedGVs.insert(&GV); @@ -285,9 +285,9 @@ void CompileOnDemandLayer::emitPartition( // If the partition is empty, return the whole module to the symbol table. if (GVsToExtract->empty()) { - R.replace(std::make_unique( - std::move(TSM), R.getVModuleKey(), R.getSymbols(), - R.getInitializerSymbol(), std::move(Defs), *this)); + R->replace(std::make_unique( + std::move(TSM), R->getVModuleKey(), R->getSymbols(), + R->getInitializerSymbol(), std::move(Defs), *this)); return; } @@ -308,7 +308,7 @@ void CompileOnDemandLayer::emitPartition( IRSymbolMapper::add(ES, *getManglingOptions(), PromotedGlobals, SymbolFlags); - if (auto Err = R.defineMaterializing(SymbolFlags)) + if (auto Err = R->defineMaterializing(SymbolFlags)) return std::move(Err); } @@ -348,12 +348,12 @@ void CompileOnDemandLayer::emitPartition( if (!ExtractedTSM) { ES.reportError(ExtractedTSM.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } - R.replace(std::make_unique( - ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this)); + R->replace(std::make_unique( + ES, *getManglingOptions(), std::move(TSM), R->getVModuleKey(), *this)); BaseLayer.emit(std::move(R), std::move(*ExtractedTSM)); } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 18eced68f07bc..243bac79c012f 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -279,7 +279,7 @@ void MaterializationResponsibility::replace( JD->replace(std::move(MU)); } -MaterializationResponsibility +std::unique_ptr MaterializationResponsibility::delegate(const SymbolNameSet &Symbols, VModuleKey NewKey) { @@ -302,9 +302,10 @@ MaterializationResponsibility::delegate(const SymbolNameSet &Symbols, SymbolFlags.erase(I); } - return MaterializationResponsibility(JD, std::move(DelegatedFlags), - std::move(DelegatedInitSymbol), - std::move(NewKey)); + return std::unique_ptr( + new MaterializationResponsibility(JD, std::move(DelegatedFlags), + std::move(DelegatedInitSymbol), + std::move(NewKey))); } void MaterializationResponsibility::addDependencies( @@ -338,10 +339,10 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { } void AbsoluteSymbolsMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { // No dependencies, so these calls can't fail. - cantFail(R.notifyResolved(Symbols)); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(Symbols)); + cantFail(R->notifyEmitted()); } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, @@ -370,16 +371,16 @@ StringRef ReExportsMaterializationUnit::getName() const { } void ReExportsMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { - auto &ES = R.getTargetJITDylib().getExecutionSession(); - JITDylib &TgtJD = R.getTargetJITDylib(); + auto &ES = R->getTargetJITDylib().getExecutionSession(); + JITDylib &TgtJD = R->getTargetJITDylib(); JITDylib &SrcJD = SourceJD ? *SourceJD : TgtJD; // Find the set of requested aliases and aliasees. Return any unrequested // aliases back to the JITDylib so as to not prematurely materialize any // aliasees. - auto RequestedSymbols = R.getRequestedSymbols(); + auto RequestedSymbols = R->getRequestedSymbols(); SymbolAliasMap RequestedAliases; for (auto &Name : RequestedSymbols) { @@ -399,18 +400,19 @@ void ReExportsMaterializationUnit::materialize( if (!Aliases.empty()) { if (SourceJD) - R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); + R->replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); else - R.replace(symbolAliases(std::move(Aliases))); + R->replace(symbolAliases(std::move(Aliases))); } // The OnResolveInfo struct will hold the aliases and responsibilty for each // query in the list. struct OnResolveInfo { - OnResolveInfo(MaterializationResponsibility R, SymbolAliasMap Aliases) + OnResolveInfo(std::unique_ptr R, + SymbolAliasMap Aliases) : R(std::move(R)), Aliases(std::move(Aliases)) {} - MaterializationResponsibility R; + std::unique_ptr R; SymbolAliasMap Aliases; }; @@ -451,7 +453,7 @@ void ReExportsMaterializationUnit::materialize( assert(!QuerySymbols.empty() && "Alias cycle detected!"); auto QueryInfo = std::make_shared( - R.delegate(ResponsibilitySymbols), std::move(QueryAliases)); + R->delegate(ResponsibilitySymbols), std::move(QueryAliases)); QueryInfos.push_back( make_pair(std::move(QuerySymbols), std::move(QueryInfo))); } @@ -480,12 +482,12 @@ void ReExportsMaterializationUnit::materialize( for (auto &KV : QueryInfo->Aliases) if (SrcJDDeps.count(KV.second.Aliasee)) { PerAliasDeps = {KV.second.Aliasee}; - QueryInfo->R.addDependencies(KV.first, PerAliasDepsMap); + QueryInfo->R->addDependencies(KV.first, PerAliasDepsMap); } }; auto OnComplete = [QueryInfo](Expected Result) { - auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); + auto &ES = QueryInfo->R->getTargetJITDylib().getExecutionSession(); if (Result) { SymbolMap ResolutionMap; for (auto &KV : QueryInfo->Aliases) { @@ -499,19 +501,19 @@ void ReExportsMaterializationUnit::materialize( ResolutionMap[KV.first] = JITEvaluatedSymbol( (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags); } - if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) { + if (auto Err = QueryInfo->R->notifyResolved(ResolutionMap)) { ES.reportError(std::move(Err)); - QueryInfo->R.failMaterialization(); + QueryInfo->R->failMaterialization(); return; } - if (auto Err = QueryInfo->R.notifyEmitted()) { + if (auto Err = QueryInfo->R->notifyEmitted()) { ES.reportError(std::move(Err)); - QueryInfo->R.failMaterialization(); + QueryInfo->R->failMaterialization(); return; } } else { ES.reportError(Result.takeError()); - QueryInfo->R.failMaterialization(); + QueryInfo->R->failMaterialization(); } }; @@ -2131,7 +2133,7 @@ void ExecutionSession::dump(raw_ostream &OS) { void ExecutionSession::runOutstandingMUs() { while (1) { Optional, - MaterializationResponsibility>> + std::unique_ptr>> JMU; { diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index 023940dc82982..c6f6870279728 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -25,7 +25,7 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) { this->NotifyCompiled = std::move(NotifyCompiled); } -void IRCompileLayer::emit(MaterializationResponsibility R, +void IRCompileLayer::emit(std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); @@ -33,13 +33,13 @@ void IRCompileLayer::emit(MaterializationResponsibility R, { std::lock_guard Lock(IRLayerMutex); if (NotifyCompiled) - NotifyCompiled(R.getVModuleKey(), std::move(TSM)); + NotifyCompiled(R->getVModuleKey(), std::move(TSM)); else TSM = ThreadSafeModule(); } BaseLayer.emit(std::move(R), std::move(*Obj)); } else { - R.failMaterialization(); + R->failMaterialization(); getExecutionSession().reportError(Obj.takeError()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp index 511248f83b259..d5b11349277c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp @@ -17,14 +17,14 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer, : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} -void IRTransformLayer::emit(MaterializationResponsibility R, +void IRTransformLayer::emit(std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); - if (auto TransformedTSM = Transform(std::move(TSM), R)) + if (auto TransformedTSM = Transform(std::move(TSM), *R)) BaseLayer.emit(std::move(R), std::move(*TransformedTSM)); else { - R.failMaterialization(); + R->failMaterialization(); getExecutionSession().reportError(TransformedTSM.takeError()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 4f7f6089e68db..7d57ed5a3a04c 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -33,12 +33,12 @@ class CompileCallbackMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override { return ""; } private: - void materialize(MaterializationResponsibility R) override { + void materialize(std::unique_ptr R) override { SymbolMap Result; Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported); // No dependencies, so these calls cannot fail. - cantFail(R.notifyResolved(Result)); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(Result)); + cantFail(R->notifyEmitted()); } void discard(const JITDylib &JD, const SymbolStringPtr &Name) override { diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 373d86d92f8d7..81f500d66bc29 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -1085,15 +1085,17 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) std::make_unique(hardware_concurrency(S.NumCompileThreads)); ES->setDispatchMaterialization( [this](std::unique_ptr MU, - MaterializationResponsibility MR) { - // FIXME: Switch to move capture once ThreadPool uses unique_function. - auto SharedMU = std::shared_ptr(std::move(MU)); - auto SharedMR = - std::make_shared(std::move(MR)); - auto Work = [SharedMU, SharedMR]() mutable { - SharedMU->materialize(std::move(*SharedMR)); - }; - CompileThreads->async(std::move(Work)); + std::unique_ptr MR) { + // FIXME: We should be able to use move-capture here, but ThreadPool's + // AsyncTaskTys are std::functions rather than unique_functions + // (because MSVC's std::packaged_tasks don't support move-only types). + // Fix this when all the above gets sorted out. + CompileThreads->async( + [UnownedMU = MU.release(), UnownedMR = MR.release()]() mutable { + std::unique_ptr MU(UnownedMU); + std::unique_ptr MR(UnownedMR); + MU->materialize(std::move(MR)); + }); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp index 0a5d5577e99e8..8052e7b08a5a6 100644 --- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp @@ -133,7 +133,7 @@ BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit( L(L), K(std::move(K)) {} void BasicIRLayerMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { // Throw away the SymbolToDefinition map: it's not usable after we hand // off the module. @@ -144,8 +144,8 @@ void BasicIRLayerMaterializationUnit::materialize( TSM = cloneToNewContext(TSM); #ifndef NDEBUG - auto &ES = R.getTargetJITDylib().getExecutionSession(); - auto &N = R.getTargetJITDylib().getName(); + auto &ES = R->getTargetJITDylib().getExecutionSession(); + auto &N = R->getTargetJITDylib().getName(); #endif // NDEBUG LLVM_DEBUG(ES.runSessionLocked( @@ -200,7 +200,7 @@ StringRef BasicObjectLayerMaterializationUnit::getName() const { } void BasicObjectLayerMaterializationUnit::materialize( - MaterializationResponsibility R) { + std::unique_ptr R) { L.emit(std::move(R), std::move(O)); } diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 5e604130d6eab..695f6cc9c1cb4 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -154,8 +154,8 @@ StringRef LazyReexportsMaterializationUnit::getName() const { } void LazyReexportsMaterializationUnit::materialize( - MaterializationResponsibility R) { - auto RequestedSymbols = R.getRequestedSymbols(); + std::unique_ptr R) { + auto RequestedSymbols = R->getRequestedSymbols(); SymbolAliasMap RequestedAliases; for (auto &RequestedSymbol : RequestedSymbols) { @@ -166,8 +166,8 @@ void LazyReexportsMaterializationUnit::materialize( } if (!CallableAliases.empty()) - R.replace(lazyReexports(LCTManager, ISManager, SourceJD, - std::move(CallableAliases), AliaseeTable)); + R->replace(lazyReexports(LCTManager, ISManager, SourceJD, + std::move(CallableAliases), AliaseeTable)); IndirectStubsManager::StubInitsMap StubInits; for (auto &Alias : RequestedAliases) { @@ -182,7 +182,7 @@ void LazyReexportsMaterializationUnit::materialize( if (!CallThroughTrampoline) { SourceJD.getExecutionSession().reportError( CallThroughTrampoline.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -195,7 +195,7 @@ void LazyReexportsMaterializationUnit::materialize( if (auto Err = ISManager.createStubs(StubInits)) { SourceJD.getExecutionSession().reportError(std::move(Err)); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -204,8 +204,8 @@ void LazyReexportsMaterializationUnit::materialize( Stubs[Alias.first] = ISManager.findStub(*Alias.first, false); // No registered dependencies, so these calls cannot fail. - cantFail(R.notifyResolved(Stubs)); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(Stubs)); + cantFail(R->notifyEmitted()); } void LazyReexportsMaterializationUnit::discard(const JITDylib &JD, diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index d8283fa7e3461..9e3245d9cc991 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -24,9 +24,10 @@ namespace orc { class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { public: - ObjectLinkingLayerJITLinkContext(ObjectLinkingLayer &Layer, - MaterializationResponsibility MR, - std::unique_ptr ObjBuffer) + ObjectLinkingLayerJITLinkContext( + ObjectLinkingLayer &Layer, + std::unique_ptr MR, + std::unique_ptr ObjBuffer) : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {} ~ObjectLinkingLayerJITLinkContext() { @@ -44,14 +45,14 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { void notifyFailed(Error Err) override { Layer.getExecutionSession().reportError(std::move(Err)); - MR.failMaterialization(); + MR->failMaterialization(); } void lookup(const LookupMap &Symbols, std::unique_ptr LC) override { JITDylibSearchOrder LinkOrder; - MR.getTargetJITDylib().withLinkOrderDo( + MR->getTargetJITDylib().withLinkOrderDo( [&](const JITDylibSearchOrder &LO) { LinkOrder = LO; }); auto &ES = Layer.getExecutionSession(); @@ -85,8 +86,8 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto &KV : InternalNamedSymbolDeps) { SymbolDependenceMap InternalDeps; - InternalDeps[&MR.getTargetJITDylib()] = std::move(KV.second); - MR.addDependencies(KV.first, InternalDeps); + InternalDeps[&MR->getTargetJITDylib()] = std::move(KV.second); + MR->addDependencies(KV.first, InternalDeps); } ES.lookup(LookupKind::Static, LinkOrder, std::move(LookupSet), @@ -115,7 +116,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { InternedResult[InternedName] = JITEvaluatedSymbol(Sym->getAddress(), Flags); - if (AutoClaim && !MR.getSymbols().count(InternedName)) { + if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); ExtraSymbolsToClaim[InternedName] = Flags; @@ -133,7 +134,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = JITEvaluatedSymbol(Sym->getAddress(), Flags); - if (AutoClaim && !MR.getSymbols().count(InternedName)) { + if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); ExtraSymbolsToClaim[InternedName] = Flags; @@ -141,19 +142,19 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } if (!ExtraSymbolsToClaim.empty()) - if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim)) + if (auto Err = MR->defineMaterializing(ExtraSymbolsToClaim)) return Err; { - // Check that InternedResult matches up with MR.getSymbols(). + // Check that InternedResult matches up with MR->getSymbols(). // This guards against faulty transformations / compilers / object caches. // First check that there aren't any missing symbols. size_t NumMaterializationSideEffectsOnlySymbols = 0; SymbolNameVector ExtraSymbols; SymbolNameVector MissingSymbols; - for (auto &KV : MR.getSymbols()) { + for (auto &KV : MR->getSymbols()) { // If this is a materialization-side-effects only symbol then bump // the counter and make sure it's *not* defined, otherwise make @@ -175,9 +176,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { // If there are more definitions than expected, add them to the // ExtraSymbols vector. if (InternedResult.size() > - MR.getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) { + MR->getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) { for (auto &KV : InternedResult) - if (!MR.getSymbols().count(KV.first)) + if (!MR->getSymbols().count(KV.first)) ExtraSymbols.push_back(KV.first); } @@ -187,23 +188,23 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { std::move(ExtraSymbols)); } - if (auto Err = MR.notifyResolved(InternedResult)) + if (auto Err = MR->notifyResolved(InternedResult)) return Err; - Layer.notifyLoaded(MR); + Layer.notifyLoaded(*MR); return Error::success(); } void notifyFinalized( std::unique_ptr A) override { - if (auto Err = Layer.notifyEmitted(MR, std::move(A))) { + if (auto Err = Layer.notifyEmitted(*MR, std::move(A))) { Layer.getExecutionSession().reportError(std::move(Err)); - MR.failMaterialization(); + MR->failMaterialization(); return; } - if (auto Err = MR.notifyEmitted()) { + if (auto Err = MR->notifyEmitted()) { Layer.getExecutionSession().reportError(std::move(Err)); - MR.failMaterialization(); + MR->failMaterialization(); } } @@ -217,7 +218,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Config.PrePrunePasses.push_back( [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); }); - Layer.modifyPassConfig(MR, TT, Config); + Layer.modifyPassConfig(*MR, TT, Config); Config.PostPrunePasses.push_back( [this](LinkGraph &G) { return computeNamedSymbolDependencies(G); }); @@ -237,13 +238,13 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { auto &ES = Layer.getExecutionSession(); for (auto *Sym : G.defined_symbols()) if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { - if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + if (!MR->getSymbols().count(ES.intern(Sym->getName()))) G.makeExternal(*Sym); } for (auto *Sym : G.absolute_symbols()) if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { - if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + if (!MR->getSymbols().count(ES.intern(Sym->getName()))) G.makeExternal(*Sym); } @@ -253,13 +254,13 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Error markResponsibilitySymbolsLive(LinkGraph &G) const { auto &ES = Layer.getExecutionSession(); for (auto *Sym : G.defined_symbols()) - if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName()))) + if (Sym->hasName() && MR->getSymbols().count(ES.intern(Sym->getName()))) Sym->setLive(true); return Error::success(); } Error computeNamedSymbolDependencies(LinkGraph &G) { - auto &ES = MR.getTargetJITDylib().getExecutionSession(); + auto &ES = MR->getTargetJITDylib().getExecutionSession(); auto LocalDeps = computeLocalDeps(G); // Compute dependencies for symbols defined in the JITLink graph. @@ -306,7 +307,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } for (auto &P : Layer.Plugins) { - auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(MR); + auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(*MR); if (SyntheticLocalDeps.empty()) continue; @@ -426,12 +427,12 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { SymbolDeps.erase(&SourceJD); } - MR.addDependencies(Name, SymbolDeps); + MR->addDependencies(Name, SymbolDeps); } } ObjectLinkingLayer &Layer; - MaterializationResponsibility MR; + std::unique_ptr MR; std::unique_ptr ObjBuffer; DenseMap ExternalNamedSymbolDeps; DenseMap InternalNamedSymbolDeps; @@ -452,7 +453,7 @@ ObjectLinkingLayer::~ObjectLinkingLayer() { getExecutionSession().reportError(std::move(Err)); } -void ObjectLinkingLayer::emit(MaterializationResponsibility R, +void ObjectLinkingLayer::emit(std::unique_ptr R, std::unique_ptr O) { assert(O && "Object must not be null"); jitLink(std::make_unique( diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp index d18eb38a41423..a57662e10a794 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp @@ -17,8 +17,9 @@ ObjectTransformLayer::ObjectTransformLayer(ExecutionSession &ES, TransformFunction Transform) : ObjectLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} -void ObjectTransformLayer::emit(MaterializationResponsibility R, - std::unique_ptr O) { +void ObjectTransformLayer::emit( + std::unique_ptr R, + std::unique_ptr O) { assert(O && "Module must not be null"); // If there is a transform set then apply it. @@ -26,7 +27,7 @@ void ObjectTransformLayer::emit(MaterializationResponsibility R, if (auto TransformedObj = Transform(std::move(O))) O = std::move(*TransformedObj); else { - R.failMaterialization(); + R->failMaterialization(); getExecutionSession().reportError(TransformedObj.takeError()); return; } diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 7888c2fcbdbd9..1981039eb9f12 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -89,23 +89,18 @@ RTDyldObjectLinkingLayer::~RTDyldObjectLinkingLayer() { } } -void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, - std::unique_ptr O) { +void RTDyldObjectLinkingLayer::emit( + std::unique_ptr R, + std::unique_ptr O) { assert(O && "Object must not be null"); - // This method launches an asynchronous link step that will fulfill our - // materialization responsibility. We need to switch R to be heap - // allocated before that happens so it can live as long as the asynchronous - // link needs it to (i.e. it must be able to outlive this method). - auto SharedR = std::make_shared(std::move(R)); - auto &ES = getExecutionSession(); auto Obj = object::ObjectFile::createObjectFile(*O); if (!Obj) { getExecutionSession().reportError(Obj.takeError()); - SharedR->failMaterialization(); + R->failMaterialization(); return; } @@ -121,7 +116,7 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, continue; } else { ES.reportError(SymType.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -129,7 +124,7 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, if (!SymFlagsOrErr) { // TODO: Test this error. ES.reportError(SymFlagsOrErr.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } @@ -139,14 +134,14 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, InternalSymbols->insert(*SymName); else { ES.reportError(SymName.takeError()); - R.failMaterialization(); + R->failMaterialization(); return; } } } } - auto K = R.getVModuleKey(); + auto K = R->getVModuleKey(); RuntimeDyld::MemoryManager *MemMgr = nullptr; // Create a record a memory manager for this object. @@ -157,6 +152,10 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, MemMgr = MemMgrs.back().get(); } + // Switch to shared ownership of MR so that it can be captured by both + // lambdas below. + std::shared_ptr SharedR(std::move(R)); + JITDylibSearchOrderResolver Resolver(*SharedR); jitLinkForORC( diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp index 3dd536d8253e3..0b4755fe23cfc 100644 --- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -55,7 +55,7 @@ Error Speculator::addSpeculationRuntime(JITDylib &JD, // If two modules, share the same LLVMContext, different threads must // not access them concurrently without locking the associated LLVMContext // this implementation follows this contract. -void IRSpeculationLayer::emit(MaterializationResponsibility R, +void IRSpeculationLayer::emit(std::unique_ptr R, ThreadSafeModule TSM) { assert(TSM && "Speculation Layer received Null Module ?"); @@ -127,7 +127,7 @@ void IRSpeculationLayer::emit(MaterializationResponsibility R, assert(Mutator.GetInsertBlock()->getParent() == &Fn && "IR builder association mismatch?"); S.registerSymbols(internToJITSymbols(IRNames.getValue()), - &R.getTargetJITDylib()); + &R->getTargetJITDylib()); } } } diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index 2c008dfdbd33e..9a1dbbb172517 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -35,12 +35,12 @@ TEST_F(CoreAPIsStandardTest, BasicSuccessfulLookup) { OnCompletionRun = true; }; - std::shared_ptr FooMR; + std::unique_ptr FooMR; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { - FooMR = std::make_shared(std::move(R)); + [&](std::unique_ptr R) { + FooMR = std::move(R); }))); ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), @@ -99,9 +99,9 @@ TEST_F(CoreAPIsStandardTest, ResolveUnrequestedSymbol) { cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [this](MaterializationResponsibility R) { - cantFail(R.notifyResolved({{Foo, FooSym}, {Bar, BarSym}})); - cantFail(R.notifyEmitted()); + [this](std::unique_ptr R) { + cantFail(R->notifyResolved({{Foo, FooSym}, {Bar, BarSym}})); + cantFail(R->notifyEmitted()); }))); auto Result = @@ -116,14 +116,16 @@ TEST_F(CoreAPIsStandardTest, MaterializationSideEffctsOnlyBasic) { // don't return until they're emitted, and that they don't appear in query // results. - Optional FooR; + std::unique_ptr FooR; Optional Result; cantFail(JD.define(std::make_unique( SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported | JITSymbolFlags::MaterializationSideEffectsOnly}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }))); + [&](std::unique_ptr R) { + FooR = std::move(R); + }))); ES.lookup( LookupKind::Static, makeJITDylibSearchOrder(&JD), @@ -155,7 +157,9 @@ TEST_F(CoreAPIsStandardTest, MaterializationSideEffectsOnlyFailuresPersist) { SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported | JITSymbolFlags::MaterializationSideEffectsOnly}}), - [&](MaterializationResponsibility R) { R.failMaterialization(); }))); + [&](std::unique_ptr R) { + R->failMaterialization(); + }))); EXPECT_THAT_EXPECTED( ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Foo})), @@ -182,10 +186,10 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { bool BarMaterializerDestructed = false; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [this](MaterializationResponsibility R) { + [this](std::unique_ptr R) { ADD_FAILURE() << "Unexpected materialization of \"Bar\""; - cantFail(R.notifyResolved({{Bar, BarSym}})); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved({{Bar, BarSym}})); + cantFail(R->notifyEmitted()); }, nullptr, [&](const JITDylib &JD, const SymbolStringPtr &Name) { @@ -197,10 +201,12 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { // Baz will be in the materializing state initially, then // materialized for the final removal attempt. - Optional BazR; + std::unique_ptr BazR; cantFail(JD.define(std::make_unique( SymbolFlagsMap({{Baz, BazSym.getFlags()}}), - [&](MaterializationResponsibility R) { BazR.emplace(std::move(R)); }, + [&](std::unique_ptr R) { + BazR = std::move(R); + }, nullptr, [](const JITDylib &JD, const SymbolStringPtr &Name) { ADD_FAILURE() << "\"Baz\" discarded unexpectedly"; @@ -297,7 +303,7 @@ TEST_F(CoreAPIsStandardTest, LookupFlagsTest) { JITSymbolFlags::Exported | JITSymbolFlags::Weak)); auto MU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [](MaterializationResponsibility R) { + [](std::unique_ptr R) { llvm_unreachable("Symbol materialized on flags lookup"); }); @@ -400,10 +406,10 @@ TEST_F(CoreAPIsStandardTest, TestThatReExportsDontUnnecessarilyMaterialize) { bool BarMaterialized = false; auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { BarMaterialized = true; - cantFail(R.notifyResolved({{Bar, BarSym}})); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved({{Bar, BarSym}})); + cantFail(R->notifyEmitted()); }); cantFail(JD.define(BarMU)); @@ -444,10 +450,12 @@ TEST_F(CoreAPIsStandardTest, TestReexportsGenerator) { } TEST_F(CoreAPIsStandardTest, TestTrivialCircularDependency) { - Optional FooR; + std::unique_ptr FooR; auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); cantFail(JD.define(FooMU)); @@ -476,26 +484,29 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { // does not prevent any symbol from becoming 'ready' once all symbols are // emitted. - // Create three MaterializationResponsibility objects: one for each of Foo, - // Bar and Baz. These are optional because MaterializationResponsibility - // does not have a default constructor). - Optional FooR; - Optional BarR; - Optional BazR; + std::unique_ptr FooR; + std::unique_ptr BarR; + std::unique_ptr BazR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); auto BazMU = std::make_unique( SymbolFlagsMap({{Baz, BazSym.getFlags()}}), - [&](MaterializationResponsibility R) { BazR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BazR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -622,18 +633,22 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { } TEST_F(CoreAPIsStandardTest, FailureInDependency) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -687,18 +702,22 @@ TEST_F(CoreAPIsStandardTest, FailureInDependency) { } TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -753,18 +772,22 @@ TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { } TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -819,18 +842,22 @@ TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { } TEST_F(CoreAPIsStandardTest, FailAfterMaterialization) { - Optional FooR; - Optional BarR; + std::unique_ptr FooR; + std::unique_ptr BarR; // Create a MaterializationUnit for each symbol that moves the // MaterializationResponsibility into one of the locals above. auto FooMU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { FooR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + FooR = std::move(R); + }); auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { BarR.emplace(std::move(R)); }); + [&](std::unique_ptr R) { + BarR = std::move(R); + }); // Define the symbols. cantFail(JD.define(FooMU)); @@ -882,9 +909,9 @@ TEST_F(CoreAPIsStandardTest, FailMaterializerWithUnqueriedSymbols) { auto MU = std::make_unique( SymbolFlagsMap( {{Foo, JITSymbolFlags::Exported}, {Bar, JITSymbolFlags::Exported}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { MaterializerRun = true; - R.failMaterialization(); + R->failMaterialization(); }); cantFail(JD.define(std::move(MU))); @@ -911,7 +938,7 @@ TEST_F(CoreAPIsStandardTest, DropMaterializerWhenEmpty) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, WeakExported}, {Bar, WeakExported}}), - [](MaterializationResponsibility R) { + [](std::unique_ptr R) { llvm_unreachable("Unexpected call to materialize"); }, nullptr, @@ -943,10 +970,10 @@ TEST_F(CoreAPIsStandardTest, AddAndMaterializeLazySymbol) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}, {Bar, WeakExported}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { assert(BarDiscarded && "Bar should have been discarded by this point"); - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(R->notifyEmitted()); FooMaterialized = true; }, nullptr, @@ -985,18 +1012,18 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { bool BarMaterialized = false; auto MU1 = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); - cantFail(R.notifyEmitted()); + [&](std::unique_ptr R) { + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + cantFail(R->notifyEmitted()); BarMaterialized = true; }); bool DuplicateBarDiscarded = false; auto MU2 = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { ADD_FAILURE() << "Attempt to materialize Bar from the wrong unit"; - R.failMaterialization(); + R->failMaterialization(); }, nullptr, [&](const JITDylib &JD, SymbolStringPtr Name) { @@ -1026,20 +1053,21 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { TEST_F(CoreAPIsStandardTest, DefineMaterializingSymbol) { bool ExpectNoMoreMaterialization = false; - ES.setDispatchMaterialization([&](std::unique_ptr MU, - MaterializationResponsibility MR) { - if (ExpectNoMoreMaterialization) - ADD_FAILURE() << "Unexpected materialization"; - MU->materialize(std::move(MR)); - }); + ES.setDispatchMaterialization( + [&](std::unique_ptr MU, + std::unique_ptr MR) { + if (ExpectNoMoreMaterialization) + ADD_FAILURE() << "Unexpected materialization"; + MU->materialize(std::move(MR)); + }); auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { cantFail( - R.defineMaterializing(SymbolFlagsMap({{Bar, BarSym.getFlags()}}))); - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); - cantFail(R.notifyEmitted()); + R->defineMaterializing(SymbolFlagsMap({{Bar, BarSym.getFlags()}}))); + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + cantFail(R->notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1093,8 +1121,8 @@ TEST_F(CoreAPIsStandardTest, FailResolution) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported | JITSymbolFlags::Weak}, {Bar, JITSymbolFlags::Exported | JITSymbolFlags::Weak}}), - [&](MaterializationResponsibility R) { - R.failMaterialization(); + [&](std::unique_ptr R) { + R->failMaterialization(); }); cantFail(JD.define(MU)); @@ -1129,23 +1157,23 @@ TEST_F(CoreAPIsStandardTest, FailEmissionAfterResolution) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); + [&](std::unique_ptr R) { + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); ES.lookup( LookupKind::Static, makeJITDylibSearchOrder(&JD), SymbolLookupSet({Baz}), SymbolState::Resolved, - [&R](Expected Result) { + [&](Expected Result) { // Called when "baz" is resolved. We don't actually depend // on or care about baz, but use it to trigger failure of // this materialization before Baz has been finalized in // order to test that error propagation is correct in this // scenario. cantFail(std::move(Result)); - R.failMaterialization(); + R->failMaterialization(); }, [&](const SymbolDependenceMap &Deps) { - R.addDependenciesForAll(Deps); + R->addDependenciesForAll(Deps); }); }); @@ -1165,7 +1193,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { // Fail materialization of bar. auto BarMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { R.failMaterialization(); }); + [&](std::unique_ptr R) { + R->failMaterialization(); + }); cantFail(JD.define(std::move(BarMU))); @@ -1185,9 +1215,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { TEST_F(CoreAPIsStandardTest, TestLookupWithUnthreadedMaterialization) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), - [&](MaterializationResponsibility R) { - cantFail(R.notifyResolved({{Foo, FooSym}})); - cantFail(R.notifyEmitted()); + [&](std::unique_ptr R) { + cantFail(R->notifyResolved({{Foo, FooSym}})); + cantFail(R->notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1204,15 +1234,14 @@ TEST_F(CoreAPIsStandardTest, TestLookupWithThreadedMaterialization) { #if LLVM_ENABLE_THREADS std::thread MaterializationThread; - ES.setDispatchMaterialization([&](std::unique_ptr MU, - MaterializationResponsibility MR) { - auto SharedMR = - std::make_shared(std::move(MR)); - MaterializationThread = - std::thread([MU = std::move(MU), MR = std::move(SharedMR)] { - MU->materialize(std::move(*MR)); - }); - }); + ES.setDispatchMaterialization( + [&](std::unique_ptr MU, + std::unique_ptr MR) { + MaterializationThread = + std::thread([MU = std::move(MU), MR = std::move(MR)]() mutable { + MU->materialize(std::move(MR)); + }); + }); cantFail(JD.define(absoluteSymbols({{Foo, FooSym}}))); @@ -1238,23 +1267,23 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - auto Requested = R.getRequestedSymbols(); + [&](std::unique_ptr R) { + auto Requested = R->getRequestedSymbols(); EXPECT_EQ(Requested.size(), 1U) << "Expected one symbol requested"; EXPECT_EQ(*Requested.begin(), Foo) << "Expected \"Foo\" requested"; auto NewMU = std::make_unique( SymbolFlagsMap({{Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R2) { - cantFail(R2.notifyResolved(SymbolMap({{Bar, BarSym}}))); - cantFail(R2.notifyEmitted()); + [&](std::unique_ptr R2) { + cantFail(R2->notifyResolved(SymbolMap({{Bar, BarSym}}))); + cantFail(R2->notifyEmitted()); BarMaterialized = true; }); - R.replace(std::move(NewMU)); + R->replace(std::move(NewMU)); - cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(R.notifyEmitted()); + cantFail(R->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(R->notifyEmitted()); FooMaterialized = true; }); @@ -1280,13 +1309,13 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { TEST_F(CoreAPIsStandardTest, TestMaterializationResponsibilityDelegation) { auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}, {Bar, BarSym.getFlags()}}), - [&](MaterializationResponsibility R) { - auto R2 = R.delegate({Bar}); + [&](std::unique_ptr R) { + auto R2 = R->delegate({Bar}); - cantFail(R.notifyResolved({{Foo, FooSym}})); - cantFail(R.notifyEmitted()); - cantFail(R2.notifyResolved({{Bar, BarSym}})); - cantFail(R2.notifyEmitted()); + cantFail(R->notifyResolved({{Foo, FooSym}})); + cantFail(R->notifyEmitted()); + cantFail(R2->notifyResolved({{Bar, BarSym}})); + cantFail(R2->notifyEmitted()); }); cantFail(JD.define(MU)); @@ -1309,12 +1338,11 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { JITSymbolFlags WeakExported = JITSymbolFlags::Exported; WeakExported &= JITSymbolFlags::Weak; - std::unique_ptr FooResponsibility; + std::unique_ptr FooR; auto MU = std::make_unique( SymbolFlagsMap({{Foo, FooSym.getFlags()}}), - [&](MaterializationResponsibility R) { - FooResponsibility = - std::make_unique(std::move(R)); + [&](std::unique_ptr R) { + FooR = std::move(R); }); cantFail(JD.define(MU)); @@ -1328,7 +1356,7 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { auto MU2 = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), - [](MaterializationResponsibility R) { + [](std::unique_ptr R) { llvm_unreachable("This unit should never be materialized"); }); @@ -1339,8 +1367,8 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { consumeError(std::move(Err)); // No dependencies registered, can't fail: - cantFail(FooResponsibility->notifyResolved(SymbolMap({{Foo, FooSym}}))); - cantFail(FooResponsibility->notifyEmitted()); + cantFail(FooR->notifyResolved(SymbolMap({{Foo, FooSym}}))); + cantFail(FooR->notifyEmitted()); } static bool linkOrdersEqual(const std::vector> &LHS, diff --git a/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp index 50e7b60a2df4e..81ff3e7a87b30 100644 --- a/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp @@ -39,15 +39,15 @@ TEST_F(LazyReexportsTest, BasicLocalCallThroughManagerOperation) { cantFail(JD.define(std::make_unique( SymbolFlagsMap({{DummyTarget, JITSymbolFlags::Exported}}), - [&](MaterializationResponsibility R) { + [&](std::unique_ptr R) { DummyTargetMaterialized = true; // No dependencies registered, can't fail. - cantFail(R.notifyResolved( + cantFail(R->notifyResolved( {{DummyTarget, JITEvaluatedSymbol(static_cast( reinterpret_cast(&dummyTarget)), JITSymbolFlags::Exported)}})); - cantFail(R.notifyEmitted()); + cantFail(R->notifyEmitted()); }))); unsigned NotifyResolvedCount = 0; diff --git a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h index b25851d8f796c..afbc4a9ffaa5c 100644 --- a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h +++ b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h @@ -86,7 +86,7 @@ class OrcNativeTarget { class SimpleMaterializationUnit : public orc::MaterializationUnit { public: using MaterializeFunction = - std::function; + std::function)>; using DiscardFunction = std::function; using DestructorFunction = std::function; @@ -108,7 +108,8 @@ class SimpleMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override { return ""; } - void materialize(orc::MaterializationResponsibility R) override { + void + materialize(std::unique_ptr R) override { Materialize(std::move(R)); } From ccb4124a4172bf2cb2e1cd7c253f0f1654fce294 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 8 Sep 2020 13:45:45 -0400 Subject: [PATCH 0411/1079] Fix -gz=zlib options for linker gcc translates -gz=zlib to --compress-debug-options=zlib for both assembler and linker but clang only does this for assembler. The linker needs --compress-debug-options=zlib option to compress the debug sections in the generated executable or shared library. Due to this bug, -gz=zlib has no effect on the generated executable or shared library. This patch fixes that. Differential Revision: https://reviews.llvm.org/D87321 --- clang/lib/Driver/ToolChains/AMDGPU.cpp | 1 + clang/lib/Driver/ToolChains/CommonArgs.cpp | 18 ++++++++++++++++++ clang/lib/Driver/ToolChains/CommonArgs.h | 4 ++++ clang/lib/Driver/ToolChains/Gnu.cpp | 1 + clang/lib/Driver/ToolChains/HIP.cpp | 2 ++ clang/test/Driver/amdgcn-gz-options.cl | 16 ++++++++++++++++ clang/test/Driver/compress.c | 16 +++++++++------- clang/test/Driver/hip-gz-options.hip | 14 ++++++++++++++ 8 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 clang/test/Driver/amdgcn-gz-options.cl create mode 100644 clang/test/Driver/hip-gz-options.hip diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 71acf3ed32816..3616310c37bf7 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -351,6 +351,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, std::string Linker = getToolChain().GetProgramPath(getShortName()); ArgStringList CmdArgs; + addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 0507794ee34ff..4a946721a551e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -214,6 +214,24 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, } } +void tools::addLinkerCompressDebugSectionsOption( + const ToolChain &TC, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) { + // GNU ld supports --compress-debug-sections=none|zlib|zlib-gnu|zlib-gabi + // whereas zlib is an alias to zlib-gabi. Therefore -gz=none|zlib|zlib-gnu + // are translated to --compress-debug-sections=none|zlib|zlib-gnu. + // -gz is not translated since ld --compress-debug-sections option requires an + // argument. + if (const Arg *A = Args.getLastArg(options::OPT_gz_EQ)) { + StringRef V = A->getValue(); + if (V == "none" || V == "zlib" || V == "zlib-gnu") + CmdArgs.push_back(Args.MakeArgString("--compress-debug-sections=" + V)); + else + TC.getDriver().Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << V; + } +} + void tools::AddTargetFeature(const ArgList &Args, std::vector &Features, OptSpecifier OnOpt, OptSpecifier OffOpt, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 29dedec9b09cd..0028ea0ca3373 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -27,6 +27,10 @@ void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA); +void addLinkerCompressDebugSectionsOption(const ToolChain &TC, + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs); + void claimNoWarnArgs(const llvm::opt::ArgList &Args); bool addSanitizerRuntimes(const ToolChain &TC, const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index d423a71b5cca6..7f7a3956781ac 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -556,6 +556,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); + addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); // The profile runtime also needs access to system libraries. getToolChain().addProfileRTLibs(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index f3e3976d715b7..43e557c980507 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -89,6 +89,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, if (C.getDriver().isSaveTempsEnabled()) LldArgs.push_back("-save-temps"); + addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); + LldArgs.append({"-o", Output.getFilename()}); for (auto Input : Inputs) LldArgs.push_back(Input.getFilename()); diff --git a/clang/test/Driver/amdgcn-gz-options.cl b/clang/test/Driver/amdgcn-gz-options.cl new file mode 100644 index 0000000000000..1074653984e7f --- /dev/null +++ b/clang/test/Driver/amdgcn-gz-options.cl @@ -0,0 +1,16 @@ +// REQUIRES: zlib, amdgpu-registered-target + +// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=none -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s +// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=none %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s +// CHECK-OPT_GZ_EQ_NONE: {{".*clang.*".* "--compress-debug-sections=none"}} +// CHECK-OPT_GZ_EQ_NONE: "--compress-debug-sections=none" + +// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s +// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s +// CHECK-OPT_GZ_EQ_ZLIB: {{".*clang.*".* "--compress-debug-sections=zlib"}} +// CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib" + +// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib-gnu -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s +// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib-gnu %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s +// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{".*clang.*".* "--compress-debug-sections=zlib-gnu"}} +// CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu" diff --git a/clang/test/Driver/compress.c b/clang/test/Driver/compress.c index 1a16c6385c66e..67c9fdcb0fc99 100644 --- a/clang/test/Driver/compress.c +++ b/clang/test/Driver/compress.c @@ -18,19 +18,21 @@ // RUN: %clang -### -fintegrated-as -gz -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ %s // CHECK-OPT_GZ: "--compress-debug-sections" -// RUN: %clang -### -fintegrated-as -gz=none -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s -// RUN: %clang -### -fintegrated-as -gz=none -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=none -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=none %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s +// CHECK-OPT_GZ_EQ_NONE: {{".*clang.*".* "--compress-debug-sections=none"}} // CHECK-OPT_GZ_EQ_NONE: "--compress-debug-sections=none" -// RUN: %clang -### -fintegrated-as -gz=zlib -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s -// RUN: %clang -### -fintegrated-as -gz=zlib -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s +// CHECK-OPT_GZ_EQ_ZLIB: {{".*clang.*".* "--compress-debug-sections=zlib"}} // CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib" -// RUN: %clang -### -fintegrated-as -gz=zlib-gnu -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s -// RUN: %clang -### -fintegrated-as -gz=zlib-gnu -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib-gnu -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib-gnu %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s +// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{".*clang.*".* "--compress-debug-sections=zlib-gnu"}} // CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu" // RUN: %clang -### -fintegrated-as -gz=invalid -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s // RUN: %clang -### -fintegrated-as -gz=invalid -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s // CHECK-OPT_GZ_EQ_INVALID: error: unsupported argument 'invalid' to option 'gz=' - diff --git a/clang/test/Driver/hip-gz-options.hip b/clang/test/Driver/hip-gz-options.hip new file mode 100644 index 0000000000000..063aedf8a0ac9 --- /dev/null +++ b/clang/test/Driver/hip-gz-options.hip @@ -0,0 +1,14 @@ +// REQUIRES: zlib, clang-driver, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx906 %s -nogpulib -nogpuinc \ +// RUN: -ggdb -gz=zlib 2>&1 | FileCheck %s + +// RUN: %clang -### -target x86_64-unknown-linux-gnu \ +// RUN: -fgpu-rdc --offload-arch=gfx906 %s -nogpulib -nogpuinc \ +// RUN: -ggdb -gz=zlib 2>&1 | FileCheck %s + +// CHECK: {{".*clang.*" .* "--compress-debug-sections=zlib"}} +// CHECK-DAG: {{".*lld.*" .* "--compress-debug-sections=zlib"}} +// CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}} +// CHECK: "--compress-debug-sections=zlib" From f5ab5b20fb2aae5567e6c50cc642ff63eb2146d4 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Tue, 8 Sep 2020 21:19:43 +0000 Subject: [PATCH 0412/1079] Sema: add support for `__attribute__((__swift_error__))` Introduce a new attribute that is used to indicate the error handling convention used by a function. This is used to translate the error semantics from the decorated interface to a compatible Swift interface. The supported error convention is one of: - none: no error handling - nonnull_error: a non-null error parameter indicates an error signifier - null_result: a return value of NULL is an error signifier - zero_result: a return value of 0 is an error signifier - nonzero_result: a non-zero return value is an error signifier Since this is the first of the attributes needed to support the semantic annotation for Swift, this change also includes the necessary supporting infrastructure for a new category of attributes (Swift). This is based on the work of the original changes in https://github.com/llvm/llvm-project-staging/commit/8afaf3aad2af43cfedca7a24cd817848c4e95c0c Differential Revision: https://reviews.llvm.org/D87331 Reviewed By: John McCall, Aaron Ballman, Dmitri Gribenko --- clang/include/clang/Basic/Attr.td | 11 ++ clang/include/clang/Basic/AttrDocs.td | 47 ++++++++ .../clang/Basic/DiagnosticSemaKinds.td | 7 ++ clang/lib/Sema/SemaDeclAttr.cpp | 101 ++++++++++++++++++ ...a-attribute-supported-attributes-list.test | 1 + clang/test/SemaObjC/attr-swift-error.m | 93 ++++++++++++++++ 6 files changed, 260 insertions(+) create mode 100644 clang/test/SemaObjC/attr-swift-error.m diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 5676e9aa16789..1790ae01497fb 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2130,6 +2130,17 @@ def Regparm : TypeAttr { let ASTNode = 0; } +def SwiftError : InheritableAttr { + let Spellings = [GNU<"swift_error">]; + let Args = [ + EnumArgument<"Convention", "ConventionKind", + ["none", "nonnull_error", "null_result", "zero_result", "nonzero_result"], + ["None", "NonNullError", "NullResult", "ZeroResult", "NonZeroResult"]> + ]; + let Subjects = SubjectList<[Function, ObjCMethod], ErrorDiag>; + let Documentation = [SwiftErrorDocs]; +} + def NoDeref : TypeAttr { let Spellings = [Clang<"noderef">]; let Documentation = [NoDerefDocs]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 6daf9ca678961..842ffe050adcd 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3469,6 +3469,53 @@ For example: }]; } +def SwiftDocs : DocumentationCategory<"Customizing Swift Import"> { + let Content = [{ +Clang supports additional attributes for customizing how APIs are imported into +Swift. + }]; +} + +def SwiftErrorDocs : Documentation { + let Category = SwiftDocs; + let Heading = "swift_error"; + let Content = [{ +The ``swift_error`` attribute controls whether a particular function (or +Objective-C method) is imported into Swift as a throwing function, and if so, +which dynamic convention it uses. + +All of these conventions except ``none`` require the function to have an error +parameter. Currently, the error parameter is always the last parameter of type +``NSError**`` or ``CFErrorRef*``. Swift will remove the error parameter from +the imported API. When calling the API, Swift will always pass a valid address +initialized to a null pointer. + +* ``swift_error(none)`` means that the function should not be imported as +throwing. The error parameter and result type will be imported normally. + +* ``swift_error(null_result)`` means that calls to the function should be +considered to have thrown if they return a null value. The return type must be +a pointer type, and it will be imported into Swift with a non-optional type. +This is the default error convention for Objective-C methods that return +pointers. + +* ``swift_error(zero_result)`` means that calls to the function should be +considered to have thrown if they return a zero result. The return type must be +an integral type. If the return type would have been imported as ``Bool``, it +is instead imported as ``Void``. This is the default error convention for +Objective-C methods that return a type that would be imported as ``Bool``. + +* ``swift_error(nonzero_result)`` means that calls to the function should be +considered to have thrown if they return a non-zero result. The return type must +be an integral type. If the return type would have been imported as ``Bool``, +it is instead imported as ``Void``. + +* ``swift_error(nonnull_error)`` means that calls to the function should be +considered to have thrown if they leave a non-null error in the error parameter. +The return type is left unmodified. + }]; +} + def OMPDeclareSimdDocs : Documentation { let Category = DocCatFunction; let Heading = "#pragma omp declare simd"; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 98dc6dfba4efa..e0d700c66724a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3974,6 +3974,13 @@ def err_objc_bridged_related_known_method : Error< def err_objc_attr_protocol_requires_definition : Error< "attribute %0 can only be applied to @protocol definitions, not forward declarations">; +def err_attr_swift_error_no_error_parameter : Error< + "%0 attribute can only be applied to a %select{function|method}1 with an " + "error parameter">; +def err_attr_swift_error_return_type : Error< + "%0 attribute with '%1' convention can only be applied to a " + "%select{function|method}2 returning %select{an integral type|a pointer}3">; + def warn_ignored_objc_externally_retained : Warning< "'objc_externally_retained' can only be applied to local variables " "%select{of retainable type|with strong ownership}0">, diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 49fd22fb21987..e317211d8bee8 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5524,6 +5524,102 @@ static void handleObjCPreciseLifetimeAttr(Sema &S, Decl *D, D->addAttr(::new (S.Context) ObjCPreciseLifetimeAttr(S.Context, AL)); } +static bool isErrorParameter(Sema &S, QualType QT) { + const auto *PT = QT->getAs(); + if (!PT) + return false; + + QualType Pointee = PT->getPointeeType(); + + // Check for NSError**. + if (const auto *OPT = Pointee->getAs()) + if (const auto *ID = OPT->getInterfaceDecl()) + if (ID->getIdentifier() == S.getNSErrorIdent()) + return true; + + // Check for CFError**. + if (const auto *PT = Pointee->getAs()) + if (const auto *RT = PT->getPointeeType()->getAs()) + if (S.isCFError(RT->getDecl())) + return true; + + return false; +} + +static void handleSwiftError(Sema &S, Decl *D, const ParsedAttr &AL) { + auto hasErrorParameter = [](Sema &S, Decl *D, const ParsedAttr &AL) -> bool { + for (unsigned I = 0, E = getFunctionOrMethodNumParams(D); I != E; ++I) { + if (isErrorParameter(S, getFunctionOrMethodParamType(D, I))) + return true; + } + + S.Diag(AL.getLoc(), diag::err_attr_swift_error_no_error_parameter) + << AL << isa(D); + return false; + }; + + auto hasPointerResult = [](Sema &S, Decl *D, const ParsedAttr &AL) -> bool { + // - C, ObjC, and block pointers are definitely okay. + // - References are definitely not okay. + // - nullptr_t is weird, but acceptable. + QualType RT = getFunctionOrMethodResultType(D); + if (RT->hasPointerRepresentation() && !RT->isReferenceType()) + return true; + + S.Diag(AL.getLoc(), diag::err_attr_swift_error_return_type) + << AL << AL.getArgAsIdent(0)->Ident->getName() << isa(D) + << /*pointer*/ 1; + return false; + }; + + auto hasIntegerResult = [](Sema &S, Decl *D, const ParsedAttr &AL) -> bool { + QualType RT = getFunctionOrMethodResultType(D); + if (RT->isIntegralType(S.Context)) + return true; + + S.Diag(AL.getLoc(), diag::err_attr_swift_error_return_type) + << AL << AL.getArgAsIdent(0)->Ident->getName() << isa(D) + << /*integral*/ 0; + return false; + }; + + if (D->isInvalidDecl()) + return; + + IdentifierLoc *Loc = AL.getArgAsIdent(0); + SwiftErrorAttr::ConventionKind Convention; + if (!SwiftErrorAttr::ConvertStrToConventionKind(Loc->Ident->getName(), + Convention)) { + S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported) + << AL << Loc->Ident; + return; + } + + switch (Convention) { + case SwiftErrorAttr::None: + // No additional validation required. + break; + + case SwiftErrorAttr::NonNullError: + if (!hasErrorParameter(S, D, AL)) + return; + break; + + case SwiftErrorAttr::NullResult: + if (!hasErrorParameter(S, D, AL) || !hasPointerResult(S, D, AL)) + return; + break; + + case SwiftErrorAttr::NonZeroResult: + case SwiftErrorAttr::ZeroResult: + if (!hasErrorParameter(S, D, AL) || !hasIntegerResult(S, D, AL)) + return; + break; + } + + D->addAttr(::new (S.Context) SwiftErrorAttr(S.Context, AL, Convention)); +} + //===----------------------------------------------------------------------===// // Microsoft specific attribute handlers. //===----------------------------------------------------------------------===// @@ -7436,6 +7532,11 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, handleTypeTagForDatatypeAttr(S, D, AL); break; + // Swift attributes. + case ParsedAttr::AT_SwiftError: + handleSwiftError(S, D, AL); + break; + // XRay attributes. case ParsedAttr::AT_XRayLogArgs: handleXRayLogArgsAttr(S, D, AL); diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 194c92e40eec3..12800b9d54eaa 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -147,6 +147,7 @@ // CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member) // CHECK-NEXT: SpeculativeLoadHardening (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: SwiftContext (SubjectMatchRule_variable_is_parameter) +// CHECK-NEXT: SwiftError (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: SwiftErrorResult (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: SwiftIndirectResult (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local) diff --git a/clang/test/SemaObjC/attr-swift-error.m b/clang/test/SemaObjC/attr-swift-error.m new file mode 100644 index 0000000000000..0132a8b200f5f --- /dev/null +++ b/clang/test/SemaObjC/attr-swift-error.m @@ -0,0 +1,93 @@ +// RUN: %clang_cc1 -verify -fsyntax-only -fobjc-arc -fblocks %s + +@class NSError; + +#if __SIZEOF_POINTER__ == 4 +typedef unsigned char BOOL; +#else +typedef _Bool BOOL; +#endif + +typedef struct __attribute__((__objc_bridge__(NSError))) __CFError *CFErrorRef; + +extern int f0(void) __attribute__((__swift_error__)); +// expected-error@-1 {{'__swift_error__' attribute takes one argument}} +extern int f1(void) __attribute__((__swift_error__(invalid))); +// expected-warning@-1 {{'__swift_error__' attribute argument not supported: 'invalid'}} +extern int f2(void) __attribute__((__swift_error__(none,zero_result))); +// expected-error@-1 {{use of undeclared identifier 'zero_result'}} + +@interface Erroneous +- (BOOL)m0:(NSError **)error __attribute__((__swift_error__(none))); +- (BOOL)m1:(NSError **)error __attribute__((__swift_error__(nonnull_error))); +- (BOOL)m2:(NSError **)error __attribute__((__swift_error__(null_result))); +// expected-error@-1 {{'__swift_error__' attribute with 'null_result' convention can only be applied to a method returning a pointer}} +- (BOOL)m3:(NSError **)error __attribute__((__swift_error__(nonzero_result))); +- (BOOL)m4:(NSError **)error __attribute__((__swift_error__(zero_result))); + +- (Undeclared)n0:(NSError **)error __attribute__((__swift_error__(none))); +// expected-error@-1 {{expected a type}} +- (Undeclared)n1:(NSError **)error __attribute__((__swift_error__(nonnull_error))); +// expected-error@-1 {{expected a type}} +- (Undeclared)n2:(NSError **)error __attribute__((__swift_error__(null_result))); +// expected-error@-1 {{expected a type}} +- (Undeclared)n3:(NSError **)error __attribute__((__swift_error__(nonzero_result))); +// expected-error@-1 {{expected a type}} +// FIXME: the follow-on warning should really be suppressed, but apparently +// having an ill-formed return type doesn't mark anything as invalid. +// expected-error@-4 {{can only be applied}} +- (Undeclared)n4:(NSError **)error __attribute__((__swift_error__(zero_result))); +// expected-error@-1 {{expected a type}} +// FIXME: the follow-on warning should really be suppressed, but apparently +// having an ill-formed return type doesn't mark anything as invalid. +// expected-error@-4 {{can only be applied}} + +- (instancetype)o0 __attribute__((__swift_error__(none))); +- (instancetype)o1 __attribute__((__swift_error__(nonnull_error))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a method with an error parameter}} +- (instancetype)o2 __attribute__((__swift_error__(null_result))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a method with an error parameter}} +- (instancetype)o3 __attribute__((__swift_error__(nonzero_result))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a method with an error parameter}} +- (instancetype)o4 __attribute__((__swift_error__(zero_result))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a method with an error parameter}} +@end + +extern BOOL m0(CFErrorRef *) __attribute__((__swift_error__(none))); +extern BOOL m1(CFErrorRef *) __attribute__((__swift_error__(nonnull_error))); +extern BOOL m2(CFErrorRef *) __attribute__((__swift_error__(null_result))); +// expected-error@-1 {{'__swift_error__' attribute with 'null_result' convention can only be applied to a function returning a pointer}} +extern BOOL m3(CFErrorRef *) __attribute__((__swift_error__(nonzero_result))); +extern BOOL m4(CFErrorRef *) __attribute__((__swift_error__(zero_result))); + +extern Undeclared n0(CFErrorRef *) __attribute__((__swift_error__(none))); +// expected-error@-1 {{unknown type name 'Undeclared'}} +extern Undeclared n1(CFErrorRef *) __attribute__((__swift_error__(nonnull_error))); +// expected-error@-1 {{unknown type name 'Undeclared'}} +extern Undeclared n2(CFErrorRef *) __attribute__((__swift_error__(null_result))); +// expected-error@-1 {{unknown type name 'Undeclared'}} +extern Undeclared n3(CFErrorRef *) __attribute__((__swift_error__(nonzero_result))); +// expected-error@-1 {{unknown type name 'Undeclared'}} +extern Undeclared n4(CFErrorRef *) __attribute__((__swift_error__(zero_result))); +// expected-error@-1 {{unknown type name 'Undeclared'}} + +extern void *o0(CFErrorRef *) __attribute__((__swift_error__(none))); +extern void *o1(CFErrorRef *) __attribute__((__swift_error__(nonnull_error))); +extern void *o2(CFErrorRef *) __attribute__((__swift_error__(null_result))); +extern void *o3(CFErrorRef *) __attribute__((__swift_error__(nonzero_result))); +// expected-error@-1 {{'__swift_error__' attribute with 'nonzero_result' convention can only be applied to a function returning an integral type}} +extern void *o4(CFErrorRef *) __attribute__((__swift_error__(zero_result))); +// expected-error@-1 {{'__swift_error__' attribute with 'zero_result' convention can only be applied to a function returning an integral type}} + +extern void *p0(void) __attribute__((__swift_error__(none))); +extern void *p1(void) __attribute__((__swift_error__(nonnull_error))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a function with an error parameter}} +extern void *p2(void) __attribute__((__swift_error__(null_result))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a function with an error parameter}} +extern void *p3(void) __attribute__((__swift_error__(nonzero_result))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a function with an error parameter}} +extern void *p4(void) __attribute__((__swift_error__(zero_result))); +// expected-error@-1 {{'__swift_error__' attribute can only be applied to a function with an error parameter}} + +extern BOOL b __attribute__((__swift_error__(none))); +// expected-error@-1 {{attribute only applies to functions and Objective-C methods}} From e3e3d6eecfa5003bf431d8223bcc968e2ce291c8 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 11 Sep 2020 11:22:31 -0700 Subject: [PATCH 0413/1079] [lld][WebAssembly] Convert a objyaml-using test to assembly Differential Revision: https://reviews.llvm.org/D87536 --- lld/test/wasm/Inputs/undefined-globals.s | 11 +++ lld/test/wasm/Inputs/undefined-globals.yaml | 53 ------------ lld/test/wasm/gc-imports.ll | 91 --------------------- lld/test/wasm/gc-imports.s | 87 ++++++++++++++++++++ 4 files changed, 98 insertions(+), 144 deletions(-) create mode 100644 lld/test/wasm/Inputs/undefined-globals.s delete mode 100644 lld/test/wasm/Inputs/undefined-globals.yaml delete mode 100644 lld/test/wasm/gc-imports.ll create mode 100644 lld/test/wasm/gc-imports.s diff --git a/lld/test/wasm/Inputs/undefined-globals.s b/lld/test/wasm/Inputs/undefined-globals.s new file mode 100644 index 0000000000000..607d7942d0037 --- /dev/null +++ b/lld/test/wasm/Inputs/undefined-globals.s @@ -0,0 +1,11 @@ +.globl use_undef_global +.globl unused_undef_global +.globl used_undef_global + +use_undef_global: + .functype use_undef_global () -> (i64) + global.get used_undef_global + end_function + +.globaltype unused_undef_global, i64 +.globaltype used_undef_global, i64 diff --git a/lld/test/wasm/Inputs/undefined-globals.yaml b/lld/test/wasm/Inputs/undefined-globals.yaml deleted file mode 100644 index 41bc64356400b..0000000000000 --- a/lld/test/wasm/Inputs/undefined-globals.yaml +++ /dev/null @@ -1,53 +0,0 @@ ---- !WASM -FileHeader: - Version: 0x00000001 -Sections: - - Type: TYPE - Signatures: - - Index: 0 - ParamTypes: - ReturnTypes: - - I64 - - Type: IMPORT - Imports: - - Module: env - Field: unused_undef_global - Kind: GLOBAL - GlobalType: I64 - GlobalMutable: true - - Module: env - Field: used_undef_global - Kind: GLOBAL - GlobalType: I64 - GlobalMutable: true - - Type: FUNCTION - FunctionTypes: [ 0 ] - - Type: CODE - Functions: - - Index: 0 - Locals: - Body: 2381808080000B - Relocations: - - Type: R_WASM_GLOBAL_INDEX_LEB - Index: 1 - Offset: 0x00000004 - - Type: CUSTOM - Name: linking - Version: 2 - SymbolTable: - - Index: 0 - Kind: GLOBAL - Name: unused_undef_global - Flags: [ VISIBILITY_HIDDEN, UNDEFINED ] - Global: 0 - - Index: 1 - Kind: GLOBAL - Name: used_undef_global - Flags: [ VISIBILITY_HIDDEN, UNDEFINED ] - Global: 1 - - Index: 2 - Kind: FUNCTION - Name: use_undef_global - Flags: [ VISIBILITY_HIDDEN ] - Function: 0 -... diff --git a/lld/test/wasm/gc-imports.ll b/lld/test/wasm/gc-imports.ll deleted file mode 100644 index 68d403765916b..0000000000000 --- a/lld/test/wasm/gc-imports.ll +++ /dev/null @@ -1,91 +0,0 @@ -; RUN: llc -filetype=obj %s -o %t.o -; RUN: yaml2obj %S/Inputs/undefined-globals.yaml -o %t_globals.o -; RUN: wasm-ld --allow-undefined -o %t1.wasm %t.o %t_globals.o - -target triple = "wasm32-unknown-unknown" - -declare i64 @unused_undef_function(i64 %arg) - -declare i32 @used_undef_function() - -declare i64 @use_undef_global() - -define hidden void @foo() { -entry: - call i64 @unused_undef_function(i64 0) - ret void -} - -define hidden void @_start() { -entry: - call i32 @used_undef_function() - call i64 @use_undef_global() - ret void -} - -; RUN: obj2yaml %t1.wasm | FileCheck %s - -; CHECK: - Type: IMPORT -; CHECK-NEXT: Imports: -; CHECK-NEXT: - Module: env -; CHECK-NEXT: Field: used_undef_function -; CHECK-NEXT: Kind: FUNCTION -; CHECK-NEXT: SigIndex: 0 -; CHECK-NEXT: - Module: env -; CHECK-NEXT: Field: used_undef_global -; CHECK-NEXT: Kind: GLOBAL -; CHECK-NEXT: GlobalType: I64 -; CHECK-NEXT: GlobalMutable: true -; CHECK-NEXT: - Type: -; CHECK: - Type: CUSTOM -; CHECK-NEXT: Name: name -; CHECK-NEXT: FunctionNames: -; CHECK-NEXT: - Index: 0 -; CHECK-NEXT: Name: used_undef_function -; CHECK-NEXT: - Index: 1 -; CHECK-NEXT: Name: _start -; CHECK-NEXT: - Index: 2 -; CHECK-NEXT: Name: use_undef_global -; CHECK-NEXT: ... - -; RUN: wasm-ld --no-gc-sections --allow-undefined \ -; RUN: -o %t1.no-gc.wasm %t.o %t_globals.o -; RUN: obj2yaml %t1.no-gc.wasm | FileCheck %s -check-prefix=NO-GC - -; NO-GC: - Type: IMPORT -; NO-GC-NEXT: Imports: -; NO-GC-NEXT: - Module: env -; NO-GC-NEXT: Field: unused_undef_function -; NO-GC-NEXT: Kind: FUNCTION -; NO-GC-NEXT: SigIndex: 0 -; NO-GC-NEXT: - Module: env -; NO-GC-NEXT: Field: used_undef_function -; NO-GC-NEXT: Kind: FUNCTION -; NO-GC-NEXT: SigIndex: 1 -; NO-GC-NEXT: - Module: env -; NO-GC-NEXT: Field: unused_undef_global -; NO-GC-NEXT: Kind: GLOBAL -; NO-GC-NEXT: GlobalType: I64 -; NO-GC-NEXT: GlobalMutable: true -; NO-GC-NEXT: - Module: env -; NO-GC-NEXT: Field: used_undef_global -; NO-GC-NEXT: Kind: GLOBAL -; NO-GC-NEXT: GlobalType: I64 -; NO-GC-NEXT: GlobalMutable: true -; NO-GC-NEXT: - Type: -; NO-GC: - Type: CUSTOM -; NO-GC-NEXT: Name: name -; NO-GC-NEXT: FunctionNames: -; NO-GC-NEXT: - Index: 0 -; NO-GC-NEXT: Name: unused_undef_function -; NO-GC-NEXT: - Index: 1 -; NO-GC-NEXT: Name: used_undef_function -; NO-GC-NEXT: - Index: 2 -; NO-GC-NEXT: Name: __wasm_call_ctors -; NO-GC-NEXT: - Index: 3 -; NO-GC-NEXT: Name: foo -; NO-GC-NEXT: - Index: 4 -; NO-GC-NEXT: Name: _start -; NO-GC-NEXT: - Index: 5 -; NO-GC-NEXT: Name: use_undef_global -; NO-GC-NEXT: ... diff --git a/lld/test/wasm/gc-imports.s b/lld/test/wasm/gc-imports.s new file mode 100644 index 0000000000000..6564b5c1a7d87 --- /dev/null +++ b/lld/test/wasm/gc-imports.s @@ -0,0 +1,87 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %S/Inputs/undefined-globals.s -o %t_globals.o +# RUN: wasm-ld --allow-undefined -o %t1.wasm %t.o %t_globals.o + +.functype unused_undef_function (i64) -> (i64) +.functype used_undef_function () -> (i32) +.functype use_undef_global () -> (i64) + +foo: + .functype foo () -> () + call unused_undef_function + end_function + +.globl _start + +_start: + .functype _start () -> () + call used_undef_function + call use_undef_global + end_function + +# RUN: obj2yaml %t1.wasm | FileCheck %s + +# CHECK: - Type: IMPORT +# CHECK-NEXT: Imports: +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: used_undef_function +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: SigIndex: 0 +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: used_undef_global +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: GlobalType: I64 +# CHECK-NEXT: GlobalMutable: true +# CHECK-NEXT: - Type: +# CHECK: - Type: CUSTOM +# CHECK-NEXT: Name: name +# CHECK-NEXT: FunctionNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: used_undef_function +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: _start +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: use_undef_global +# CHECK-NEXT: ... + +# RUN: wasm-ld --no-gc-sections --allow-undefined \ +# RUN: -o %t1.no-gc.wasm %t.o %t_globals.o +# RUN: obj2yaml %t1.no-gc.wasm | FileCheck %s -check-prefix=NO-GC + +# NO-GC: - Type: IMPORT +# NO-GC-NEXT: Imports: +# NO-GC-NEXT: - Module: env +# NO-GC-NEXT: Field: unused_undef_function +# NO-GC-NEXT: Kind: FUNCTION +# NO-GC-NEXT: SigIndex: 0 +# NO-GC-NEXT: - Module: env +# NO-GC-NEXT: Field: used_undef_function +# NO-GC-NEXT: Kind: FUNCTION +# NO-GC-NEXT: SigIndex: 1 +# NO-GC-NEXT: - Module: env +# NO-GC-NEXT: Field: unused_undef_global +# NO-GC-NEXT: Kind: GLOBAL +# NO-GC-NEXT: GlobalType: I64 +# NO-GC-NEXT: GlobalMutable: true +# NO-GC-NEXT: - Module: env +# NO-GC-NEXT: Field: used_undef_global +# NO-GC-NEXT: Kind: GLOBAL +# NO-GC-NEXT: GlobalType: I64 +# NO-GC-NEXT: GlobalMutable: true +# NO-GC-NEXT: - Type: +# NO-GC: - Type: CUSTOM +# NO-GC-NEXT: Name: name +# NO-GC-NEXT: FunctionNames: +# NO-GC-NEXT: - Index: 0 +# NO-GC-NEXT: Name: unused_undef_function +# NO-GC-NEXT: - Index: 1 +# NO-GC-NEXT: Name: used_undef_function +# NO-GC-NEXT: - Index: 2 +# NO-GC-NEXT: Name: __wasm_call_ctors +# NO-GC-NEXT: - Index: 3 +# NO-GC-NEXT: Name: foo +# NO-GC-NEXT: - Index: 4 +# NO-GC-NEXT: Name: _start +# NO-GC-NEXT: - Index: 5 +# NO-GC-NEXT: Name: use_undef_global +# NO-GC-NEXT: ... From ee13ae030e21d584c72d384ea463896400ccee1c Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 11 Sep 2020 17:56:28 -0400 Subject: [PATCH 0414/1079] Fix test hip-gz-options.hip --- clang/test/Driver/hip-gz-options.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/hip-gz-options.hip b/clang/test/Driver/hip-gz-options.hip index 063aedf8a0ac9..b2544a42ebedc 100644 --- a/clang/test/Driver/hip-gz-options.hip +++ b/clang/test/Driver/hip-gz-options.hip @@ -8,7 +8,7 @@ // RUN: -fgpu-rdc --offload-arch=gfx906 %s -nogpulib -nogpuinc \ // RUN: -ggdb -gz=zlib 2>&1 | FileCheck %s -// CHECK: {{".*clang.*" .* "--compress-debug-sections=zlib"}} +// CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}} // CHECK-DAG: {{".*lld.*" .* "--compress-debug-sections=zlib"}} // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}} // CHECK: "--compress-debug-sections=zlib" From e21bb31eb6c6fcff652ecfb338e8558362473150 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Aug 2020 19:51:33 -0400 Subject: [PATCH 0415/1079] CodeGen: Require SSA to run PeepholeOptimizer --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 5 + llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir | 180 -------------------- 2 files changed, 5 insertions(+), 180 deletions(-) diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 05c843078fb1a..ed2a50e90ffe7 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -178,6 +178,11 @@ namespace { } } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA); + } + /// Track Def -> Use info used for rewriting copies. using RewriteMapTy = SmallDenseMap; diff --git a/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir index 458bdcef1a584..eae7e4807f765 100644 --- a/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir +++ b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir @@ -16,21 +16,6 @@ body: | ... ---- -name: fold_simm_16_sub_to_sub -body: | - bb.0: - - ; GCN-LABEL: name: fold_simm_16_sub_to_sub - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] - %0:sreg_32 = S_MOV_B32 2048 - %1.lo16:sreg_32 = COPY killed %0.lo16 - SI_RETURN_TO_EPILOG %1 - -... - --- name: fold_simm_16_sub_to_phys body: | @@ -46,36 +31,6 @@ body: | ... ---- -name: fold_aimm_16_sub_to_sub_2048 -body: | - bb.0: - - ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16 - ; GCN: SI_RETURN_TO_EPILOG %1 - %0:sreg_32 = S_MOV_B32 2048 - %1.lo16:agpr_32 = COPY killed %0.lo16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_aimm_16_sub_to_sub_0 -body: | - bb.0: - - ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec - ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] - %0:sreg_32 = S_MOV_B32 0 - %1.lo16:agpr_32 = COPY killed %0.lo16 - SI_RETURN_TO_EPILOG %1 - -... - --- name: fold_aimm_16_sub_to_phys body: | @@ -106,21 +61,6 @@ body: | ... ---- -name: fold_vimm_16_sub_to_sub -body: | - bb.0: - - ; GCN-LABEL: name: fold_vimm_16_sub_to_sub - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16 - ; GCN: SI_RETURN_TO_EPILOG %1 - %0:sreg_32 = S_MOV_B32 2048 - %1.lo16:vgpr_32 = COPY killed %0.lo16 - SI_RETURN_TO_EPILOG %1 - -... - --- name: fold_vimm_16_sub_to_phys body: | @@ -135,123 +75,3 @@ body: | SI_RETURN_TO_EPILOG $vgpr0_lo16 ... - ---- -name: fold_vimm_16_lo_to_hi -body: | - bb.0: - - ; GCN-LABEL: name: fold_vimm_16_lo_to_hi - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16 - ; GCN: SI_RETURN_TO_EPILOG %1 - %0:sreg_32 = S_MOV_B32 2048 - %1.hi16:vgpr_32 = COPY killed %0.lo16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_vimm_16_hi_to_lo -body: | - bb.0: - - ; GCN-LABEL: name: fold_vimm_16_hi_to_lo - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16 - ; GCN: SI_RETURN_TO_EPILOG %1 - %0:sreg_32 = S_MOV_B32 2048 - %1.lo16:vgpr_32 = COPY killed %0.hi16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_simm_16_sub_to_sub_lo_to_hi -body: | - bb.0: - - ; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16 - ; GCN: SI_RETURN_TO_EPILOG %1 - %0:sreg_32 = S_MOV_B32 2048 - %1.hi16:sreg_32 = COPY killed %0.lo16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_simm_16_sub_to_sub_hi_to_lo_2048 -body: | - bb.0: - - ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] - %0:sreg_32 = S_MOV_B32 2048 - %1.lo16:sreg_32 = COPY killed %0.hi16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048 -body: | - bb.0: - - ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] - %0:sreg_32 = S_MOV_B32 134217728 - %1.lo16:sreg_32 = COPY killed %0.hi16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_aimm_16_sub_to_sub_hi_to_lo_2048 -body: | - bb.0: - - ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 - ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec - ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] - %0:sreg_32 = S_MOV_B32 2048 - %1.lo16:agpr_32 = COPY killed %0.hi16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1 -body: | - bb.0: - - ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536 - ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec - ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] - %0:sreg_32 = S_MOV_B32 65536 - %1.lo16:agpr_32 = COPY killed %0.hi16 - SI_RETURN_TO_EPILOG %1 - -... - ---- -name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048 -body: | - bb.0: - - ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728 - ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16 - ; GCN: SI_RETURN_TO_EPILOG %1 - %0:sreg_32 = S_MOV_B32 134217728 - %1.lo16:agpr_32 = COPY killed %0.hi16 - SI_RETURN_TO_EPILOG %1 - -... From 382b2b1b5183cdcc4c57b0650e25f4f107619099 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 25 Aug 2020 16:07:35 -0400 Subject: [PATCH 0416/1079] RegAllocFast: Fix typo in comment --- llvm/lib/CodeGen/RegAllocFast.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 5396f9f3a1432..e0742c4508ea0 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1142,8 +1142,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Kill dead defs after the scan to ensure that multiple defs of the same // register are allocated identically. We didn't need to do this for uses - // because we are crerating our own kill flags, and they are always at the - // last use. + // because we are creating our own kill flags, and they are always at the last + // use. for (Register VirtReg : VirtDead) killVirtReg(VirtReg); VirtDead.clear(); From 43e6c59f1c1fc3c1b9cdcddfe9826b9abf2cfb73 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Fri, 11 Sep 2020 22:08:38 +0000 Subject: [PATCH 0417/1079] docs: add a newline to appease Sphinx Sphinx expects an empty newline after the bulleted list. --- clang/include/clang/Basic/AttrDocs.td | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 842ffe050adcd..2fffc0daabee3 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3513,6 +3513,7 @@ it is instead imported as ``Void``. * ``swift_error(nonnull_error)`` means that calls to the function should be considered to have thrown if they leave a non-null error in the error parameter. The return type is left unmodified. + }]; } From 45d0343900d3005d1d00cbb1a87c419c085dec71 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 11 Sep 2020 15:12:15 -0700 Subject: [PATCH 0418/1079] [MC] Allow .org directives in SHT_NOBITS sections This is used by kvm-unit-tests and can be trivially supported. --- llvm/lib/MC/MCAssembler.cpp | 2 ++ llvm/test/MC/ELF/org.s | 24 +++++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 9515b7e2642bc..1b2eb2412a161 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -754,6 +754,8 @@ void MCAssembler::writeSectionData(raw_ostream &OS, const MCSection *Sec, assert((cast(F).getValue() == 0) && "Invalid fill in virtual section!"); break; + case MCFragment::FT_Org: + break; } } diff --git a/llvm/test/MC/ELF/org.s b/llvm/test/MC/ELF/org.s index ec6264f823c27..d8f52311420ee 100644 --- a/llvm/test/MC/ELF/org.s +++ b/llvm/test/MC/ELF/org.s @@ -1,15 +1,21 @@ -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -S - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64 %s -o - | llvm-readobj -S - | FileCheck %s --strict-whitespace .zero 4 foo: .zero 4 .org foo+16 -// CHECK: Section { -// CHECK: Name: .text -// CHECK-NEXT: Type: -// CHECK-NEXT: Flags [ -// CHECK: ] -// CHECK-NEXT: Address: -// CHECK-NEXT: Offset: -// CHECK-NEXT: Size: 20 +.bss + .zero 1 +# .org is a zero initializer and can appear in a SHT_NOBITS section. + .org .bss+5 + +# CHECK: Section { +# CHECK: Name: .text +# CHECK: Size: +# CHECK-SAME: {{ 20$}} + +# CHECK: Section { +# CHECK: Name: .bss +# CHECK: Size: +# CHECK-SAME: {{ 5$}} From 658475897b14781070549f72483fd283e3fe50aa Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 11 Sep 2020 13:45:07 -0700 Subject: [PATCH 0419/1079] [NFC][Asan] Early return from GetBlockBegin --- .../lib/sanitizer_common/sanitizer_allocator_primary64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index 774c09e424952..0a18b0c58ef79 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -186,13 +186,13 @@ class SizeClassAllocator64 { void *GetBlockBegin(const void *p) { uptr class_id = GetSizeClass(p); + if (class_id >= kNumClasses) return nullptr; uptr size = ClassIdToSize(class_id); if (!size) return nullptr; uptr chunk_idx = GetChunkIdx((uptr)p, size); uptr reg_beg = GetRegionBegin(p); uptr beg = chunk_idx * size; uptr next_beg = beg + size; - if (class_id >= kNumClasses) return nullptr; const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); if (region->mapped_user >= next_beg) return reinterpret_cast(reg_beg + beg); From e10df779f097e3a1fb02d901117ce71a5dd9dda2 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Sat, 12 Sep 2020 01:07:54 +0200 Subject: [PATCH 0420/1079] Fix clang Wrange-loop-analysis in BuildTree.cpp Building on Mac OS with clang 12: ``` jhemphill@jhemphill-mbp build % clang --version Apple clang version 12.0.0 (clang-1200.0.26.2) Target: x86_64-apple-darwin19.6.0 Thread model: posix InstalledDir: /Library/Developer/CommandLineTools/usr/bin ``` yields one warning: ``` /Users/jhemphill/oss/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp:1126:22: warning: loop variable 'Arg' is always a copy because the range of type 'llvm::iterator_range >' does not return a reference [-Wrange-loop-analysis] for (const auto &Arg : Args) { ^ /Users/jhemphill/oss/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp:1126:10: note: use non-reference type 'clang::Expr *' for (const auto &Arg : Args) { ``` It appears that `Arg` is an `Expr*`, passed by value rather than by const reference. Reviewed By: eduucaldas, gribozavr2 Differential Revision: https://reviews.llvm.org/D87482 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index dab1457fbdba6..3e0573ac4ffcf 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -1126,7 +1126,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { syntax::CallArguments * buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { auto Args = dropDefaultArgs(ArgsAndDefaultArgs); - for (const auto &Arg : Args) { + for (auto *Arg : Args) { Builder.markExprChild(Arg, syntax::NodeRole::ListElement); const auto *DelimiterToken = std::next(Builder.findToken(Arg->getEndLoc())); From 76e3a27c16d2a8171454cf12a33e35e3ae6f9dc2 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 11 Sep 2020 14:33:55 -0700 Subject: [PATCH 0421/1079] [lldb] Add test for CFMutableDictionaryRef While writing a test for a change in Foundation I noticed we didn't yet test CFMutableDictionaryRef. --- .../data-formatter-objc/TestDataFormatterObjCNSContainer.py | 4 +++- .../functionalities/data-formatter/data-formatter-objc/main.m | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py index d13d5d5df1d5b..05367c144b302 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py @@ -21,7 +21,7 @@ def test_nscontainers_with_run_command(self): def nscontainers_data_formatter_commands(self): self.expect( - 'frame variable newArray nsDictionary newDictionary nscfDictionary cfDictionaryRef newMutableDictionary cfarray_ref mutable_array_ref', + 'frame variable newArray nsDictionary newDictionary nscfDictionary cfDictionaryRef newMutableDictionary newMutableDictionaryRef cfarray_ref mutable_array_ref', substrs=[ '(NSArray *) newArray = ', ' @"50 elements"', @@ -35,6 +35,8 @@ def nscontainers_data_formatter_commands(self): ' 2 key/value pairs', '(NSDictionary *) newMutableDictionary = ', ' 21 key/value pairs', + '(CFMutableDictionaryRef) newMutableDictionaryRef = ', + ' 21 key/value pairs', '(CFArrayRef) cfarray_ref = ', ' @"3 elements"', '(CFMutableArrayRef) mutable_array_ref = ', diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m index 169b3aed4f222..409cb0a993f9d 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m @@ -476,6 +476,8 @@ int main(int argc, const char *argv[]) { [newMutableDictionary setObject:@"foo" forKey:@"bar19"]; [newMutableDictionary setObject:@"foo" forKey:@"bar20"]; + CFMutableDictionaryRef newMutableDictionaryRef = CFDictionaryCreateMutableCopy(kCFAllocatorDefault, 0, newMutableDictionary); + id cfKeys[4] = {@"foo", @"bar", @"baz", @"quux"}; id cfValues[4] = {@"foo", @"bar", @"baz", @"quux"}; NSDictionary *nsDictionary = CFBridgingRelease( From 83286a1a8f059d1664b64341854676a36a85cecd Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Thu, 10 Sep 2020 17:45:16 -0700 Subject: [PATCH 0422/1079] [MS ABI] Add mangled type for auto template parameter whose argument kind is Integeral --- clang/include/clang/Basic/LangOptions.h | 1 + clang/lib/AST/MicrosoftMangle.cpp | 61 ++++++++++++------- .../CodeGenCXX/mangle-ms-auto-templates.cpp | 47 ++++++++++++++ 3 files changed, 86 insertions(+), 23 deletions(-) create mode 100644 clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 4e277435bf8fc..2c8bb55cb5d93 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -119,6 +119,7 @@ class LangOptions : public LangOptionsBase { MSVC2017 = 1910, MSVC2017_5 = 1912, MSVC2017_7 = 1914, + MSVC2019 = 1920, }; /// Clang versions with different platform ABI conformance. diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 55ac7629a54c3..376b17dc7995f 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -378,8 +378,10 @@ class MicrosoftCXXNameMangler { void mangleFunctionClass(const FunctionDecl *FD); void mangleCallingConvention(CallingConv CC); void mangleCallingConvention(const FunctionType *T); - void mangleIntegerLiteral(const llvm::APSInt &Number, bool IsBoolean); - void mangleExpression(const Expr *E); + void mangleIntegerLiteral(const llvm::APSInt &Number, + const NonTypeTemplateParmDecl *PD = nullptr, + QualType TemplateArgType = QualType()); + void mangleExpression(const Expr *E, const NonTypeTemplateParmDecl *PD); void mangleThrowSpecification(const FunctionProtoType *T); void mangleTemplateArgs(const TemplateDecl *TD, @@ -1357,24 +1359,36 @@ MicrosoftCXXNameMangler::mangleUnscopedTemplateName(const TemplateDecl *TD) { mangleUnqualifiedName(TD); } -void MicrosoftCXXNameMangler::mangleIntegerLiteral(const llvm::APSInt &Value, - bool IsBoolean) { +void MicrosoftCXXNameMangler::mangleIntegerLiteral( + const llvm::APSInt &Value, const NonTypeTemplateParmDecl *PD, + QualType TemplateArgType) { // ::= $0 - Out << "$0"; - // Make sure booleans are encoded as 0/1. - if (IsBoolean && Value.getBoolValue()) - mangleNumber(1); - else if (Value.isSigned()) + Out << "$"; + + // Since MSVC 2019, add 'M[]' after '$' for auto template parameter when + // argument is integer. + if (getASTContext().getLangOpts().isCompatibleWithMSVC( + LangOptions::MSVC2019) && + PD && PD->getType()->getTypeClass() == Type::Auto && + !TemplateArgType.isNull()) { + Out << "M"; + mangleType(TemplateArgType, SourceRange(), QMM_Drop); + } + + Out << "0"; + + if (Value.isSigned()) mangleNumber(Value.getSExtValue()); else mangleNumber(Value.getZExtValue()); } -void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) { +void MicrosoftCXXNameMangler::mangleExpression( + const Expr *E, const NonTypeTemplateParmDecl *PD) { // See if this is a constant expression. if (Optional Value = E->getIntegerConstantExpr(Context.getASTContext())) { - mangleIntegerLiteral(*Value, E->getType()->isBooleanType()); + mangleIntegerLiteral(*Value, PD, E->getType()); return; } @@ -1448,10 +1462,12 @@ void MicrosoftCXXNameMangler::mangleTemplateArg(const TemplateDecl *TD, } break; } - case TemplateArgument::Integral: + case TemplateArgument::Integral: { + QualType T = TA.getIntegralType(); mangleIntegerLiteral(TA.getAsIntegral(), - TA.getIntegralType()->isBooleanType()); + cast(Parm), T); break; + } case TemplateArgument::NullPtr: { QualType T = TA.getNullPtrType(); if (const MemberPointerType *MPT = T->getAs()) { @@ -1473,16 +1489,18 @@ void MicrosoftCXXNameMangler::mangleTemplateArg(const TemplateDecl *TD, // However, we are free to use 0 *if* we would use multiple fields for // non-nullptr member pointers. if (!RD->nullFieldOffsetIsZero()) { - mangleIntegerLiteral(llvm::APSInt::get(-1), /*IsBoolean=*/false); + mangleIntegerLiteral(llvm::APSInt::get(-1), + cast(Parm), T); return; } } } - mangleIntegerLiteral(llvm::APSInt::getUnsigned(0), /*IsBoolean=*/false); + mangleIntegerLiteral(llvm::APSInt::getUnsigned(0), + cast(Parm), T); break; } case TemplateArgument::Expression: - mangleExpression(TA.getAsExpr()); + mangleExpression(TA.getAsExpr(), cast(Parm)); break; case TemplateArgument::Pack: { ArrayRef TemplateArgs = TA.getPackAsArray(); @@ -1814,8 +1832,7 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T, if (Context.getASTContext().addressSpaceMapManglingFor(AS)) { unsigned TargetAS = Context.getASTContext().getTargetAddressSpace(AS); Extra.mangleSourceName("_AS"); - Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(TargetAS), - /*IsBoolean*/ false); + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(TargetAS)); } else { switch (AS) { default: @@ -2707,8 +2724,7 @@ void MicrosoftCXXNameMangler::mangleType(const VectorType *T, Qualifiers Quals, Stream << "?$"; Extra.mangleSourceName("__vector"); Extra.mangleType(QualType(ET, 0), Range, QMM_Escape); - Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumElements()), - /*IsBoolean=*/false); + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumElements())); mangleArtificialTagType(TTK_Union, TemplateMangling, {"__clang"}); } @@ -2947,7 +2963,7 @@ void MicrosoftCXXNameMangler::mangleType(const PipeType *T, Qualifiers, Stream << "?$"; Extra.mangleSourceName("ocl_pipe"); Extra.mangleType(ElementType, Range, QMM_Escape); - Extra.mangleIntegerLiteral(llvm::APSInt::get(T->isReadOnly()), true); + Extra.mangleIntegerLiteral(llvm::APSInt::get(T->isReadOnly())); mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"}); } @@ -2987,8 +3003,7 @@ void MicrosoftCXXNameMangler::mangleType(const ExtIntType *T, Qualifiers, Extra.mangleSourceName("_UExtInt"); else Extra.mangleSourceName("_ExtInt"); - Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumBits()), - /*IsBoolean=*/false); + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumBits())); mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"}); } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp new file mode 100644 index 0000000000000..c17f5f5e4477f --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.20 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck --check-prefix=AFTER %s +// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.14 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck --check-prefix=BEFORE %s + +template +class AutoParmTemplate { +public: + AutoParmTemplate() {} +}; + +template +class AutoParmsTemplate { +public: + AutoParmsTemplate() {} +}; + +template +auto AutoFunc() { + return a; +} + +void template_mangling() { + AutoFunc<1>(); + // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A?@@XZ" + // BEFORE: call {{.*}} @"??$AutoFunc@$00@@YA?A?@@XZ" + AutoParmTemplate<0> auto_int; + // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MH0A@@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0A@@@QEAA@XZ" + AutoParmTemplate<'a'> auto_char; + // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MD0GB@@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0GB@@@QEAA@XZ" + AutoParmTemplate<9223372036854775807LL> int64_max; + // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$M_J0HPPPPPPPPPPPPPPP@@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0HPPPPPPPPPPPPPPP@@@QEAA@XZ" + AutoParmTemplate<-9223372036854775807LL - 1LL> int64_min; + // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$M_J0?IAAAAAAAAAAAAAAA@@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0?IAAAAAAAAAAAAAAA@@@QEAA@XZ" + AutoParmTemplate<(unsigned long long)-1> uint64_neg_1; + // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$M_K0?0@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0?0@@QEAA@XZ" + + AutoParmsTemplate<0, false, 'a'> c1; + // AFTER: call {{.*}} @"??0?$AutoParmsTemplate@$MH0A@$M_N0A@$MD0GB@@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$0A@$0A@$0GB@@@QEAA@XZ" + AutoParmsTemplate<(unsigned long)1, 9223372036854775807LL> c2; + // AFTER: call {{.*}} @"??0?$AutoParmsTemplate@$MK00$M_J0HPPPPPPPPPPPPPPP@@@QEAA@XZ" + // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$00$0HPPPPPPPPPPPPPPP@@@QEAA@XZ" +} From 12292c8b27aca8d173a3a2825f2e8aeb383cc695 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 11 Sep 2020 14:22:54 -0700 Subject: [PATCH 0423/1079] [NFC][Asan] Add another lsan test --- compiler-rt/test/asan/TestCases/leaks.cpp | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 compiler-rt/test/asan/TestCases/leaks.cpp diff --git a/compiler-rt/test/asan/TestCases/leaks.cpp b/compiler-rt/test/asan/TestCases/leaks.cpp new file mode 100644 index 0000000000000..9c076dd894ebf --- /dev/null +++ b/compiler-rt/test/asan/TestCases/leaks.cpp @@ -0,0 +1,29 @@ +// Test for LeakSanitizer+AddressSanitizer of different sizes. +// REQUIRES: leak-detection +// +// RUN: %clangxx_asan -O0 %s -o %t +// RUN: not %run %t 0 2>&1 | FileCheck %s +// RUN: not %run %t 1 2>&1 | FileCheck %s +// RUN: not %run %t 1000 2>&1 | FileCheck %s +// RUN: not %run %t 1000000 2>&1 | FileCheck %s +// RUN: not %run %t 10000000 2>&1 | FileCheck %s + +#include +#include +#include +int *t; + +__attribute__((noopt)) void leak(int n) { + // Repeat few times to make sure that at least one pointer is + // not somewhere on the stack. + for (int i = 0; i < 10; ++i) { + t = new int[n]; + printf("t: %p\n", t); + t = 0; + } +} + +int main(int argc, char **argv) { + leak(atoi(argv[1])); +} +// CHECK: LeakSanitizer: detected memory leaks From 31ecf8d29d81d196374a562c6d2bd2c25a62861e Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 11 Sep 2020 15:56:27 -0700 Subject: [PATCH 0424/1079] [NewPM][CodeGen] Introduce CodeGenPassBuilder to help build codegen pipeline Following up on D67687. Please refer to the RFC here http://lists.llvm.org/pipermail/llvm-dev/2020-July/143309.html `CodeGenPassBuilder` is the NPM counterpart of `TargetPassConfig` with below differences. - Debugging features (MIR print/verify, disable pass, start/stop-before/after, etc.) living in `TargetPassConfig` are moved to use PassInstrument as much as possible. (Implementation also lives in `TargetPassConfig.cpp`) - `TargetPassConfig` is a polymorphic base (virtual inheritance) to build the target-dependent pipeline whereas `CodeGenPassBuilder` is the CRTP base/helper to implement the target-dependent pipeline. The motivation is flexibility for targets to customize the pipeline, inlining opportunity, and fits the overall NPM value semantics design. - `TargetPassConfig` is a legacy immutable pass to declare hooks for targets to customize some target-independent codegen layer behavior. This is partially ported to TargetMachine::options. The rest, such as `createMachineScheduler/createPostMachineScheduler`, are left out for now. They should be implemented in LLVMTargetMachine in the future. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D83608 --- .../llvm/CodeGen/CGPassBuilderOption.h | 110 ++ .../include/llvm/CodeGen/CodeGenPassBuilder.h | 1171 +++++++++++++++++ .../llvm/CodeGen/MachinePassRegistry.def | 195 +++ .../llvm/Passes/StandardInstrumentations.h | 5 + llvm/include/llvm/Target/TargetMachine.h | 21 + llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGenPassBuilder.cpp | 25 + llvm/lib/CodeGen/LLVMTargetMachine.cpp | 35 +- llvm/lib/CodeGen/TargetPassConfig.cpp | 161 ++- 9 files changed, 1704 insertions(+), 20 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/CGPassBuilderOption.h create mode 100644 llvm/include/llvm/CodeGen/CodeGenPassBuilder.h create mode 100644 llvm/include/llvm/CodeGen/MachinePassRegistry.def create mode 100644 llvm/lib/CodeGen/CodeGenPassBuilder.cpp diff --git a/llvm/include/llvm/CodeGen/CGPassBuilderOption.h b/llvm/include/llvm/CodeGen/CGPassBuilderOption.h new file mode 100644 index 0000000000000..4553060e687bf --- /dev/null +++ b/llvm/include/llvm/CodeGen/CGPassBuilderOption.h @@ -0,0 +1,110 @@ +//===- CGPassBuilderOption.h - Options for pass builder ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the options influencing building of codegen pipeline. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CGPASSBUILDEROPTION_H +#define LLVM_CODEGEN_CGPASSBUILDEROPTION_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Target/TargetOptions.h" +#include + +namespace llvm { +class TargetMachine; + +enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline }; +enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP }; +enum class CFLAAType { None, Steensgaard, Andersen, Both }; + +// Not one-on-one but mostly corresponding to commandline options in +// TargetPassConfig.cpp +struct CGPassBuilderOption { + // Enable optimized register allocation compilation path + Optional OptimizeRegAlloc; + + // Enable interprocedural register allocation to reduce load/store at + // procedure calls + Optional EnableIPRA; + + // Enable debug logging of pass pipeline + bool DebugPM = false; + + // Disable machine function verification + bool DisableVerify = false; + + // Fold null checks into faulting memory operations + bool EnableImplicitNullChecksPass = false; + + // Collect probability-driven block placement stats + bool EnableMachineBlockPlacementStatsPass = false; + + // Run MachineScheduler post regalloc (independent of preRA sched) + bool EnablePostMachineSchedulerPass = false; + + // Run live interval analysis earlier in the pipeline + bool EnableLiveIntervalsPass = false; + + // Disable Loop Strength Reduction Pass + bool DisableLoopStrengthReducePass = false; + + // Disable Codegen Prepare + bool DisableCodeGenPreparePass = false; + + // Disable MergeICmps Pass + bool DisableMergeICmpsPass = false; + + // Disable Partial Libcall Inlining Pass + bool DisablePartiallyInlineLibCallsPass = false; + + // Disable ConstantHoisting Pass + bool DisableConstantHoistingPass = false; + + // Print LLVM IR produced by the loop-reduce pass + bool PrintAfterLSR = false; + + // Print LLVM IR input to isel pass + bool PrintISelInput = false; + + // Dump garbage collector data + bool PrintGCInfo = false; + + // Enable codegen in SCC order. + bool RequiresCodeGenSCCOrder = false; + + // Enable the machine outliner + RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault; + + // Register allocator to use + RegAllocType RegAlloc = RegAllocType::Default; + + // Experimental option to use CFL-AA in codegen + CFLAAType UseCFLAA = CFLAAType::None; + + // Enable abort calls when "global" instruction selection fails to + // lower/select an instruction + Optional EnableGlobalISelAbort; + + // Verify generated machine code" + Optional VerifyMachineCode; + + // Enable the "fast" instruction selector + Optional EnableFastISelOption; + + // Enable the "global" instruction selector + Optional EnableGlobalISelOption; +}; + +CGPassBuilderOption getCGPassBuilderOption(); + +} // namespace llvm + +#endif // LLVM_CODEGEN_CGPASSBUILDEROPTION_H diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h new file mode 100644 index 0000000000000..0c679eb174b76 --- /dev/null +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -0,0 +1,1171 @@ +//===- Construction of codegen pass pipelines ------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Interfaces for registering analysis passes, producing common pass manager +/// configurations, and parsing of pass pipelines. +/// +/// TODO: handle addRequiredID where, in legacy PM, one pass require other pass +/// to run as prerequisite. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H +#define LLVM_CODEGEN_CODEGENPASSBUILDER_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/CGPassBuilderOption.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include +#include +#include +#include +#include + +namespace llvm { + +// FIXME: Dummy target independent passes definitions that have not yet been +// ported to new pass manager. Once they do, remove these. +#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin { \ + template PASS_NAME(Ts &&...) {} \ + PreservedAnalyses run(Function &, FunctionAnalysisManager &) { \ + return PreservedAnalyses::all(); \ + } \ + }; +#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin { \ + template PASS_NAME(Ts &&...) {} \ + PreservedAnalyses run(Module &, ModuleAnalysisManager &) { \ + return PreservedAnalyses::all(); \ + } \ + }; +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin { \ + template PASS_NAME(Ts &&...) {} \ + Error run(Module &, MachineFunctionAnalysisManager &) { \ + return Error::success(); \ + } \ + PreservedAnalyses run(MachineFunction &, \ + MachineFunctionAnalysisManager &) { \ + llvm_unreachable("this api is to make new PM api happy"); \ + } \ + static AnalysisKey Key; \ + }; +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin { \ + template PASS_NAME(Ts &&...) {} \ + PreservedAnalyses run(MachineFunction &, \ + MachineFunctionAnalysisManager &) { \ + return PreservedAnalyses::all(); \ + } \ + static AnalysisKey Key; \ + }; +#include "MachinePassRegistry.def" + +/// This class provides access to building LLVM's passes. +/// +/// Its members provide the baseline state available to passes during their +/// construction. The \c MachinePassRegistry.def file specifies how to construct +/// all of the built-in passes, and those may reference these members during +/// construction. +template class CodeGenPassBuilder { +public: + explicit CodeGenPassBuilder(LLVMTargetMachine &TM, CGPassBuilderOption Opts, + PassInstrumentationCallbacks *PIC) + : TM(TM), Opt(Opts), PIC(PIC) { + // Target could set CGPassBuilderOption::MISchedPostRA to true to achieve + // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID) + + // Target should override TM.Options.EnableIPRA in their target-specific + // LLVMTM ctor. See TargetMachine::setGlobalISel for example. + if (Opt.EnableIPRA) + TM.Options.EnableIPRA = *Opt.EnableIPRA; + + if (Opt.EnableGlobalISelAbort) + TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort; + + if (!Opt.OptimizeRegAlloc) + Opt.OptimizeRegAlloc = getOptLevel() != CodeGenOpt::None; + + if (!Opt.VerifyMachineCode) { +#ifdef EXPENSIVE_CHECKS + Opt.VerifyMachineCode = TM->isMachineVerifierClean(); +#else + Opt.VerifyMachineCode = false; +#endif + } + } + + Expected> + buildPipeline(raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType) const; + + void registerModuleAnalyses(ModuleAnalysisManager &) const; + void registerFunctionAnalyses(FunctionAnalysisManager &) const; + void registerMachineFunctionAnalyses(MachineFunctionAnalysisManager &) const; + std::pair getPassNameFromLegacyName(StringRef) const; + + void registerAnalyses(MachineFunctionAnalysisManager &MFAM) const { + registerModuleAnalyses(*MFAM.MAM); + registerFunctionAnalyses(*MFAM.FAM); + registerMachineFunctionAnalyses(MFAM); + } + + PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const { + return PIC; + } + +protected: + template using has_key_t = decltype(PassT::Key); + + template + using is_module_pass_t = decltype(std::declval().run( + std::declval(), std::declval())); + + template + using is_function_pass_t = decltype(std::declval().run( + std::declval(), std::declval())); + + // Function object to maintain state while adding codegen IR passes. + class AddIRPass { + public: + AddIRPass(bool DebugPM) : MPM(DebugPM), FPM(DebugPM) { + AddingFunctionPasses = false; + } + + // Add Function Pass + template + std::enable_if_t::value> + operator()(PassT &&Pass) { + if (!AddingFunctionPasses) + AddingFunctionPasses = true; + FPM.addPass(std::forward(Pass)); + } + + // Add Module Pass + template + std::enable_if_t::value && + !is_detected::value> + operator()(PassT &&Pass) { + assert((!AddingFunctionPasses) && + "could not add module pass after adding function pass"); + MPM.addPass(std::forward(Pass)); + } + + ModulePassManager releasePM() { + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + return std::move(MPM); + } + + private: + ModulePassManager MPM; + FunctionPassManager FPM; + // The codegen IR pipeline are mostly function passes with the exceptions of + // a few loop and module passes. `AddingFunctionPasses` makes sure that + // we could only add module passes at the beginning of the pipeline. Once + // we begin adding function passes, we could no longer add module passes. + // This special-casing introduces less adaptor passes. If we have the need + // of adding module passes after function passes, we could change the + // implementation to accommodate that. + bool AddingFunctionPasses; + }; + + // Function object to maintain state while adding codegen machine passes. + class AddMachinePass { + public: + AddMachinePass(bool DebugPM, bool RequiresCodeGenSCCOrder, + bool VerifyMachineCode) + : PM(DebugPM, RequiresCodeGenSCCOrder, VerifyMachineCode) {} + + template void operator()(PassT &&Pass) { + static_assert( + is_detected::value, + "Machine function pass must define a static member variable `Key`."); + for (auto &C : BeforeCallbacks) { + if (!C(&PassT::Key)) + return; + } + PM.addPass(std::forward(Pass)); + for (auto &C : AfterCallbacks) + C(&PassT::Key); + } + + template void insertPass(AnalysisKey *ID, PassT Pass) { + AfterCallbacks.emplace_back( + [this, ID, Pass = std::move(Pass)](AnalysisKey *PassID) { + if (PassID == ID) + this->PM.addPass(std::move(Pass)); + }); + } + + void disablePass(AnalysisKey *ID) { + BeforeCallbacks.emplace_back( + [ID](AnalysisKey *PassID) { return PassID != ID; }); + } + + MachineFunctionPassManager releasePM() { return std::move(PM); } + + private: + MachineFunctionPassManager PM; + SmallVector, 4> BeforeCallbacks; + SmallVector, 4> AfterCallbacks; + }; + + LLVMTargetMachine &TM; + CGPassBuilderOption Opt; + PassInstrumentationCallbacks *PIC; + + /// Target override these hooks to parse target-specific analyses. + void registerTargetAnalysis(ModuleAnalysisManager &) const {} + void registerTargetAnalysis(FunctionAnalysisManager &) const {} + void registerTargetAnalysis(MachineFunctionAnalysisManager &) const {} + std::pair getTargetPassNameFromLegacyName(StringRef) const { + return {"", false}; + } + + template TMC &getTM() const { return static_cast(TM); } + CodeGenOpt::Level getOptLevel() const { return TM.getOptLevel(); } + + /// Check whether or not GlobalISel should abort on error. + /// When this is disabled, GlobalISel will fall back on SDISel instead of + /// erroring out. + bool isGlobalISelAbortEnabled() const { + return TM.Options.GlobalISelAbort == GlobalISelAbortMode::Enable; + } + + /// Check whether or not a diagnostic should be emitted when GlobalISel + /// uses the fallback path. In other words, it will emit a diagnostic + /// when GlobalISel failed and isGlobalISelAbortEnabled is false. + bool reportDiagnosticWhenGlobalISelFallback() const { + return TM.Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag; + } + + /// addInstSelector - This method should install an instruction selector pass, + /// which converts from LLVM code to machine instructions. + Error addInstSelector(AddMachinePass &) const { + return make_error("addInstSelector is not overridden", + inconvertibleErrorCode()); + } + + /// Add passes that optimize instruction level parallelism for out-of-order + /// targets. These passes are run while the machine code is still in SSA + /// form, so they can use MachineTraceMetrics to control their heuristics. + /// + /// All passes added here should preserve the MachineDominatorTree, + /// MachineLoopInfo, and MachineTraceMetrics analyses. + void addILPOpts(AddMachinePass &) const {} + + /// This method may be implemented by targets that want to run passes + /// immediately before register allocation. + void addPreRegAlloc(AddMachinePass &) const {} + + /// addPreRewrite - Add passes to the optimized register allocation pipeline + /// after register allocation is complete, but before virtual registers are + /// rewritten to physical registers. + /// + /// These passes must preserve VirtRegMap and LiveIntervals, and when running + /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix. + /// When these passes run, VirtRegMap contains legal physreg assignments for + /// all virtual registers. + /// + /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not + /// be honored. This is also not generally used for the the fast variant, + /// where the allocation and rewriting are done in one pass. + void addPreRewrite(AddMachinePass &) const {} + + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. + void addPostRewrite(AddMachinePass &) const {} + + /// This method may be implemented by targets that want to run passes after + /// register allocation pass pipeline but before prolog-epilog insertion. + void addPostRegAlloc(AddMachinePass &) const {} + + /// This method may be implemented by targets that want to run passes after + /// prolog-epilog insertion and before the second instruction scheduling pass. + void addPreSched2(AddMachinePass &) const {} + + /// This pass may be implemented by targets that want to run passes + /// immediately before machine code is emitted. + void addPreEmitPass(AddMachinePass &) const {} + + /// Targets may add passes immediately before machine code is emitted in this + /// callback. This is called even later than `addPreEmitPass`. + // FIXME: Rename `addPreEmitPass` to something more sensible given its actual + // position and remove the `2` suffix here as this callback is what + // `addPreEmitPass` *should* be but in reality isn't. + void addPreEmitPass2(AddMachinePass &) const {} + + /// {{@ For GlobalISel + /// + + /// addPreISel - This method should add any "last minute" LLVM->LLVM + /// passes (which are run just before instruction selector). + void addPreISel(AddIRPass &) const { + llvm_unreachable("addPreISel is not overridden"); + } + + /// This method should install an IR translator pass, which converts from + /// LLVM code to machine instructions with possibly generic opcodes. + Error addIRTranslator(AddMachinePass &) const { + return make_error("addIRTranslator is not overridden", + inconvertibleErrorCode()); + } + + /// This method may be implemented by targets that want to run passes + /// immediately before legalization. + void addPreLegalizeMachineIR(AddMachinePass &) const {} + + /// This method should install a legalize pass, which converts the instruction + /// sequence into one that can be selected by the target. + Error addLegalizeMachineIR(AddMachinePass &) const { + return make_error("addLegalizeMachineIR is not overridden", + inconvertibleErrorCode()); + } + + /// This method may be implemented by targets that want to run passes + /// immediately before the register bank selection. + void addPreRegBankSelect(AddMachinePass &) const {} + + /// This method should install a register bank selector pass, which + /// assigns register banks to virtual registers without a register + /// class or register banks. + Error addRegBankSelect(AddMachinePass &) const { + return make_error("addRegBankSelect is not overridden", + inconvertibleErrorCode()); + } + + /// This method may be implemented by targets that want to run passes + /// immediately before the (global) instruction selection. + void addPreGlobalInstructionSelect(AddMachinePass &) const {} + + /// This method should install a (global) instruction selector pass, which + /// converts possibly generic instructions to fully target-specific + /// instructions, thereby constraining all generic virtual registers to + /// register classes. + Error addGlobalInstructionSelect(AddMachinePass &) const { + return make_error( + "addGlobalInstructionSelect is not overridden", + inconvertibleErrorCode()); + } + /// @}} + + /// High level function that adds all passes necessary to go from llvm IR + /// representation to the MI representation. + /// Adds IR based lowering and target specific optimization passes and finally + /// the core instruction selection passes. + /// \returns true if an error occurred, false otherwise. + ModulePassManager addISelPasses() const; + + /// Add the actual instruction selection passes. This does not include + /// preparation passes on IR. + Expected addCoreISelPasses() const; + + /// Add the complete, standard set of LLVM CodeGen passes. + /// Fully developed targets will not generally override this. + Error addMachinePasses(AddMachinePass &) const; + + /// Add passes to lower exception handling for the code generator. + void addPassesToHandleExceptions(AddIRPass &) const; + + /// Add common target configurable passes that perform LLVM IR to IR + /// transforms following machine independent optimization. + void addIRPasses(AddIRPass &) const; + + /// Add pass to prepare the LLVM IR for code generation. This should be done + /// before exception handling preparation passes. + void addCodeGenPrepare(AddIRPass &) const; + + /// Add common passes that perform LLVM IR to IR transforms in preparation for + /// instruction selection. + void addISelPrepare(AddIRPass &) const; + + /// Methods with trivial inline returns are convenient points in the common + /// codegen pass pipeline where targets may insert passes. Methods with + /// out-of-line standard implementations are major CodeGen stages called by + /// addMachinePasses. Some targets may override major stages when inserting + /// passes is insufficient, but maintaining overriden stages is more work. + /// + + /// addMachineSSAOptimization - Add standard passes that optimize machine + /// instructions in SSA form. + void addMachineSSAOptimization(AddMachinePass &) const; + + /// addFastRegAlloc - Add the minimum set of target-independent passes that + /// are required for fast register allocation. + Error addFastRegAlloc(AddMachinePass &) const; + + /// addOptimizedRegAlloc - Add passes related to register allocation. + /// LLVMTargetMachine provides standard regalloc passes for most targets. + void addOptimizedRegAlloc(AddMachinePass &) const; + + /// Add passes that optimize machine instructions after register allocation. + void addMachineLateOptimization(AddMachinePass &) const; + + /// addGCPasses - Add late codegen passes that analyze code for garbage + /// collection. This should return true if GC info should be printed after + /// these passes. + void addGCPasses(AddMachinePass &) const {} + + /// Add standard basic block placement passes. + void addBlockPlacement(AddMachinePass &) const; + + using CreateMCStreamer = + std::function>(MCContext &)>; + void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const { + llvm_unreachable("addAsmPrinter is not overridden"); + } + + /// Utilities for targets to add passes to the pass manager. + /// + + /// createTargetRegisterAllocator - Create the register allocator pass for + /// this target at the current optimization level. + void addTargetRegisterAllocator(AddMachinePass &, bool Optimized) const; + + /// addMachinePasses helper to create the target-selected or overriden + /// regalloc pass. + void addRegAllocPass(AddMachinePass &, bool Optimized) const; + + /// Add core register alloator passes which do the actual register assignment + /// and rewriting. \returns true if any passes were added. + Error addRegAssignmentFast(AddMachinePass &) const; + Error addRegAssignmentOptimized(AddMachinePass &) const; + +private: + DerivedT &derived() { return static_cast(*this); } + const DerivedT &derived() const { + return static_cast(*this); + } +}; + +template +Expected> +CodeGenPassBuilder::buildPipeline(raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, + CodeGenFileType FileType) const { + Expected AddPassOrErr = addCoreISelPasses(); + if (!AddPassOrErr) + return AddPassOrErr.takeError(); + + AddMachinePass &addPass = *AddPassOrErr; + + if (auto Err = derived().addMachinePasses(addPass)) + return std::move(Err); + + derived().addAsmPrinter( + addPass, [this, &Out, DwoOut, FileType](MCContext &Ctx) { + return this->TM.createMCStreamer(Out, DwoOut, FileType, Ctx); + }); + + addPass(FreeMachineFunctionPass()); + + return std::pair{ + addISelPasses(), addPass.releasePM()}; +} + +static inline AAManager registerAAAnalyses(CFLAAType UseCFLAA) { + AAManager AA; + + // The order in which these are registered determines their priority when + // being queried. + + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + AA.registerFunctionAnalysis(); + break; + case CFLAAType::Andersen: + AA.registerFunctionAnalysis(); + break; + case CFLAAType::Both: + AA.registerFunctionAnalysis(); + AA.registerFunctionAnalysis(); + break; + default: + break; + } + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + AA.registerFunctionAnalysis(); + AA.registerFunctionAnalysis(); + AA.registerFunctionAnalysis(); + + return AA; +} + +template +void CodeGenPassBuilder::registerModuleAnalyses( + ModuleAnalysisManager &MAM) const { +#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ + MAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); +#include "MachinePassRegistry.def" + derived().registerTargetAnalysis(MAM); +} + +template +void CodeGenPassBuilder::registerFunctionAnalyses( + FunctionAnalysisManager &FAM) const { + FAM.registerPass([this] { return registerAAAnalyses(this->Opt.UseCFLAA); }); + +#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ + FAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); +#include "MachinePassRegistry.def" + derived().registerTargetAnalysis(FAM); +} + +template +void CodeGenPassBuilder::registerMachineFunctionAnalyses( + MachineFunctionAnalysisManager &MFAM) const { +#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ + MFAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); +#include "MachinePassRegistry.def" + derived().registerTargetAnalysis(MFAM); +} + +// FIXME: For new PM, use pass name directly in commandline seems good. +// Translate stringfied pass name to its old commandline name. Returns the +// matching legacy name and a boolean value indicating if the pass is a machine +// pass. +template +std::pair +CodeGenPassBuilder::getPassNameFromLegacyName(StringRef Name) const { + std::pair Ret; + if (Name.empty()) + return Ret; + +#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#include "llvm/CodeGen/MachinePassRegistry.def" + + if (Ret.first.empty()) + Ret = derived().getTargetPassNameFromLegacyName(Name); + + if (Ret.first.empty()) + report_fatal_error(Twine('\"') + Twine(Name) + + Twine("\" pass could not be found.")); + + return Ret; +} + +template +ModulePassManager CodeGenPassBuilder::addISelPasses() const { + AddIRPass addPass(Opt.DebugPM); + + if (TM.useEmulatedTLS()) + addPass(LowerEmuTLSPass()); + + addPass(PreISelIntrinsicLoweringPass()); + + derived().addIRPasses(addPass); + derived().addCodeGenPrepare(addPass); + addPassesToHandleExceptions(addPass); + derived().addISelPrepare(addPass); + return addPass.releasePM(); +} + +/// Add common target configurable passes that perform LLVM IR to IR transforms +/// following machine independent optimization. +template +void CodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!Opt.DisableVerify) + addPass(VerifierPass()); + + // Run loop strength reduction before anything else. + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableLoopStrengthReducePass) { + addPass(createFunctionToLoopPassAdaptor( + LoopStrengthReducePass(), /*UseMemorySSA*/ true, Opt.DebugPM)); + // FIXME: use -stop-after so we could remove PrintAfterLSR + if (Opt.PrintAfterLSR) + addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); + } + + if (getOptLevel() != CodeGenOpt::None) { + // The MergeICmpsPass tries to create memcmp calls by grouping sequences of + // loads and compares. ExpandMemCmpPass then tries to expand those calls + // into optimally-sized loads and compares. The transforms are enabled by a + // target lowering hook. + if (!Opt.DisableMergeICmpsPass) + addPass(MergeICmpsPass()); + addPass(ExpandMemCmpPass()); + } + + // Run GC lowering passes for builtin collectors + // TODO: add a pass insertion point here + addPass(GCLoweringPass()); + addPass(ShadowStackGCLoweringPass()); + addPass(LowerConstantIntrinsicsPass()); + + // Make sure that no unreachable blocks are instruction selected. + addPass(UnreachableBlockElimPass()); + + // Prepare expensive constants for SelectionDAG. + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoistingPass) + addPass(ConstantHoistingPass()); + + if (getOptLevel() != CodeGenOpt::None && + !Opt.DisablePartiallyInlineLibCallsPass) + addPass(PartiallyInlineLibCallsPass()); + + // Instrument function entry and exit, e.g. with calls to mcount(). + addPass(EntryExitInstrumenterPass(/*PostInlining=*/true)); + + // Add scalarization of target's unsupported masked memory intrinsics pass. + // the unsupported intrinsic will be replaced with a chain of basic blocks, + // that stores/loads element one-by-one if the appropriate mask bit is set. + addPass(ScalarizeMaskedMemIntrinPass()); + + // Expand reduction intrinsics into shuffle sequences if the target wants to. + addPass(ExpandReductionsPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +template +void CodeGenPassBuilder::addPassesToHandleExceptions( + AddIRPass &addPass) const { + const MCAsmInfo *MCAI = TM.getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + switch (MCAI->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(SjLjEHPreparePass()); + LLVM_FALLTHROUGH; + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + addPass(DwarfEHPass()); + break; + case ExceptionHandling::WinEH: + // We support using both GCC-style and MSVC-style exceptions on Windows, so + // add both preparation passes. Each pass will only actually run if it + // recognizes the personality function. + addPass(WinEHPass()); + addPass(DwarfEHPass()); + break; + case ExceptionHandling::Wasm: + // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs + // on catchpads and cleanuppads because it does not outline them into + // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we + // should remove PHIs there. + addPass(WinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(WasmEHPass()); + break; + case ExceptionHandling::None: + addPass(LowerInvokePass()); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(UnreachableBlockElimPass()); + break; + } +} + +/// Add pass to prepare the LLVM IR for code generation. This should be done +/// before exception handling preparation passes. +template +void CodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableCodeGenPreparePass) + addPass(CodeGenPreparePass()); + // TODO: Default ctor'd RewriteSymbolPass is no-op. + // addPass(RewriteSymbolPass()); +} + +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +template +void CodeGenPassBuilder::addISelPrepare(AddIRPass &addPass) const { + derived().addPreISel(addPass); + + // Add both the safe stack and the stack protection passes: each of them will + // only protect functions that have corresponding attributes. + addPass(SafeStackPass()); + addPass(StackProtectorPass()); + + if (Opt.PrintISelInput) + addPass(PrintFunctionPass(dbgs(), + "\n\n*** Final LLVM Code input to ISel ***\n")); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!Opt.DisableVerify) + addPass(VerifierPass()); +} + +template +Expected::AddMachinePass> +CodeGenPassBuilder::addCoreISelPasses() const { + // Enable FastISel with -fast-isel, but allow that to be overridden. + TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true)); + + // Determine an instruction selector. + enum class SelectorType { SelectionDAG, FastISel, GlobalISel }; + SelectorType Selector; + + if (Opt.EnableFastISelOption && *Opt.EnableFastISelOption == true) + Selector = SelectorType::FastISel; + else if ((Opt.EnableGlobalISelOption && + *Opt.EnableGlobalISelOption == true) || + (TM.Options.EnableGlobalISel && + (!Opt.EnableGlobalISelOption || + *Opt.EnableGlobalISelOption == false))) + Selector = SelectorType::GlobalISel; + else if (TM.getOptLevel() == CodeGenOpt::None && TM.getO0WantsFastISel()) + Selector = SelectorType::FastISel; + else + Selector = SelectorType::SelectionDAG; + + // Set consistently TM.Options.EnableFastISel and EnableGlobalISel. + if (Selector == SelectorType::FastISel) { + TM.setFastISel(true); + TM.setGlobalISel(false); + } else if (Selector == SelectorType::GlobalISel) { + TM.setFastISel(false); + TM.setGlobalISel(true); + } + + AddMachinePass addPass(Opt.DebugPM, Opt.RequiresCodeGenSCCOrder, + *Opt.VerifyMachineCode); + + // Add instruction selector passes. + if (Selector == SelectorType::GlobalISel) { + if (auto Err = derived().addIRTranslator(addPass)) + return std::move(Err); + + derived().addPreLegalizeMachineIR(addPass); + + if (auto Err = derived().addLegalizeMachineIR(addPass)) + return std::move(Err); + + // Before running the register bank selector, ask the target if it + // wants to run some passes. + derived().addPreRegBankSelect(addPass); + + if (auto Err = derived().addRegBankSelect(addPass)) + return std::move(Err); + + derived().addPreGlobalInstructionSelect(addPass); + + if (auto Err = derived().addGlobalInstructionSelect(addPass)) + return std::move(Err); + + // Pass to reset the MachineFunction if the ISel failed. + addPass(ResetMachineFunctionPass(reportDiagnosticWhenGlobalISelFallback(), + isGlobalISelAbortEnabled())); + + // Provide a fallback path when we do not want to abort on + // not-yet-supported input. + if (!isGlobalISelAbortEnabled()) { + if (auto Err = derived().addInstSelector(addPass)) + return std::move(Err); + } + + } else if (auto Err = derived().addInstSelector(addPass)) + return std::move(Err); + + // Expand pseudo-instructions emitted by ISel. Don't run the verifier before + // FinalizeISel. + addPass(FinalizeISelPass()); + + return addPass; +} + +/// Add the complete set of target-independent postISel code generator passes. +/// +/// This can be read as the standard order of major LLVM CodeGen stages. Stages +/// with nontrivial configuration or multiple passes are broken out below in +/// add%Stage routines. +/// +/// Any CodeGenPassBuilder::addXX routine may be overriden by the +/// Target. The addPre/Post methods with empty header implementations allow +/// injecting target-specific fixups just before or after major stages. +/// Additionally, targets have the flexibility to change pass order within a +/// stage by overriding default implementation of add%Stage routines below. Each +/// technique has maintainability tradeoffs because alternate pass orders are +/// not well supported. addPre/Post works better if the target pass is easily +/// tied to a common pass. But if it has subtle dependencies on multiple passes, +/// the target should override the stage instead. +template +Error CodeGenPassBuilder::addMachinePasses( + AddMachinePass &addPass) const { + // Add passes that optimize machine instructions in SSA form. + if (getOptLevel() != CodeGenOpt::None) { + derived().addMachineSSAOptimization(addPass); + } else { + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotPass()); + } + + if (TM.Options.EnableIPRA) + addPass(RegUsageInfoPropagationPass()); + + // Run pre-ra passes. + derived().addPreRegAlloc(addPass); + + // Run register allocation and passes that are tightly coupled with it, + // including phi elimination and scheduling. + if (*Opt.OptimizeRegAlloc) { + derived().addOptimizedRegAlloc(addPass); + } else { + if (auto Err = derived().addFastRegAlloc(addPass)) + return Err; + } + + // Run post-ra passes. + derived().addPostRegAlloc(addPass); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + if (getOptLevel() != CodeGenOpt::None) { + addPass(PostRAMachineSinkingPass()); + addPass(ShrinkWrapPass()); + } + + addPass(PrologEpilogInserterPass()); + + /// Add passes that optimize machine instructions after register allocation. + if (getOptLevel() != CodeGenOpt::None) + derived().addMachineLateOptimization(addPass); + + // Expand pseudo instructions before second scheduling pass. + addPass(ExpandPostRAPseudosPass()); + + // Run pre-sched2 passes. + derived().addPreSched2(addPass); + + if (Opt.EnableImplicitNullChecksPass) + addPass(ImplicitNullChecksPass()); + + // Second pass scheduler. + // Let Target optionally insert this pass by itself at some other point. + if (getOptLevel() != CodeGenOpt::None && + !TM.targetSchedulesPostRAScheduling()) { + if (Opt.EnablePostMachineSchedulerPass) + addPass(PostMachineSchedulerPass()); + else + addPass(PostRASchedulerPass()); + } + + // GC + derived().addGCPasses(addPass); + + // Basic block placement. + if (getOptLevel() != CodeGenOpt::None) + derived().addBlockPlacement(addPass); + + // Insert before XRay Instrumentation. + addPass(FEntryInserterPass()); + + addPass(XRayInstrumentationPass()); + addPass(PatchableFunctionPass()); + + derived().addPreEmitPass(addPass); + + if (TM.Options.EnableIPRA) { + // Collect register usage information and produce a register mask of + // clobbered registers, to be used to optimize call sites. + addPass(RegUsageInfoCollectorPass()); + } + + addPass(FuncletLayoutPass()); + + addPass(StackMapLivenessPass()); + addPass(LiveDebugValuesPass()); + + if (TM.Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && + Opt.EnableMachineOutliner != RunOutliner::NeverOutline) { + bool RunOnAllFunctions = + (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline); + bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining; + if (AddOutliner) + addPass(MachineOutlinerPass(RunOnAllFunctions)); + } + + // Add passes that directly emit MI after all other MI passes. + derived().addPreEmitPass2(addPass); + + return Error::success(); +} + +/// Add passes that optimize machine instructions in SSA form. +template +void CodeGenPassBuilder::addMachineSSAOptimization( + AddMachinePass &addPass) const { + // Pre-ra tail duplication. + addPass(EarlyTailDuplicatePass()); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(OptimizePHIsPass()); + + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(StackColoringPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotPass()); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(DeadMachineInstructionElimPass()); + + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + derived().addILPOpts(addPass); + + addPass(EarlyMachineLICMPass()); + addPass(MachineCSEPass()); + + addPass(MachineSinkingPass()); + + addPass(PeepholeOptimizerPass()); + // Clean-up the dead code that may have been generated by peephole + // rewriting. + addPass(DeadMachineInstructionElimPass()); +} + +//===---------------------------------------------------------------------===// +/// Register Allocation Pass Configuration +//===---------------------------------------------------------------------===// + +/// Instantiate the default register allocator pass for this target for either +/// the optimized or unoptimized allocation path. This will be added to the pass +/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc +/// in the optimized case. +/// +/// A target that uses the standard regalloc pass order for fast or optimized +/// allocation may still override this for per-target regalloc +/// selection. But -regalloc=... always takes precedence. +template +void CodeGenPassBuilder::addTargetRegisterAllocator( + AddMachinePass &addPass, bool Optimized) const { + if (Optimized) + addPass(RAGreedyPass()); + else + addPass(RAFastPass()); +} + +/// Find and instantiate the register allocation pass requested by this target +/// at the current optimization level. Different register allocators are +/// defined as separate passes because they may require different analysis. +template +void CodeGenPassBuilder::addRegAllocPass(AddMachinePass &addPass, + bool Optimized) const { + switch (Opt.RegAlloc) { + case RegAllocType::Default: + // With no -regalloc= override, ask the target for a regalloc pass. + derived().addTargetRegisterAllocator(addPass, Optimized); + break; + case RegAllocType::Basic: + addPass(RABasicPass()); + break; + case RegAllocType::Fast: + addPass(RAFastPass()); + break; + case RegAllocType::Greedy: + addPass(RAGreedyPass()); + break; + case RegAllocType::PBQP: + addPass(RAPBQPPass()); + break; + default: + llvm_unreachable("unknonwn register allocator type"); + } +} + +template +Error CodeGenPassBuilder::addRegAssignmentFast( + AddMachinePass &addPass) const { + if (Opt.RegAlloc != RegAllocType::Default && + Opt.RegAlloc != RegAllocType::Fast) + return make_error( + "Must use fast (default) register allocator for unoptimized regalloc.", + inconvertibleErrorCode()); + + addRegAllocPass(addPass, false); + return Error::success(); +} + +template +Error CodeGenPassBuilder::addRegAssignmentOptimized( + AddMachinePass &addPass) const { + // Add the selected register allocation pass. + addRegAllocPass(addPass, true); + + // Allow targets to change the register assignments before rewriting. + derived().addPreRewrite(addPass); + + // Finally rewrite virtual registers. + addPass(VirtRegRewriterPass()); + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(StackSlotColoringPass()); + + return Error::success(); +} + +/// Add the minimum set of target-independent passes that are required for +/// register allocation. No coalescing or scheduling. +template +Error CodeGenPassBuilder::addFastRegAlloc( + AddMachinePass &addPass) const { + addPass(PHIEliminationPass()); + addPass(TwoAddressInstructionPass()); + return derived().addRegAssignmentFast(addPass); +} + +/// Add standard target-independent passes that are tightly coupled with +/// optimized register allocation, including coalescing, machine instruction +/// scheduling, and register allocation itself. +template +void CodeGenPassBuilder::addOptimizedRegAlloc( + AddMachinePass &addPass) const { + addPass(DetectDeadLanesPass()); + + addPass(ProcessImplicitDefsPass()); + + // Edge splitting is smarter with machine loop info. + addPass(PHIEliminationPass()); + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (Opt.EnableLiveIntervalsPass) + addPass(LiveIntervalsPass()); + + addPass(TwoAddressInstructionPass()); + addPass(RegisterCoalescerPass()); + + // The machine scheduler may accidentally create disconnected components + // when moving subregister definitions around, avoid this by splitting them to + // separate vregs before. Splitting can also improve reg. allocation quality. + addPass(RenameIndependentSubregsPass()); + + // PreRA instruction scheduling. + addPass(MachineSchedulerPass()); + + if (derived().addRegAssignmentOptimized(addPass)) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + derived().addPostRewrite(addPass); + + // Copy propagate to forward register uses and try to eliminate COPYs that + // were not coalesced. + addPass(MachineCopyPropagationPass()); + + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(MachineLICMPass()); + } +} + +//===---------------------------------------------------------------------===// +/// Post RegAlloc Pass Configuration +//===---------------------------------------------------------------------===// + +/// Add passes that optimize machine instructions after register allocation. +template +void CodeGenPassBuilder::addMachineLateOptimization( + AddMachinePass &addPass) const { + // Branch folding must be run after regalloc and prolog/epilog insertion. + addPass(BranchFolderPass()); + + // Tail duplication. + // Note that duplicating tail just increases code size and degrades + // performance for targets that require Structured Control Flow. + // In addition it can also make CFG irreducible. Thus we disable it. + if (!TM.requiresStructuredCFG()) + addPass(TailDuplicatePass()); + + // Copy propagation. + addPass(MachineCopyPropagationPass()); +} + +/// Add standard basic block placement passes. +template +void CodeGenPassBuilder::addBlockPlacement( + AddMachinePass &addPass) const { + addPass(MachineBlockPlacementPass()); + // Run a separate pass to collect block placement statistics. + if (Opt.EnableMachineBlockPlacementStatsPass) + addPass(MachineBlockPlacementStatsPass()); +} + +} // namespace llvm + +#endif // LLVM_CODEGEN_CODEGENPASSBUILDER_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def new file mode 100644 index 0000000000000..734bbebc76dee --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -0,0 +1,195 @@ +//===- MachinePassRegistry.def - Registry of passes -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is used as the registry of passes that are for target-independent +// code generator. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef MODULE_ANALYSIS +#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +#undef MODULE_ANALYSIS + +#ifndef MODULE_PASS +#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ()) +#undef MODULE_PASS + +#ifndef FUNCTION_ANALYSIS +#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis()))) +#undef FUNCTION_ANALYSIS + +#ifndef FUNCTION_PASS +#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +FUNCTION_PASS("mergeicmps", MergeICmpsPass, ()) +FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) +FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) +FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) +FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) +FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) +FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) +FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) +FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) +FUNCTION_PASS("verify", VerifierPass, ()) +#undef FUNCTION_PASS + +#ifndef LOOP_PASS +#define LOOP_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +LOOP_PASS("loop-reduce", LoopStrengthReducePass, ()) +#undef LOOP_PASS + +#ifndef MACHINE_MODULE_PASS +#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +#undef MACHINE_MODULE_PASS + +#ifndef MACHINE_FUNCTION_ANALYSIS +#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +// LiveVariables currently requires pure SSA form. +// FIXME: Once TwoAddressInstruction pass no longer uses kill flags, +// LiveVariables can be removed completely, and LiveIntervals can be directly +// computed. (We still either need to regenerate kill flags after regalloc, or +// preferably fix the scavenger to not depend on them). +// MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) + +// MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass()) +// MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", LazyMachineBlockFrequencyInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-bfi", MachineBlockFrequencyInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", MachineDominanceFrontierAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-ore", MachineOptimizationRemarkEmitterPassAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MachinePostDominatorTreeAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-region-info", MachineRegionInfoPassAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysisAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("gc-analysis", GCMachineCodeAnalysisPass()) +#undef MACHINE_FUNCTION_ANALYSIS + +#ifndef MACHINE_FUNCTION_PASS +#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ()) +#undef MACHINE_FUNCTION_PASS + +// After a pass is converted to new pass manager, its entry should be moved from +// dummy table to the normal one. For example, for a machine function pass, +// DUMMY_MACHINE_FUNCTION_PASS to MACHINE_FUNCTION_PASS. + +#ifndef DUMMY_FUNCTION_PASS +#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ()) +DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) +DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) +DUMMY_FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) +DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ()) +DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ()) +DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ()) +DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ()) +DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ()) +DUMMY_FUNCTION_PASS("safe-stack", SafeStackPass, ()) +DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ()) +DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ()) +DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ()) +DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) +DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ()) +DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ()) +DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) +#undef DUMMY_FUNCTION_PASS + +#ifndef DUMMY_MODULE_PASS +#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_MODULE_PASS("lower-emutls", LowerEmuTLSPass, ()) +#undef DUMMY_MODULE_PASS + +#ifndef DUMMY_MACHINE_MODULE_PASS +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass, ()) +#undef DUMMY_MACHINE_MODULE_PASS + +#ifndef DUMMY_MACHINE_FUNCTION_PASS +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ()) +DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ()) +DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-fast", RAFastPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) +#undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 76e217c899745..457eae26fd474 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -28,6 +28,7 @@ namespace llvm { +class LLVMTargetMachine; class Module; class Function; @@ -140,6 +141,10 @@ class StandardInstrumentations { TimePassesHandler &getTimePasses() { return TimePasses; } }; + +void registerCodeGenCallback(PassInstrumentationCallbacks &PIC, + LLVMTargetMachine &); + } // namespace llvm #endif diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index 2a422341fdc84..c7673d3e74e40 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -15,9 +15,12 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/CGPassBuilderOption.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Error.h" #include "llvm/Target/TargetOptions.h" #include @@ -367,6 +370,20 @@ class LLVMTargetMachine : public TargetMachine { bool DisableVerify = true, MachineModuleInfoWrapperPass *MMIWP = nullptr) override; + virtual Expected> + buildCodeGenPipeline(raw_pwrite_stream &, raw_pwrite_stream *, + CodeGenFileType, CGPassBuilderOption, + MachineFunctionAnalysisManager &, + PassInstrumentationCallbacks *) { + return make_error("buildCodeGenPipeline is not overriden", + inconvertibleErrorCode()); + } + + virtual std::pair getPassNameFromLegacyName(StringRef) { + llvm_unreachable( + "getPassNameFromLegacyName parseMIRPipeline is not overriden"); + } + /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It /// fills the MCContext Ctx pointer which can be used to build custom @@ -387,6 +404,10 @@ class LLVMTargetMachine : public TargetMachine { raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); + Expected> + createMCStreamer(raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, MCContext &Ctx); + /// True if the target uses physical regs (as nearly all targets do). False /// for stack machines such as WebAssembly and other virtual-register /// machines. If true, all vregs must be allocated before PEI. If false, then diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 617692a347922..83b3655441fe4 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMCodeGen CFGuardLongjmp.cpp CFIInstrInserter.cpp CodeGen.cpp + CodeGenPassBuilder.cpp CodeGenPrepare.cpp CommandFlags.cpp CriticalAntiDepBreaker.cpp diff --git a/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/llvm/lib/CodeGen/CodeGenPassBuilder.cpp new file mode 100644 index 0000000000000..7f37f2069a3ba --- /dev/null +++ b/llvm/lib/CodeGen/CodeGenPassBuilder.cpp @@ -0,0 +1,25 @@ +//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/CodeGenPassBuilder.h" + +using namespace llvm; + +namespace llvm { +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + AnalysisKey PASS_NAME::Key; +#include "llvm/CodeGen/MachinePassRegistry.def" +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + AnalysisKey PASS_NAME::Key; +#include "llvm/CodeGen/MachinePassRegistry.def" +} // namespace llvm diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index e94b7ed4de039..e86f255129990 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -118,6 +118,24 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context) { + Expected> MCStreamerOrErr = + createMCStreamer(Out, DwoOut, FileType, Context); + if (auto Err = MCStreamerOrErr.takeError()) + return true; + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(*MCStreamerOrErr)); + if (!Printer) + return true; + + PM.add(Printer); + return false; +} + +Expected> LLVMTargetMachine::createMCStreamer( + raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, + MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) Context.setAllowTemporaryLabels(false); @@ -152,10 +170,14 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); + if (!MCE) + return make_error("createMCCodeEmitter failed", + inconvertibleErrorCode()); MCAsmBackend *MAB = getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); - if (!MCE || !MAB) - return true; + if (!MAB) + return make_error("createMCAsmBackend failed", + inconvertibleErrorCode()); Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( @@ -174,14 +196,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, break; } - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = - getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); - if (!Printer) - return true; - - PM.add(Printer); - return false; + return std::move(AsmStreamer); } bool LLVMTargetMachine::addPassesToEmitFile( diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 19db8eb480ca4..03a567e3d443a 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/CGPassBuilderOption.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -29,11 +30,13 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Pass.h" +#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -120,16 +123,17 @@ static cl::opt DebugifyAndStripAll( "Debugify MIR before and Strip debug after " "each pass except those known to be unsafe when debug info is present"), cl::ZeroOrMore); -enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; + // Enable or disable the MachineOutliner. static cl::opt EnableMachineOutliner( "enable-machine-outliner", cl::desc("Enable the machine outliner"), - cl::Hidden, cl::ValueOptional, cl::init(TargetDefault), - cl::values(clEnumValN(AlwaysOutline, "always", + cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault), + cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always", "Run on all functions guaranteed to be beneficial"), - clEnumValN(NeverOutline, "never", "Disable all outlining"), + clEnumValN(RunOutliner::NeverOutline, "never", + "Disable all outlining"), // Sentinel value for unspecified option. - clEnumValN(AlwaysOutline, "", ""))); + clEnumValN(RunOutliner::AlwaysOutline, "", ""))); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -172,7 +176,6 @@ static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); // Experimental option to use CFL-AA in codegen -enum class CFLAAType { None, Steensgaard, Andersen, Both }; static cl::opt UseCFLAA( "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden, cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"), @@ -404,6 +407,143 @@ void TargetPassConfig::setStartStopPasses() { Started = (StartAfter == nullptr) && (StartBefore == nullptr); } +CGPassBuilderOption llvm::getCGPassBuilderOption() { + CGPassBuilderOption Opt; + +#define SET_OPTION(Option) \ + if (Option.getNumOccurrences()) \ + Opt.Option = Option; + + SET_OPTION(EnableFastISelOption) + SET_OPTION(EnableGlobalISelAbort) + SET_OPTION(EnableGlobalISelOption) + SET_OPTION(EnableIPRA) + SET_OPTION(OptimizeRegAlloc) + SET_OPTION(VerifyMachineCode) + + Opt.EnableMachineOutliner = EnableMachineOutliner; + Opt.UseCFLAA = UseCFLAA; + Opt.PrintISelInput = PrintISelInput; + Opt.PrintGCInfo = PrintGCInfo; + Opt.EnablePostMachineSchedulerPass = MISchedPostRA; + Opt.EnableLiveIntervalsPass = EarlyLiveIntervals; + Opt.EnableMachineBlockPlacementStatsPass = EnableBlockPlacementStats; + Opt.EnableImplicitNullChecksPass = EnableImplicitNullChecks; + Opt.DisableLoopStrengthReducePass = DisableLSR; + Opt.DisableCodeGenPreparePass = DisableCGP; + Opt.DisableMergeICmpsPass = DisableMergeICmps; + Opt.DisablePartiallyInlineLibCallsPass = DisablePartialLibcallInlining; + Opt.DisableConstantHoistingPass = DisableConstantHoisting; + Opt.PrintAfterLSR = PrintLSR; + + return Opt; +} + +static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC, + LLVMTargetMachine &LLVMTM) { + StringRef StartBefore; + StringRef StartAfter; + StringRef StopBefore; + StringRef StopAfter; + + unsigned StartBeforeInstanceNum = 0; + unsigned StartAfterInstanceNum = 0; + unsigned StopBeforeInstanceNum = 0; + unsigned StopAfterInstanceNum = 0; + + std::tie(StartBefore, StartBeforeInstanceNum) = + getPassNameAndInstanceNum(StartBeforeOpt); + std::tie(StartAfter, StartAfterInstanceNum) = + getPassNameAndInstanceNum(StartAfterOpt); + std::tie(StopBefore, StopBeforeInstanceNum) = + getPassNameAndInstanceNum(StopBeforeOpt); + std::tie(StopAfter, StopAfterInstanceNum) = + getPassNameAndInstanceNum(StopAfterOpt); + + if (StartBefore.empty() && StartAfter.empty() && StopBefore.empty() && + StopAfter.empty()) + return; + + std::tie(StartBefore, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StartBefore); + std::tie(StartAfter, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StartAfter); + std::tie(StopBefore, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StopBefore); + std::tie(StopAfter, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StopAfter); + if (!StartBefore.empty() && !StartAfter.empty()) + report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") + + Twine(StartAfterOptName) + Twine(" specified!")); + if (!StopBefore.empty() && !StopAfter.empty()) + report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") + + Twine(StopAfterOptName) + Twine(" specified!")); + + PIC.registerBeforePassCallback( + [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(), + EnableNext = Optional(), StartBeforeCount = 0u, + StartAfterCount = 0u, StopBeforeCount = 0u, + StopAfterCount = 0u](StringRef P, Any) mutable { + bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore); + bool StartAfterPass = !StartAfter.empty() && P.contains(StartAfter); + bool StopBeforePass = !StopBefore.empty() && P.contains(StopBefore); + bool StopAfterPass = !StopAfter.empty() && P.contains(StopAfter); + + // Implement -start-after/-stop-after + if (EnableNext) { + EnableCurrent = *EnableNext; + EnableNext.reset(); + } + + // Using PIC.registerAfterPassCallback won't work because if this + // callback returns false, AfterPassCallback is also skipped. + if (StartAfterPass && StartAfterCount++ == StartAfterInstanceNum) { + assert(!EnableNext && "Error: assign to EnableNext more than once"); + EnableNext = true; + } + if (StopAfterPass && StopAfterCount++ == StopAfterInstanceNum) { + assert(!EnableNext && "Error: assign to EnableNext more than once"); + EnableNext = false; + } + + if (StartBeforePass && StartBeforeCount++ == StartBeforeInstanceNum) + EnableCurrent = true; + if (StopBeforePass && StopBeforeCount++ == StopBeforeInstanceNum) + EnableCurrent = false; + return EnableCurrent; + }); +} + +void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC, + LLVMTargetMachine &LLVMTM) { + + // Register a callback for disabling passes. + PIC.registerBeforePassCallback([](StringRef P, Any) { + +#define DISABLE_PASS(Option, Name) \ + if (Option && P.contains(#Name)) \ + return false; + DISABLE_PASS(DisableBlockPlacement, MachineBlockPlacementPass) + DISABLE_PASS(DisableBranchFold, BranchFolderPass) + DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass) + DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass) + DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass) + DISABLE_PASS(DisableMachineCSE, MachineCSEPass) + DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass) + DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass) + DISABLE_PASS(DisableMachineSink, MachineSinkingPass) + DISABLE_PASS(DisablePostRAMachineLICM, MachineLICMPass) + DISABLE_PASS(DisablePostRAMachineSink, PostRAMachineSinkingPass) + DISABLE_PASS(DisablePostRASched, PostRASchedulerPass) + DISABLE_PASS(DisableSSC, StackSlotColoringPass) + DISABLE_PASS(DisableTailDuplicate, TailDuplicatePass) + + return true; + }); + + registerPartialPipelineCallback(PIC, LLVMTM); +} + // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) @@ -1012,10 +1152,11 @@ void TargetPassConfig::addMachinePasses() { addPass(&LiveDebugValuesID, false); if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && - EnableMachineOutliner != NeverOutline) { - bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); - bool AddOutliner = RunOnAllFunctions || - TM->Options.SupportsDefaultOutlining; + EnableMachineOutliner != RunOutliner::NeverOutline) { + bool RunOnAllFunctions = + (EnableMachineOutliner == RunOutliner::AlwaysOutline); + bool AddOutliner = + RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; if (AddOutliner) addPass(createMachineOutlinerPass(RunOnAllFunctions)); } From 37f2776d1af27a38ba4fabf3b356d71590f70d90 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 9 Sep 2020 15:22:38 -0700 Subject: [PATCH 0425/1079] [ConstantFold] Fold binary arithmetic on scalable vector splats. It's a nice simplification, and it confuses instcombine if we don't do it. Differential Revision: https://reviews.llvm.org/D87422 --- llvm/lib/IR/ConstantFold.cpp | 35 +++++++++---------- .../InstSimplify/ConstProp/vscale.ll | 16 +++++++++ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 468dce95a29ad..a827d9144c07c 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1408,12 +1408,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, return ConstantFP::get(C1->getContext(), C3V); } } - } else if (IsScalableVector) { - // Do not iterate on scalable vector. The number of elements is unknown at - // compile-time. - // FIXME: this branch can potentially be removed - return nullptr; - } else if (auto *VTy = dyn_cast(C1->getType())) { + } else if (auto *VTy = dyn_cast(C1->getType())) { // Fast path for splatted constants. if (Constant *C2Splat = C2->getSplatValue()) { if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue()) @@ -1425,22 +1420,24 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, } } - // Fold each element and create a vector constant from those constants. - SmallVector Result; - Type *Ty = IntegerType::get(VTy->getContext(), 32); - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *ExtractIdx = ConstantInt::get(Ty, i); - Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx); - Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx); + if (auto *FVTy = dyn_cast(VTy)) { + // Fold each element and create a vector constant from those constants. + SmallVector Result; + Type *Ty = IntegerType::get(FVTy->getContext(), 32); + for (unsigned i = 0, e = FVTy->getNumElements(); i != e; ++i) { + Constant *ExtractIdx = ConstantInt::get(Ty, i); + Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx); + Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx); - // If any element of a divisor vector is zero, the whole op is undef. - if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue()) - return UndefValue::get(VTy); + // If any element of a divisor vector is zero, the whole op is undef. + if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue()) + return UndefValue::get(VTy); - Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); - } + Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); + } - return ConstantVector::get(Result); + return ConstantVector::get(Result); + } } if (ConstantExpr *CE1 = dyn_cast(C1)) { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll index d590c565316e7..1da77358ede7e 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll @@ -41,6 +41,14 @@ define @sub() { ret %r } +define @sub_splat() { +; CHECK-LABEL: @sub_splat( +; CHECK-NEXT: ret shufflevector ( insertelement ( undef, i32 -16, i32 0), undef, zeroinitializer) +; + %r = sub zeroinitializer, shufflevector ( insertelement ( undef, i32 16, i32 0), undef, zeroinitializer) + ret %r +} + define @fsub() { ; CHECK-LABEL: @fsub( ; CHECK-NEXT: ret undef @@ -73,6 +81,14 @@ define @udiv() { ret %r } +define @udiv_splat_zero() { +; CHECK-LABEL: @udiv_splat_zero( +; CHECK-NEXT: ret undef +; + %r = udiv zeroinitializer, zeroinitializer + ret %r +} + define @sdiv() { ; CHECK-LABEL: @sdiv( ; CHECK-NEXT: ret undef From a8503b87f739776cc9d5738f69aa0990db952340 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 11 Sep 2020 16:49:20 -0700 Subject: [PATCH 0426/1079] [NFC] Remove unused static function --- clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index 441dcad424442..ce4addd2f9451 100644 --- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -834,11 +834,6 @@ LLVM_DUMP_METHOD static void dumpArgTokensToStream(llvm::raw_ostream &Out, const Preprocessor &PP, const ArgTokensTy &Toks); -LLVM_DUMP_METHOD static void dumpArgTokens(const Preprocessor &PP, - const ArgTokensTy &Toks) { - dumpArgTokensToStream(llvm::errs(), PP, Toks); -} - namespace { /// Maps unexpanded macro parameters to expanded arguments. A macro argument may /// need to expanded further when it is nested inside another macro. From 3fdaa8602a086a3fca5f0fc8527536ac659079d0 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 11 Sep 2020 16:50:36 -0700 Subject: [PATCH 0427/1079] Fix a typo in 31ecf8d29d81d196374a562c6d2bd2c25a62861e --- llvm/include/llvm/CodeGen/CodeGenPassBuilder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index 0c679eb174b76..aad7629bb176a 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -130,7 +130,7 @@ template class CodeGenPassBuilder { if (!Opt.VerifyMachineCode) { #ifdef EXPENSIVE_CHECKS - Opt.VerifyMachineCode = TM->isMachineVerifierClean(); + Opt.VerifyMachineCode = TM.isMachineVerifierClean(); #else Opt.VerifyMachineCode = false; #endif From c931dc0bf596ed0a6c4531b0e1f05bd8bda566a6 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 11 Sep 2020 23:54:25 +0000 Subject: [PATCH 0428/1079] [gn build] Port 31ecf8d29d8 --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index e2f6c710496ec..a6ca6b974930a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -32,6 +32,7 @@ static_library("CodeGen") { "CalcSpillWeights.cpp", "CallingConvLower.cpp", "CodeGen.cpp", + "CodeGenPassBuilder.cpp", "CodeGenPrepare.cpp", "CommandFlags.cpp", "CriticalAntiDepBreaker.cpp", From d751f86189a7f7ef2a6fe06974a5da3349b02f20 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 3 Sep 2020 20:58:56 -0700 Subject: [PATCH 0429/1079] [ConstantFold] Make areGlobalsPotentiallyEqual less aggressive. In particular, we shouldn't make assumptions about globals which are unnamed_addr: we can fold them together with other globals. Also while I'm here, use isInterposable() instead of trying to explicitly name all the different kinds of weak linkage. Fixes https://bugs.llvm.org/show_bug.cgi?id=47090 Differential Revision: https://reviews.llvm.org/D87123 --- llvm/lib/IR/ConstantFold.cpp | 2 +- llvm/test/Assembler/ConstantExprNoFold.ll | 6 ++++++ llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index a827d9144c07c..3f00dd0575369 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1616,7 +1616,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1, const GlobalValue *GV2) { auto isGlobalUnsafeForEquality = [](const GlobalValue *GV) { - if (GV->hasExternalWeakLinkage() || GV->hasWeakAnyLinkage()) + if (GV->isInterposable() || GV->hasGlobalUnnamedAddr()) return true; if (const auto *GVar = dyn_cast(GV)) { Type *Ty = GVar->getValueType(); diff --git a/llvm/test/Assembler/ConstantExprNoFold.ll b/llvm/test/Assembler/ConstantExprNoFold.ll index 42e558eb38657..d91855925c897 100644 --- a/llvm/test/Assembler/ConstantExprNoFold.ll +++ b/llvm/test/Assembler/ConstantExprNoFold.ll @@ -42,6 +42,12 @@ target datalayout = "p:32:32" @empty.2 = external global [0 x i8], align 1 @empty.cmp = global i1 icmp eq ([0 x i8]* @empty.1, [0 x i8]* @empty.2) +; Two unnamed_addr globals can share an address +; CHECK: @unnamed.cmp = global i1 icmp eq ([5 x i8]* @unnamed.1, [5 x i8]* @unnamed.2) +@unnamed.1 = unnamed_addr constant [5 x i8] c"asdf\00" +@unnamed.2 = unnamed_addr constant [5 x i8] c"asdf\00" +@unnamed.cmp = global i1 icmp eq ([5 x i8]* @unnamed.1, [5 x i8]* @unnamed.2) + @addrspace3 = internal addrspace(3) global i32 undef ; CHECK: @no.fold.addrspace.icmp.eq.gv.null = global i1 icmp eq (i32 addrspace(3)* @addrspace3, i32 addrspace(3)* null) diff --git a/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll index ad0fe5a21783d..da9d0469e5e2c 100644 --- a/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll +++ b/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll @@ -16,8 +16,8 @@ define i1 @PR6486() nounwind { ; CHECK: ret i1 true } -@d = common global i32 0, align 4 -@a = common global [1 x i32] zeroinitializer, align 4 +@d = global i32 0, align 4 +@a = global [1 x i32] zeroinitializer, align 4 define i1 @PR16462_1() nounwind { ; CHECK-LABEL: @PR16462_1( From 33eb64704292dc2fc8585b8aa7459f96482c6cf9 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 11 Sep 2020 13:25:40 -0700 Subject: [PATCH 0430/1079] [lldb] Use GetNonKVOClassDescriptor to get the NSDictionary class descriptor On macOS Big Sur the class descriptor contains the NSKVONotifying_ prefix. This is covered by TestDataFormatterObjCKVO. Differential revision: https://reviews.llvm.org/D87545 --- lldb/source/Plugins/Language/ObjC/NSDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 3dc07678f92f5..b3209160cecf0 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -388,7 +388,7 @@ bool lldb_private::formatters::NSDictionarySummaryProvider( return false; ObjCLanguageRuntime::ClassDescriptorSP descriptor( - runtime->GetClassDescriptor(valobj)); + runtime->GetNonKVOClassDescriptor(valobj)); if (!descriptor || !descriptor->IsValid()) return false; From 928d419797ea173090e26f624f08801c7d6661e3 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 11 Sep 2020 17:44:49 -0700 Subject: [PATCH 0431/1079] Fix a couple of tests that relied on the clang binary having 'clang' somewhere in the name Because why would that be necessary? (I joke - I hadn't actually expected this to be an issue but a content-hash-named filesystem means the clang binary's just a bunch of numbers, and doesn't have 'clang' anywhere in the name) --- clang/test/Driver/amdgcn-gz-options.cl | 6 +++--- clang/test/Driver/compress.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/Driver/amdgcn-gz-options.cl b/clang/test/Driver/amdgcn-gz-options.cl index 1074653984e7f..40fe9cfcc50df 100644 --- a/clang/test/Driver/amdgcn-gz-options.cl +++ b/clang/test/Driver/amdgcn-gz-options.cl @@ -2,15 +2,15 @@ // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=none -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=none %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s -// CHECK-OPT_GZ_EQ_NONE: {{".*clang.*".* "--compress-debug-sections=none"}} +// CHECK-OPT_GZ_EQ_NONE: {{.* "-cc1(as)?".* "--compress-debug-sections=none"}} // CHECK-OPT_GZ_EQ_NONE: "--compress-debug-sections=none" // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s -// CHECK-OPT_GZ_EQ_ZLIB: {{".*clang.*".* "--compress-debug-sections=zlib"}} +// CHECK-OPT_GZ_EQ_ZLIB: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib"}} // CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib" // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib-gnu -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib-gnu %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s -// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{".*clang.*".* "--compress-debug-sections=zlib-gnu"}} +// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib-gnu"}} // CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu" diff --git a/clang/test/Driver/compress.c b/clang/test/Driver/compress.c index 67c9fdcb0fc99..f2cc187278f41 100644 --- a/clang/test/Driver/compress.c +++ b/clang/test/Driver/compress.c @@ -20,17 +20,17 @@ // RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=none -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s // RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=none %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_NONE %s -// CHECK-OPT_GZ_EQ_NONE: {{".*clang.*".* "--compress-debug-sections=none"}} +// CHECK-OPT_GZ_EQ_NONE: {{.* "-cc1(as)?".* "--compress-debug-sections=none"}} // CHECK-OPT_GZ_EQ_NONE: "--compress-debug-sections=none" // RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s // RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s -// CHECK-OPT_GZ_EQ_ZLIB: {{".*clang.*".* "--compress-debug-sections=zlib"}} +// CHECK-OPT_GZ_EQ_ZLIB: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib"}} // CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib" // RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib-gnu -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s // RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib-gnu %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s -// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{".*clang.*".* "--compress-debug-sections=zlib-gnu"}} +// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib-gnu"}} // CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu" // RUN: %clang -### -fintegrated-as -gz=invalid -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s From 12a281d368e3ae115b2340c45f93b62e20759811 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 11 Sep 2020 17:43:49 -0700 Subject: [PATCH 0432/1079] [gn] Remove unneeded MC dep from llvm-tblgen Tablegen does not have link time dependencies on MC. Having llvm-tblgen depend on it causes it to be rebuilt in the gn build every time somebody touches any cpp file in llvm/lib/MC* or llvm/lib/DebugInfo/Codeview*. Touching tablegen invalidates most of the rest of the build, and re-running it takes a while. This is is annoying for me when swapping between branches that touch CodeView logic. This dep was added to LLVMBuild.txt back in 2018, and presumably it was carried over into the gn build. Differential Revision: https://reviews.llvm.org/D87553 --- llvm/utils/TableGen/LLVMBuild.txt | 2 +- llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/LLVMBuild.txt b/llvm/utils/TableGen/LLVMBuild.txt index 5eec4e060be58..6293aa0e40248 100644 --- a/llvm/utils/TableGen/LLVMBuild.txt +++ b/llvm/utils/TableGen/LLVMBuild.txt @@ -18,4 +18,4 @@ type = BuildTool name = tblgen parent = BuildTools -required_libraries = Support TableGen MC +required_libraries = Support TableGen diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 4559926899c9f..bd1382d4def7d 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -1,7 +1,6 @@ executable("llvm-tblgen") { deps = [ "//llvm/include/llvm/Config:llvm-config", - "//llvm/lib/MC", "//llvm/lib/Support", "//llvm/lib/TableGen", "//llvm/utils/TableGen/GlobalISel", From ad99e34c59b80fd094a6acdbcde4869ff37dac87 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 11 Sep 2020 18:51:54 -0700 Subject: [PATCH 0433/1079] Revert "[NewPM][CodeGen] Introduce CodeGenPassBuilder to help build codegen pipeline" This reverts commit 31ecf8d29d81d196374a562c6d2bd2c25a62861e. This reverts commit 3fdaa8602a086a3fca5f0fc8527536ac659079d0. There is laying violation for Target->CodeGen. --- .../llvm/CodeGen/CGPassBuilderOption.h | 110 -- .../include/llvm/CodeGen/CodeGenPassBuilder.h | 1171 ----------------- .../llvm/CodeGen/MachinePassRegistry.def | 195 --- .../llvm/Passes/StandardInstrumentations.h | 5 - llvm/include/llvm/Target/TargetMachine.h | 21 - llvm/lib/CodeGen/CMakeLists.txt | 1 - llvm/lib/CodeGen/CodeGenPassBuilder.cpp | 25 - llvm/lib/CodeGen/LLVMTargetMachine.cpp | 35 +- llvm/lib/CodeGen/TargetPassConfig.cpp | 161 +-- 9 files changed, 20 insertions(+), 1704 deletions(-) delete mode 100644 llvm/include/llvm/CodeGen/CGPassBuilderOption.h delete mode 100644 llvm/include/llvm/CodeGen/CodeGenPassBuilder.h delete mode 100644 llvm/include/llvm/CodeGen/MachinePassRegistry.def delete mode 100644 llvm/lib/CodeGen/CodeGenPassBuilder.cpp diff --git a/llvm/include/llvm/CodeGen/CGPassBuilderOption.h b/llvm/include/llvm/CodeGen/CGPassBuilderOption.h deleted file mode 100644 index 4553060e687bf..0000000000000 --- a/llvm/include/llvm/CodeGen/CGPassBuilderOption.h +++ /dev/null @@ -1,110 +0,0 @@ -//===- CGPassBuilderOption.h - Options for pass builder ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares the options influencing building of codegen pipeline. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_CGPASSBUILDEROPTION_H -#define LLVM_CODEGEN_CGPASSBUILDEROPTION_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Target/TargetOptions.h" -#include - -namespace llvm { -class TargetMachine; - -enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline }; -enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP }; -enum class CFLAAType { None, Steensgaard, Andersen, Both }; - -// Not one-on-one but mostly corresponding to commandline options in -// TargetPassConfig.cpp -struct CGPassBuilderOption { - // Enable optimized register allocation compilation path - Optional OptimizeRegAlloc; - - // Enable interprocedural register allocation to reduce load/store at - // procedure calls - Optional EnableIPRA; - - // Enable debug logging of pass pipeline - bool DebugPM = false; - - // Disable machine function verification - bool DisableVerify = false; - - // Fold null checks into faulting memory operations - bool EnableImplicitNullChecksPass = false; - - // Collect probability-driven block placement stats - bool EnableMachineBlockPlacementStatsPass = false; - - // Run MachineScheduler post regalloc (independent of preRA sched) - bool EnablePostMachineSchedulerPass = false; - - // Run live interval analysis earlier in the pipeline - bool EnableLiveIntervalsPass = false; - - // Disable Loop Strength Reduction Pass - bool DisableLoopStrengthReducePass = false; - - // Disable Codegen Prepare - bool DisableCodeGenPreparePass = false; - - // Disable MergeICmps Pass - bool DisableMergeICmpsPass = false; - - // Disable Partial Libcall Inlining Pass - bool DisablePartiallyInlineLibCallsPass = false; - - // Disable ConstantHoisting Pass - bool DisableConstantHoistingPass = false; - - // Print LLVM IR produced by the loop-reduce pass - bool PrintAfterLSR = false; - - // Print LLVM IR input to isel pass - bool PrintISelInput = false; - - // Dump garbage collector data - bool PrintGCInfo = false; - - // Enable codegen in SCC order. - bool RequiresCodeGenSCCOrder = false; - - // Enable the machine outliner - RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault; - - // Register allocator to use - RegAllocType RegAlloc = RegAllocType::Default; - - // Experimental option to use CFL-AA in codegen - CFLAAType UseCFLAA = CFLAAType::None; - - // Enable abort calls when "global" instruction selection fails to - // lower/select an instruction - Optional EnableGlobalISelAbort; - - // Verify generated machine code" - Optional VerifyMachineCode; - - // Enable the "fast" instruction selector - Optional EnableFastISelOption; - - // Enable the "global" instruction selector - Optional EnableGlobalISelOption; -}; - -CGPassBuilderOption getCGPassBuilderOption(); - -} // namespace llvm - -#endif // LLVM_CODEGEN_CGPASSBUILDEROPTION_H diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h deleted file mode 100644 index aad7629bb176a..0000000000000 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ /dev/null @@ -1,1171 +0,0 @@ -//===- Construction of codegen pass pipelines ------------------*- C++ -*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Interfaces for registering analysis passes, producing common pass manager -/// configurations, and parsing of pass pipelines. -/// -/// TODO: handle addRequiredID where, in legacy PM, one pass require other pass -/// to run as prerequisite. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H -#define LLVM_CODEGEN_CODEGENPASSBUILDER_H - -#include "llvm/ADT/FunctionExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAndersAliasAnalysis.h" -#include "llvm/Analysis/CFLSteensAliasAnalysis.h" -#include "llvm/Analysis/ScopedNoAliasAA.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/CodeGen/CGPassBuilderOption.h" -#include "llvm/CodeGen/ExpandReductions.h" -#include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachinePassManager.h" -#include "llvm/CodeGen/PreISelIntrinsicLowering.h" -#include "llvm/CodeGen/UnreachableBlockElim.h" -#include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/Verifier.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/ConstantHoisting.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" -#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" -#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" -#include "llvm/Transforms/Scalar/MergeICmps.h" -#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" -#include "llvm/Transforms/Utils/LowerInvoke.h" -#include -#include -#include -#include -#include - -namespace llvm { - -// FIXME: Dummy target independent passes definitions that have not yet been -// ported to new pass manager. Once they do, remove these. -#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - struct PASS_NAME : public PassInfoMixin { \ - template PASS_NAME(Ts &&...) {} \ - PreservedAnalyses run(Function &, FunctionAnalysisManager &) { \ - return PreservedAnalyses::all(); \ - } \ - }; -#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - struct PASS_NAME : public PassInfoMixin { \ - template PASS_NAME(Ts &&...) {} \ - PreservedAnalyses run(Module &, ModuleAnalysisManager &) { \ - return PreservedAnalyses::all(); \ - } \ - }; -#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - struct PASS_NAME : public PassInfoMixin { \ - template PASS_NAME(Ts &&...) {} \ - Error run(Module &, MachineFunctionAnalysisManager &) { \ - return Error::success(); \ - } \ - PreservedAnalyses run(MachineFunction &, \ - MachineFunctionAnalysisManager &) { \ - llvm_unreachable("this api is to make new PM api happy"); \ - } \ - static AnalysisKey Key; \ - }; -#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - struct PASS_NAME : public PassInfoMixin { \ - template PASS_NAME(Ts &&...) {} \ - PreservedAnalyses run(MachineFunction &, \ - MachineFunctionAnalysisManager &) { \ - return PreservedAnalyses::all(); \ - } \ - static AnalysisKey Key; \ - }; -#include "MachinePassRegistry.def" - -/// This class provides access to building LLVM's passes. -/// -/// Its members provide the baseline state available to passes during their -/// construction. The \c MachinePassRegistry.def file specifies how to construct -/// all of the built-in passes, and those may reference these members during -/// construction. -template class CodeGenPassBuilder { -public: - explicit CodeGenPassBuilder(LLVMTargetMachine &TM, CGPassBuilderOption Opts, - PassInstrumentationCallbacks *PIC) - : TM(TM), Opt(Opts), PIC(PIC) { - // Target could set CGPassBuilderOption::MISchedPostRA to true to achieve - // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID) - - // Target should override TM.Options.EnableIPRA in their target-specific - // LLVMTM ctor. See TargetMachine::setGlobalISel for example. - if (Opt.EnableIPRA) - TM.Options.EnableIPRA = *Opt.EnableIPRA; - - if (Opt.EnableGlobalISelAbort) - TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort; - - if (!Opt.OptimizeRegAlloc) - Opt.OptimizeRegAlloc = getOptLevel() != CodeGenOpt::None; - - if (!Opt.VerifyMachineCode) { -#ifdef EXPENSIVE_CHECKS - Opt.VerifyMachineCode = TM.isMachineVerifierClean(); -#else - Opt.VerifyMachineCode = false; -#endif - } - } - - Expected> - buildPipeline(raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, - CodeGenFileType FileType) const; - - void registerModuleAnalyses(ModuleAnalysisManager &) const; - void registerFunctionAnalyses(FunctionAnalysisManager &) const; - void registerMachineFunctionAnalyses(MachineFunctionAnalysisManager &) const; - std::pair getPassNameFromLegacyName(StringRef) const; - - void registerAnalyses(MachineFunctionAnalysisManager &MFAM) const { - registerModuleAnalyses(*MFAM.MAM); - registerFunctionAnalyses(*MFAM.FAM); - registerMachineFunctionAnalyses(MFAM); - } - - PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const { - return PIC; - } - -protected: - template using has_key_t = decltype(PassT::Key); - - template - using is_module_pass_t = decltype(std::declval().run( - std::declval(), std::declval())); - - template - using is_function_pass_t = decltype(std::declval().run( - std::declval(), std::declval())); - - // Function object to maintain state while adding codegen IR passes. - class AddIRPass { - public: - AddIRPass(bool DebugPM) : MPM(DebugPM), FPM(DebugPM) { - AddingFunctionPasses = false; - } - - // Add Function Pass - template - std::enable_if_t::value> - operator()(PassT &&Pass) { - if (!AddingFunctionPasses) - AddingFunctionPasses = true; - FPM.addPass(std::forward(Pass)); - } - - // Add Module Pass - template - std::enable_if_t::value && - !is_detected::value> - operator()(PassT &&Pass) { - assert((!AddingFunctionPasses) && - "could not add module pass after adding function pass"); - MPM.addPass(std::forward(Pass)); - } - - ModulePassManager releasePM() { - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - return std::move(MPM); - } - - private: - ModulePassManager MPM; - FunctionPassManager FPM; - // The codegen IR pipeline are mostly function passes with the exceptions of - // a few loop and module passes. `AddingFunctionPasses` makes sure that - // we could only add module passes at the beginning of the pipeline. Once - // we begin adding function passes, we could no longer add module passes. - // This special-casing introduces less adaptor passes. If we have the need - // of adding module passes after function passes, we could change the - // implementation to accommodate that. - bool AddingFunctionPasses; - }; - - // Function object to maintain state while adding codegen machine passes. - class AddMachinePass { - public: - AddMachinePass(bool DebugPM, bool RequiresCodeGenSCCOrder, - bool VerifyMachineCode) - : PM(DebugPM, RequiresCodeGenSCCOrder, VerifyMachineCode) {} - - template void operator()(PassT &&Pass) { - static_assert( - is_detected::value, - "Machine function pass must define a static member variable `Key`."); - for (auto &C : BeforeCallbacks) { - if (!C(&PassT::Key)) - return; - } - PM.addPass(std::forward(Pass)); - for (auto &C : AfterCallbacks) - C(&PassT::Key); - } - - template void insertPass(AnalysisKey *ID, PassT Pass) { - AfterCallbacks.emplace_back( - [this, ID, Pass = std::move(Pass)](AnalysisKey *PassID) { - if (PassID == ID) - this->PM.addPass(std::move(Pass)); - }); - } - - void disablePass(AnalysisKey *ID) { - BeforeCallbacks.emplace_back( - [ID](AnalysisKey *PassID) { return PassID != ID; }); - } - - MachineFunctionPassManager releasePM() { return std::move(PM); } - - private: - MachineFunctionPassManager PM; - SmallVector, 4> BeforeCallbacks; - SmallVector, 4> AfterCallbacks; - }; - - LLVMTargetMachine &TM; - CGPassBuilderOption Opt; - PassInstrumentationCallbacks *PIC; - - /// Target override these hooks to parse target-specific analyses. - void registerTargetAnalysis(ModuleAnalysisManager &) const {} - void registerTargetAnalysis(FunctionAnalysisManager &) const {} - void registerTargetAnalysis(MachineFunctionAnalysisManager &) const {} - std::pair getTargetPassNameFromLegacyName(StringRef) const { - return {"", false}; - } - - template TMC &getTM() const { return static_cast(TM); } - CodeGenOpt::Level getOptLevel() const { return TM.getOptLevel(); } - - /// Check whether or not GlobalISel should abort on error. - /// When this is disabled, GlobalISel will fall back on SDISel instead of - /// erroring out. - bool isGlobalISelAbortEnabled() const { - return TM.Options.GlobalISelAbort == GlobalISelAbortMode::Enable; - } - - /// Check whether or not a diagnostic should be emitted when GlobalISel - /// uses the fallback path. In other words, it will emit a diagnostic - /// when GlobalISel failed and isGlobalISelAbortEnabled is false. - bool reportDiagnosticWhenGlobalISelFallback() const { - return TM.Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag; - } - - /// addInstSelector - This method should install an instruction selector pass, - /// which converts from LLVM code to machine instructions. - Error addInstSelector(AddMachinePass &) const { - return make_error("addInstSelector is not overridden", - inconvertibleErrorCode()); - } - - /// Add passes that optimize instruction level parallelism for out-of-order - /// targets. These passes are run while the machine code is still in SSA - /// form, so they can use MachineTraceMetrics to control their heuristics. - /// - /// All passes added here should preserve the MachineDominatorTree, - /// MachineLoopInfo, and MachineTraceMetrics analyses. - void addILPOpts(AddMachinePass &) const {} - - /// This method may be implemented by targets that want to run passes - /// immediately before register allocation. - void addPreRegAlloc(AddMachinePass &) const {} - - /// addPreRewrite - Add passes to the optimized register allocation pipeline - /// after register allocation is complete, but before virtual registers are - /// rewritten to physical registers. - /// - /// These passes must preserve VirtRegMap and LiveIntervals, and when running - /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix. - /// When these passes run, VirtRegMap contains legal physreg assignments for - /// all virtual registers. - /// - /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not - /// be honored. This is also not generally used for the the fast variant, - /// where the allocation and rewriting are done in one pass. - void addPreRewrite(AddMachinePass &) const {} - - /// Add passes to be run immediately after virtual registers are rewritten - /// to physical registers. - void addPostRewrite(AddMachinePass &) const {} - - /// This method may be implemented by targets that want to run passes after - /// register allocation pass pipeline but before prolog-epilog insertion. - void addPostRegAlloc(AddMachinePass &) const {} - - /// This method may be implemented by targets that want to run passes after - /// prolog-epilog insertion and before the second instruction scheduling pass. - void addPreSched2(AddMachinePass &) const {} - - /// This pass may be implemented by targets that want to run passes - /// immediately before machine code is emitted. - void addPreEmitPass(AddMachinePass &) const {} - - /// Targets may add passes immediately before machine code is emitted in this - /// callback. This is called even later than `addPreEmitPass`. - // FIXME: Rename `addPreEmitPass` to something more sensible given its actual - // position and remove the `2` suffix here as this callback is what - // `addPreEmitPass` *should* be but in reality isn't. - void addPreEmitPass2(AddMachinePass &) const {} - - /// {{@ For GlobalISel - /// - - /// addPreISel - This method should add any "last minute" LLVM->LLVM - /// passes (which are run just before instruction selector). - void addPreISel(AddIRPass &) const { - llvm_unreachable("addPreISel is not overridden"); - } - - /// This method should install an IR translator pass, which converts from - /// LLVM code to machine instructions with possibly generic opcodes. - Error addIRTranslator(AddMachinePass &) const { - return make_error("addIRTranslator is not overridden", - inconvertibleErrorCode()); - } - - /// This method may be implemented by targets that want to run passes - /// immediately before legalization. - void addPreLegalizeMachineIR(AddMachinePass &) const {} - - /// This method should install a legalize pass, which converts the instruction - /// sequence into one that can be selected by the target. - Error addLegalizeMachineIR(AddMachinePass &) const { - return make_error("addLegalizeMachineIR is not overridden", - inconvertibleErrorCode()); - } - - /// This method may be implemented by targets that want to run passes - /// immediately before the register bank selection. - void addPreRegBankSelect(AddMachinePass &) const {} - - /// This method should install a register bank selector pass, which - /// assigns register banks to virtual registers without a register - /// class or register banks. - Error addRegBankSelect(AddMachinePass &) const { - return make_error("addRegBankSelect is not overridden", - inconvertibleErrorCode()); - } - - /// This method may be implemented by targets that want to run passes - /// immediately before the (global) instruction selection. - void addPreGlobalInstructionSelect(AddMachinePass &) const {} - - /// This method should install a (global) instruction selector pass, which - /// converts possibly generic instructions to fully target-specific - /// instructions, thereby constraining all generic virtual registers to - /// register classes. - Error addGlobalInstructionSelect(AddMachinePass &) const { - return make_error( - "addGlobalInstructionSelect is not overridden", - inconvertibleErrorCode()); - } - /// @}} - - /// High level function that adds all passes necessary to go from llvm IR - /// representation to the MI representation. - /// Adds IR based lowering and target specific optimization passes and finally - /// the core instruction selection passes. - /// \returns true if an error occurred, false otherwise. - ModulePassManager addISelPasses() const; - - /// Add the actual instruction selection passes. This does not include - /// preparation passes on IR. - Expected addCoreISelPasses() const; - - /// Add the complete, standard set of LLVM CodeGen passes. - /// Fully developed targets will not generally override this. - Error addMachinePasses(AddMachinePass &) const; - - /// Add passes to lower exception handling for the code generator. - void addPassesToHandleExceptions(AddIRPass &) const; - - /// Add common target configurable passes that perform LLVM IR to IR - /// transforms following machine independent optimization. - void addIRPasses(AddIRPass &) const; - - /// Add pass to prepare the LLVM IR for code generation. This should be done - /// before exception handling preparation passes. - void addCodeGenPrepare(AddIRPass &) const; - - /// Add common passes that perform LLVM IR to IR transforms in preparation for - /// instruction selection. - void addISelPrepare(AddIRPass &) const; - - /// Methods with trivial inline returns are convenient points in the common - /// codegen pass pipeline where targets may insert passes. Methods with - /// out-of-line standard implementations are major CodeGen stages called by - /// addMachinePasses. Some targets may override major stages when inserting - /// passes is insufficient, but maintaining overriden stages is more work. - /// - - /// addMachineSSAOptimization - Add standard passes that optimize machine - /// instructions in SSA form. - void addMachineSSAOptimization(AddMachinePass &) const; - - /// addFastRegAlloc - Add the minimum set of target-independent passes that - /// are required for fast register allocation. - Error addFastRegAlloc(AddMachinePass &) const; - - /// addOptimizedRegAlloc - Add passes related to register allocation. - /// LLVMTargetMachine provides standard regalloc passes for most targets. - void addOptimizedRegAlloc(AddMachinePass &) const; - - /// Add passes that optimize machine instructions after register allocation. - void addMachineLateOptimization(AddMachinePass &) const; - - /// addGCPasses - Add late codegen passes that analyze code for garbage - /// collection. This should return true if GC info should be printed after - /// these passes. - void addGCPasses(AddMachinePass &) const {} - - /// Add standard basic block placement passes. - void addBlockPlacement(AddMachinePass &) const; - - using CreateMCStreamer = - std::function>(MCContext &)>; - void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const { - llvm_unreachable("addAsmPrinter is not overridden"); - } - - /// Utilities for targets to add passes to the pass manager. - /// - - /// createTargetRegisterAllocator - Create the register allocator pass for - /// this target at the current optimization level. - void addTargetRegisterAllocator(AddMachinePass &, bool Optimized) const; - - /// addMachinePasses helper to create the target-selected or overriden - /// regalloc pass. - void addRegAllocPass(AddMachinePass &, bool Optimized) const; - - /// Add core register alloator passes which do the actual register assignment - /// and rewriting. \returns true if any passes were added. - Error addRegAssignmentFast(AddMachinePass &) const; - Error addRegAssignmentOptimized(AddMachinePass &) const; - -private: - DerivedT &derived() { return static_cast(*this); } - const DerivedT &derived() const { - return static_cast(*this); - } -}; - -template -Expected> -CodeGenPassBuilder::buildPipeline(raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, - CodeGenFileType FileType) const { - Expected AddPassOrErr = addCoreISelPasses(); - if (!AddPassOrErr) - return AddPassOrErr.takeError(); - - AddMachinePass &addPass = *AddPassOrErr; - - if (auto Err = derived().addMachinePasses(addPass)) - return std::move(Err); - - derived().addAsmPrinter( - addPass, [this, &Out, DwoOut, FileType](MCContext &Ctx) { - return this->TM.createMCStreamer(Out, DwoOut, FileType, Ctx); - }); - - addPass(FreeMachineFunctionPass()); - - return std::pair{ - addISelPasses(), addPass.releasePM()}; -} - -static inline AAManager registerAAAnalyses(CFLAAType UseCFLAA) { - AAManager AA; - - // The order in which these are registered determines their priority when - // being queried. - - switch (UseCFLAA) { - case CFLAAType::Steensgaard: - AA.registerFunctionAnalysis(); - break; - case CFLAAType::Andersen: - AA.registerFunctionAnalysis(); - break; - case CFLAAType::Both: - AA.registerFunctionAnalysis(); - AA.registerFunctionAnalysis(); - break; - default: - break; - } - - // Basic AliasAnalysis support. - // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that - // BasicAliasAnalysis wins if they disagree. This is intended to help - // support "obvious" type-punning idioms. - AA.registerFunctionAnalysis(); - AA.registerFunctionAnalysis(); - AA.registerFunctionAnalysis(); - - return AA; -} - -template -void CodeGenPassBuilder::registerModuleAnalyses( - ModuleAnalysisManager &MAM) const { -#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ - MAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); -#include "MachinePassRegistry.def" - derived().registerTargetAnalysis(MAM); -} - -template -void CodeGenPassBuilder::registerFunctionAnalyses( - FunctionAnalysisManager &FAM) const { - FAM.registerPass([this] { return registerAAAnalyses(this->Opt.UseCFLAA); }); - -#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ - FAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); -#include "MachinePassRegistry.def" - derived().registerTargetAnalysis(FAM); -} - -template -void CodeGenPassBuilder::registerMachineFunctionAnalyses( - MachineFunctionAnalysisManager &MFAM) const { -#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ - MFAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); -#include "MachinePassRegistry.def" - derived().registerTargetAnalysis(MFAM); -} - -// FIXME: For new PM, use pass name directly in commandline seems good. -// Translate stringfied pass name to its old commandline name. Returns the -// matching legacy name and a boolean value indicating if the pass is a machine -// pass. -template -std::pair -CodeGenPassBuilder::getPassNameFromLegacyName(StringRef Name) const { - std::pair Ret; - if (Name.empty()) - return Ret; - -#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, false}; -#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, false}; -#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, false}; -#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, false}; -#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, true}; -#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, true}; -#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, true}; -#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - if (Name == NAME) \ - Ret = {#PASS_NAME, true}; -#include "llvm/CodeGen/MachinePassRegistry.def" - - if (Ret.first.empty()) - Ret = derived().getTargetPassNameFromLegacyName(Name); - - if (Ret.first.empty()) - report_fatal_error(Twine('\"') + Twine(Name) + - Twine("\" pass could not be found.")); - - return Ret; -} - -template -ModulePassManager CodeGenPassBuilder::addISelPasses() const { - AddIRPass addPass(Opt.DebugPM); - - if (TM.useEmulatedTLS()) - addPass(LowerEmuTLSPass()); - - addPass(PreISelIntrinsicLoweringPass()); - - derived().addIRPasses(addPass); - derived().addCodeGenPrepare(addPass); - addPassesToHandleExceptions(addPass); - derived().addISelPrepare(addPass); - return addPass.releasePM(); -} - -/// Add common target configurable passes that perform LLVM IR to IR transforms -/// following machine independent optimization. -template -void CodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { - // Before running any passes, run the verifier to determine if the input - // coming from the front-end and/or optimizer is valid. - if (!Opt.DisableVerify) - addPass(VerifierPass()); - - // Run loop strength reduction before anything else. - if (getOptLevel() != CodeGenOpt::None && !Opt.DisableLoopStrengthReducePass) { - addPass(createFunctionToLoopPassAdaptor( - LoopStrengthReducePass(), /*UseMemorySSA*/ true, Opt.DebugPM)); - // FIXME: use -stop-after so we could remove PrintAfterLSR - if (Opt.PrintAfterLSR) - addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); - } - - if (getOptLevel() != CodeGenOpt::None) { - // The MergeICmpsPass tries to create memcmp calls by grouping sequences of - // loads and compares. ExpandMemCmpPass then tries to expand those calls - // into optimally-sized loads and compares. The transforms are enabled by a - // target lowering hook. - if (!Opt.DisableMergeICmpsPass) - addPass(MergeICmpsPass()); - addPass(ExpandMemCmpPass()); - } - - // Run GC lowering passes for builtin collectors - // TODO: add a pass insertion point here - addPass(GCLoweringPass()); - addPass(ShadowStackGCLoweringPass()); - addPass(LowerConstantIntrinsicsPass()); - - // Make sure that no unreachable blocks are instruction selected. - addPass(UnreachableBlockElimPass()); - - // Prepare expensive constants for SelectionDAG. - if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoistingPass) - addPass(ConstantHoistingPass()); - - if (getOptLevel() != CodeGenOpt::None && - !Opt.DisablePartiallyInlineLibCallsPass) - addPass(PartiallyInlineLibCallsPass()); - - // Instrument function entry and exit, e.g. with calls to mcount(). - addPass(EntryExitInstrumenterPass(/*PostInlining=*/true)); - - // Add scalarization of target's unsupported masked memory intrinsics pass. - // the unsupported intrinsic will be replaced with a chain of basic blocks, - // that stores/loads element one-by-one if the appropriate mask bit is set. - addPass(ScalarizeMaskedMemIntrinPass()); - - // Expand reduction intrinsics into shuffle sequences if the target wants to. - addPass(ExpandReductionsPass()); -} - -/// Turn exception handling constructs into something the code generators can -/// handle. -template -void CodeGenPassBuilder::addPassesToHandleExceptions( - AddIRPass &addPass) const { - const MCAsmInfo *MCAI = TM.getMCAsmInfo(); - assert(MCAI && "No MCAsmInfo"); - switch (MCAI->getExceptionHandlingType()) { - case ExceptionHandling::SjLj: - // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, - // catch info can get misplaced when a selector ends up more than one block - // removed from the parent invoke(s). This could happen when a landing - // pad is shared by multiple invokes and is also a target of a normal - // edge from elsewhere. - addPass(SjLjEHPreparePass()); - LLVM_FALLTHROUGH; - case ExceptionHandling::DwarfCFI: - case ExceptionHandling::ARM: - addPass(DwarfEHPass()); - break; - case ExceptionHandling::WinEH: - // We support using both GCC-style and MSVC-style exceptions on Windows, so - // add both preparation passes. Each pass will only actually run if it - // recognizes the personality function. - addPass(WinEHPass()); - addPass(DwarfEHPass()); - break; - case ExceptionHandling::Wasm: - // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs - // on catchpads and cleanuppads because it does not outline them into - // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we - // should remove PHIs there. - addPass(WinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); - addPass(WasmEHPass()); - break; - case ExceptionHandling::None: - addPass(LowerInvokePass()); - - // The lower invoke pass may create unreachable code. Remove it. - addPass(UnreachableBlockElimPass()); - break; - } -} - -/// Add pass to prepare the LLVM IR for code generation. This should be done -/// before exception handling preparation passes. -template -void CodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { - if (getOptLevel() != CodeGenOpt::None && !Opt.DisableCodeGenPreparePass) - addPass(CodeGenPreparePass()); - // TODO: Default ctor'd RewriteSymbolPass is no-op. - // addPass(RewriteSymbolPass()); -} - -/// Add common passes that perform LLVM IR to IR transforms in preparation for -/// instruction selection. -template -void CodeGenPassBuilder::addISelPrepare(AddIRPass &addPass) const { - derived().addPreISel(addPass); - - // Add both the safe stack and the stack protection passes: each of them will - // only protect functions that have corresponding attributes. - addPass(SafeStackPass()); - addPass(StackProtectorPass()); - - if (Opt.PrintISelInput) - addPass(PrintFunctionPass(dbgs(), - "\n\n*** Final LLVM Code input to ISel ***\n")); - - // All passes which modify the LLVM IR are now complete; run the verifier - // to ensure that the IR is valid. - if (!Opt.DisableVerify) - addPass(VerifierPass()); -} - -template -Expected::AddMachinePass> -CodeGenPassBuilder::addCoreISelPasses() const { - // Enable FastISel with -fast-isel, but allow that to be overridden. - TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true)); - - // Determine an instruction selector. - enum class SelectorType { SelectionDAG, FastISel, GlobalISel }; - SelectorType Selector; - - if (Opt.EnableFastISelOption && *Opt.EnableFastISelOption == true) - Selector = SelectorType::FastISel; - else if ((Opt.EnableGlobalISelOption && - *Opt.EnableGlobalISelOption == true) || - (TM.Options.EnableGlobalISel && - (!Opt.EnableGlobalISelOption || - *Opt.EnableGlobalISelOption == false))) - Selector = SelectorType::GlobalISel; - else if (TM.getOptLevel() == CodeGenOpt::None && TM.getO0WantsFastISel()) - Selector = SelectorType::FastISel; - else - Selector = SelectorType::SelectionDAG; - - // Set consistently TM.Options.EnableFastISel and EnableGlobalISel. - if (Selector == SelectorType::FastISel) { - TM.setFastISel(true); - TM.setGlobalISel(false); - } else if (Selector == SelectorType::GlobalISel) { - TM.setFastISel(false); - TM.setGlobalISel(true); - } - - AddMachinePass addPass(Opt.DebugPM, Opt.RequiresCodeGenSCCOrder, - *Opt.VerifyMachineCode); - - // Add instruction selector passes. - if (Selector == SelectorType::GlobalISel) { - if (auto Err = derived().addIRTranslator(addPass)) - return std::move(Err); - - derived().addPreLegalizeMachineIR(addPass); - - if (auto Err = derived().addLegalizeMachineIR(addPass)) - return std::move(Err); - - // Before running the register bank selector, ask the target if it - // wants to run some passes. - derived().addPreRegBankSelect(addPass); - - if (auto Err = derived().addRegBankSelect(addPass)) - return std::move(Err); - - derived().addPreGlobalInstructionSelect(addPass); - - if (auto Err = derived().addGlobalInstructionSelect(addPass)) - return std::move(Err); - - // Pass to reset the MachineFunction if the ISel failed. - addPass(ResetMachineFunctionPass(reportDiagnosticWhenGlobalISelFallback(), - isGlobalISelAbortEnabled())); - - // Provide a fallback path when we do not want to abort on - // not-yet-supported input. - if (!isGlobalISelAbortEnabled()) { - if (auto Err = derived().addInstSelector(addPass)) - return std::move(Err); - } - - } else if (auto Err = derived().addInstSelector(addPass)) - return std::move(Err); - - // Expand pseudo-instructions emitted by ISel. Don't run the verifier before - // FinalizeISel. - addPass(FinalizeISelPass()); - - return addPass; -} - -/// Add the complete set of target-independent postISel code generator passes. -/// -/// This can be read as the standard order of major LLVM CodeGen stages. Stages -/// with nontrivial configuration or multiple passes are broken out below in -/// add%Stage routines. -/// -/// Any CodeGenPassBuilder::addXX routine may be overriden by the -/// Target. The addPre/Post methods with empty header implementations allow -/// injecting target-specific fixups just before or after major stages. -/// Additionally, targets have the flexibility to change pass order within a -/// stage by overriding default implementation of add%Stage routines below. Each -/// technique has maintainability tradeoffs because alternate pass orders are -/// not well supported. addPre/Post works better if the target pass is easily -/// tied to a common pass. But if it has subtle dependencies on multiple passes, -/// the target should override the stage instead. -template -Error CodeGenPassBuilder::addMachinePasses( - AddMachinePass &addPass) const { - // Add passes that optimize machine instructions in SSA form. - if (getOptLevel() != CodeGenOpt::None) { - derived().addMachineSSAOptimization(addPass); - } else { - // If the target requests it, assign local variables to stack slots relative - // to one another and simplify frame index references where possible. - addPass(LocalStackSlotPass()); - } - - if (TM.Options.EnableIPRA) - addPass(RegUsageInfoPropagationPass()); - - // Run pre-ra passes. - derived().addPreRegAlloc(addPass); - - // Run register allocation and passes that are tightly coupled with it, - // including phi elimination and scheduling. - if (*Opt.OptimizeRegAlloc) { - derived().addOptimizedRegAlloc(addPass); - } else { - if (auto Err = derived().addFastRegAlloc(addPass)) - return Err; - } - - // Run post-ra passes. - derived().addPostRegAlloc(addPass); - - // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getOptLevel() != CodeGenOpt::None) { - addPass(PostRAMachineSinkingPass()); - addPass(ShrinkWrapPass()); - } - - addPass(PrologEpilogInserterPass()); - - /// Add passes that optimize machine instructions after register allocation. - if (getOptLevel() != CodeGenOpt::None) - derived().addMachineLateOptimization(addPass); - - // Expand pseudo instructions before second scheduling pass. - addPass(ExpandPostRAPseudosPass()); - - // Run pre-sched2 passes. - derived().addPreSched2(addPass); - - if (Opt.EnableImplicitNullChecksPass) - addPass(ImplicitNullChecksPass()); - - // Second pass scheduler. - // Let Target optionally insert this pass by itself at some other point. - if (getOptLevel() != CodeGenOpt::None && - !TM.targetSchedulesPostRAScheduling()) { - if (Opt.EnablePostMachineSchedulerPass) - addPass(PostMachineSchedulerPass()); - else - addPass(PostRASchedulerPass()); - } - - // GC - derived().addGCPasses(addPass); - - // Basic block placement. - if (getOptLevel() != CodeGenOpt::None) - derived().addBlockPlacement(addPass); - - // Insert before XRay Instrumentation. - addPass(FEntryInserterPass()); - - addPass(XRayInstrumentationPass()); - addPass(PatchableFunctionPass()); - - derived().addPreEmitPass(addPass); - - if (TM.Options.EnableIPRA) { - // Collect register usage information and produce a register mask of - // clobbered registers, to be used to optimize call sites. - addPass(RegUsageInfoCollectorPass()); - } - - addPass(FuncletLayoutPass()); - - addPass(StackMapLivenessPass()); - addPass(LiveDebugValuesPass()); - - if (TM.Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && - Opt.EnableMachineOutliner != RunOutliner::NeverOutline) { - bool RunOnAllFunctions = - (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline); - bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining; - if (AddOutliner) - addPass(MachineOutlinerPass(RunOnAllFunctions)); - } - - // Add passes that directly emit MI after all other MI passes. - derived().addPreEmitPass2(addPass); - - return Error::success(); -} - -/// Add passes that optimize machine instructions in SSA form. -template -void CodeGenPassBuilder::addMachineSSAOptimization( - AddMachinePass &addPass) const { - // Pre-ra tail duplication. - addPass(EarlyTailDuplicatePass()); - - // Optimize PHIs before DCE: removing dead PHI cycles may make more - // instructions dead. - addPass(OptimizePHIsPass()); - - // This pass merges large allocas. StackSlotColoring is a different pass - // which merges spill slots. - addPass(StackColoringPass()); - - // If the target requests it, assign local variables to stack slots relative - // to one another and simplify frame index references where possible. - addPass(LocalStackSlotPass()); - - // With optimization, dead code should already be eliminated. However - // there is one known exception: lowered code for arguments that are only - // used by tail calls, where the tail calls reuse the incoming stack - // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - addPass(DeadMachineInstructionElimPass()); - - // Allow targets to insert passes that improve instruction level parallelism, - // like if-conversion. Such passes will typically need dominator trees and - // loop info, just like LICM and CSE below. - derived().addILPOpts(addPass); - - addPass(EarlyMachineLICMPass()); - addPass(MachineCSEPass()); - - addPass(MachineSinkingPass()); - - addPass(PeepholeOptimizerPass()); - // Clean-up the dead code that may have been generated by peephole - // rewriting. - addPass(DeadMachineInstructionElimPass()); -} - -//===---------------------------------------------------------------------===// -/// Register Allocation Pass Configuration -//===---------------------------------------------------------------------===// - -/// Instantiate the default register allocator pass for this target for either -/// the optimized or unoptimized allocation path. This will be added to the pass -/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc -/// in the optimized case. -/// -/// A target that uses the standard regalloc pass order for fast or optimized -/// allocation may still override this for per-target regalloc -/// selection. But -regalloc=... always takes precedence. -template -void CodeGenPassBuilder::addTargetRegisterAllocator( - AddMachinePass &addPass, bool Optimized) const { - if (Optimized) - addPass(RAGreedyPass()); - else - addPass(RAFastPass()); -} - -/// Find and instantiate the register allocation pass requested by this target -/// at the current optimization level. Different register allocators are -/// defined as separate passes because they may require different analysis. -template -void CodeGenPassBuilder::addRegAllocPass(AddMachinePass &addPass, - bool Optimized) const { - switch (Opt.RegAlloc) { - case RegAllocType::Default: - // With no -regalloc= override, ask the target for a regalloc pass. - derived().addTargetRegisterAllocator(addPass, Optimized); - break; - case RegAllocType::Basic: - addPass(RABasicPass()); - break; - case RegAllocType::Fast: - addPass(RAFastPass()); - break; - case RegAllocType::Greedy: - addPass(RAGreedyPass()); - break; - case RegAllocType::PBQP: - addPass(RAPBQPPass()); - break; - default: - llvm_unreachable("unknonwn register allocator type"); - } -} - -template -Error CodeGenPassBuilder::addRegAssignmentFast( - AddMachinePass &addPass) const { - if (Opt.RegAlloc != RegAllocType::Default && - Opt.RegAlloc != RegAllocType::Fast) - return make_error( - "Must use fast (default) register allocator for unoptimized regalloc.", - inconvertibleErrorCode()); - - addRegAllocPass(addPass, false); - return Error::success(); -} - -template -Error CodeGenPassBuilder::addRegAssignmentOptimized( - AddMachinePass &addPass) const { - // Add the selected register allocation pass. - addRegAllocPass(addPass, true); - - // Allow targets to change the register assignments before rewriting. - derived().addPreRewrite(addPass); - - // Finally rewrite virtual registers. - addPass(VirtRegRewriterPass()); - // Perform stack slot coloring and post-ra machine LICM. - // - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - addPass(StackSlotColoringPass()); - - return Error::success(); -} - -/// Add the minimum set of target-independent passes that are required for -/// register allocation. No coalescing or scheduling. -template -Error CodeGenPassBuilder::addFastRegAlloc( - AddMachinePass &addPass) const { - addPass(PHIEliminationPass()); - addPass(TwoAddressInstructionPass()); - return derived().addRegAssignmentFast(addPass); -} - -/// Add standard target-independent passes that are tightly coupled with -/// optimized register allocation, including coalescing, machine instruction -/// scheduling, and register allocation itself. -template -void CodeGenPassBuilder::addOptimizedRegAlloc( - AddMachinePass &addPass) const { - addPass(DetectDeadLanesPass()); - - addPass(ProcessImplicitDefsPass()); - - // Edge splitting is smarter with machine loop info. - addPass(PHIEliminationPass()); - - // Eventually, we want to run LiveIntervals before PHI elimination. - if (Opt.EnableLiveIntervalsPass) - addPass(LiveIntervalsPass()); - - addPass(TwoAddressInstructionPass()); - addPass(RegisterCoalescerPass()); - - // The machine scheduler may accidentally create disconnected components - // when moving subregister definitions around, avoid this by splitting them to - // separate vregs before. Splitting can also improve reg. allocation quality. - addPass(RenameIndependentSubregsPass()); - - // PreRA instruction scheduling. - addPass(MachineSchedulerPass()); - - if (derived().addRegAssignmentOptimized(addPass)) { - // Allow targets to expand pseudo instructions depending on the choice of - // registers before MachineCopyPropagation. - derived().addPostRewrite(addPass); - - // Copy propagate to forward register uses and try to eliminate COPYs that - // were not coalesced. - addPass(MachineCopyPropagationPass()); - - // Run post-ra machine LICM to hoist reloads / remats. - // - // FIXME: can this move into MachineLateOptimization? - addPass(MachineLICMPass()); - } -} - -//===---------------------------------------------------------------------===// -/// Post RegAlloc Pass Configuration -//===---------------------------------------------------------------------===// - -/// Add passes that optimize machine instructions after register allocation. -template -void CodeGenPassBuilder::addMachineLateOptimization( - AddMachinePass &addPass) const { - // Branch folding must be run after regalloc and prolog/epilog insertion. - addPass(BranchFolderPass()); - - // Tail duplication. - // Note that duplicating tail just increases code size and degrades - // performance for targets that require Structured Control Flow. - // In addition it can also make CFG irreducible. Thus we disable it. - if (!TM.requiresStructuredCFG()) - addPass(TailDuplicatePass()); - - // Copy propagation. - addPass(MachineCopyPropagationPass()); -} - -/// Add standard basic block placement passes. -template -void CodeGenPassBuilder::addBlockPlacement( - AddMachinePass &addPass) const { - addPass(MachineBlockPlacementPass()); - // Run a separate pass to collect block placement statistics. - if (Opt.EnableMachineBlockPlacementStatsPass) - addPass(MachineBlockPlacementStatsPass()); -} - -} // namespace llvm - -#endif // LLVM_CODEGEN_CODEGENPASSBUILDER_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def deleted file mode 100644 index 734bbebc76dee..0000000000000 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ /dev/null @@ -1,195 +0,0 @@ -//===- MachinePassRegistry.def - Registry of passes -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is used as the registry of passes that are for target-independent -// code generator. -// -//===----------------------------------------------------------------------===// - -// NOTE: NO INCLUDE GUARD DESIRED! - -#ifndef MODULE_ANALYSIS -#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) -#undef MODULE_ANALYSIS - -#ifndef MODULE_PASS -#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ()) -#undef MODULE_PASS - -#ifndef FUNCTION_ANALYSIS -#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) -FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis()))) -#undef FUNCTION_ANALYSIS - -#ifndef FUNCTION_PASS -#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -FUNCTION_PASS("mergeicmps", MergeICmpsPass, ()) -FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) -FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) -FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) -FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) -FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) -FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) -FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) -FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) -FUNCTION_PASS("verify", VerifierPass, ()) -#undef FUNCTION_PASS - -#ifndef LOOP_PASS -#define LOOP_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -LOOP_PASS("loop-reduce", LoopStrengthReducePass, ()) -#undef LOOP_PASS - -#ifndef MACHINE_MODULE_PASS -#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -#undef MACHINE_MODULE_PASS - -#ifndef MACHINE_FUNCTION_ANALYSIS -#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) -// LiveVariables currently requires pure SSA form. -// FIXME: Once TwoAddressInstruction pass no longer uses kill flags, -// LiveVariables can be removed completely, and LiveIntervals can be directly -// computed. (We still either need to regenerate kill flags after regalloc, or -// preferably fix the scavenger to not depend on them). -// MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) - -// MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass()) -// MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", LazyMachineBlockFrequencyInfoAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-bfi", MachineBlockFrequencyInfoAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", MachineDominanceFrontierAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-ore", MachineOptimizationRemarkEmitterPassAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MachinePostDominatorTreeAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-region-info", MachineRegionInfoPassAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysisAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("gc-analysis", GCMachineCodeAnalysisPass()) -#undef MACHINE_FUNCTION_ANALYSIS - -#ifndef MACHINE_FUNCTION_PASS -#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ()) -#undef MACHINE_FUNCTION_PASS - -// After a pass is converted to new pass manager, its entry should be moved from -// dummy table to the normal one. For example, for a machine function pass, -// DUMMY_MACHINE_FUNCTION_PASS to MACHINE_FUNCTION_PASS. - -#ifndef DUMMY_FUNCTION_PASS -#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ()) -DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) -DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) -DUMMY_FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) -DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ()) -DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ()) -DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ()) -DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ()) -DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ()) -DUMMY_FUNCTION_PASS("safe-stack", SafeStackPass, ()) -DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ()) -DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ()) -DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ()) -DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) -DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ()) -DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ()) -DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) -#undef DUMMY_FUNCTION_PASS - -#ifndef DUMMY_MODULE_PASS -#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -DUMMY_MODULE_PASS("lower-emutls", LowerEmuTLSPass, ()) -#undef DUMMY_MODULE_PASS - -#ifndef DUMMY_MACHINE_MODULE_PASS -#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass, ()) -#undef DUMMY_MACHINE_MODULE_PASS - -#ifndef DUMMY_MACHINE_FUNCTION_PASS -#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) -#endif -DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ()) -DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ()) -DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("ra-fast", RAFastPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) -#undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 457eae26fd474..76e217c899745 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -28,7 +28,6 @@ namespace llvm { -class LLVMTargetMachine; class Module; class Function; @@ -141,10 +140,6 @@ class StandardInstrumentations { TimePassesHandler &getTimePasses() { return TimePasses; } }; - -void registerCodeGenCallback(PassInstrumentationCallbacks &PIC, - LLVMTargetMachine &); - } // namespace llvm #endif diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index c7673d3e74e40..2a422341fdc84 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -15,12 +15,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/CGPassBuilderOption.h" -#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/Error.h" #include "llvm/Target/TargetOptions.h" #include @@ -370,20 +367,6 @@ class LLVMTargetMachine : public TargetMachine { bool DisableVerify = true, MachineModuleInfoWrapperPass *MMIWP = nullptr) override; - virtual Expected> - buildCodeGenPipeline(raw_pwrite_stream &, raw_pwrite_stream *, - CodeGenFileType, CGPassBuilderOption, - MachineFunctionAnalysisManager &, - PassInstrumentationCallbacks *) { - return make_error("buildCodeGenPipeline is not overriden", - inconvertibleErrorCode()); - } - - virtual std::pair getPassNameFromLegacyName(StringRef) { - llvm_unreachable( - "getPassNameFromLegacyName parseMIRPipeline is not overriden"); - } - /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It /// fills the MCContext Ctx pointer which can be used to build custom @@ -404,10 +387,6 @@ class LLVMTargetMachine : public TargetMachine { raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); - Expected> - createMCStreamer(raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, - CodeGenFileType FileType, MCContext &Ctx); - /// True if the target uses physical regs (as nearly all targets do). False /// for stack machines such as WebAssembly and other virtual-register /// machines. If true, all vregs must be allocated before PEI. If false, then diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 83b3655441fe4..617692a347922 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -14,7 +14,6 @@ add_llvm_component_library(LLVMCodeGen CFGuardLongjmp.cpp CFIInstrInserter.cpp CodeGen.cpp - CodeGenPassBuilder.cpp CodeGenPrepare.cpp CommandFlags.cpp CriticalAntiDepBreaker.cpp diff --git a/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/llvm/lib/CodeGen/CodeGenPassBuilder.cpp deleted file mode 100644 index 7f37f2069a3ba..0000000000000 --- a/llvm/lib/CodeGen/CodeGenPassBuilder.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines interfaces to access the target independent code -// generation passes provided by the LLVM backend. -// -//===---------------------------------------------------------------------===// - -#include "llvm/CodeGen/CodeGenPassBuilder.h" - -using namespace llvm; - -namespace llvm { -#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - AnalysisKey PASS_NAME::Key; -#include "llvm/CodeGen/MachinePassRegistry.def" -#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - AnalysisKey PASS_NAME::Key; -#include "llvm/CodeGen/MachinePassRegistry.def" -} // namespace llvm diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index e86f255129990..e94b7ed4de039 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -118,24 +118,6 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context) { - Expected> MCStreamerOrErr = - createMCStreamer(Out, DwoOut, FileType, Context); - if (auto Err = MCStreamerOrErr.takeError()) - return true; - - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = - getTarget().createAsmPrinter(*this, std::move(*MCStreamerOrErr)); - if (!Printer) - return true; - - PM.add(Printer); - return false; -} - -Expected> LLVMTargetMachine::createMCStreamer( - raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, - MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) Context.setAllowTemporaryLabels(false); @@ -170,14 +152,10 @@ Expected> LLVMTargetMachine::createMCStreamer( // Create the code emitter for the target if it exists. If not, .o file // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); - if (!MCE) - return make_error("createMCCodeEmitter failed", - inconvertibleErrorCode()); MCAsmBackend *MAB = getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); - if (!MAB) - return make_error("createMCAsmBackend failed", - inconvertibleErrorCode()); + if (!MCE || !MAB) + return true; Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( @@ -196,7 +174,14 @@ Expected> LLVMTargetMachine::createMCStreamer( break; } - return std::move(AsmStreamer); + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); + if (!Printer) + return true; + + PM.add(Printer); + return false; } bool LLVMTargetMachine::addPassesToEmitFile( diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 03a567e3d443a..19db8eb480ca4 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/CodeGen/CGPassBuilderOption.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -30,13 +29,11 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Pass.h" -#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -123,17 +120,16 @@ static cl::opt DebugifyAndStripAll( "Debugify MIR before and Strip debug after " "each pass except those known to be unsafe when debug info is present"), cl::ZeroOrMore); - +enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; // Enable or disable the MachineOutliner. static cl::opt EnableMachineOutliner( "enable-machine-outliner", cl::desc("Enable the machine outliner"), - cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault), - cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always", + cl::Hidden, cl::ValueOptional, cl::init(TargetDefault), + cl::values(clEnumValN(AlwaysOutline, "always", "Run on all functions guaranteed to be beneficial"), - clEnumValN(RunOutliner::NeverOutline, "never", - "Disable all outlining"), + clEnumValN(NeverOutline, "never", "Disable all outlining"), // Sentinel value for unspecified option. - clEnumValN(RunOutliner::AlwaysOutline, "", ""))); + clEnumValN(AlwaysOutline, "", ""))); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -176,6 +172,7 @@ static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); // Experimental option to use CFL-AA in codegen +enum class CFLAAType { None, Steensgaard, Andersen, Both }; static cl::opt UseCFLAA( "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden, cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"), @@ -407,143 +404,6 @@ void TargetPassConfig::setStartStopPasses() { Started = (StartAfter == nullptr) && (StartBefore == nullptr); } -CGPassBuilderOption llvm::getCGPassBuilderOption() { - CGPassBuilderOption Opt; - -#define SET_OPTION(Option) \ - if (Option.getNumOccurrences()) \ - Opt.Option = Option; - - SET_OPTION(EnableFastISelOption) - SET_OPTION(EnableGlobalISelAbort) - SET_OPTION(EnableGlobalISelOption) - SET_OPTION(EnableIPRA) - SET_OPTION(OptimizeRegAlloc) - SET_OPTION(VerifyMachineCode) - - Opt.EnableMachineOutliner = EnableMachineOutliner; - Opt.UseCFLAA = UseCFLAA; - Opt.PrintISelInput = PrintISelInput; - Opt.PrintGCInfo = PrintGCInfo; - Opt.EnablePostMachineSchedulerPass = MISchedPostRA; - Opt.EnableLiveIntervalsPass = EarlyLiveIntervals; - Opt.EnableMachineBlockPlacementStatsPass = EnableBlockPlacementStats; - Opt.EnableImplicitNullChecksPass = EnableImplicitNullChecks; - Opt.DisableLoopStrengthReducePass = DisableLSR; - Opt.DisableCodeGenPreparePass = DisableCGP; - Opt.DisableMergeICmpsPass = DisableMergeICmps; - Opt.DisablePartiallyInlineLibCallsPass = DisablePartialLibcallInlining; - Opt.DisableConstantHoistingPass = DisableConstantHoisting; - Opt.PrintAfterLSR = PrintLSR; - - return Opt; -} - -static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC, - LLVMTargetMachine &LLVMTM) { - StringRef StartBefore; - StringRef StartAfter; - StringRef StopBefore; - StringRef StopAfter; - - unsigned StartBeforeInstanceNum = 0; - unsigned StartAfterInstanceNum = 0; - unsigned StopBeforeInstanceNum = 0; - unsigned StopAfterInstanceNum = 0; - - std::tie(StartBefore, StartBeforeInstanceNum) = - getPassNameAndInstanceNum(StartBeforeOpt); - std::tie(StartAfter, StartAfterInstanceNum) = - getPassNameAndInstanceNum(StartAfterOpt); - std::tie(StopBefore, StopBeforeInstanceNum) = - getPassNameAndInstanceNum(StopBeforeOpt); - std::tie(StopAfter, StopAfterInstanceNum) = - getPassNameAndInstanceNum(StopAfterOpt); - - if (StartBefore.empty() && StartAfter.empty() && StopBefore.empty() && - StopAfter.empty()) - return; - - std::tie(StartBefore, std::ignore) = - LLVMTM.getPassNameFromLegacyName(StartBefore); - std::tie(StartAfter, std::ignore) = - LLVMTM.getPassNameFromLegacyName(StartAfter); - std::tie(StopBefore, std::ignore) = - LLVMTM.getPassNameFromLegacyName(StopBefore); - std::tie(StopAfter, std::ignore) = - LLVMTM.getPassNameFromLegacyName(StopAfter); - if (!StartBefore.empty() && !StartAfter.empty()) - report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") + - Twine(StartAfterOptName) + Twine(" specified!")); - if (!StopBefore.empty() && !StopAfter.empty()) - report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") + - Twine(StopAfterOptName) + Twine(" specified!")); - - PIC.registerBeforePassCallback( - [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(), - EnableNext = Optional(), StartBeforeCount = 0u, - StartAfterCount = 0u, StopBeforeCount = 0u, - StopAfterCount = 0u](StringRef P, Any) mutable { - bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore); - bool StartAfterPass = !StartAfter.empty() && P.contains(StartAfter); - bool StopBeforePass = !StopBefore.empty() && P.contains(StopBefore); - bool StopAfterPass = !StopAfter.empty() && P.contains(StopAfter); - - // Implement -start-after/-stop-after - if (EnableNext) { - EnableCurrent = *EnableNext; - EnableNext.reset(); - } - - // Using PIC.registerAfterPassCallback won't work because if this - // callback returns false, AfterPassCallback is also skipped. - if (StartAfterPass && StartAfterCount++ == StartAfterInstanceNum) { - assert(!EnableNext && "Error: assign to EnableNext more than once"); - EnableNext = true; - } - if (StopAfterPass && StopAfterCount++ == StopAfterInstanceNum) { - assert(!EnableNext && "Error: assign to EnableNext more than once"); - EnableNext = false; - } - - if (StartBeforePass && StartBeforeCount++ == StartBeforeInstanceNum) - EnableCurrent = true; - if (StopBeforePass && StopBeforeCount++ == StopBeforeInstanceNum) - EnableCurrent = false; - return EnableCurrent; - }); -} - -void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC, - LLVMTargetMachine &LLVMTM) { - - // Register a callback for disabling passes. - PIC.registerBeforePassCallback([](StringRef P, Any) { - -#define DISABLE_PASS(Option, Name) \ - if (Option && P.contains(#Name)) \ - return false; - DISABLE_PASS(DisableBlockPlacement, MachineBlockPlacementPass) - DISABLE_PASS(DisableBranchFold, BranchFolderPass) - DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass) - DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass) - DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass) - DISABLE_PASS(DisableMachineCSE, MachineCSEPass) - DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass) - DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass) - DISABLE_PASS(DisableMachineSink, MachineSinkingPass) - DISABLE_PASS(DisablePostRAMachineLICM, MachineLICMPass) - DISABLE_PASS(DisablePostRAMachineSink, PostRAMachineSinkingPass) - DISABLE_PASS(DisablePostRASched, PostRASchedulerPass) - DISABLE_PASS(DisableSSC, StackSlotColoringPass) - DISABLE_PASS(DisableTailDuplicate, TailDuplicatePass) - - return true; - }); - - registerPartialPipelineCallback(PIC, LLVMTM); -} - // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) @@ -1152,11 +1012,10 @@ void TargetPassConfig::addMachinePasses() { addPass(&LiveDebugValuesID, false); if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && - EnableMachineOutliner != RunOutliner::NeverOutline) { - bool RunOnAllFunctions = - (EnableMachineOutliner == RunOutliner::AlwaysOutline); - bool AddOutliner = - RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; + EnableMachineOutliner != NeverOutline) { + bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); + bool AddOutliner = RunOnAllFunctions || + TM->Options.SupportsDefaultOutlining; if (AddOutliner) addPass(createMachineOutlinerPass(RunOnAllFunctions)); } From 0e0d93e2f09a3e84cee0e77f0f2510001c2f064a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 12 Sep 2020 01:54:23 +0000 Subject: [PATCH 0434/1079] [gn build] Port ad99e34c59b --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index a6ca6b974930a..e2f6c710496ec 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -32,7 +32,6 @@ static_library("CodeGen") { "CalcSpillWeights.cpp", "CallingConvLower.cpp", "CodeGen.cpp", - "CodeGenPassBuilder.cpp", "CodeGenPrepare.cpp", "CommandFlags.cpp", "CriticalAntiDepBreaker.cpp", From 528554c39b098e2d9a9c7ec51c77717aa07db2a2 Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Sat, 12 Sep 2020 02:42:22 +0000 Subject: [PATCH 0435/1079] [PowerPC] Set the mayRaiseFPException for FCMPUS/FCMPUD From ISA, fcmpu will raise the Floating-Point Invalid Operation Exception (SNaN) if either of the operands is a Signaling NaN by setting the bit VXSNAN. But the instruction description didn't set the mayRaiseFPException which might have impact on the scheduling or some backend optimization. Reviewed By: qiucf Differential Revision: https://reviews.llvm.org/D83937 --- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 +- llvm/test/CodeGen/PowerPC/nofpexcept.ll | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/nofpexcept.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c865fa10956b2..bf7ad639ab6e4 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2624,7 +2624,7 @@ let isCompare = 1, hasSideEffects = 0 in { } } let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations. -let isCompare = 1, hasSideEffects = 0 in { +let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in { def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), "fcmpu $crD, $fA, $fB", IIC_FPCompare>; def FCMPOS : XForm_17<63, 32, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), diff --git a/llvm/test/CodeGen/PowerPC/nofpexcept.ll b/llvm/test/CodeGen/PowerPC/nofpexcept.ll new file mode 100644 index 0000000000000..e15b06e0babea --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/nofpexcept.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s + +; Verify if the mayRaiseFPException is set for FCMPD/FCMPS +define i32 @fcmpu(double %a, double %b) { + ; CHECK-LABEL: name: fcmpu + ; CHECK: bb.0.entry: + ; CHECK: liveins: $f1, $f2 + ; CHECK: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK: %2:crrc = nofpexcept FCMPUD [[COPY1]], [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:crbitrc = COPY %2.sub_gt + ; CHECK: [[LI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = LI8 0 + ; CHECK: [[LI8_1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LI8 1 + ; CHECK: [[ISEL8_:%[0-9]+]]:g8rc = ISEL8 [[LI8_1]], [[LI8_]], [[COPY2]] + ; CHECK: $x3 = COPY [[ISEL8_]] + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %r = fcmp ogt double %a, %b + %g = zext i1 %r to i32 + ret i32 %g +} From 0680a3d56d8b5bcb6647a1149f0de156f72edf91 Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Sat, 12 Sep 2020 02:49:47 +0000 Subject: [PATCH 0436/1079] [Power10] Enable the heuristic for Power10 and switch the sched model with P9 Model Enable the pre-ra and post-ra scheduler strategy for Power10 as we want to customize the heuristic later. And switch the scheduler model with P9 model before P10 Model is available. The NoSchedModel is modelled as in-order cpu and the pre-ra scheduler is not bi-directional which will have big impact on the scheduler. Reviewed By: jji Differential Revision: https://reviews.llvm.org/D86865 --- llvm/lib/Target/PowerPC/PPC.td | 8 +++---- .../PowerPC/pcrel-call-linkage-leaf.ll | 24 +++++++++---------- .../PowerPC/pcrel-call-linkage-with-calls.ll | 4 ++-- llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll | 4 ++-- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index c572e210093a3..d94ecc6e84381 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -325,6 +325,8 @@ def ProcessorFeatures { [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, + FeaturePPCPreRASched, + FeaturePPCPostRASched, FeatureISA3_0, FeaturePredictableSelectIsExpensive ]; @@ -334,9 +336,7 @@ def ProcessorFeatures { // dispatch for vector operations than scalar ones. For the time being, // this list also includes scheduling-related features since we do not have // enough info to create custom scheduling strategies for future CPUs. - list P9SpecificFeatures = [FeatureVectorsUseTwoUnits, - FeaturePPCPreRASched, - FeaturePPCPostRASched]; + list P9SpecificFeatures = [FeatureVectorsUseTwoUnits]; list P9InheritableFeatures = !listconcat(P8InheritableFeatures, P9AdditionalFeatures); list P9Features = @@ -559,7 +559,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; // No scheduler model yet. -def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>; +def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 9141fdc735a0e..00cc472092d47 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -45,12 +45,12 @@ define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b ; CHECK-LARGE: ld r2, .Lfunc_toc2-.Lfunc_gep2(r12) ; CHECK-LARGE: add r2, r2, r12 ; CHECK-S: .localentry AsmClobberX2WithTOC -; CHECK-S: #APP +; CHECK-S: add r3, r4, r3 +; CHECK-S-NEXT: #APP ; CHECK-S-NEXT: li r2, 0 ; CHECK-S-NEXT: #NO_APP -; CHECK-S-NEXT: plwz r5, global_int@PCREL(0), 1 -; CHECK-S-NEXT: add r3, r4, r3 -; CHECK-S-NEXT: add r3, r3, r5 +; CHECK-S-NEXT: plwz r4, global_int@PCREL(0), 1 +; CHECK-S-NEXT: add r3, r3, r4 ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: blr entry: @@ -67,10 +67,10 @@ define dso_local signext i32 @AsmClobberX5(i32 signext %a, i32 signext %b) local ; CHECK-P9-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry ; CHECK-S-NEXT: add r3, r4, r3 -; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: #APP ; CHECK-S-NEXT: nop ; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: blr entry: %add = add nsw i32 %b, %a @@ -109,24 +109,24 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-S-NEXT: add r9, r10, r9 ; CHECK-S-NEXT: sub r10, r10, r3 ; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: sub r12, r4, r5 +; CHECK-S-NEXT: add r0, r6, r5 +; CHECK-S-NEXT: sub r2, r6, r7 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r11 ; CHECK-S-NEXT: mullw r3, r3, r5 -; CHECK-S-NEXT: sub r12, r4, r5 ; CHECK-S-NEXT: mullw r3, r3, r6 -; CHECK-S-NEXT: add r0, r6, r5 ; CHECK-S-NEXT: mullw r3, r3, r12 ; CHECK-S-NEXT: mullw r3, r3, r0 ; CHECK-S-NEXT: mullw r3, r3, r7 -; CHECK-S-NEXT: sub r2, r6, r7 ; CHECK-S-NEXT: mullw r3, r3, r8 -; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r2 ; CHECK-S-NEXT: mullw r3, r3, r30 -; CHECK-S-NEXT: mullw r3, r3, r29 -; CHECK-S-NEXT: mullw r3, r3, r9 ; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mullw r3, r3, r29 ; CHECK-S-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mullw r3, r3, r9 ; CHECK-S-NEXT: mullw r3, r3, r10 ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll index 0a4f2f38c816b..8fa86ef50ea57 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -353,10 +353,10 @@ define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 ; CHECK-S-NEXT: stdu r1, -32(r1) ; CHECK-S-NEXT: .cfi_def_cfa_offset 32 ; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: add r3, r4, r3 -; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: mtctr r5 +; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: mullw r3, r4, r3 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll index 56e49780c5f0f..1340197b3ccba 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -185,8 +185,8 @@ define dso_local signext i32 @TailCallAbs() local_unnamed_addr { ; CHECK: .localentry TailCallAbs, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: li r3, 400 -; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r12, 400 +; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: bctr ; CHECK-NEXT: #TC_RETURNr8 ctr 0 entry: @@ -207,8 +207,8 @@ define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr { ; CHECK-NEXT: stdu r1, -48(r1) ; CHECK-NEXT: mr r30, r3 ; CHECK-NEXT: li r3, 400 -; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r12, 400 +; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: bctrl ; CHECK-NEXT: add r3, r3, r30 ; CHECK-NEXT: extsw r3, r3 From 6c8041aa0ffed827636935e59c489b1e390c8542 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Sat, 12 Sep 2020 14:30:44 +0700 Subject: [PATCH 0437/1079] [AST][FPEnv] Keep FP options in trailing storage of CastExpr This change allow a CastExpr to have optional FPOptionsOverride object, stored in trailing storage. Of all cast nodes only ImplicitCastExpr, CStyleCastExpr, CXXFunctionalCastExpr and CXXStaticCastExpr are allowed to have FPOptions. Differential Revision: https://reviews.llvm.org/D85960 --- clang/include/clang/AST/Expr.h | 117 +++++++++++---- clang/include/clang/AST/ExprCXX.h | 139 +++++++++++------- clang/include/clang/AST/ExprObjC.h | 4 +- clang/include/clang/AST/Stmt.h | 3 + clang/include/clang/AST/TextNodeDumper.h | 1 + clang/include/clang/Basic/LangOptions.h | 2 + clang/lib/AST/ASTImporter.cpp | 15 +- clang/lib/AST/Expr.cpp | 55 +++++-- clang/lib/AST/ExprCXX.cpp | 61 ++++---- clang/lib/AST/TextNodeDumper.cpp | 10 ++ clang/lib/Analysis/BodyFarm.cpp | 16 +- clang/lib/CodeGen/CGBlocks.cpp | 2 +- clang/lib/CodeGen/CGObjC.cpp | 13 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- .../Frontend/Rewrite/RewriteModernObjC.cpp | 7 +- clang/lib/Frontend/Rewrite/RewriteObjC.cpp | 7 +- clang/lib/Sema/Sema.cpp | 3 +- clang/lib/Sema/SemaCast.cpp | 28 ++-- clang/lib/Sema/SemaDecl.cpp | 8 +- clang/lib/Sema/SemaDeclCXX.cpp | 9 +- clang/lib/Sema/SemaExpr.cpp | 11 +- clang/lib/Sema/SemaExprCXX.cpp | 13 +- clang/lib/Sema/SemaExprObjC.cpp | 15 +- clang/lib/Sema/SemaInit.cpp | 30 ++-- clang/lib/Sema/SemaLambda.cpp | 5 +- clang/lib/Sema/SemaObjCProperty.cpp | 14 +- clang/lib/Sema/SemaOpenMP.cpp | 12 +- clang/lib/Sema/SemaOverload.cpp | 23 +-- clang/lib/Sema/SemaStmt.cpp | 8 +- clang/lib/Sema/SemaTemplate.cpp | 2 +- clang/lib/Serialization/ASTReaderStmt.cpp | 28 +++- clang/lib/Serialization/ASTWriterDecl.cpp | 1 + clang/lib/Serialization/ASTWriterStmt.cpp | 6 +- clang/test/AST/ast-dump-fpfeatures.cpp | 45 ++++++ 34 files changed, 462 insertions(+), 253 deletions(-) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 26e52ad367f81..1672fd707c6d2 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -3440,9 +3440,11 @@ class CastExpr : public Expr { } CXXBaseSpecifier **path_buffer(); + friend class ASTStmtReader; + protected: CastExpr(StmtClass SC, QualType ty, ExprValueKind VK, const CastKind kind, - Expr *op, unsigned BasePathSize) + Expr *op, unsigned BasePathSize, bool HasFPFeatures) : Expr(SC, ty, VK, OK_Ordinary), Op(op) { CastExprBits.Kind = kind; CastExprBits.PartOfExplicitCast = false; @@ -3451,17 +3453,27 @@ class CastExpr : public Expr { "BasePathSize overflow!"); setDependence(computeDependence(this)); assert(CastConsistency()); + CastExprBits.HasFPFeatures = HasFPFeatures; } /// Construct an empty cast. - CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize) - : Expr(SC, Empty) { + CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize, + bool HasFPFeatures) + : Expr(SC, Empty) { CastExprBits.PartOfExplicitCast = false; CastExprBits.BasePathSize = BasePathSize; + CastExprBits.HasFPFeatures = HasFPFeatures; assert((CastExprBits.BasePathSize == BasePathSize) && "BasePathSize overflow!"); } + /// Return a pointer to the trailing FPOptions. + /// \pre hasStoredFPFeatures() == true + FPOptionsOverride *getTrailingFPFeatures(); + const FPOptionsOverride *getTrailingFPFeatures() const { + return const_cast(this)->getTrailingFPFeatures(); + } + public: CastKind getCastKind() const { return (CastKind) CastExprBits.Kind; } void setCastKind(CastKind K) { CastExprBits.Kind = K; } @@ -3506,6 +3518,28 @@ class CastExpr : public Expr { return getTargetFieldForToUnionCast(getType(), getSubExpr()->getType()); } + bool hasStoredFPFeatures() const { return CastExprBits.HasFPFeatures; } + + /// Get FPOptionsOverride from trailing storage. + FPOptionsOverride getStoredFPFeatures() const { + assert(hasStoredFPFeatures()); + return *getTrailingFPFeatures(); + } + + // Get the FP features status of this operation. Only meaningful for + // operations on floating point types. + FPOptions getFPFeaturesInEffect(const LangOptions &LO) const { + if (hasStoredFPFeatures()) + return getStoredFPFeatures().applyOverrides(LO); + return FPOptions::defaultWithoutTrailingStorage(LO); + } + + FPOptionsOverride getFPFeatures() const { + if (hasStoredFPFeatures()) + return getStoredFPFeatures(); + return FPOptionsOverride(); + } + static const FieldDecl *getTargetFieldForToUnionCast(QualType unionType, QualType opType); static const FieldDecl *getTargetFieldForToUnionCast(const RecordDecl *RD, @@ -3543,21 +3577,35 @@ class CastExpr : public Expr { /// @endcode class ImplicitCastExpr final : public CastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { ImplicitCastExpr(QualType ty, CastKind kind, Expr *op, - unsigned BasePathLength, ExprValueKind VK) - : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength) { } + unsigned BasePathLength, FPOptionsOverride FPO, + ExprValueKind VK) + : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength, + FPO.requiresTrailingStorage()) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } /// Construct an empty implicit cast. - explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize) - : CastExpr(ImplicitCastExprClass, Shell, PathSize) { } + explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : CastExpr(ImplicitCastExprClass, Shell, PathSize, HasFPFeatures) {} + + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: enum OnStack_t { OnStack }; ImplicitCastExpr(OnStack_t _, QualType ty, CastKind kind, Expr *op, - ExprValueKind VK) - : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0) { + ExprValueKind VK, FPOptionsOverride FPO) + : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0, + FPO.requiresTrailingStorage()) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; } bool isPartOfExplicitCast() const { return CastExprBits.PartOfExplicitCast; } @@ -3568,10 +3616,10 @@ class ImplicitCastExpr final static ImplicitCastExpr *Create(const ASTContext &Context, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, - ExprValueKind Cat); + ExprValueKind Cat, FPOptionsOverride FPO); static ImplicitCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + unsigned PathSize, bool HasFPFeatures); SourceLocation getBeginLoc() const LLVM_READONLY { return getSubExpr()->getBeginLoc(); @@ -3612,12 +3660,14 @@ class ExplicitCastExpr : public CastExpr { protected: ExplicitCastExpr(StmtClass SC, QualType exprTy, ExprValueKind VK, CastKind kind, Expr *op, unsigned PathSize, - TypeSourceInfo *writtenTy) - : CastExpr(SC, exprTy, VK, kind, op, PathSize), TInfo(writtenTy) {} + bool HasFPFeatures, TypeSourceInfo *writtenTy) + : CastExpr(SC, exprTy, VK, kind, op, PathSize, HasFPFeatures), + TInfo(writtenTy) {} /// Construct an empty explicit cast. - ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) - : CastExpr(SC, Shell, PathSize) { } + ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : CastExpr(SC, Shell, PathSize, HasFPFeatures) {} public: /// getTypeInfoAsWritten - Returns the type source info for the type @@ -3640,29 +3690,38 @@ class ExplicitCastExpr : public CastExpr { /// (Type)expr. For example: @c (int)f. class CStyleCastExpr final : public ExplicitCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { SourceLocation LPLoc; // the location of the left paren SourceLocation RPLoc; // the location of the right paren CStyleCastExpr(QualType exprTy, ExprValueKind vk, CastKind kind, Expr *op, - unsigned PathSize, TypeSourceInfo *writtenTy, - SourceLocation l, SourceLocation r) - : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, - writtenTy), LPLoc(l), RPLoc(r) {} + unsigned PathSize, FPOptionsOverride FPO, + TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation r) + : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, + FPO.requiresTrailingStorage(), writtenTy), + LPLoc(l), RPLoc(r) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } /// Construct an empty C-style explicit cast. - explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize) - : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize) { } + explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize, HasFPFeatures) {} + + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: - static CStyleCastExpr *Create(const ASTContext &Context, QualType T, - ExprValueKind VK, CastKind K, - Expr *Op, const CXXCastPath *BasePath, - TypeSourceInfo *WrittenTy, SourceLocation L, - SourceLocation R); + static CStyleCastExpr * + Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, + Expr *Op, const CXXCastPath *BasePath, FPOptionsOverride FPO, + TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R); static CStyleCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + unsigned PathSize, bool HasFPFeatures); SourceLocation getLParenLoc() const { return LPLoc; } void setLParenLoc(SourceLocation L) { LPLoc = L; } diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 6b4b57eca9bea..0ba5e417fd58e 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -374,16 +374,17 @@ class CXXNamedCastExpr : public ExplicitCastExpr { protected: friend class ASTStmtReader; - CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK, - CastKind kind, Expr *op, unsigned PathSize, + CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK, CastKind kind, + Expr *op, unsigned PathSize, bool HasFPFeatures, TypeSourceInfo *writtenTy, SourceLocation l, - SourceLocation RParenLoc, - SourceRange AngleBrackets) - : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, writtenTy), Loc(l), - RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {} + SourceLocation RParenLoc, SourceRange AngleBrackets) + : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, HasFPFeatures, + writtenTy), + Loc(l), RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {} - explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) - : ExplicitCastExpr(SC, Shell, PathSize) {} + explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : ExplicitCastExpr(SC, Shell, PathSize, HasFPFeatures) {} public: const char *getCastName() const; @@ -419,29 +420,39 @@ class CXXNamedCastExpr : public ExplicitCastExpr { /// \c static_cast(1.0). class CXXStaticCastExpr final : public CXXNamedCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { CXXStaticCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy, - SourceLocation l, SourceLocation RParenLoc, - SourceRange AngleBrackets) + FPOptionsOverride FPO, SourceLocation l, + SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXStaticCastExprClass, ty, vk, kind, op, pathSize, - writtenTy, l, RParenLoc, AngleBrackets) {} + FPO.requiresTrailingStorage(), writtenTy, l, RParenLoc, + AngleBrackets) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } - explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize) - : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize) {} + explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize, + bool HasFPFeatures) + : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize, + HasFPFeatures) {} + + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: friend class CastExpr; friend TrailingObjects; - static CXXStaticCastExpr *Create(const ASTContext &Context, QualType T, - ExprValueKind VK, CastKind K, Expr *Op, - const CXXCastPath *Path, - TypeSourceInfo *Written, SourceLocation L, - SourceLocation RParenLoc, - SourceRange AngleBrackets); + static CXXStaticCastExpr * + Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, + Expr *Op, const CXXCastPath *Path, TypeSourceInfo *Written, + FPOptionsOverride FPO, SourceLocation L, SourceLocation RParenLoc, + SourceRange AngleBrackets); static CXXStaticCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + unsigned PathSize, bool hasFPFeatures); static bool classof(const Stmt *T) { return T->getStmtClass() == CXXStaticCastExprClass; @@ -456,15 +467,17 @@ class CXXStaticCastExpr final class CXXDynamicCastExpr final : public CXXNamedCastExpr, private llvm::TrailingObjects { - CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind, - Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy, + CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind, Expr *op, + unsigned pathSize, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXDynamicCastExprClass, ty, VK, kind, op, pathSize, - writtenTy, l, RParenLoc, AngleBrackets) {} + /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, + AngleBrackets) {} explicit CXXDynamicCastExpr(EmptyShell Empty, unsigned pathSize) - : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize) {} + : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -499,16 +512,17 @@ class CXXReinterpretCastExpr final : public CXXNamedCastExpr, private llvm::TrailingObjects { - CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind, - Expr *op, unsigned pathSize, - TypeSourceInfo *writtenTy, SourceLocation l, - SourceLocation RParenLoc, + CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op, + unsigned pathSize, TypeSourceInfo *writtenTy, + SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXReinterpretCastExprClass, ty, vk, kind, op, - pathSize, writtenTy, l, RParenLoc, AngleBrackets) {} + pathSize, /*HasFPFeatures*/ false, writtenTy, l, + RParenLoc, AngleBrackets) {} CXXReinterpretCastExpr(EmptyShell Empty, unsigned pathSize) - : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize) {} + : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -541,11 +555,13 @@ class CXXConstCastExpr final CXXConstCastExpr(QualType ty, ExprValueKind VK, Expr *op, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) - : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op, - 0, writtenTy, l, RParenLoc, AngleBrackets) {} + : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op, 0, + /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, + AngleBrackets) {} explicit CXXConstCastExpr(EmptyShell Empty) - : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0) {} + : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -578,10 +594,12 @@ class CXXAddrspaceCastExpr final TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXAddrspaceCastExprClass, ty, VK, Kind, op, 0, - writtenTy, l, RParenLoc, AngleBrackets) {} + /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, + AngleBrackets) {} explicit CXXAddrspaceCastExpr(EmptyShell Empty) - : CXXNamedCastExpr(CXXAddrspaceCastExprClass, Empty, 0) {} + : CXXNamedCastExpr(CXXAddrspaceCastExprClass, Empty, 0, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -1693,34 +1711,43 @@ class CXXInheritedCtorInitExpr : public Expr { /// \endcode class CXXFunctionalCastExpr final : public ExplicitCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { SourceLocation LParenLoc; SourceLocation RParenLoc; CXXFunctionalCastExpr(QualType ty, ExprValueKind VK, - TypeSourceInfo *writtenTy, - CastKind kind, Expr *castExpr, unsigned pathSize, - SourceLocation lParenLoc, SourceLocation rParenLoc) - : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind, - castExpr, pathSize, writtenTy), - LParenLoc(lParenLoc), RParenLoc(rParenLoc) {} + TypeSourceInfo *writtenTy, CastKind kind, + Expr *castExpr, unsigned pathSize, + FPOptionsOverride FPO, SourceLocation lParenLoc, + SourceLocation rParenLoc) + : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind, castExpr, + pathSize, FPO.requiresTrailingStorage(), writtenTy), + LParenLoc(lParenLoc), RParenLoc(rParenLoc) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } + + explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize, + HasFPFeatures) {} - explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize) - : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize) {} + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: friend class CastExpr; friend TrailingObjects; - static CXXFunctionalCastExpr *Create(const ASTContext &Context, QualType T, - ExprValueKind VK, - TypeSourceInfo *Written, - CastKind Kind, Expr *Op, - const CXXCastPath *Path, - SourceLocation LPLoc, - SourceLocation RPLoc); - static CXXFunctionalCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + static CXXFunctionalCastExpr * + Create(const ASTContext &Context, QualType T, ExprValueKind VK, + TypeSourceInfo *Written, CastKind Kind, Expr *Op, + const CXXCastPath *Path, FPOptionsOverride FPO, SourceLocation LPLoc, + SourceLocation RPLoc); + static CXXFunctionalCastExpr * + CreateEmpty(const ASTContext &Context, unsigned PathSize, bool HasFPFeatures); SourceLocation getLParenLoc() const { return LParenLoc; } void setLParenLoc(SourceLocation L) { LParenLoc = L; } @@ -4828,11 +4855,11 @@ class BuiltinBitCastExpr final BuiltinBitCastExpr(QualType T, ExprValueKind VK, CastKind CK, Expr *SrcExpr, TypeSourceInfo *DstType, SourceLocation KWLoc, SourceLocation RParenLoc) - : ExplicitCastExpr(BuiltinBitCastExprClass, T, VK, CK, SrcExpr, 0, + : ExplicitCastExpr(BuiltinBitCastExprClass, T, VK, CK, SrcExpr, 0, false, DstType), KWLoc(KWLoc), RParenLoc(RParenLoc) {} BuiltinBitCastExpr(EmptyShell Empty) - : ExplicitCastExpr(BuiltinBitCastExprClass, Empty, 0) {} + : ExplicitCastExpr(BuiltinBitCastExprClass, Empty, 0, false) {} SourceLocation getBeginLoc() const LLVM_READONLY { return KWLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h index 4b39d9ab96a6a..17eec51726978 100644 --- a/clang/include/clang/AST/ExprObjC.h +++ b/clang/include/clang/AST/ExprObjC.h @@ -1639,12 +1639,12 @@ class ObjCBridgedCastExpr final CastKind CK, SourceLocation BridgeKeywordLoc, TypeSourceInfo *TSInfo, Expr *Operand) : ExplicitCastExpr(ObjCBridgedCastExprClass, TSInfo->getType(), VK_RValue, - CK, Operand, 0, TSInfo), + CK, Operand, 0, false, TSInfo), LParenLoc(LParenLoc), BridgeKeywordLoc(BridgeKeywordLoc), Kind(Kind) {} /// Construct an empty Objective-C bridged cast. explicit ObjCBridgedCastExpr(EmptyShell Shell) - : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0) {} + : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0, false) {} SourceLocation getLParenLoc() const { return LParenLoc; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 1e04e64727a08..4a6e8182e5a06 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -521,6 +521,9 @@ class alignas(void *) Stmt { unsigned Kind : 6; unsigned PartOfExplicitCast : 1; // Only set for ImplicitCastExpr. + /// True if the call expression has some floating-point features. + unsigned HasFPFeatures : 1; + /// The number of CXXBaseSpecifiers in the cast. 14 bits would be enough /// here. ([implimits] Direct and indirect base classes [16384]). unsigned BasePathSize; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index f68a5dbfc2a0d..15ca348f47667 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -270,6 +270,7 @@ class TextNodeDumper void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *Node); void VisitCXXThisExpr(const CXXThisExpr *Node); void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *Node); + void VisitCXXStaticCastExpr(const CXXStaticCastExpr *Node); void VisitCXXUnresolvedConstructExpr(const CXXUnresolvedConstructExpr *Node); void VisitCXXConstructExpr(const CXXConstructExpr *Node); void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Node); diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 2c8bb55cb5d93..3614496ded967 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -497,6 +497,8 @@ class FPOptionsOverride { FPOptionsOverride() {} FPOptionsOverride(const LangOptions &LO) : Options(LO), OverrideMask(OverrideMaskBits) {} + FPOptionsOverride(FPOptions FPO) + : Options(FPO), OverrideMask(OverrideMaskBits) {} bool requiresTrailingStorage() const { return OverrideMask != 0; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 7334d5b659e20..dd3c8518c2a3e 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6930,7 +6930,7 @@ ExpectedStmt ASTNodeImporter::VisitImplicitCastExpr(ImplicitCastExpr *E) { return ImplicitCastExpr::Create( Importer.getToContext(), *ToTypeOrErr, E->getCastKind(), *ToSubExprOrErr, - &(*ToBasePathOrErr), E->getValueKind()); + &(*ToBasePathOrErr), E->getValueKind(), E->getFPFeatures()); } ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { @@ -6957,8 +6957,8 @@ ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { return ToRParenLocOrErr.takeError(); return CStyleCastExpr::Create( Importer.getToContext(), ToType, E->getValueKind(), E->getCastKind(), - ToSubExpr, ToBasePath, ToTypeInfoAsWritten, *ToLParenLocOrErr, - *ToRParenLocOrErr); + ToSubExpr, ToBasePath, CCE->getFPFeatures(), ToTypeInfoAsWritten, + *ToLParenLocOrErr, *ToRParenLocOrErr); } case Stmt::CXXFunctionalCastExprClass: { @@ -6971,8 +6971,8 @@ ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { return ToRParenLocOrErr.takeError(); return CXXFunctionalCastExpr::Create( Importer.getToContext(), ToType, E->getValueKind(), ToTypeInfoAsWritten, - E->getCastKind(), ToSubExpr, ToBasePath, *ToLParenLocOrErr, - *ToRParenLocOrErr); + E->getCastKind(), ToSubExpr, ToBasePath, FCE->getFPFeatures(), + *ToLParenLocOrErr, *ToRParenLocOrErr); } case Stmt::ObjCBridgedCastExprClass: { @@ -7815,10 +7815,11 @@ ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { if (!ToBasePathOrErr) return ToBasePathOrErr.takeError(); - if (isa(E)) { + if (auto CCE = dyn_cast(E)) { return CXXStaticCastExpr::Create( Importer.getToContext(), ToType, VK, CK, ToSubExpr, &(*ToBasePathOrErr), - ToTypeInfoAsWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets); + ToTypeInfoAsWritten, CCE->getFPFeatures(), ToOperatorLoc, ToRParenLoc, + ToAngleBrackets); } else if (isa(E)) { return CXXDynamicCastExpr::Create( Importer.getToContext(), ToType, VK, CK, ToSubExpr, &(*ToBasePathOrErr), diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 15f3df0fd2168..b664224aa7323 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1892,19 +1892,42 @@ const FieldDecl *CastExpr::getTargetFieldForToUnionCast(const RecordDecl *RD, return nullptr; } +FPOptionsOverride *CastExpr::getTrailingFPFeatures() { + assert(hasStoredFPFeatures()); + switch (getStmtClass()) { + case ImplicitCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + case CStyleCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + case CXXFunctionalCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + case CXXStaticCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + default: + llvm_unreachable("Cast does not have FPFeatures"); + } +} + ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, - ExprValueKind VK) { + ExprValueKind VK, + FPOptionsOverride FPO) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); // Per C++ [conv.lval]p3, lvalue-to-rvalue conversions on class and // std::nullptr_t have special semantics not captured by CK_LValueToRValue. assert((Kind != CK_LValueToRValue || !(T->isNullPtrType() || T->getAsCXXRecordDecl())) && "invalid type for lvalue-to-rvalue conversion"); ImplicitCastExpr *E = - new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, VK); + new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, FPO, VK); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -1912,21 +1935,26 @@ ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T, } ImplicitCastExpr *ImplicitCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize); + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize, HasFPFeatures); } - CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, CastKind K, Expr *Op, const CXXCastPath *BasePath, + FPOptionsOverride FPO, TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); CStyleCastExpr *E = - new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, R); + new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, FPO, WrittenTy, L, R); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -1934,9 +1962,12 @@ CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T, } CStyleCastExpr *CStyleCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize); + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize, HasFPFeatures); } /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 3d61496f30e2a..3f3f2303587dd 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -690,19 +690,18 @@ const char *CXXNamedCastExpr::getCastName() const { } } -CXXStaticCastExpr *CXXStaticCastExpr::Create(const ASTContext &C, QualType T, - ExprValueKind VK, - CastKind K, Expr *Op, - const CXXCastPath *BasePath, - TypeSourceInfo *WrittenTy, - SourceLocation L, - SourceLocation RParenLoc, - SourceRange AngleBrackets) { +CXXStaticCastExpr * +CXXStaticCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, + CastKind K, Expr *Op, const CXXCastPath *BasePath, + TypeSourceInfo *WrittenTy, FPOptionsOverride FPO, + SourceLocation L, SourceLocation RParenLoc, + SourceRange AngleBrackets) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - auto *E = - new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, - RParenLoc, AngleBrackets); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); + auto *E = new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, + FPO, L, RParenLoc, AngleBrackets); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -710,9 +709,12 @@ CXXStaticCastExpr *CXXStaticCastExpr::Create(const ASTContext &C, QualType T, } CXXStaticCastExpr *CXXStaticCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize); + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize, HasFPFeatures); } CXXDynamicCastExpr *CXXDynamicCastExpr::Create(const ASTContext &C, QualType T, @@ -823,25 +825,30 @@ CXXAddrspaceCastExpr *CXXAddrspaceCastExpr::CreateEmpty(const ASTContext &C) { return new (C) CXXAddrspaceCastExpr(EmptyShell()); } -CXXFunctionalCastExpr * -CXXFunctionalCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, - TypeSourceInfo *Written, CastKind K, Expr *Op, - const CXXCastPath *BasePath, - SourceLocation L, SourceLocation R) { +CXXFunctionalCastExpr *CXXFunctionalCastExpr::Create( + const ASTContext &C, QualType T, ExprValueKind VK, TypeSourceInfo *Written, + CastKind K, Expr *Op, const CXXCastPath *BasePath, FPOptionsOverride FPO, + SourceLocation L, SourceLocation R) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - auto *E = - new (Buffer) CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, L, R); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); + auto *E = new (Buffer) + CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, FPO, L, R); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); return E; } -CXXFunctionalCastExpr * -CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) CXXFunctionalCastExpr(EmptyShell(), PathSize); +CXXFunctionalCastExpr *CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) + CXXFunctionalCastExpr(EmptyShell(), PathSize, HasFPFeatures); } SourceLocation CXXFunctionalCastExpr::getBeginLoc() const { diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 16c4c3736a4a3..acbc0434931dc 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -964,6 +964,8 @@ void TextNodeDumper::VisitCastExpr(const CastExpr *Node) { } dumpBasePath(OS, Node); OS << ">"; + if (Node->hasStoredFPFeatures()) + printFPOptions(Node->getFPFeatures()); } void TextNodeDumper::VisitImplicitCastExpr(const ImplicitCastExpr *Node) { @@ -1132,6 +1134,14 @@ void TextNodeDumper::VisitCXXFunctionalCastExpr( const CXXFunctionalCastExpr *Node) { OS << " functional cast to " << Node->getTypeAsWritten().getAsString() << " <" << Node->getCastKindName() << ">"; + if (Node->hasStoredFPFeatures()) + printFPOptions(Node->getFPFeatures()); +} + +void TextNodeDumper::VisitCXXStaticCastExpr(const CXXStaticCastExpr *Node) { + VisitCXXNamedCastExpr(Node); + if (Node->hasStoredFPFeatures()) + printFPOptions(Node->getFPFeatures()); } void TextNodeDumper::VisitCXXUnresolvedConstructExpr( diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index f68b06487f98e..603da67156254 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -166,23 +166,21 @@ ASTMaker::makeLvalueToRvalue(const VarDecl *Arg, ImplicitCastExpr *ASTMaker::makeImplicitCast(const Expr *Arg, QualType Ty, CastKind CK) { return ImplicitCastExpr::Create(C, Ty, - /* CastKind=*/ CK, - /* Expr=*/ const_cast(Arg), - /* CXXCastPath=*/ nullptr, - /* ExprValueKind=*/ VK_RValue); + /* CastKind=*/CK, + /* Expr=*/const_cast(Arg), + /* CXXCastPath=*/nullptr, + /* ExprValueKind=*/VK_RValue, + /* FPFeatures */ FPOptionsOverride()); } Expr *ASTMaker::makeIntegralCast(const Expr *Arg, QualType Ty) { if (Arg->getType() == Ty) return const_cast(Arg); - - return ImplicitCastExpr::Create(C, Ty, CK_IntegralCast, - const_cast(Arg), nullptr, VK_RValue); + return makeImplicitCast(Arg, Ty, CK_IntegralCast); } ImplicitCastExpr *ASTMaker::makeIntegralCastToBoolean(const Expr *Arg) { - return ImplicitCastExpr::Create(C, C.BoolTy, CK_IntegralToBoolean, - const_cast(Arg), nullptr, VK_RValue); + return makeImplicitCast(Arg, C.BoolTy, CK_IntegralToBoolean); } ObjCBoolLiteralExpr *ASTMaker::makeObjCBool(bool Val) { diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 615b782350414..74de3df9d9005 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1024,7 +1024,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { type, VK_LValue, SourceLocation()); ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue, - &declRef, VK_RValue); + &declRef, VK_RValue, CurFPFeatures); // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 26dfb6259a290..f2807eefd7f34 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -1449,9 +1449,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, ValueDecl *selfDecl = setterMethod->getSelfDecl(); DeclRefExpr self(getContext(), selfDecl, false, selfDecl->getType(), VK_LValue, SourceLocation()); - ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, - selfDecl->getType(), CK_LValueToRValue, &self, - VK_RValue); + ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, selfDecl->getType(), + CK_LValueToRValue, &self, VK_RValue, + FPOptionsOverride(CurFPFeatures)); ObjCIvarRefExpr ivarRef(ivar, ivar->getType().getNonReferenceType(), SourceLocation(), SourceLocation(), &selfLoad, true, true); @@ -1462,7 +1462,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, SourceLocation()); ImplicitCastExpr argLoad(ImplicitCastExpr::OnStack, argType.getUnqualifiedType(), CK_LValueToRValue, - &arg, VK_RValue); + &arg, VK_RValue, CurFPFeatures); // The property type can differ from the ivar type in some situations with // Objective-C pointer types, we can always bit cast the RHS in these cases. @@ -1483,9 +1483,8 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, } else if (ivarRef.getType()->isPointerType()) { argCK = CK_BitCast; } - ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, - ivarRef.getType(), argCK, &argLoad, - VK_RValue); + ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, ivarRef.getType(), argCK, + &argLoad, VK_RValue, CurFPFeatures); Expr *finalArg = &argLoad; if (!getContext().hasSameUnqualifiedType(ivarRef.getType(), argLoad.getType())) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index b9260892bd215..19dc9a87f239c 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4137,7 +4137,7 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, PrivateVD->setInitStyle(VarDecl::CInit); PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, InitRef, /*BasePath=*/nullptr, - VK_RValue)); + VK_RValue, FPOptionsOverride())); Data.FirstprivateVars.emplace_back(OrigRef); Data.FirstprivateCopies.emplace_back(PrivateRef); Data.FirstprivateInits.emplace_back(InitRef); diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 8c41e71ef0187..c0c81221b2344 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -586,7 +586,8 @@ namespace { CastKind Kind, Expr *E) { TypeSourceInfo *TInfo = Ctx->getTrivialTypeSourceInfo(Ty, SourceLocation()); return CStyleCastExpr::Create(*Ctx, Ty, VK_RValue, Kind, E, nullptr, - TInfo, SourceLocation(), SourceLocation()); + FPOptionsOverride(), TInfo, + SourceLocation(), SourceLocation()); } bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const { @@ -2105,8 +2106,8 @@ RewriteModernObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, // Now, we cast the reference to a pointer to the objc_msgSend type. QualType pToFunc = Context->getPointerType(msgSendType); ImplicitCastExpr *ICE = - ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, - DRE, nullptr, VK_RValue); + ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, + DRE, nullptr, VK_RValue, FPOptionsOverride()); const auto *FT = msgSendType->castAs(); CallExpr *Exp = diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index 4ecd6e95de10e..990509a84b06c 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -492,7 +492,8 @@ namespace { CastKind Kind, Expr *E) { TypeSourceInfo *TInfo = Ctx->getTrivialTypeSourceInfo(Ty, SourceLocation()); return CStyleCastExpr::Create(*Ctx, Ty, VK_RValue, Kind, E, nullptr, - TInfo, SourceLocation(), SourceLocation()); + FPOptionsOverride(), TInfo, + SourceLocation(), SourceLocation()); } StringLiteral *getStringLiteral(StringRef Str) { @@ -2022,8 +2023,8 @@ RewriteObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, // Now, we cast the reference to a pointer to the objc_msgSend type. QualType pToFunc = Context->getPointerType(msgSendType); ImplicitCastExpr *ICE = - ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, - DRE, nullptr, VK_RValue); + ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, + DRE, nullptr, VK_RValue, FPOptionsOverride()); const auto *FT = msgSendType->castAs(); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 47484c5be9c9b..375fe3b28dec3 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -586,7 +586,8 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, } } - return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK); + return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK, + CurFPFeatureOverrides()); } /// ScalarTypeToBooleanCastKind - Returns the cast kind corresponding diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 726900c59f20e..5222722e71810 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -105,10 +105,9 @@ namespace { // If this is an unbridged cast, wrap the result in an implicit // cast that yields the unbridged-cast placeholder type. if (IsARCUnbridgedCast) { - castExpr = ImplicitCastExpr::Create(Self.Context, - Self.Context.ARCUnbridgedCastTy, - CK_Dependent, castExpr, nullptr, - castExpr->getValueKind()); + castExpr = ImplicitCastExpr::Create( + Self.Context, Self.Context.ARCUnbridgedCastTy, CK_Dependent, + castExpr, nullptr, castExpr->getValueKind(), FPOptionsOverride()); } updatePartOfExplicitCastFlags(castExpr); return castExpr; @@ -361,11 +360,10 @@ Sema::BuildCXXNamedCast(SourceLocation OpLoc, tok::TokenKind Kind, DiscardMisalignedMemberAddress(DestType.getTypePtr(), E); } - return Op.complete(CXXStaticCastExpr::Create(Context, Op.ResultType, - Op.ValueKind, Op.Kind, Op.SrcExpr.get(), - &Op.BasePath, DestTInfo, - OpLoc, Parens.getEnd(), - AngleBrackets)); + return Op.complete(CXXStaticCastExpr::Create( + Context, Op.ResultType, Op.ValueKind, Op.Kind, Op.SrcExpr.get(), + &Op.BasePath, DestTInfo, CurFPFeatureOverrides(), OpLoc, + Parens.getEnd(), AngleBrackets)); } } } @@ -3033,9 +3031,9 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc, // -Wcast-qual DiagnoseCastQual(Op.Self, Op.SrcExpr, Op.DestType); - return Op.complete(CStyleCastExpr::Create(Context, Op.ResultType, - Op.ValueKind, Op.Kind, Op.SrcExpr.get(), - &Op.BasePath, CastTypeInfo, LPLoc, RPLoc)); + return Op.complete(CStyleCastExpr::Create( + Context, Op.ResultType, Op.ValueKind, Op.Kind, Op.SrcExpr.get(), + &Op.BasePath, CurFPFeatureOverrides(), CastTypeInfo, LPLoc, RPLoc)); } ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, @@ -3058,7 +3056,7 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, if (auto *ConstructExpr = dyn_cast(SubExpr)) ConstructExpr->setParenOrBraceRange(SourceRange(LPLoc, RPLoc)); - return Op.complete(CXXFunctionalCastExpr::Create(Context, Op.ResultType, - Op.ValueKind, CastTypeInfo, Op.Kind, - Op.SrcExpr.get(), &Op.BasePath, LPLoc, RPLoc)); + return Op.complete(CXXFunctionalCastExpr::Create( + Context, Op.ResultType, Op.ValueKind, CastTypeInfo, Op.Kind, + Op.SrcExpr.get(), &Op.BasePath, CurFPFeatureOverrides(), LPLoc, RPLoc)); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a9e6113dc7bb5..99e6678be51c9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18172,11 +18172,9 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, // Adjust the Expr initializer and type. if (ECD->getInitExpr() && !Context.hasSameType(NewTy, ECD->getInitExpr()->getType())) - ECD->setInitExpr(ImplicitCastExpr::Create(Context, NewTy, - CK_IntegralCast, - ECD->getInitExpr(), - /*base paths*/ nullptr, - VK_RValue)); + ECD->setInitExpr(ImplicitCastExpr::Create( + Context, NewTy, CK_IntegralCast, ECD->getInitExpr(), + /*base paths*/ nullptr, VK_RValue, CurFPFeatureOverrides())); if (getLangOpts().CPlusPlus) // C++ [dcl.enum]p4: Following the closing brace of an // enum-specifier, each enumerator has the type of its diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 0a4f75ad341b1..3a8a7708949e1 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1185,7 +1185,8 @@ static bool checkTupleLikeDecomposition(Sema &S, // an xvalue otherwise if (!Src->getType()->isLValueReferenceType()) E = ImplicitCastExpr::Create(S.Context, E.get()->getType(), CK_NoOp, - E.get(), nullptr, VK_XValue); + E.get(), nullptr, VK_XValue, + S.CurFPFeatureOverrides()); TemplateArgumentListInfo Args(Loc, Loc); Args.addArgument( @@ -14869,9 +14870,9 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( // (since it's unusable otherwise); in the case where we inline the // block literal, it has block literal lifetime semantics. if (!BuildBlock.isInvalid() && !getLangOpts().ObjCAutoRefCount) - BuildBlock = ImplicitCastExpr::Create(Context, BuildBlock.get()->getType(), - CK_CopyAndAutoreleaseBlockObject, - BuildBlock.get(), nullptr, VK_RValue); + BuildBlock = ImplicitCastExpr::Create( + Context, BuildBlock.get()->getType(), CK_CopyAndAutoreleaseBlockObject, + BuildBlock.get(), nullptr, VK_RValue, CurFPFeatureOverrides()); if (BuildBlock.isInvalid()) { Diag(CurrentLocation, diag::note_lambda_to_block_conv); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index d6f0a12106fe0..a33d6e2a83a16 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -695,7 +695,8 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // C++ [conv.lval]p3: // If T is cv std::nullptr_t, the result is a null pointer constant. CastKind CK = T->isNullPtrType() ? CK_NullToPointer : CK_LValueToRValue; - Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_RValue); + Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_RValue, + CurFPFeatureOverrides()); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version @@ -703,7 +704,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), - nullptr, VK_RValue); + nullptr, VK_RValue, CurFPFeatureOverrides()); } return Res; @@ -6960,9 +6961,9 @@ void Sema::maybeExtendBlockObject(ExprResult &E) { // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; - E = ImplicitCastExpr::Create(Context, E.get()->getType(), - CK_ARCExtendBlockObject, E.get(), - /*base path*/ nullptr, VK_RValue); + E = ImplicitCastExpr::Create( + Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), + /*base path*/ nullptr, VK_RValue, CurFPFeatureOverrides()); Cleanup.setExprNeedsCleanups(true); } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d1fcdf3545278..09976197194ab 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1503,7 +1503,8 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc); Result = CXXFunctionalCastExpr::Create( Context, ResultType, Expr::getValueKindForType(Ty), TInfo, CK_NoOp, - Result.get(), /*Path=*/nullptr, Locs.getBegin(), Locs.getEnd()); + Result.get(), /*Path=*/nullptr, CurFPFeatureOverrides(), + Locs.getBegin(), Locs.getEnd()); } return Result; @@ -2204,7 +2205,7 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal, SizeTy, SourceLocation()); ImplicitCastExpr DesiredAlignment(ImplicitCastExpr::OnStack, AlignValT, CK_IntegralCast, &AlignmentLiteral, - VK_RValue); + VK_RValue, CurFPFeatureOverrides()); // Adjust placement args by prepending conjured size and alignment exprs. llvm::SmallVector CallArgs; @@ -3915,7 +3916,8 @@ static ExprResult BuildCXXCastArgument(Sema &S, // Record usage of conversion in an implicit cast. Result = ImplicitCastExpr::Create(S.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind()); + nullptr, Result.get()->getValueKind(), + S.CurFPFeatureOverrides()); return S.MaybeBindToTemporary(Result.get()); } @@ -4096,7 +4098,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, if (const AtomicType *FromAtomic = FromType->getAs()) { FromType = FromAtomic->getValueType().getUnqualifiedType(); From = ImplicitCastExpr::Create(Context, FromType, CK_AtomicToNonAtomic, - From, /*BasePath=*/nullptr, VK_RValue); + From, /*BasePath=*/nullptr, VK_RValue, + CurFPFeatureOverrides()); } break; @@ -6840,7 +6843,7 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) { CastKind ck = (ReturnsRetained ? CK_ARCConsumeObject : CK_ARCReclaimReturnedObject); return ImplicitCastExpr::Create(Context, E->getType(), ck, E, nullptr, - VK_RValue); + VK_RValue, CurFPFeatureOverrides()); } if (E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 228a1ec3ba1f9..9a0c4e2d4320d 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -4462,8 +4462,8 @@ Sema::CheckObjCConversion(SourceRange castRange, QualType castType, // If the result is +1, consume it here. case ACC_plusOne: castExpr = ImplicitCastExpr::Create(Context, castExpr->getType(), - CK_ARCConsumeObject, castExpr, - nullptr, VK_RValue); + CK_ARCConsumeObject, castExpr, nullptr, + VK_RValue, CurFPFeatureOverrides()); Cleanup.setExprNeedsCleanups(true); return ACR_okay; } @@ -4689,9 +4689,9 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, case OBC_BridgeRetained: // Produce the object before casting it. - SubExpr = ImplicitCastExpr::Create(Context, FromType, - CK_ARCProduceObject, - SubExpr, nullptr, VK_RValue); + SubExpr = ImplicitCastExpr::Create(Context, FromType, CK_ARCProduceObject, + SubExpr, nullptr, VK_RValue, + CurFPFeatureOverrides()); break; case OBC_BridgeTransfer: { @@ -4729,8 +4729,9 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, if (MustConsume) { Cleanup.setExprNeedsCleanups(true); - Result = ImplicitCastExpr::Create(Context, T, CK_ARCConsumeObject, Result, - nullptr, VK_RValue); + Result = + ImplicitCastExpr::Create(Context, T, CK_ARCConsumeObject, Result, + nullptr, VK_RValue, CurFPFeatureOverrides()); } return Result; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index f63d600032ce4..b6bd6cff4d77d 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -2891,7 +2891,8 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc()); if (CharTy != PromotedCharTy) Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, - Init, nullptr, VK_RValue); + Init, nullptr, VK_RValue, + SemaRef.CurFPFeatureOverrides()); StructuredList->updateInit(Context, i, Init); } } else { @@ -2913,7 +2914,8 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc()); if (CharTy != PromotedCharTy) Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, - Init, nullptr, VK_RValue); + Init, nullptr, VK_RValue, + SemaRef.CurFPFeatureOverrides()); StructuredList->updateInit(Context, i, Init); } } @@ -8019,9 +8021,9 @@ ExprResult InitializationSequence::Perform(Sema &S, (Step->Kind == SK_CastDerivedToBaseXValue ? VK_XValue : VK_RValue); - CurInit = - ImplicitCastExpr::Create(S.Context, Step->Type, CK_DerivedToBase, - CurInit.get(), &BasePath, VK); + CurInit = ImplicitCastExpr::Create( + S.Context, Step->Type, CK_DerivedToBase, CurInit.get(), &BasePath, VK, + S.CurFPFeatureOverrides()); break; } @@ -8150,9 +8152,9 @@ ExprResult InitializationSequence::Perform(Sema &S, if (CreatedObject && checkAbstractType(CurInit.get()->getType())) return ExprError(); - CurInit = ImplicitCastExpr::Create(S.Context, CurInit.get()->getType(), - CastKind, CurInit.get(), nullptr, - CurInit.get()->getValueKind()); + CurInit = ImplicitCastExpr::Create( + S.Context, CurInit.get()->getType(), CastKind, CurInit.get(), nullptr, + CurInit.get()->getValueKind(), S.CurFPFeatureOverrides()); if (shouldBindAsTemporary(Entity)) // The overall entity is temporary, so this expression should be @@ -8493,9 +8495,9 @@ ExprResult InitializationSequence::Perform(Sema &S, break; case SK_ProduceObjCObject: - CurInit = - ImplicitCastExpr::Create(S.Context, Step->Type, CK_ARCProduceObject, - CurInit.get(), nullptr, VK_RValue); + CurInit = ImplicitCastExpr::Create( + S.Context, Step->Type, CK_ARCProduceObject, CurInit.get(), nullptr, + VK_RValue, S.CurFPFeatureOverrides()); break; case SK_StdInitializerList: { @@ -8549,9 +8551,9 @@ ExprResult InitializationSequence::Perform(Sema &S, // Case 1b and 1c // No cast from integer to sampler is needed. if (!Var->hasGlobalStorage()) { - CurInit = ImplicitCastExpr::Create(S.Context, Step->Type, - CK_LValueToRValue, Init, - /*BasePath=*/nullptr, VK_RValue); + CurInit = ImplicitCastExpr::Create( + S.Context, Step->Type, CK_LValueToRValue, Init, + /*BasePath=*/nullptr, VK_RValue, S.CurFPFeatureOverrides()); break; } // Case 1a diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index c9f2854f7accf..a870d822b42f5 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -680,8 +680,9 @@ static void adjustBlockReturnsToEnum(Sema &S, ArrayRef returns, ExprWithCleanups *cleanups = dyn_cast(retValue); Expr *E = (cleanups ? cleanups->getSubExpr() : retValue); - E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, - E, /*base path*/ nullptr, VK_RValue); + E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, E, + /*base path*/ nullptr, VK_RValue, + S.CurFPFeatureOverrides()); if (cleanups) { cleanups->setSubExpr(E); } else { diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp index e301c62dd2c0b..f6ed3e65f94c1 100644 --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -1464,10 +1464,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, DeclRefExpr(Context, SelfDecl, false, SelfDecl->getType(), VK_LValue, PropertyDiagLoc); MarkDeclRefReferenced(SelfExpr); - Expr *LoadSelfExpr = - ImplicitCastExpr::Create(Context, SelfDecl->getType(), - CK_LValueToRValue, SelfExpr, nullptr, - VK_RValue); + Expr *LoadSelfExpr = ImplicitCastExpr::Create( + Context, SelfDecl->getType(), CK_LValueToRValue, SelfExpr, nullptr, + VK_RValue, CurFPFeatureOverrides()); Expr *IvarRefExpr = new (Context) ObjCIvarRefExpr(Ivar, Ivar->getUsageType(SelfDecl->getType()), @@ -1528,10 +1527,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, DeclRefExpr(Context, SelfDecl, false, SelfDecl->getType(), VK_LValue, PropertyDiagLoc); MarkDeclRefReferenced(SelfExpr); - Expr *LoadSelfExpr = - ImplicitCastExpr::Create(Context, SelfDecl->getType(), - CK_LValueToRValue, SelfExpr, nullptr, - VK_RValue); + Expr *LoadSelfExpr = ImplicitCastExpr::Create( + Context, SelfDecl->getType(), CK_LValueToRValue, SelfExpr, nullptr, + VK_RValue, CurFPFeatureOverrides()); Expr *lhs = new (Context) ObjCIvarRefExpr(Ivar, Ivar->getUsageType(SelfDecl->getType()), diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 352f52d2f6260..4a444b38a0aac 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15388,12 +15388,12 @@ static bool actOnOMPReductionKindClause( if (!BasePath.empty()) { LHS = S.DefaultLvalueConversion(LHS.get()); RHS = S.DefaultLvalueConversion(RHS.get()); - LHS = ImplicitCastExpr::Create(Context, PtrRedTy, - CK_UncheckedDerivedToBase, LHS.get(), - &BasePath, LHS.get()->getValueKind()); - RHS = ImplicitCastExpr::Create(Context, PtrRedTy, - CK_UncheckedDerivedToBase, RHS.get(), - &BasePath, RHS.get()->getValueKind()); + LHS = ImplicitCastExpr::Create( + Context, PtrRedTy, CK_UncheckedDerivedToBase, LHS.get(), &BasePath, + LHS.get()->getValueKind(), S.CurFPFeatureOverrides()); + RHS = ImplicitCastExpr::Create( + Context, PtrRedTy, CK_UncheckedDerivedToBase, RHS.get(), &BasePath, + RHS.get()->getValueKind(), S.CurFPFeatureOverrides()); } FunctionProtoType::ExtProtoInfo EPI; QualType Params[] = {PtrRedTy, PtrRedTy}; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 71341e5688fe0..fa68f3a4deaba 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5862,7 +5862,8 @@ diagnoseNoViableConversion(Sema &SemaRef, SourceLocation Loc, Expr *&From, // Record usage of conversion in an implicit cast. From = ImplicitCastExpr::Create(SemaRef.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind()); + nullptr, Result.get()->getValueKind(), + SemaRef.CurFPFeatureOverrides()); } return false; } @@ -5891,7 +5892,8 @@ static bool recordConversion(Sema &SemaRef, SourceLocation Loc, Expr *&From, // Record usage of conversion in an implicit cast. From = ImplicitCastExpr::Create(SemaRef.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind()); + nullptr, Result.get()->getValueKind(), + SemaRef.CurFPFeatureOverrides()); return false; } @@ -7296,8 +7298,8 @@ void Sema::AddConversionCandidate( VK_LValue, From->getBeginLoc()); ImplicitCastExpr ConversionFn(ImplicitCastExpr::OnStack, Context.getPointerType(Conversion->getType()), - CK_FunctionToPointerDecay, - &ConversionRef, VK_RValue); + CK_FunctionToPointerDecay, &ConversionRef, + VK_RValue, CurFPFeatureOverrides()); QualType ConversionType = Conversion->getConversionType(); if (!isCompleteType(From->getBeginLoc(), ConversionType)) { @@ -14422,9 +14424,9 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, if (Call.isInvalid()) return ExprError(); // Record usage of conversion in an implicit cast. - Call = ImplicitCastExpr::Create(Context, Call.get()->getType(), - CK_UserDefinedConversion, Call.get(), - nullptr, VK_RValue); + Call = ImplicitCastExpr::Create( + Context, Call.get()->getType(), CK_UserDefinedConversion, Call.get(), + nullptr, VK_RValue, CurFPFeatureOverrides()); return BuildCallExpr(S, Call.get(), LParenLoc, Args, RParenLoc); } @@ -14829,10 +14831,9 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found, if (SubExpr == ICE->getSubExpr()) return ICE; - return ImplicitCastExpr::Create(Context, ICE->getType(), - ICE->getCastKind(), - SubExpr, nullptr, - ICE->getValueKind()); + return ImplicitCastExpr::Create(Context, ICE->getType(), ICE->getCastKind(), + SubExpr, nullptr, ICE->getValueKind(), + CurFPFeatureOverrides()); } if (auto *GSE = dyn_cast(E)) { diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index c44636ad1b395..e461ad4484813 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3095,7 +3095,8 @@ static void TryMoveInitialization(Sema& S, bool ConvertingConstructorsOnly, ExprResult &Res) { ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), - CK_NoOp, Value, VK_XValue); + CK_NoOp, Value, VK_XValue, + S.CurFPFeatureOverrides()); Expr *InitExpr = &AsRvalue; @@ -3150,8 +3151,9 @@ static void TryMoveInitialization(Sema& S, // Promote "AsRvalue" to the heap, since we now need this // expression node to persist. - Value = ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, - Value, nullptr, VK_XValue); + Value = + ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, Value, + nullptr, VK_XValue, S.CurFPFeatureOverrides()); // Complete type-checking the initialization of the return type // using the constructor we found. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 6721b07253292..e1a563850970a 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7478,7 +7478,7 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, // FIXME: This is a hack. We need a better way to handle substituted // non-type template parameters. E = CStyleCastExpr::Create(Context, OrigT, VK_RValue, CK_IntegralCast, E, - nullptr, + nullptr, CurFPFeatureOverrides(), Context.getTrivialTypeSourceInfo(OrigT, Loc), Loc, Loc); } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index e261044f7cb14..48897cd2d822b 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1082,6 +1082,8 @@ void ASTStmtReader::VisitCastExpr(CastExpr *E) { VisitExpr(E); unsigned NumBaseSpecs = Record.readInt(); assert(NumBaseSpecs == E->path_size()); + unsigned HasFPFeatures = Record.readInt(); + assert(E->hasStoredFPFeatures() == HasFPFeatures); E->setSubExpr(Record.readSubExpr()); E->setCastKind((CastKind)Record.readInt()); CastExpr::path_iterator BaseI = E->path_begin(); @@ -1090,6 +1092,8 @@ void ASTStmtReader::VisitCastExpr(CastExpr *E) { *BaseSpec = Record.readCXXBaseSpecifier(); *BaseI++ = BaseSpec; } + if (HasFPFeatures) + *E->getTrailingFPFeatures() = FPOptionsOverride::getFromOpaqueInt(Record.readInt()); } void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) { @@ -2893,13 +2897,17 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_IMPLICIT_CAST: - S = ImplicitCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = ImplicitCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_CSTYLE_CAST: - S = CStyleCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = CStyleCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_COMPOUND_LITERAL: @@ -3501,8 +3509,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_STATIC_CAST: - S = CXXStaticCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = CXXStaticCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_CXX_DYNAMIC_CAST: @@ -3524,8 +3534,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_FUNCTIONAL_CAST: - S = CXXFunctionalCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = CXXFunctionalCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_BUILTIN_BIT_CAST: diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 2d250674057c3..911fcb4095474 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2346,6 +2346,7 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); //GetObjectKind // CastExpr Abv->Add(BitCodeAbbrevOp(0)); // PathSize + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 6)); // CastKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // PartOfExplicitCast // ImplicitCastExpr diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 4e3e1fdc346fc..0121f25832073 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -946,12 +946,16 @@ void ASTStmtWriter::VisitObjCBridgedCastExpr(ObjCBridgedCastExpr *E) { void ASTStmtWriter::VisitCastExpr(CastExpr *E) { VisitExpr(E); Record.push_back(E->path_size()); + Record.push_back(E->hasStoredFPFeatures()); Record.AddStmt(E->getSubExpr()); Record.push_back(E->getCastKind()); // FIXME: stable encoding for (CastExpr::path_iterator PI = E->path_begin(), PE = E->path_end(); PI != PE; ++PI) Record.AddCXXBaseSpecifier(**PI); + + if (E->hasStoredFPFeatures()) + Record.push_back(E->getFPFeatures().getAsOpaqueInt()); } void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) { @@ -1003,7 +1007,7 @@ void ASTStmtWriter::VisitImplicitCastExpr(ImplicitCastExpr *E) { VisitCastExpr(E); Record.push_back(E->isPartOfExplicitCast()); - if (E->path_size() == 0) + if (E->path_size() == 0 && !E->hasStoredFPFeatures()) AbbrevToUse = Writer.getExprImplicitCastAbbrev(); Code = serialization::EXPR_IMPLICIT_CAST; diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index f3925aebbe752..830623ff48520 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -36,6 +36,51 @@ float func_03(float x) { // CHECK-NEXT: ReturnStmt // CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 +int func_04(float x) { +#pragma STDC FP_CONTRACT ON + return x; +} + +// CHECK: FunctionDecl {{.*}} func_04 'int (float)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' FPContractMode=1 + +float func_05(double x) { +#pragma STDC FP_CONTRACT ON + return (float)x; +} + +// CHECK: FunctionDecl {{.*}} func_05 'float (double)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} FPContractMode=1 + +float func_06(double x) { +#pragma STDC FP_CONTRACT ON + return float(x); +} + +// CHECK: FunctionDecl {{.*}} func_06 'float (double)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CXXFunctionalCastExpr {{.*}} FPContractMode=1 + +float func_07(double x) { +#pragma STDC FP_CONTRACT ON + return static_cast(x); +} + +// CHECK: FunctionDecl {{.*}} func_07 'float (double)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CXXStaticCastExpr {{.*}} FPContractMode=1 +// CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 + From 0ece51c60c51f0d4c285dbda3b6cff794041bdd7 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Tue, 1 Sep 2020 07:16:07 +0000 Subject: [PATCH 0438/1079] Add raw_fd_stream that supports reading/seeking/writing This is used by https://reviews.llvm.org/D86905 to support bitcode writer's incremental flush. --- llvm/include/llvm/Support/raw_ostream.h | 63 +++++++++++++++-- llvm/lib/Support/raw_ostream.cpp | 36 +++++++++- llvm/unittests/Support/raw_fd_stream_test.cpp | 67 +++++++++++++++++++ 3 files changed, 157 insertions(+), 9 deletions(-) create mode 100644 llvm/unittests/Support/raw_fd_stream_test.cpp diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index cae57430baffb..5e68390bdc8f6 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -47,7 +47,16 @@ class FileLocker; /// buffered disciplines etc. It is a simple buffer that outputs /// a chunk at a time. class raw_ostream { +public: + // Class kinds to support LLVM-style RTTI. + enum class OStreamKind { + OK_OStream, + OK_FDStream, + }; + private: + OStreamKind Kind; + /// The buffer is handled in such a way that the buffer is /// uninitialized, unbuffered, or out of space when OutBufCur >= /// OutBufEnd. Thus a single comparison suffices to determine if we @@ -105,9 +114,10 @@ class raw_ostream { static constexpr Colors SAVEDCOLOR = Colors::SAVEDCOLOR; static constexpr Colors RESET = Colors::RESET; - explicit raw_ostream(bool unbuffered = false) - : BufferMode(unbuffered ? BufferKind::Unbuffered - : BufferKind::InternalBuffer) { + explicit raw_ostream(bool unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream) + : Kind(K), BufferMode(unbuffered ? BufferKind::Unbuffered + : BufferKind::InternalBuffer) { // Start out ready to flush. OutBufStart = OutBufEnd = OutBufCur = nullptr; } @@ -120,6 +130,8 @@ class raw_ostream { /// tell - Return the current offset with the file. uint64_t tell() const { return current_pos() + GetNumBytesInBuffer(); } + OStreamKind get_kind() const { return Kind; } + //===--------------------------------------------------------------------===// // Configuration Interface //===--------------------------------------------------------------------===// @@ -388,8 +400,9 @@ class raw_pwrite_stream : public raw_ostream { void anchor() override; public: - explicit raw_pwrite_stream(bool Unbuffered = false) - : raw_ostream(Unbuffered) {} + explicit raw_pwrite_stream(bool Unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream) + : raw_ostream(Unbuffered, K) {} void pwrite(const char *Ptr, size_t Size, uint64_t Offset) { #ifndef NDEBUG uint64_t Pos = tell(); @@ -436,10 +449,17 @@ class raw_fd_ostream : public raw_pwrite_stream { /// Determine an efficient buffer size. size_t preferred_buffer_size() const override; + void anchor() override; + +protected: /// Set the flag indicating that an output error has been encountered. void error_detected(std::error_code EC) { this->EC = EC; } - void anchor() override; + /// Return the file descriptor. + int get_fd() const { return FD; } + + // Update the file position by increasing \p Delta. + void inc_pos(uint64_t Delta) { pos += Delta; } public: /// Open the specified file for writing. If an error occurs, information @@ -464,7 +484,8 @@ class raw_fd_ostream : public raw_pwrite_stream { /// FD is the file descriptor that this writes to. If ShouldClose is true, /// this closes the file when the stream is destroyed. If FD is for stdout or /// stderr, it will not be closed. - raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false); + raw_fd_ostream(int fd, bool shouldClose, bool unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream); ~raw_fd_ostream() override; @@ -548,6 +569,34 @@ raw_fd_ostream &errs(); /// This returns a reference to a raw_ostream which simply discards output. raw_ostream &nulls(); +//===----------------------------------------------------------------------===// +// File Streams +//===----------------------------------------------------------------------===// + +/// A raw_ostream of a file for reading/writing/seeking. +/// +class raw_fd_stream : public raw_fd_ostream { +public: + /// Open the specified file for reading/writing/seeking. If an error occurs, + /// information about the error is put into EC, and the stream should be + /// immediately destroyed. + raw_fd_stream(StringRef Filename, std::error_code &EC); + + /// This reads the \p Size bytes into a buffer pointed by \p Ptr. + /// + /// \param Ptr The start of the buffer to hold data to be read. + /// + /// \param Size The number of bytes to be read. + /// + /// On success, the number of bytes read is returned, and the file position is + /// advanced by this number. On error, -1 is returned, use error() to get the + /// error code. + ssize_t read(char *Ptr, size_t Size); + + /// Check if \p OS is a pointer of type raw_fd_stream*. + static bool classof(const raw_ostream *OS); +}; + //===----------------------------------------------------------------------===// // Output Stream Adaptors //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 83050c8574d9d..c803724eb1cfa 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -620,8 +620,9 @@ raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, /// FD is the file descriptor that this writes to. If ShouldClose is true, this /// closes the file when the stream is destroyed. -raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) - : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose) { +raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered, + OStreamKind K) + : raw_pwrite_stream(unbuffered, K), FD(fd), ShouldClose(shouldClose) { if (FD < 0 ) { ShouldClose = false; return; @@ -904,6 +905,37 @@ raw_ostream &llvm::nulls() { return S; } +//===----------------------------------------------------------------------===// +// File Streams +//===----------------------------------------------------------------------===// + +raw_fd_stream::raw_fd_stream(StringRef Filename, std::error_code &EC) + : raw_fd_ostream(getFD(Filename, EC, sys::fs::CD_CreateAlways, + sys::fs::FA_Write | sys::fs::FA_Read, + sys::fs::OF_None), + true, false, OStreamKind::OK_FDStream) { + if (EC) + return; + + // Do not support non-seekable files. + if (!supportsSeeking()) + EC = std::make_error_code(std::errc::invalid_argument); +} + +ssize_t raw_fd_stream::read(char *Ptr, size_t Size) { + assert(get_fd() >= 0 && "File already closed."); + ssize_t Ret = ::read(get_fd(), (void *)Ptr, Size); + if (Ret >= 0) + inc_pos(Ret); + else + error_detected(std::error_code(errno, std::generic_category())); + return Ret; +} + +bool raw_fd_stream::classof(const raw_ostream *OS) { + return OS->get_kind() == OStreamKind::OK_FDStream; +} + //===----------------------------------------------------------------------===// // raw_string_ostream //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/Support/raw_fd_stream_test.cpp b/llvm/unittests/Support/raw_fd_stream_test.cpp new file mode 100644 index 0000000000000..00d834da32101 --- /dev/null +++ b/llvm/unittests/Support/raw_fd_stream_test.cpp @@ -0,0 +1,67 @@ +//===- llvm/unittest/Support/raw_fd_stream_test.cpp - raw_fd_stream tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(raw_fd_streamTest, ReadAfterWrite) { + SmallString<64> Path; + int FD; + ASSERT_FALSE(sys::fs::createTemporaryFile("foo", "bar", FD, Path)); + FileRemover Cleanup(Path); + std::error_code EC; + raw_fd_stream OS(Path, EC); + EXPECT_TRUE(!EC); + + char Bytes[8]; + + OS.write("01234567", 8); + + OS.seek(3); + EXPECT_EQ(OS.read(Bytes, 2), 2); + EXPECT_EQ(Bytes[0], '3'); + EXPECT_EQ(Bytes[1], '4'); + + OS.seek(4); + OS.write("xyz", 3); + + OS.seek(0); + EXPECT_EQ(OS.read(Bytes, 8), 8); + EXPECT_EQ(Bytes[0], '0'); + EXPECT_EQ(Bytes[1], '1'); + EXPECT_EQ(Bytes[2], '2'); + EXPECT_EQ(Bytes[3], '3'); + EXPECT_EQ(Bytes[4], 'x'); + EXPECT_EQ(Bytes[5], 'y'); + EXPECT_EQ(Bytes[6], 'z'); + EXPECT_EQ(Bytes[7], '7'); +} + +TEST(raw_fd_streamTest, DynCast) { + { + std::error_code EC; + raw_fd_stream OS("-", EC); + EXPECT_TRUE(dyn_cast(&OS)); + } + { + std::error_code EC; + raw_fd_ostream OS("-", EC); + EXPECT_FALSE(dyn_cast(&OS)); + } +} + +} // namespace From 19531a81f1de8ef8ee219765c74c32c6fcd3323f Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Sat, 12 Sep 2020 07:48:12 +0000 Subject: [PATCH 0439/1079] Add raw_fd_stream_test.cpp into CMakeLists.txt Fixing https://github.com/llvm/llvm-project/commit/0ece51c60c51f0d4c285dbda3b6cff794041bdd7 --- llvm/unittests/Support/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 30de294f499e6..90545bf056a30 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -87,6 +87,7 @@ add_llvm_unittest(SupportTests YAMLIOTest.cpp YAMLParserTest.cpp formatted_raw_ostream_test.cpp + raw_fd_stream_test.cpp raw_ostream_test.cpp raw_pwrite_stream_test.cpp raw_sha1_ostream_test.cpp From 9c651c231f3144f53e13cd0a1747589e1b2edccd Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Sat, 12 Sep 2020 15:10:09 +0700 Subject: [PATCH 0440/1079] Missing change from previous commit --- clang/test/AST/ast-dump-fpfeatures.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index 830623ff48520..e143009806b56 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -79,7 +79,6 @@ float func_07(double x) { // CHECK-NEXT: CompoundStmt // CHECK-NEXT: ReturnStmt // CHECK-NEXT: CXXStaticCastExpr {{.*}} FPContractMode=1 -// CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 From b3f364e8561caeb704f48e962df9c4c0bdad4aa2 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Sat, 12 Sep 2020 08:49:22 +0000 Subject: [PATCH 0441/1079] Add a header file to support ssize_t for windows fixing https://github.com/llvm/llvm-project/commit/0ece51c60c51f0d4c285dbda3b6cff794041bdd7 --- llvm/include/llvm/Support/raw_ostream.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index 5e68390bdc8f6..bd15f97a13a1b 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" #include #include #include From de044f756286edebf86044d5172016d87f49fda0 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Sat, 12 Sep 2020 17:05:26 +0700 Subject: [PATCH 0442/1079] Revert "[AST][FPEnv] Keep FP options in trailing storage of CastExpr" This reverts commit 6c8041aa0ffed827636935e59c489b1e390c8542. It caused some fails on buildbots. --- clang/include/clang/AST/Expr.h | 117 ++++----------- clang/include/clang/AST/ExprCXX.h | 139 +++++++----------- clang/include/clang/AST/ExprObjC.h | 4 +- clang/include/clang/AST/Stmt.h | 3 - clang/include/clang/AST/TextNodeDumper.h | 1 - clang/include/clang/Basic/LangOptions.h | 2 - clang/lib/AST/ASTImporter.cpp | 15 +- clang/lib/AST/Expr.cpp | 55 ++----- clang/lib/AST/ExprCXX.cpp | 61 ++++---- clang/lib/AST/TextNodeDumper.cpp | 10 -- clang/lib/Analysis/BodyFarm.cpp | 16 +- clang/lib/CodeGen/CGBlocks.cpp | 2 +- clang/lib/CodeGen/CGObjC.cpp | 13 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- .../Frontend/Rewrite/RewriteModernObjC.cpp | 7 +- clang/lib/Frontend/Rewrite/RewriteObjC.cpp | 7 +- clang/lib/Sema/Sema.cpp | 3 +- clang/lib/Sema/SemaCast.cpp | 28 ++-- clang/lib/Sema/SemaDecl.cpp | 8 +- clang/lib/Sema/SemaDeclCXX.cpp | 9 +- clang/lib/Sema/SemaExpr.cpp | 11 +- clang/lib/Sema/SemaExprCXX.cpp | 13 +- clang/lib/Sema/SemaExprObjC.cpp | 15 +- clang/lib/Sema/SemaInit.cpp | 30 ++-- clang/lib/Sema/SemaLambda.cpp | 5 +- clang/lib/Sema/SemaObjCProperty.cpp | 14 +- clang/lib/Sema/SemaOpenMP.cpp | 12 +- clang/lib/Sema/SemaOverload.cpp | 23 ++- clang/lib/Sema/SemaStmt.cpp | 8 +- clang/lib/Sema/SemaTemplate.cpp | 2 +- clang/lib/Serialization/ASTReaderStmt.cpp | 28 +--- clang/lib/Serialization/ASTWriterDecl.cpp | 1 - clang/lib/Serialization/ASTWriterStmt.cpp | 6 +- clang/test/AST/ast-dump-fpfeatures.cpp | 44 ------ 34 files changed, 253 insertions(+), 461 deletions(-) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 1672fd707c6d2..26e52ad367f81 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -3440,11 +3440,9 @@ class CastExpr : public Expr { } CXXBaseSpecifier **path_buffer(); - friend class ASTStmtReader; - protected: CastExpr(StmtClass SC, QualType ty, ExprValueKind VK, const CastKind kind, - Expr *op, unsigned BasePathSize, bool HasFPFeatures) + Expr *op, unsigned BasePathSize) : Expr(SC, ty, VK, OK_Ordinary), Op(op) { CastExprBits.Kind = kind; CastExprBits.PartOfExplicitCast = false; @@ -3453,27 +3451,17 @@ class CastExpr : public Expr { "BasePathSize overflow!"); setDependence(computeDependence(this)); assert(CastConsistency()); - CastExprBits.HasFPFeatures = HasFPFeatures; } /// Construct an empty cast. - CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize, - bool HasFPFeatures) - : Expr(SC, Empty) { + CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize) + : Expr(SC, Empty) { CastExprBits.PartOfExplicitCast = false; CastExprBits.BasePathSize = BasePathSize; - CastExprBits.HasFPFeatures = HasFPFeatures; assert((CastExprBits.BasePathSize == BasePathSize) && "BasePathSize overflow!"); } - /// Return a pointer to the trailing FPOptions. - /// \pre hasStoredFPFeatures() == true - FPOptionsOverride *getTrailingFPFeatures(); - const FPOptionsOverride *getTrailingFPFeatures() const { - return const_cast(this)->getTrailingFPFeatures(); - } - public: CastKind getCastKind() const { return (CastKind) CastExprBits.Kind; } void setCastKind(CastKind K) { CastExprBits.Kind = K; } @@ -3518,28 +3506,6 @@ class CastExpr : public Expr { return getTargetFieldForToUnionCast(getType(), getSubExpr()->getType()); } - bool hasStoredFPFeatures() const { return CastExprBits.HasFPFeatures; } - - /// Get FPOptionsOverride from trailing storage. - FPOptionsOverride getStoredFPFeatures() const { - assert(hasStoredFPFeatures()); - return *getTrailingFPFeatures(); - } - - // Get the FP features status of this operation. Only meaningful for - // operations on floating point types. - FPOptions getFPFeaturesInEffect(const LangOptions &LO) const { - if (hasStoredFPFeatures()) - return getStoredFPFeatures().applyOverrides(LO); - return FPOptions::defaultWithoutTrailingStorage(LO); - } - - FPOptionsOverride getFPFeatures() const { - if (hasStoredFPFeatures()) - return getStoredFPFeatures(); - return FPOptionsOverride(); - } - static const FieldDecl *getTargetFieldForToUnionCast(QualType unionType, QualType opType); static const FieldDecl *getTargetFieldForToUnionCast(const RecordDecl *RD, @@ -3577,35 +3543,21 @@ class CastExpr : public Expr { /// @endcode class ImplicitCastExpr final : public CastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { ImplicitCastExpr(QualType ty, CastKind kind, Expr *op, - unsigned BasePathLength, FPOptionsOverride FPO, - ExprValueKind VK) - : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength, - FPO.requiresTrailingStorage()) { - if (hasStoredFPFeatures()) - *getTrailingFPFeatures() = FPO; - } + unsigned BasePathLength, ExprValueKind VK) + : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength) { } /// Construct an empty implicit cast. - explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize, - bool HasFPFeatures) - : CastExpr(ImplicitCastExprClass, Shell, PathSize, HasFPFeatures) {} - - unsigned numTrailingObjects(OverloadToken) const { - return path_size(); - } + explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize) + : CastExpr(ImplicitCastExprClass, Shell, PathSize) { } public: enum OnStack_t { OnStack }; ImplicitCastExpr(OnStack_t _, QualType ty, CastKind kind, Expr *op, - ExprValueKind VK, FPOptionsOverride FPO) - : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0, - FPO.requiresTrailingStorage()) { - if (hasStoredFPFeatures()) - *getTrailingFPFeatures() = FPO; + ExprValueKind VK) + : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0) { } bool isPartOfExplicitCast() const { return CastExprBits.PartOfExplicitCast; } @@ -3616,10 +3568,10 @@ class ImplicitCastExpr final static ImplicitCastExpr *Create(const ASTContext &Context, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, - ExprValueKind Cat, FPOptionsOverride FPO); + ExprValueKind Cat); static ImplicitCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize, bool HasFPFeatures); + unsigned PathSize); SourceLocation getBeginLoc() const LLVM_READONLY { return getSubExpr()->getBeginLoc(); @@ -3660,14 +3612,12 @@ class ExplicitCastExpr : public CastExpr { protected: ExplicitCastExpr(StmtClass SC, QualType exprTy, ExprValueKind VK, CastKind kind, Expr *op, unsigned PathSize, - bool HasFPFeatures, TypeSourceInfo *writtenTy) - : CastExpr(SC, exprTy, VK, kind, op, PathSize, HasFPFeatures), - TInfo(writtenTy) {} + TypeSourceInfo *writtenTy) + : CastExpr(SC, exprTy, VK, kind, op, PathSize), TInfo(writtenTy) {} /// Construct an empty explicit cast. - ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize, - bool HasFPFeatures) - : CastExpr(SC, Shell, PathSize, HasFPFeatures) {} + ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) + : CastExpr(SC, Shell, PathSize) { } public: /// getTypeInfoAsWritten - Returns the type source info for the type @@ -3690,38 +3640,29 @@ class ExplicitCastExpr : public CastExpr { /// (Type)expr. For example: @c (int)f. class CStyleCastExpr final : public ExplicitCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { SourceLocation LPLoc; // the location of the left paren SourceLocation RPLoc; // the location of the right paren CStyleCastExpr(QualType exprTy, ExprValueKind vk, CastKind kind, Expr *op, - unsigned PathSize, FPOptionsOverride FPO, - TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation r) - : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, - FPO.requiresTrailingStorage(), writtenTy), - LPLoc(l), RPLoc(r) { - if (hasStoredFPFeatures()) - *getTrailingFPFeatures() = FPO; - } + unsigned PathSize, TypeSourceInfo *writtenTy, + SourceLocation l, SourceLocation r) + : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, + writtenTy), LPLoc(l), RPLoc(r) {} /// Construct an empty C-style explicit cast. - explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize, - bool HasFPFeatures) - : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize, HasFPFeatures) {} - - unsigned numTrailingObjects(OverloadToken) const { - return path_size(); - } + explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize) + : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize) { } public: - static CStyleCastExpr * - Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, - Expr *Op, const CXXCastPath *BasePath, FPOptionsOverride FPO, - TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R); + static CStyleCastExpr *Create(const ASTContext &Context, QualType T, + ExprValueKind VK, CastKind K, + Expr *Op, const CXXCastPath *BasePath, + TypeSourceInfo *WrittenTy, SourceLocation L, + SourceLocation R); static CStyleCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize, bool HasFPFeatures); + unsigned PathSize); SourceLocation getLParenLoc() const { return LPLoc; } void setLParenLoc(SourceLocation L) { LPLoc = L; } diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 0ba5e417fd58e..6b4b57eca9bea 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -374,17 +374,16 @@ class CXXNamedCastExpr : public ExplicitCastExpr { protected: friend class ASTStmtReader; - CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK, CastKind kind, - Expr *op, unsigned PathSize, bool HasFPFeatures, + CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK, + CastKind kind, Expr *op, unsigned PathSize, TypeSourceInfo *writtenTy, SourceLocation l, - SourceLocation RParenLoc, SourceRange AngleBrackets) - : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, HasFPFeatures, - writtenTy), - Loc(l), RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {} + SourceLocation RParenLoc, + SourceRange AngleBrackets) + : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, writtenTy), Loc(l), + RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {} - explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize, - bool HasFPFeatures) - : ExplicitCastExpr(SC, Shell, PathSize, HasFPFeatures) {} + explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) + : ExplicitCastExpr(SC, Shell, PathSize) {} public: const char *getCastName() const; @@ -420,39 +419,29 @@ class CXXNamedCastExpr : public ExplicitCastExpr { /// \c static_cast(1.0). class CXXStaticCastExpr final : public CXXNamedCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { CXXStaticCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy, - FPOptionsOverride FPO, SourceLocation l, - SourceLocation RParenLoc, SourceRange AngleBrackets) + SourceLocation l, SourceLocation RParenLoc, + SourceRange AngleBrackets) : CXXNamedCastExpr(CXXStaticCastExprClass, ty, vk, kind, op, pathSize, - FPO.requiresTrailingStorage(), writtenTy, l, RParenLoc, - AngleBrackets) { - if (hasStoredFPFeatures()) - *getTrailingFPFeatures() = FPO; - } + writtenTy, l, RParenLoc, AngleBrackets) {} - explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize, - bool HasFPFeatures) - : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize, - HasFPFeatures) {} - - unsigned numTrailingObjects(OverloadToken) const { - return path_size(); - } + explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize) + : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize) {} public: friend class CastExpr; friend TrailingObjects; - static CXXStaticCastExpr * - Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, - Expr *Op, const CXXCastPath *Path, TypeSourceInfo *Written, - FPOptionsOverride FPO, SourceLocation L, SourceLocation RParenLoc, - SourceRange AngleBrackets); + static CXXStaticCastExpr *Create(const ASTContext &Context, QualType T, + ExprValueKind VK, CastKind K, Expr *Op, + const CXXCastPath *Path, + TypeSourceInfo *Written, SourceLocation L, + SourceLocation RParenLoc, + SourceRange AngleBrackets); static CXXStaticCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize, bool hasFPFeatures); + unsigned PathSize); static bool classof(const Stmt *T) { return T->getStmtClass() == CXXStaticCastExprClass; @@ -467,17 +456,15 @@ class CXXStaticCastExpr final class CXXDynamicCastExpr final : public CXXNamedCastExpr, private llvm::TrailingObjects { - CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind, Expr *op, - unsigned pathSize, TypeSourceInfo *writtenTy, + CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind, + Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXDynamicCastExprClass, ty, VK, kind, op, pathSize, - /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, - AngleBrackets) {} + writtenTy, l, RParenLoc, AngleBrackets) {} explicit CXXDynamicCastExpr(EmptyShell Empty, unsigned pathSize) - : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize, - /*HasFPFeatures*/ false) {} + : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize) {} public: friend class CastExpr; @@ -512,17 +499,16 @@ class CXXReinterpretCastExpr final : public CXXNamedCastExpr, private llvm::TrailingObjects { - CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op, - unsigned pathSize, TypeSourceInfo *writtenTy, - SourceLocation l, SourceLocation RParenLoc, + CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind, + Expr *op, unsigned pathSize, + TypeSourceInfo *writtenTy, SourceLocation l, + SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXReinterpretCastExprClass, ty, vk, kind, op, - pathSize, /*HasFPFeatures*/ false, writtenTy, l, - RParenLoc, AngleBrackets) {} + pathSize, writtenTy, l, RParenLoc, AngleBrackets) {} CXXReinterpretCastExpr(EmptyShell Empty, unsigned pathSize) - : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize, - /*HasFPFeatures*/ false) {} + : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize) {} public: friend class CastExpr; @@ -555,13 +541,11 @@ class CXXConstCastExpr final CXXConstCastExpr(QualType ty, ExprValueKind VK, Expr *op, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) - : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op, 0, - /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, - AngleBrackets) {} + : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op, + 0, writtenTy, l, RParenLoc, AngleBrackets) {} explicit CXXConstCastExpr(EmptyShell Empty) - : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0, - /*HasFPFeatures*/ false) {} + : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0) {} public: friend class CastExpr; @@ -594,12 +578,10 @@ class CXXAddrspaceCastExpr final TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXAddrspaceCastExprClass, ty, VK, Kind, op, 0, - /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, - AngleBrackets) {} + writtenTy, l, RParenLoc, AngleBrackets) {} explicit CXXAddrspaceCastExpr(EmptyShell Empty) - : CXXNamedCastExpr(CXXAddrspaceCastExprClass, Empty, 0, - /*HasFPFeatures*/ false) {} + : CXXNamedCastExpr(CXXAddrspaceCastExprClass, Empty, 0) {} public: friend class CastExpr; @@ -1711,43 +1693,34 @@ class CXXInheritedCtorInitExpr : public Expr { /// \endcode class CXXFunctionalCastExpr final : public ExplicitCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { SourceLocation LParenLoc; SourceLocation RParenLoc; CXXFunctionalCastExpr(QualType ty, ExprValueKind VK, - TypeSourceInfo *writtenTy, CastKind kind, - Expr *castExpr, unsigned pathSize, - FPOptionsOverride FPO, SourceLocation lParenLoc, - SourceLocation rParenLoc) - : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind, castExpr, - pathSize, FPO.requiresTrailingStorage(), writtenTy), - LParenLoc(lParenLoc), RParenLoc(rParenLoc) { - if (hasStoredFPFeatures()) - *getTrailingFPFeatures() = FPO; - } - - explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize, - bool HasFPFeatures) - : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize, - HasFPFeatures) {} + TypeSourceInfo *writtenTy, + CastKind kind, Expr *castExpr, unsigned pathSize, + SourceLocation lParenLoc, SourceLocation rParenLoc) + : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind, + castExpr, pathSize, writtenTy), + LParenLoc(lParenLoc), RParenLoc(rParenLoc) {} - unsigned numTrailingObjects(OverloadToken) const { - return path_size(); - } + explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize) + : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize) {} public: friend class CastExpr; friend TrailingObjects; - static CXXFunctionalCastExpr * - Create(const ASTContext &Context, QualType T, ExprValueKind VK, - TypeSourceInfo *Written, CastKind Kind, Expr *Op, - const CXXCastPath *Path, FPOptionsOverride FPO, SourceLocation LPLoc, - SourceLocation RPLoc); - static CXXFunctionalCastExpr * - CreateEmpty(const ASTContext &Context, unsigned PathSize, bool HasFPFeatures); + static CXXFunctionalCastExpr *Create(const ASTContext &Context, QualType T, + ExprValueKind VK, + TypeSourceInfo *Written, + CastKind Kind, Expr *Op, + const CXXCastPath *Path, + SourceLocation LPLoc, + SourceLocation RPLoc); + static CXXFunctionalCastExpr *CreateEmpty(const ASTContext &Context, + unsigned PathSize); SourceLocation getLParenLoc() const { return LParenLoc; } void setLParenLoc(SourceLocation L) { LParenLoc = L; } @@ -4855,11 +4828,11 @@ class BuiltinBitCastExpr final BuiltinBitCastExpr(QualType T, ExprValueKind VK, CastKind CK, Expr *SrcExpr, TypeSourceInfo *DstType, SourceLocation KWLoc, SourceLocation RParenLoc) - : ExplicitCastExpr(BuiltinBitCastExprClass, T, VK, CK, SrcExpr, 0, false, + : ExplicitCastExpr(BuiltinBitCastExprClass, T, VK, CK, SrcExpr, 0, DstType), KWLoc(KWLoc), RParenLoc(RParenLoc) {} BuiltinBitCastExpr(EmptyShell Empty) - : ExplicitCastExpr(BuiltinBitCastExprClass, Empty, 0, false) {} + : ExplicitCastExpr(BuiltinBitCastExprClass, Empty, 0) {} SourceLocation getBeginLoc() const LLVM_READONLY { return KWLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h index 17eec51726978..4b39d9ab96a6a 100644 --- a/clang/include/clang/AST/ExprObjC.h +++ b/clang/include/clang/AST/ExprObjC.h @@ -1639,12 +1639,12 @@ class ObjCBridgedCastExpr final CastKind CK, SourceLocation BridgeKeywordLoc, TypeSourceInfo *TSInfo, Expr *Operand) : ExplicitCastExpr(ObjCBridgedCastExprClass, TSInfo->getType(), VK_RValue, - CK, Operand, 0, false, TSInfo), + CK, Operand, 0, TSInfo), LParenLoc(LParenLoc), BridgeKeywordLoc(BridgeKeywordLoc), Kind(Kind) {} /// Construct an empty Objective-C bridged cast. explicit ObjCBridgedCastExpr(EmptyShell Shell) - : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0, false) {} + : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0) {} SourceLocation getLParenLoc() const { return LParenLoc; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 4a6e8182e5a06..1e04e64727a08 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -521,9 +521,6 @@ class alignas(void *) Stmt { unsigned Kind : 6; unsigned PartOfExplicitCast : 1; // Only set for ImplicitCastExpr. - /// True if the call expression has some floating-point features. - unsigned HasFPFeatures : 1; - /// The number of CXXBaseSpecifiers in the cast. 14 bits would be enough /// here. ([implimits] Direct and indirect base classes [16384]). unsigned BasePathSize; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 15ca348f47667..f68a5dbfc2a0d 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -270,7 +270,6 @@ class TextNodeDumper void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *Node); void VisitCXXThisExpr(const CXXThisExpr *Node); void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *Node); - void VisitCXXStaticCastExpr(const CXXStaticCastExpr *Node); void VisitCXXUnresolvedConstructExpr(const CXXUnresolvedConstructExpr *Node); void VisitCXXConstructExpr(const CXXConstructExpr *Node); void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Node); diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 3614496ded967..2c8bb55cb5d93 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -497,8 +497,6 @@ class FPOptionsOverride { FPOptionsOverride() {} FPOptionsOverride(const LangOptions &LO) : Options(LO), OverrideMask(OverrideMaskBits) {} - FPOptionsOverride(FPOptions FPO) - : Options(FPO), OverrideMask(OverrideMaskBits) {} bool requiresTrailingStorage() const { return OverrideMask != 0; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index dd3c8518c2a3e..7334d5b659e20 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6930,7 +6930,7 @@ ExpectedStmt ASTNodeImporter::VisitImplicitCastExpr(ImplicitCastExpr *E) { return ImplicitCastExpr::Create( Importer.getToContext(), *ToTypeOrErr, E->getCastKind(), *ToSubExprOrErr, - &(*ToBasePathOrErr), E->getValueKind(), E->getFPFeatures()); + &(*ToBasePathOrErr), E->getValueKind()); } ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { @@ -6957,8 +6957,8 @@ ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { return ToRParenLocOrErr.takeError(); return CStyleCastExpr::Create( Importer.getToContext(), ToType, E->getValueKind(), E->getCastKind(), - ToSubExpr, ToBasePath, CCE->getFPFeatures(), ToTypeInfoAsWritten, - *ToLParenLocOrErr, *ToRParenLocOrErr); + ToSubExpr, ToBasePath, ToTypeInfoAsWritten, *ToLParenLocOrErr, + *ToRParenLocOrErr); } case Stmt::CXXFunctionalCastExprClass: { @@ -6971,8 +6971,8 @@ ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { return ToRParenLocOrErr.takeError(); return CXXFunctionalCastExpr::Create( Importer.getToContext(), ToType, E->getValueKind(), ToTypeInfoAsWritten, - E->getCastKind(), ToSubExpr, ToBasePath, FCE->getFPFeatures(), - *ToLParenLocOrErr, *ToRParenLocOrErr); + E->getCastKind(), ToSubExpr, ToBasePath, *ToLParenLocOrErr, + *ToRParenLocOrErr); } case Stmt::ObjCBridgedCastExprClass: { @@ -7815,11 +7815,10 @@ ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { if (!ToBasePathOrErr) return ToBasePathOrErr.takeError(); - if (auto CCE = dyn_cast(E)) { + if (isa(E)) { return CXXStaticCastExpr::Create( Importer.getToContext(), ToType, VK, CK, ToSubExpr, &(*ToBasePathOrErr), - ToTypeInfoAsWritten, CCE->getFPFeatures(), ToOperatorLoc, ToRParenLoc, - ToAngleBrackets); + ToTypeInfoAsWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets); } else if (isa(E)) { return CXXDynamicCastExpr::Create( Importer.getToContext(), ToType, VK, CK, ToSubExpr, &(*ToBasePathOrErr), diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index b664224aa7323..15f3df0fd2168 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1892,42 +1892,19 @@ const FieldDecl *CastExpr::getTargetFieldForToUnionCast(const RecordDecl *RD, return nullptr; } -FPOptionsOverride *CastExpr::getTrailingFPFeatures() { - assert(hasStoredFPFeatures()); - switch (getStmtClass()) { - case ImplicitCastExprClass: - return static_cast(this) - ->getTrailingObjects(); - case CStyleCastExprClass: - return static_cast(this) - ->getTrailingObjects(); - case CXXFunctionalCastExprClass: - return static_cast(this) - ->getTrailingObjects(); - case CXXStaticCastExprClass: - return static_cast(this) - ->getTrailingObjects(); - default: - llvm_unreachable("Cast does not have FPFeatures"); - } -} - ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, - ExprValueKind VK, - FPOptionsOverride FPO) { + ExprValueKind VK) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, FPO.requiresTrailingStorage())); + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); // Per C++ [conv.lval]p3, lvalue-to-rvalue conversions on class and // std::nullptr_t have special semantics not captured by CK_LValueToRValue. assert((Kind != CK_LValueToRValue || !(T->isNullPtrType() || T->getAsCXXRecordDecl())) && "invalid type for lvalue-to-rvalue conversion"); ImplicitCastExpr *E = - new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, FPO, VK); + new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, VK); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -1935,26 +1912,21 @@ ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T, } ImplicitCastExpr *ImplicitCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize, - bool HasFPFeatures) { - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, HasFPFeatures)); - return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize, HasFPFeatures); + unsigned PathSize) { + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize); } + CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, CastKind K, Expr *Op, const CXXCastPath *BasePath, - FPOptionsOverride FPO, TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, FPO.requiresTrailingStorage())); + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); CStyleCastExpr *E = - new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, FPO, WrittenTy, L, R); + new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, R); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -1962,12 +1934,9 @@ CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T, } CStyleCastExpr *CStyleCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize, - bool HasFPFeatures) { - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, HasFPFeatures)); - return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize, HasFPFeatures); + unsigned PathSize) { + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize); } /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 3f3f2303587dd..3d61496f30e2a 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -690,18 +690,19 @@ const char *CXXNamedCastExpr::getCastName() const { } } -CXXStaticCastExpr * -CXXStaticCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, - CastKind K, Expr *Op, const CXXCastPath *BasePath, - TypeSourceInfo *WrittenTy, FPOptionsOverride FPO, - SourceLocation L, SourceLocation RParenLoc, - SourceRange AngleBrackets) { +CXXStaticCastExpr *CXXStaticCastExpr::Create(const ASTContext &C, QualType T, + ExprValueKind VK, + CastKind K, Expr *Op, + const CXXCastPath *BasePath, + TypeSourceInfo *WrittenTy, + SourceLocation L, + SourceLocation RParenLoc, + SourceRange AngleBrackets) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, FPO.requiresTrailingStorage())); - auto *E = new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, - FPO, L, RParenLoc, AngleBrackets); + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + auto *E = + new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, + RParenLoc, AngleBrackets); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -709,12 +710,9 @@ CXXStaticCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, } CXXStaticCastExpr *CXXStaticCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize, - bool HasFPFeatures) { - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, HasFPFeatures)); - return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize, HasFPFeatures); + unsigned PathSize) { + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize); } CXXDynamicCastExpr *CXXDynamicCastExpr::Create(const ASTContext &C, QualType T, @@ -825,30 +823,25 @@ CXXAddrspaceCastExpr *CXXAddrspaceCastExpr::CreateEmpty(const ASTContext &C) { return new (C) CXXAddrspaceCastExpr(EmptyShell()); } -CXXFunctionalCastExpr *CXXFunctionalCastExpr::Create( - const ASTContext &C, QualType T, ExprValueKind VK, TypeSourceInfo *Written, - CastKind K, Expr *Op, const CXXCastPath *BasePath, FPOptionsOverride FPO, - SourceLocation L, SourceLocation R) { +CXXFunctionalCastExpr * +CXXFunctionalCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, + TypeSourceInfo *Written, CastKind K, Expr *Op, + const CXXCastPath *BasePath, + SourceLocation L, SourceLocation R) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, FPO.requiresTrailingStorage())); - auto *E = new (Buffer) - CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, FPO, L, R); + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + auto *E = + new (Buffer) CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, L, R); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); return E; } -CXXFunctionalCastExpr *CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize, - bool HasFPFeatures) { - void *Buffer = - C.Allocate(totalSizeToAlloc( - PathSize, HasFPFeatures)); - return new (Buffer) - CXXFunctionalCastExpr(EmptyShell(), PathSize, HasFPFeatures); +CXXFunctionalCastExpr * +CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, unsigned PathSize) { + void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + return new (Buffer) CXXFunctionalCastExpr(EmptyShell(), PathSize); } SourceLocation CXXFunctionalCastExpr::getBeginLoc() const { diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index acbc0434931dc..16c4c3736a4a3 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -964,8 +964,6 @@ void TextNodeDumper::VisitCastExpr(const CastExpr *Node) { } dumpBasePath(OS, Node); OS << ">"; - if (Node->hasStoredFPFeatures()) - printFPOptions(Node->getFPFeatures()); } void TextNodeDumper::VisitImplicitCastExpr(const ImplicitCastExpr *Node) { @@ -1134,14 +1132,6 @@ void TextNodeDumper::VisitCXXFunctionalCastExpr( const CXXFunctionalCastExpr *Node) { OS << " functional cast to " << Node->getTypeAsWritten().getAsString() << " <" << Node->getCastKindName() << ">"; - if (Node->hasStoredFPFeatures()) - printFPOptions(Node->getFPFeatures()); -} - -void TextNodeDumper::VisitCXXStaticCastExpr(const CXXStaticCastExpr *Node) { - VisitCXXNamedCastExpr(Node); - if (Node->hasStoredFPFeatures()) - printFPOptions(Node->getFPFeatures()); } void TextNodeDumper::VisitCXXUnresolvedConstructExpr( diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index 603da67156254..f68b06487f98e 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -166,21 +166,23 @@ ASTMaker::makeLvalueToRvalue(const VarDecl *Arg, ImplicitCastExpr *ASTMaker::makeImplicitCast(const Expr *Arg, QualType Ty, CastKind CK) { return ImplicitCastExpr::Create(C, Ty, - /* CastKind=*/CK, - /* Expr=*/const_cast(Arg), - /* CXXCastPath=*/nullptr, - /* ExprValueKind=*/VK_RValue, - /* FPFeatures */ FPOptionsOverride()); + /* CastKind=*/ CK, + /* Expr=*/ const_cast(Arg), + /* CXXCastPath=*/ nullptr, + /* ExprValueKind=*/ VK_RValue); } Expr *ASTMaker::makeIntegralCast(const Expr *Arg, QualType Ty) { if (Arg->getType() == Ty) return const_cast(Arg); - return makeImplicitCast(Arg, Ty, CK_IntegralCast); + + return ImplicitCastExpr::Create(C, Ty, CK_IntegralCast, + const_cast(Arg), nullptr, VK_RValue); } ImplicitCastExpr *ASTMaker::makeIntegralCastToBoolean(const Expr *Arg) { - return makeImplicitCast(Arg, C.BoolTy, CK_IntegralToBoolean); + return ImplicitCastExpr::Create(C, C.BoolTy, CK_IntegralToBoolean, + const_cast(Arg), nullptr, VK_RValue); } ObjCBoolLiteralExpr *ASTMaker::makeObjCBool(bool Val) { diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 74de3df9d9005..615b782350414 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1024,7 +1024,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { type, VK_LValue, SourceLocation()); ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue, - &declRef, VK_RValue, CurFPFeatures); + &declRef, VK_RValue); // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index f2807eefd7f34..26dfb6259a290 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -1449,9 +1449,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, ValueDecl *selfDecl = setterMethod->getSelfDecl(); DeclRefExpr self(getContext(), selfDecl, false, selfDecl->getType(), VK_LValue, SourceLocation()); - ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, selfDecl->getType(), - CK_LValueToRValue, &self, VK_RValue, - FPOptionsOverride(CurFPFeatures)); + ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, + selfDecl->getType(), CK_LValueToRValue, &self, + VK_RValue); ObjCIvarRefExpr ivarRef(ivar, ivar->getType().getNonReferenceType(), SourceLocation(), SourceLocation(), &selfLoad, true, true); @@ -1462,7 +1462,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, SourceLocation()); ImplicitCastExpr argLoad(ImplicitCastExpr::OnStack, argType.getUnqualifiedType(), CK_LValueToRValue, - &arg, VK_RValue, CurFPFeatures); + &arg, VK_RValue); // The property type can differ from the ivar type in some situations with // Objective-C pointer types, we can always bit cast the RHS in these cases. @@ -1483,8 +1483,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, } else if (ivarRef.getType()->isPointerType()) { argCK = CK_BitCast; } - ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, ivarRef.getType(), argCK, - &argLoad, VK_RValue, CurFPFeatures); + ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, + ivarRef.getType(), argCK, &argLoad, + VK_RValue); Expr *finalArg = &argLoad; if (!getContext().hasSameUnqualifiedType(ivarRef.getType(), argLoad.getType())) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 19dc9a87f239c..b9260892bd215 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4137,7 +4137,7 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, PrivateVD->setInitStyle(VarDecl::CInit); PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, InitRef, /*BasePath=*/nullptr, - VK_RValue, FPOptionsOverride())); + VK_RValue)); Data.FirstprivateVars.emplace_back(OrigRef); Data.FirstprivateCopies.emplace_back(PrivateRef); Data.FirstprivateInits.emplace_back(InitRef); diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index c0c81221b2344..8c41e71ef0187 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -586,8 +586,7 @@ namespace { CastKind Kind, Expr *E) { TypeSourceInfo *TInfo = Ctx->getTrivialTypeSourceInfo(Ty, SourceLocation()); return CStyleCastExpr::Create(*Ctx, Ty, VK_RValue, Kind, E, nullptr, - FPOptionsOverride(), TInfo, - SourceLocation(), SourceLocation()); + TInfo, SourceLocation(), SourceLocation()); } bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const { @@ -2106,8 +2105,8 @@ RewriteModernObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, // Now, we cast the reference to a pointer to the objc_msgSend type. QualType pToFunc = Context->getPointerType(msgSendType); ImplicitCastExpr *ICE = - ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, - DRE, nullptr, VK_RValue, FPOptionsOverride()); + ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, + DRE, nullptr, VK_RValue); const auto *FT = msgSendType->castAs(); CallExpr *Exp = diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index 990509a84b06c..4ecd6e95de10e 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -492,8 +492,7 @@ namespace { CastKind Kind, Expr *E) { TypeSourceInfo *TInfo = Ctx->getTrivialTypeSourceInfo(Ty, SourceLocation()); return CStyleCastExpr::Create(*Ctx, Ty, VK_RValue, Kind, E, nullptr, - FPOptionsOverride(), TInfo, - SourceLocation(), SourceLocation()); + TInfo, SourceLocation(), SourceLocation()); } StringLiteral *getStringLiteral(StringRef Str) { @@ -2023,8 +2022,8 @@ RewriteObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, // Now, we cast the reference to a pointer to the objc_msgSend type. QualType pToFunc = Context->getPointerType(msgSendType); ImplicitCastExpr *ICE = - ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, - DRE, nullptr, VK_RValue, FPOptionsOverride()); + ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, + DRE, nullptr, VK_RValue); const auto *FT = msgSendType->castAs(); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 375fe3b28dec3..47484c5be9c9b 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -586,8 +586,7 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, } } - return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK, - CurFPFeatureOverrides()); + return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK); } /// ScalarTypeToBooleanCastKind - Returns the cast kind corresponding diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 5222722e71810..726900c59f20e 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -105,9 +105,10 @@ namespace { // If this is an unbridged cast, wrap the result in an implicit // cast that yields the unbridged-cast placeholder type. if (IsARCUnbridgedCast) { - castExpr = ImplicitCastExpr::Create( - Self.Context, Self.Context.ARCUnbridgedCastTy, CK_Dependent, - castExpr, nullptr, castExpr->getValueKind(), FPOptionsOverride()); + castExpr = ImplicitCastExpr::Create(Self.Context, + Self.Context.ARCUnbridgedCastTy, + CK_Dependent, castExpr, nullptr, + castExpr->getValueKind()); } updatePartOfExplicitCastFlags(castExpr); return castExpr; @@ -360,10 +361,11 @@ Sema::BuildCXXNamedCast(SourceLocation OpLoc, tok::TokenKind Kind, DiscardMisalignedMemberAddress(DestType.getTypePtr(), E); } - return Op.complete(CXXStaticCastExpr::Create( - Context, Op.ResultType, Op.ValueKind, Op.Kind, Op.SrcExpr.get(), - &Op.BasePath, DestTInfo, CurFPFeatureOverrides(), OpLoc, - Parens.getEnd(), AngleBrackets)); + return Op.complete(CXXStaticCastExpr::Create(Context, Op.ResultType, + Op.ValueKind, Op.Kind, Op.SrcExpr.get(), + &Op.BasePath, DestTInfo, + OpLoc, Parens.getEnd(), + AngleBrackets)); } } } @@ -3031,9 +3033,9 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc, // -Wcast-qual DiagnoseCastQual(Op.Self, Op.SrcExpr, Op.DestType); - return Op.complete(CStyleCastExpr::Create( - Context, Op.ResultType, Op.ValueKind, Op.Kind, Op.SrcExpr.get(), - &Op.BasePath, CurFPFeatureOverrides(), CastTypeInfo, LPLoc, RPLoc)); + return Op.complete(CStyleCastExpr::Create(Context, Op.ResultType, + Op.ValueKind, Op.Kind, Op.SrcExpr.get(), + &Op.BasePath, CastTypeInfo, LPLoc, RPLoc)); } ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, @@ -3056,7 +3058,7 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, if (auto *ConstructExpr = dyn_cast(SubExpr)) ConstructExpr->setParenOrBraceRange(SourceRange(LPLoc, RPLoc)); - return Op.complete(CXXFunctionalCastExpr::Create( - Context, Op.ResultType, Op.ValueKind, CastTypeInfo, Op.Kind, - Op.SrcExpr.get(), &Op.BasePath, CurFPFeatureOverrides(), LPLoc, RPLoc)); + return Op.complete(CXXFunctionalCastExpr::Create(Context, Op.ResultType, + Op.ValueKind, CastTypeInfo, Op.Kind, + Op.SrcExpr.get(), &Op.BasePath, LPLoc, RPLoc)); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 99e6678be51c9..a9e6113dc7bb5 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18172,9 +18172,11 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, // Adjust the Expr initializer and type. if (ECD->getInitExpr() && !Context.hasSameType(NewTy, ECD->getInitExpr()->getType())) - ECD->setInitExpr(ImplicitCastExpr::Create( - Context, NewTy, CK_IntegralCast, ECD->getInitExpr(), - /*base paths*/ nullptr, VK_RValue, CurFPFeatureOverrides())); + ECD->setInitExpr(ImplicitCastExpr::Create(Context, NewTy, + CK_IntegralCast, + ECD->getInitExpr(), + /*base paths*/ nullptr, + VK_RValue)); if (getLangOpts().CPlusPlus) // C++ [dcl.enum]p4: Following the closing brace of an // enum-specifier, each enumerator has the type of its diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 3a8a7708949e1..0a4f75ad341b1 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1185,8 +1185,7 @@ static bool checkTupleLikeDecomposition(Sema &S, // an xvalue otherwise if (!Src->getType()->isLValueReferenceType()) E = ImplicitCastExpr::Create(S.Context, E.get()->getType(), CK_NoOp, - E.get(), nullptr, VK_XValue, - S.CurFPFeatureOverrides()); + E.get(), nullptr, VK_XValue); TemplateArgumentListInfo Args(Loc, Loc); Args.addArgument( @@ -14870,9 +14869,9 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( // (since it's unusable otherwise); in the case where we inline the // block literal, it has block literal lifetime semantics. if (!BuildBlock.isInvalid() && !getLangOpts().ObjCAutoRefCount) - BuildBlock = ImplicitCastExpr::Create( - Context, BuildBlock.get()->getType(), CK_CopyAndAutoreleaseBlockObject, - BuildBlock.get(), nullptr, VK_RValue, CurFPFeatureOverrides()); + BuildBlock = ImplicitCastExpr::Create(Context, BuildBlock.get()->getType(), + CK_CopyAndAutoreleaseBlockObject, + BuildBlock.get(), nullptr, VK_RValue); if (BuildBlock.isInvalid()) { Diag(CurrentLocation, diag::note_lambda_to_block_conv); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index a33d6e2a83a16..d6f0a12106fe0 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -695,8 +695,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // C++ [conv.lval]p3: // If T is cv std::nullptr_t, the result is a null pointer constant. CastKind CK = T->isNullPtrType() ? CK_NullToPointer : CK_LValueToRValue; - Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_RValue, - CurFPFeatureOverrides()); + Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_RValue); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version @@ -704,7 +703,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), - nullptr, VK_RValue, CurFPFeatureOverrides()); + nullptr, VK_RValue); } return Res; @@ -6961,9 +6960,9 @@ void Sema::maybeExtendBlockObject(ExprResult &E) { // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; - E = ImplicitCastExpr::Create( - Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), - /*base path*/ nullptr, VK_RValue, CurFPFeatureOverrides()); + E = ImplicitCastExpr::Create(Context, E.get()->getType(), + CK_ARCExtendBlockObject, E.get(), + /*base path*/ nullptr, VK_RValue); Cleanup.setExprNeedsCleanups(true); } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 09976197194ab..d1fcdf3545278 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1503,8 +1503,7 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc); Result = CXXFunctionalCastExpr::Create( Context, ResultType, Expr::getValueKindForType(Ty), TInfo, CK_NoOp, - Result.get(), /*Path=*/nullptr, CurFPFeatureOverrides(), - Locs.getBegin(), Locs.getEnd()); + Result.get(), /*Path=*/nullptr, Locs.getBegin(), Locs.getEnd()); } return Result; @@ -2205,7 +2204,7 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal, SizeTy, SourceLocation()); ImplicitCastExpr DesiredAlignment(ImplicitCastExpr::OnStack, AlignValT, CK_IntegralCast, &AlignmentLiteral, - VK_RValue, CurFPFeatureOverrides()); + VK_RValue); // Adjust placement args by prepending conjured size and alignment exprs. llvm::SmallVector CallArgs; @@ -3916,8 +3915,7 @@ static ExprResult BuildCXXCastArgument(Sema &S, // Record usage of conversion in an implicit cast. Result = ImplicitCastExpr::Create(S.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind(), - S.CurFPFeatureOverrides()); + nullptr, Result.get()->getValueKind()); return S.MaybeBindToTemporary(Result.get()); } @@ -4098,8 +4096,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, if (const AtomicType *FromAtomic = FromType->getAs()) { FromType = FromAtomic->getValueType().getUnqualifiedType(); From = ImplicitCastExpr::Create(Context, FromType, CK_AtomicToNonAtomic, - From, /*BasePath=*/nullptr, VK_RValue, - CurFPFeatureOverrides()); + From, /*BasePath=*/nullptr, VK_RValue); } break; @@ -6843,7 +6840,7 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) { CastKind ck = (ReturnsRetained ? CK_ARCConsumeObject : CK_ARCReclaimReturnedObject); return ImplicitCastExpr::Create(Context, E->getType(), ck, E, nullptr, - VK_RValue, CurFPFeatureOverrides()); + VK_RValue); } if (E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 9a0c4e2d4320d..228a1ec3ba1f9 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -4462,8 +4462,8 @@ Sema::CheckObjCConversion(SourceRange castRange, QualType castType, // If the result is +1, consume it here. case ACC_plusOne: castExpr = ImplicitCastExpr::Create(Context, castExpr->getType(), - CK_ARCConsumeObject, castExpr, nullptr, - VK_RValue, CurFPFeatureOverrides()); + CK_ARCConsumeObject, castExpr, + nullptr, VK_RValue); Cleanup.setExprNeedsCleanups(true); return ACR_okay; } @@ -4689,9 +4689,9 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, case OBC_BridgeRetained: // Produce the object before casting it. - SubExpr = ImplicitCastExpr::Create(Context, FromType, CK_ARCProduceObject, - SubExpr, nullptr, VK_RValue, - CurFPFeatureOverrides()); + SubExpr = ImplicitCastExpr::Create(Context, FromType, + CK_ARCProduceObject, + SubExpr, nullptr, VK_RValue); break; case OBC_BridgeTransfer: { @@ -4729,9 +4729,8 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, if (MustConsume) { Cleanup.setExprNeedsCleanups(true); - Result = - ImplicitCastExpr::Create(Context, T, CK_ARCConsumeObject, Result, - nullptr, VK_RValue, CurFPFeatureOverrides()); + Result = ImplicitCastExpr::Create(Context, T, CK_ARCConsumeObject, Result, + nullptr, VK_RValue); } return Result; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index b6bd6cff4d77d..f63d600032ce4 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -2891,8 +2891,7 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc()); if (CharTy != PromotedCharTy) Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, - Init, nullptr, VK_RValue, - SemaRef.CurFPFeatureOverrides()); + Init, nullptr, VK_RValue); StructuredList->updateInit(Context, i, Init); } } else { @@ -2914,8 +2913,7 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc()); if (CharTy != PromotedCharTy) Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, - Init, nullptr, VK_RValue, - SemaRef.CurFPFeatureOverrides()); + Init, nullptr, VK_RValue); StructuredList->updateInit(Context, i, Init); } } @@ -8021,9 +8019,9 @@ ExprResult InitializationSequence::Perform(Sema &S, (Step->Kind == SK_CastDerivedToBaseXValue ? VK_XValue : VK_RValue); - CurInit = ImplicitCastExpr::Create( - S.Context, Step->Type, CK_DerivedToBase, CurInit.get(), &BasePath, VK, - S.CurFPFeatureOverrides()); + CurInit = + ImplicitCastExpr::Create(S.Context, Step->Type, CK_DerivedToBase, + CurInit.get(), &BasePath, VK); break; } @@ -8152,9 +8150,9 @@ ExprResult InitializationSequence::Perform(Sema &S, if (CreatedObject && checkAbstractType(CurInit.get()->getType())) return ExprError(); - CurInit = ImplicitCastExpr::Create( - S.Context, CurInit.get()->getType(), CastKind, CurInit.get(), nullptr, - CurInit.get()->getValueKind(), S.CurFPFeatureOverrides()); + CurInit = ImplicitCastExpr::Create(S.Context, CurInit.get()->getType(), + CastKind, CurInit.get(), nullptr, + CurInit.get()->getValueKind()); if (shouldBindAsTemporary(Entity)) // The overall entity is temporary, so this expression should be @@ -8495,9 +8493,9 @@ ExprResult InitializationSequence::Perform(Sema &S, break; case SK_ProduceObjCObject: - CurInit = ImplicitCastExpr::Create( - S.Context, Step->Type, CK_ARCProduceObject, CurInit.get(), nullptr, - VK_RValue, S.CurFPFeatureOverrides()); + CurInit = + ImplicitCastExpr::Create(S.Context, Step->Type, CK_ARCProduceObject, + CurInit.get(), nullptr, VK_RValue); break; case SK_StdInitializerList: { @@ -8551,9 +8549,9 @@ ExprResult InitializationSequence::Perform(Sema &S, // Case 1b and 1c // No cast from integer to sampler is needed. if (!Var->hasGlobalStorage()) { - CurInit = ImplicitCastExpr::Create( - S.Context, Step->Type, CK_LValueToRValue, Init, - /*BasePath=*/nullptr, VK_RValue, S.CurFPFeatureOverrides()); + CurInit = ImplicitCastExpr::Create(S.Context, Step->Type, + CK_LValueToRValue, Init, + /*BasePath=*/nullptr, VK_RValue); break; } // Case 1a diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index a870d822b42f5..c9f2854f7accf 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -680,9 +680,8 @@ static void adjustBlockReturnsToEnum(Sema &S, ArrayRef returns, ExprWithCleanups *cleanups = dyn_cast(retValue); Expr *E = (cleanups ? cleanups->getSubExpr() : retValue); - E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, E, - /*base path*/ nullptr, VK_RValue, - S.CurFPFeatureOverrides()); + E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, + E, /*base path*/ nullptr, VK_RValue); if (cleanups) { cleanups->setSubExpr(E); } else { diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp index f6ed3e65f94c1..e301c62dd2c0b 100644 --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -1464,9 +1464,10 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, DeclRefExpr(Context, SelfDecl, false, SelfDecl->getType(), VK_LValue, PropertyDiagLoc); MarkDeclRefReferenced(SelfExpr); - Expr *LoadSelfExpr = ImplicitCastExpr::Create( - Context, SelfDecl->getType(), CK_LValueToRValue, SelfExpr, nullptr, - VK_RValue, CurFPFeatureOverrides()); + Expr *LoadSelfExpr = + ImplicitCastExpr::Create(Context, SelfDecl->getType(), + CK_LValueToRValue, SelfExpr, nullptr, + VK_RValue); Expr *IvarRefExpr = new (Context) ObjCIvarRefExpr(Ivar, Ivar->getUsageType(SelfDecl->getType()), @@ -1527,9 +1528,10 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, DeclRefExpr(Context, SelfDecl, false, SelfDecl->getType(), VK_LValue, PropertyDiagLoc); MarkDeclRefReferenced(SelfExpr); - Expr *LoadSelfExpr = ImplicitCastExpr::Create( - Context, SelfDecl->getType(), CK_LValueToRValue, SelfExpr, nullptr, - VK_RValue, CurFPFeatureOverrides()); + Expr *LoadSelfExpr = + ImplicitCastExpr::Create(Context, SelfDecl->getType(), + CK_LValueToRValue, SelfExpr, nullptr, + VK_RValue); Expr *lhs = new (Context) ObjCIvarRefExpr(Ivar, Ivar->getUsageType(SelfDecl->getType()), diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 4a444b38a0aac..352f52d2f6260 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15388,12 +15388,12 @@ static bool actOnOMPReductionKindClause( if (!BasePath.empty()) { LHS = S.DefaultLvalueConversion(LHS.get()); RHS = S.DefaultLvalueConversion(RHS.get()); - LHS = ImplicitCastExpr::Create( - Context, PtrRedTy, CK_UncheckedDerivedToBase, LHS.get(), &BasePath, - LHS.get()->getValueKind(), S.CurFPFeatureOverrides()); - RHS = ImplicitCastExpr::Create( - Context, PtrRedTy, CK_UncheckedDerivedToBase, RHS.get(), &BasePath, - RHS.get()->getValueKind(), S.CurFPFeatureOverrides()); + LHS = ImplicitCastExpr::Create(Context, PtrRedTy, + CK_UncheckedDerivedToBase, LHS.get(), + &BasePath, LHS.get()->getValueKind()); + RHS = ImplicitCastExpr::Create(Context, PtrRedTy, + CK_UncheckedDerivedToBase, RHS.get(), + &BasePath, RHS.get()->getValueKind()); } FunctionProtoType::ExtProtoInfo EPI; QualType Params[] = {PtrRedTy, PtrRedTy}; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index fa68f3a4deaba..71341e5688fe0 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5862,8 +5862,7 @@ diagnoseNoViableConversion(Sema &SemaRef, SourceLocation Loc, Expr *&From, // Record usage of conversion in an implicit cast. From = ImplicitCastExpr::Create(SemaRef.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind(), - SemaRef.CurFPFeatureOverrides()); + nullptr, Result.get()->getValueKind()); } return false; } @@ -5892,8 +5891,7 @@ static bool recordConversion(Sema &SemaRef, SourceLocation Loc, Expr *&From, // Record usage of conversion in an implicit cast. From = ImplicitCastExpr::Create(SemaRef.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind(), - SemaRef.CurFPFeatureOverrides()); + nullptr, Result.get()->getValueKind()); return false; } @@ -7298,8 +7296,8 @@ void Sema::AddConversionCandidate( VK_LValue, From->getBeginLoc()); ImplicitCastExpr ConversionFn(ImplicitCastExpr::OnStack, Context.getPointerType(Conversion->getType()), - CK_FunctionToPointerDecay, &ConversionRef, - VK_RValue, CurFPFeatureOverrides()); + CK_FunctionToPointerDecay, + &ConversionRef, VK_RValue); QualType ConversionType = Conversion->getConversionType(); if (!isCompleteType(From->getBeginLoc(), ConversionType)) { @@ -14424,9 +14422,9 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, if (Call.isInvalid()) return ExprError(); // Record usage of conversion in an implicit cast. - Call = ImplicitCastExpr::Create( - Context, Call.get()->getType(), CK_UserDefinedConversion, Call.get(), - nullptr, VK_RValue, CurFPFeatureOverrides()); + Call = ImplicitCastExpr::Create(Context, Call.get()->getType(), + CK_UserDefinedConversion, Call.get(), + nullptr, VK_RValue); return BuildCallExpr(S, Call.get(), LParenLoc, Args, RParenLoc); } @@ -14831,9 +14829,10 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found, if (SubExpr == ICE->getSubExpr()) return ICE; - return ImplicitCastExpr::Create(Context, ICE->getType(), ICE->getCastKind(), - SubExpr, nullptr, ICE->getValueKind(), - CurFPFeatureOverrides()); + return ImplicitCastExpr::Create(Context, ICE->getType(), + ICE->getCastKind(), + SubExpr, nullptr, + ICE->getValueKind()); } if (auto *GSE = dyn_cast(E)) { diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index e461ad4484813..c44636ad1b395 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3095,8 +3095,7 @@ static void TryMoveInitialization(Sema& S, bool ConvertingConstructorsOnly, ExprResult &Res) { ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), - CK_NoOp, Value, VK_XValue, - S.CurFPFeatureOverrides()); + CK_NoOp, Value, VK_XValue); Expr *InitExpr = &AsRvalue; @@ -3151,9 +3150,8 @@ static void TryMoveInitialization(Sema& S, // Promote "AsRvalue" to the heap, since we now need this // expression node to persist. - Value = - ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, Value, - nullptr, VK_XValue, S.CurFPFeatureOverrides()); + Value = ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, + Value, nullptr, VK_XValue); // Complete type-checking the initialization of the return type // using the constructor we found. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index e1a563850970a..6721b07253292 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7478,7 +7478,7 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, // FIXME: This is a hack. We need a better way to handle substituted // non-type template parameters. E = CStyleCastExpr::Create(Context, OrigT, VK_RValue, CK_IntegralCast, E, - nullptr, CurFPFeatureOverrides(), + nullptr, Context.getTrivialTypeSourceInfo(OrigT, Loc), Loc, Loc); } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 48897cd2d822b..e261044f7cb14 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1082,8 +1082,6 @@ void ASTStmtReader::VisitCastExpr(CastExpr *E) { VisitExpr(E); unsigned NumBaseSpecs = Record.readInt(); assert(NumBaseSpecs == E->path_size()); - unsigned HasFPFeatures = Record.readInt(); - assert(E->hasStoredFPFeatures() == HasFPFeatures); E->setSubExpr(Record.readSubExpr()); E->setCastKind((CastKind)Record.readInt()); CastExpr::path_iterator BaseI = E->path_begin(); @@ -1092,8 +1090,6 @@ void ASTStmtReader::VisitCastExpr(CastExpr *E) { *BaseSpec = Record.readCXXBaseSpecifier(); *BaseI++ = BaseSpec; } - if (HasFPFeatures) - *E->getTrailingFPFeatures() = FPOptionsOverride::getFromOpaqueInt(Record.readInt()); } void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) { @@ -2897,17 +2893,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_IMPLICIT_CAST: - S = ImplicitCastExpr::CreateEmpty( - Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields], - /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); + S = ImplicitCastExpr::CreateEmpty(Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields]); break; case EXPR_CSTYLE_CAST: - S = CStyleCastExpr::CreateEmpty( - Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields], - /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); + S = CStyleCastExpr::CreateEmpty(Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields]); break; case EXPR_COMPOUND_LITERAL: @@ -3509,10 +3501,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_STATIC_CAST: - S = CXXStaticCastExpr::CreateEmpty( - Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields], - /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); + S = CXXStaticCastExpr::CreateEmpty(Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields]); break; case EXPR_CXX_DYNAMIC_CAST: @@ -3534,10 +3524,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_FUNCTIONAL_CAST: - S = CXXFunctionalCastExpr::CreateEmpty( - Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields], - /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); + S = CXXFunctionalCastExpr::CreateEmpty(Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields]); break; case EXPR_BUILTIN_BIT_CAST: diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 911fcb4095474..2d250674057c3 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2346,7 +2346,6 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); //GetObjectKind // CastExpr Abv->Add(BitCodeAbbrevOp(0)); // PathSize - Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 6)); // CastKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // PartOfExplicitCast // ImplicitCastExpr diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 0121f25832073..4e3e1fdc346fc 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -946,16 +946,12 @@ void ASTStmtWriter::VisitObjCBridgedCastExpr(ObjCBridgedCastExpr *E) { void ASTStmtWriter::VisitCastExpr(CastExpr *E) { VisitExpr(E); Record.push_back(E->path_size()); - Record.push_back(E->hasStoredFPFeatures()); Record.AddStmt(E->getSubExpr()); Record.push_back(E->getCastKind()); // FIXME: stable encoding for (CastExpr::path_iterator PI = E->path_begin(), PE = E->path_end(); PI != PE; ++PI) Record.AddCXXBaseSpecifier(**PI); - - if (E->hasStoredFPFeatures()) - Record.push_back(E->getFPFeatures().getAsOpaqueInt()); } void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) { @@ -1007,7 +1003,7 @@ void ASTStmtWriter::VisitImplicitCastExpr(ImplicitCastExpr *E) { VisitCastExpr(E); Record.push_back(E->isPartOfExplicitCast()); - if (E->path_size() == 0 && !E->hasStoredFPFeatures()) + if (E->path_size() == 0) AbbrevToUse = Writer.getExprImplicitCastAbbrev(); Code = serialization::EXPR_IMPLICIT_CAST; diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index e143009806b56..f3925aebbe752 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -36,50 +36,6 @@ float func_03(float x) { // CHECK-NEXT: ReturnStmt // CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 -int func_04(float x) { -#pragma STDC FP_CONTRACT ON - return x; -} - -// CHECK: FunctionDecl {{.*}} func_04 'int (float)' -// CHECK-NEXT: ParmVarDecl {{.*}} x 'float' -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: ReturnStmt -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' FPContractMode=1 - -float func_05(double x) { -#pragma STDC FP_CONTRACT ON - return (float)x; -} - -// CHECK: FunctionDecl {{.*}} func_05 'float (double)' -// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: ReturnStmt -// CHECK-NEXT: CStyleCastExpr {{.*}} FPContractMode=1 - -float func_06(double x) { -#pragma STDC FP_CONTRACT ON - return float(x); -} - -// CHECK: FunctionDecl {{.*}} func_06 'float (double)' -// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: ReturnStmt -// CHECK-NEXT: CXXFunctionalCastExpr {{.*}} FPContractMode=1 - -float func_07(double x) { -#pragma STDC FP_CONTRACT ON - return static_cast(x); -} - -// CHECK: FunctionDecl {{.*}} func_07 'float (double)' -// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: ReturnStmt -// CHECK-NEXT: CXXStaticCastExpr {{.*}} FPContractMode=1 - From 4ede83c06831adf5bf5e4a2abffd752615f643d0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 12 Sep 2020 10:08:18 +0000 Subject: [PATCH 0443/1079] [gn build] Port 19531a81f1d --- llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index f47e5a996b336..2aee1db5086ec 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -90,6 +90,7 @@ unittest("SupportTests") { "YAMLIOTest.cpp", "YAMLParserTest.cpp", "formatted_raw_ostream_test.cpp", + "raw_fd_stream_test.cpp", "raw_ostream_test.cpp", "raw_pwrite_stream_test.cpp", "raw_sha1_ostream_test.cpp", From 35dc91aee2013ce1a57dfee965fa5fdee1987ee0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Sep 2020 13:39:33 +0100 Subject: [PATCH 0444/1079] [X86][SSE] lowerShuffleAsDecomposedShuffleBlend - support decomposed unpacks for some vXi8/vXi16 cases Follow up to D86429 to handle the remaining regressions. This patch generalizes lowerShuffleAsDecomposedShuffleBlend to lowerShuffleAsDecomposedShuffleMerge, and attempts to use an UNPCKL shuffle mask instead of a blend for the cases where the inputs are coming from alternating vXi8/vXi16 sources. Technically they don't have to be alternating (just as long as they can fit into a lower lane half for the unpack) but I didn't find as many general cases and it needed a lot more of the function to be altered. For vXi32/vXi64 cases this could still be beneficial but in most cases the existing permute+blend approach was better. Differential Revision: https://reviews.llvm.org/D87405 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 94 ++++++++++----- .../CodeGen/X86/vector-shuffle-128-v16.ll | 32 ++--- .../CodeGen/X86/vector-shuffle-256-v16.ll | 114 ++++++++---------- .../CodeGen/X86/vector-shuffle-256-v32.ll | 37 +++--- .../CodeGen/X86/vector-shuffle-512-v32.ll | 10 +- 5 files changed, 144 insertions(+), 143 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d0115a58ba4e7..8913dff47df42 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12120,23 +12120,32 @@ static SDValue lowerShuffleAsByteRotateAndPermute( /// This matches the extremely common pattern for handling combined /// shuffle+blend operations on newer X86 ISAs where we have very fast blend /// operations. It will try to pick the best arrangement of shuffles and -/// blends. -static SDValue lowerShuffleAsDecomposedShuffleBlend( +/// blends. For vXi8/vXi16 shuffles we may use unpack instead of blend. +static SDValue lowerShuffleAsDecomposedShuffleMerge( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + int NumElts = Mask.size(); + int NumLanes = VT.getSizeInBits() / 128; + int NumEltsPerLane = NumElts / NumLanes; + // Shuffle the input elements into the desired positions in V1 and V2 and - // blend them together. - SmallVector V1Mask(Mask.size(), -1); - SmallVector V2Mask(Mask.size(), -1); - SmallVector BlendMask(Mask.size(), -1); - for (int i = 0, Size = Mask.size(); i < Size; ++i) - if (Mask[i] >= 0 && Mask[i] < Size) { - V1Mask[i] = Mask[i]; - BlendMask[i] = i; - } else if (Mask[i] >= Size) { - V2Mask[i] = Mask[i] - Size; - BlendMask[i] = i + Size; + // unpack/blend them together. + bool IsAlternating = true; + SmallVector V1Mask(NumElts, -1); + SmallVector V2Mask(NumElts, -1); + SmallVector FinalMask(NumElts, -1); + for (int i = 0; i < NumElts; ++i) { + int M = Mask[i]; + if (M >= 0 && M < NumElts) { + V1Mask[i] = M; + FinalMask[i] = i; + IsAlternating &= (i & 1) == 0; + } else if (M >= NumElts) { + V2Mask[i] = M - NumElts; + FinalMask[i] = i + NumElts; + IsAlternating &= (i & 1) == 1; } + } // Try to lower with the simpler initial blend/unpack/rotate strategies unless // one of the input shuffles would be a no-op. We prefer to shuffle inputs as @@ -12160,9 +12169,30 @@ static SDValue lowerShuffleAsDecomposedShuffleBlend( return BlendPerm; } + // If the final mask is an alternating blend of vXi8/vXi16, convert to an + // UNPCKL(SHUFFLE, SHUFFLE) pattern. + // TODO: It doesn't have to be alternating - but each lane mustn't have more + // than half the elements coming from each source. + if (IsAlternating && VT.getScalarSizeInBits() < 32) { + V1Mask.assign(NumElts, -1); + V2Mask.assign(NumElts, -1); + FinalMask.assign(NumElts, -1); + for (int i = 0; i != NumElts; i += NumEltsPerLane) + for (int j = 0; j != NumEltsPerLane; ++j) { + int M = Mask[i + j]; + if (M >= 0 && M < NumElts) { + V1Mask[i + (j / 2)] = M; + FinalMask[i + j] = i + (j / 2); + } else if (M >= NumElts) { + V2Mask[i + (j / 2)] = M - NumElts; + FinalMask[i + j] = i + (j / 2) + NumElts; + } + } + } + V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); - return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); + return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask); } /// Try to lower a vector shuffle as a bit rotation. @@ -13901,7 +13931,7 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG); // We implement this with SHUFPD which is pretty lame because it will likely @@ -14193,7 +14223,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG); // Try to lower by permuting the inputs into an unpack instruction. @@ -14943,8 +14973,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, } // We can always bit-blend if we have to so the fallback strategy is to - // decompose into single-input permutes and blends. - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, + // decompose into single-input permutes and blends/unpacks. + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG); } @@ -15281,9 +15311,9 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, return Result; } - // Handle multi-input cases by blending single-input shuffles. + // Handle multi-input cases by blending/unpacking single-input shuffles. if (NumV2Elements > 0) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG); // The fallback path for single-input shuffles widens this into two v8i16 @@ -15463,7 +15493,7 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, } /// Either split a vector in halves or decompose the shuffles and the -/// blend. +/// blend/unpack. /// /// This is provided as a good fallback for many lowerings of non-single-input /// shuffles with more than one 128-bit lane. In those cases, we want to select @@ -15498,8 +15528,8 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, return true; }; if (DoBothBroadcast()) - return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, + DAG); // If the inputs all stem from a single 128-bit lane of each input, then we // split them rather than blending because the split will decompose to @@ -15515,9 +15545,9 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); - // Otherwise, just fall back to decomposed shuffles and a blend. This requires - // that the decomposed single-input shuffles don't end up here. - return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget, + // Otherwise, just fall back to decomposed shuffles and a blend/unpack. This + // requires that the decomposed single-input shuffles don't end up here. + return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, DAG); } @@ -16569,7 +16599,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have one input in place, then we can permute the other input and // blend the result. if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -16597,7 +16627,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have AVX2 then we always want to lower with a blend because an v4 we // can fully permute the elements. if (Subtarget.hasAVX2()) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); // Otherwise fall back on generic lowering. @@ -16679,7 +16709,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have one input in place, then we can permute the other input and // blend the result. if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG); // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -16699,7 +16729,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, return Result; // Otherwise fall back on generic blend lowering. - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG); } @@ -16794,7 +16824,7 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. if (Subtarget.hasAVX2()) - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG); // Otherwise fall back on generic lowering. @@ -16913,7 +16943,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef Mask, return Result; // Otherwise fall back on generic blend lowering. - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask, + return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG); } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index 19d9b159fd830..fb300a88b4120 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -289,31 +289,13 @@ define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31( } define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: por %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; SSSE3: # %bb.0: -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; SSE41: # %bb.0: -; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: +; SSE: # %bb.0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: ; AVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index ec775e9155721..5eb4b1039bf9f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -2139,9 +2139,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_3 ; ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: @@ -2161,9 +2161,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_3 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2181,9 +2181,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_2 ; ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: @@ -2203,9 +2203,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_2 ; ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5086,10 +5086,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2 ; ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: @@ -5110,10 +5109,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5181,10 +5179,10 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3 ; ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] +; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: @@ -5205,10 +5203,10 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3 ; ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5283,21 +5281,19 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_3 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: ; AVX2-SLOW: # %bb.0: ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,1,4,5,6,7,8,8,10,9,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,6,6,6,6,8,9,10,11,14,14,14,14] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6],ymm2[7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7] +; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: ; AVX2-FAST: # %bb.0: ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,7,u,4,7,u,u> ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,10,11,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7] +; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: @@ -5320,12 +5316,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_3 ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,1,4,5,6,7,8,8,10,9,12,13,14,15] -; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,6,6,6,6,8,9,10,11,14,14,14,14] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6],ymm2[7] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] -; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,3,2,3,4,7,6,7] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5350,19 +5344,18 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_2 ; AVX2-SLOW: # %bb.0: ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,0,1,u,u,2,3,u,u,24,25,u,u,26,27,u,u,16,17,u,u,18,19] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7] +; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: ; AVX2-FAST: # %bb.0: ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <2,0,4,u,6,4,u,u> ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,10,11,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7] +; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: @@ -5386,10 +5379,9 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_2 ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,0,1,u,u,2,3,u,u,24,25,u,u,26,27,u,u,16,17,u,u,18,19] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] -; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,0,2,3,6,4,6,7] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5469,10 +5461,9 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1 ; ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: @@ -5494,10 +5485,9 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5516,10 +5506,10 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1 ; ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: @@ -5541,10 +5531,10 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,u,u,u,u,u,u,u,u,16,17,18,19,20,21,22,23,u,u,u,u,u,u,u,u] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index a7e65f10a3604..23bf91de6e7e8 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2793,16 +2793,16 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] +; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] -; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] +; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: @@ -2822,9 +2822,9 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] +; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -2842,16 +2842,16 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] +; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] -; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] +; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: @@ -2871,9 +2871,9 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero -; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] +; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -3316,7 +3316,6 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX512VLBW-FAST-NEXT: kmovd %eax, %k1 ; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u] ; AVX512VLBW-FAST-NEXT: retq - ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; AVX512VLVBMI: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index ac6701b383f25..2b76d668f5fe2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -67,16 +67,16 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1 ; KNL: ## %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3] -; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u] +; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[6,7,12,13,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,u,u,u,u,u,u,u,u,u,u] ; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3] -; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,u,u,4,5,u,u,2,3,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u] -; KNL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7],ymm0[8],ymm3[9],ymm0[10],ymm3[11],ymm0[12],ymm3[13],ymm0[14],ymm3[15] +; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,8,9,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u] +; KNL-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[8],ymm3[8],ymm0[9],ymm3[9],ymm0[10],ymm3[10],ymm0[11],ymm3[11] ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm1 ; KNL-NEXT: vpbroadcastw %xmm1, %ymm1 ; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5,6],ymm1[7],ymm3[8,9,10,11,12,13,14],ymm1[15] ; KNL-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] -; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17] -; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[6,7,12,13,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u] +; KNL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11] ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; KNL-NEXT: retq ; From 36e2e2e12efb6b02ad07f502d61b9a95937edb08 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 12:19:16 +0200 Subject: [PATCH 0445/1079] [InstCombine] Fix incorrect SimplifyWithOpReplaced transform (PR47322) This is a followup to D86834, which partially fixed this issue in InstSimplify. However, InstCombine repeats the same transform while dropping poison flags -- which does not cover cases where poison is introduced in some other way. The fix here is a bit more comprehensive, because things are quite entangled, and it's hard to only partially address it without regressing optimization. There are really two changes here: * Export the SimplifyWithOpReplaced API from InstSimplify, with an added AllowRefinement flag. For replacements inside the TrueVal we don't actually care whether refinement occurs or not, the replacement is always legal. This part of the transform is now done in InstSimplify only. (It should be noted that the current AllowRefinement check is not sufficient -- that's an issue we need to address separately.) * Change the InstCombine fold to work by temporarily dropping poison generating flags, running the fold and then restoring the flags if it didn't work out. This will ensure that the InstCombine fold is correct as long as the InstSimplify fold is correct. Differential Revision: https://reviews.llvm.org/D87445 --- .../llvm/Analysis/InstructionSimplify.h | 6 ++ llvm/lib/Analysis/InstructionSimplify.cpp | 50 ++++++++++------- .../InstCombine/InstCombineSelect.cpp | 55 +++++++++++-------- llvm/test/Transforms/InstCombine/select.ll | 7 ++- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 6f3d168466217..e0251e7c8bbfd 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -292,6 +292,12 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q); Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE = nullptr); +/// See if V simplifies when its operand Op is replaced with RepOp. +/// AllowRefinement specifies whether the simplification can be a refinement, +/// or whether it needs to be strictly identical. +Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const SimplifyQuery &Q, bool AllowRefinement); + /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. /// /// This first performs a normal RAUW of I with SimpleV. It then recursively diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index e59c0a84044aa..f7f5105f9383c 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3769,10 +3769,10 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } -/// See if V simplifies when its operand Op is replaced with RepOp. -static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const SimplifyQuery &Q, - unsigned MaxRecurse) { +static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const SimplifyQuery &Q, + bool AllowRefinement, + unsigned MaxRecurse) { // Trivial replacement. if (V == Op) return RepOp; @@ -3785,20 +3785,19 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (!I) return nullptr; + // Consider: + // %cmp = icmp eq i32 %x, 2147483647 + // %add = add nsw i32 %x, 1 + // %sel = select i1 %cmp, i32 -2147483648, i32 %add + // + // We can't replace %sel with %add unless we strip away the flags (which will + // be done in InstCombine). + // TODO: This is unsound, because it only catches some forms of refinement. + if (!AllowRefinement && canCreatePoison(cast(I))) + return nullptr; + // If this is a binary operator, try to simplify it with the replaced op. if (auto *B = dyn_cast(I)) { - // Consider: - // %cmp = icmp eq i32 %x, 2147483647 - // %add = add nsw i32 %x, 1 - // %sel = select i1 %cmp, i32 -2147483648, i32 %add - // - // We can't replace %sel with %add unless we strip away the flags. - // TODO: This is an unusual limitation because better analysis results in - // worse simplification. InstCombine can do this fold more generally - // by dropping the flags. Remove this fold to save compile-time? - if (canCreatePoison(cast(I))) - return nullptr; - if (MaxRecurse) { if (B->getOperand(0) == Op) return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q, @@ -3865,6 +3864,13 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return nullptr; } +Value *llvm::SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const SimplifyQuery &Q, + bool AllowRefinement) { + return ::SimplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement, + RecursionLimit); +} + /// Try to simplify a select instruction when its condition operand is an /// integer comparison where one operand of the compare is a constant. static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, @@ -3985,14 +3991,18 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, + /* AllowRefinement */ false, MaxRecurse) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, + /* AllowRefinement */ false, MaxRecurse) == TrueVal) return FalseVal; - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, + /* AllowRefinement */ true, MaxRecurse) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, + /* AllowRefinement */ true, MaxRecurse) == FalseVal) return FalseVal; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index c05c16b4bdb16..378132011aba2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1149,22 +1149,6 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, return &Sel; } -static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp, - const SimplifyQuery &Q) { - // If this is a binary operator, try to simplify it with the replaced op - // because we know Op and ReplaceOp are equivalant. - // For example: V = X + 1, Op = X, ReplaceOp = 42 - // Simplifies as: add(42, 1) --> 43 - if (auto *BO = dyn_cast(V)) { - if (BO->getOperand(0) == Op) - return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q); - if (BO->getOperand(1) == Op) - return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q); - } - - return nullptr; -} - /// If we have a select with an equality comparison, then we know the value in /// one of the arms of the select. See if substituting this value into an arm /// and simplifying the result yields the same value as the other arm. @@ -1191,20 +1175,45 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, if (Cmp.getPredicate() == ICmpInst::ICMP_NE) std::swap(TrueVal, FalseVal); + auto *FalseInst = dyn_cast(FalseVal); + if (!FalseInst) + return nullptr; + + // InstSimplify already performed this fold if it was possible subject to + // current poison-generating flags. Try the transform again with + // poison-generating flags temporarily dropped. + bool WasNUW = false, WasNSW = false, WasExact = false; + if (auto *OBO = dyn_cast(FalseVal)) { + WasNUW = OBO->hasNoUnsignedWrap(); + WasNSW = OBO->hasNoSignedWrap(); + FalseInst->setHasNoUnsignedWrap(false); + FalseInst->setHasNoSignedWrap(false); + } + if (auto *PEO = dyn_cast(FalseVal)) { + WasExact = PEO->isExact(); + FalseInst->setIsExact(false); + } + // Try each equivalence substitution possibility. // We have an 'EQ' comparison, so the select's false value will propagate. // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 - // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43 Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); - if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal || - simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal || - simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal || - simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) { - if (auto *FalseInst = dyn_cast(FalseVal)) - FalseInst->dropPoisonGeneratingFlags(); + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, + /* AllowRefinement */ false) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, + /* AllowRefinement */ false) == TrueVal) { return FalseVal; } + + // Restore poison-generating flags if the transform did not apply. + if (WasNUW) + FalseInst->setHasNoUnsignedWrap(); + if (WasNSW) + FalseInst->setHasNoSignedWrap(); + if (WasExact) + FalseInst->setIsExact(); + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 570f92866d89b..d9a4f4bdbd473 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2588,12 +2588,13 @@ define void @select_freeze_icmp_multuses(i32 %x, i32 %y) { ret void } -; FIXME: This is a miscompile! define i32 @pr47322_more_poisonous_replacement(i32 %arg) { ; CHECK-LABEL: @pr47322_more_poisonous_replacement( -; CHECK-NEXT: [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 immarg true), [[RNG0:!range !.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0 +; CHECK-NEXT: [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 immarg true), [[RNG0:!range !.*]] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[ARG]], [[TRAILING]] -; CHECK-NEXT: ret i32 [[SHIFTED]] +; CHECK-NEXT: [[R1_SROA_0_1:%.*]] = select i1 [[CMP]], i32 0, i32 [[SHIFTED]] +; CHECK-NEXT: ret i32 [[R1_SROA_0_1]] ; %cmp = icmp eq i32 %arg, 0 %trailing = call i32 @llvm.cttz.i32(i32 %arg, i1 immarg true) From c437446d90be17c3fe8a216a90ee442222f2fe9d Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 12 Sep 2020 13:51:42 +0100 Subject: [PATCH 0446/1079] [ARM] Recognize "double extend" reduction patterns We can sometimes get code that does: xe = zext i16 x to i32 ye = zext i16 y to i32 m = mul i32 xe, ye me = zext i32 m to i64 r = vecreduce.add(me) This "double extend" can trip up the reduction identification, but should give identical results. This extends the pattern matching to handle them. Differential Revision: https://reviews.llvm.org/D87276 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 31 +- llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll | 922 +--------- .../CodeGen/Thumb2/mve-vecreduce-mlapred.ll | 1536 +---------------- 3 files changed, 151 insertions(+), 2338 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1239e6bbf6843..83d89de7b4772 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14765,10 +14765,25 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, }; auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef ExtTypes, SDValue &A, SDValue &B) { - if (ResVT != RetTy || N0->getOpcode() != ISD::MUL) + // For a vmla we are trying to match a larger pattern: + // ExtA = sext/zext A + // ExtB = sext/zext B + // Mul = mul ExtA, ExtB + // vecreduce.add Mul + // There might also be en extra extend between the mul and the addreduce, so + // long as the bitwidth is high enough to make them equivalent (for example + // original v8i16 might be mul at v8i32 and the reduce happens at v8i64). + if (ResVT != RetTy) return false; - SDValue ExtA = N0->getOperand(0); - SDValue ExtB = N0->getOperand(1); + SDValue Mul = N0; + if (Mul->getOpcode() == ExtendCode && + Mul->getOperand(0).getScalarValueSizeInBits() * 2 >= + ResVT.getScalarSizeInBits()) + Mul = Mul->getOperand(0); + if (Mul->getOpcode() != ISD::MUL) + return false; + SDValue ExtA = Mul->getOperand(0); + SDValue ExtB = Mul->getOperand(1); if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode) return false; A = ExtA->getOperand(0); @@ -14780,11 +14795,21 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, }; auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef ExtTypes, SDValue &A, SDValue &B, SDValue &Mask) { + // Same as the pattern above with a select for the zero predicated lanes + // ExtA = sext/zext A + // ExtB = sext/zext B + // Mul = mul ExtA, ExtB + // N0 = select Mask, Mul, 0 + // vecreduce.add N0 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT || !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode())) return false; Mask = N0->getOperand(0); SDValue Mul = N0->getOperand(1); + if (Mul->getOpcode() == ExtendCode && + Mul->getOperand(0).getScalarValueSizeInBits() * 2 >= + ResVT.getScalarSizeInBits()) + Mul = Mul->getOperand(0); if (Mul->getOpcode() != ISD::MUL) return false; SDValue ExtA = Mul->getOperand(0); diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll index 4010e3c911126..8cef85de3d956 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -173,86 +173,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_zext(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmullb.u16 q3, q3, q2 -; CHECK-NEXT: vmov.i64 q2, #0xffffffff -; CHECK-NEXT: vmov.f32 s16, s12 -; CHECK-NEXT: vmov.f32 s18, s13 -; CHECK-NEXT: vand q4, q4, q2 -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r1, s17 -; CHECK-NEXT: vmov.f32 s16, s14 -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: vand q3, q4, q2 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: adcs r0, r1 -; CHECK-NEXT: vmov r1, s13 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s15 -; CHECK-NEXT: adcs r1, r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmullb.u16 q0, q1, q3 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s6 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s7 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: vand q0, q1, q2 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmlalv.u16 r0, r1, q0, q1 ; CHECK-NEXT: bx lr entry: %xx = zext <8 x i16> %x to <8 x i32> @@ -266,100 +187,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sext(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmullb.s16 q2, q3, q2 -; CHECK-NEXT: vmov.f32 s12, s8 -; CHECK-NEXT: vmov.f32 s14, s9 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: asrs r0, r0, #31 -; CHECK-NEXT: vmov.32 q4[1], r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.f32 s12, s10 -; CHECK-NEXT: vmov.f32 s14, s11 -; CHECK-NEXT: asrs r1, r0, #31 -; CHECK-NEXT: vmov.32 q4[3], r1 -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov r1, s17 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: adc.w r12, r1, r0, asr #31 -; CHECK-NEXT: vmov r1, s12 -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: asrs r1, r1, #31 -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov r1, s14 -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: vmov.32 q2[3], r3 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adc.w r2, r12, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adds.w r12, r0, r3 -; CHECK-NEXT: adc.w r1, r2, r1, asr #31 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q0, q1, q2 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: asrs r3, r2, #31 -; CHECK-NEXT: vmov.32 q2[3], r3 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: adds.w r0, r0, r12 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adc.w r1, r1, r2, asr #31 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adc.w r1, r1, r2, asr #31 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adc.w r1, r1, r2, asr #31 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmlalv.s16 r0, r1, q0, q1 ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i32> @@ -515,115 +343,7 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_zext(<16 x i8> %x, <16 x i8> %y) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[8] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[9] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[10] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[11] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[12] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[13] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[14] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmullb.u8 q2, q3, q2 -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[0] -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[6] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmovlb.u16 q3, q3 -; CHECK-NEXT: vmullb.u8 q0, q1, q4 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmovlb.u16 q1, q1 -; CHECK-NEXT: vadd.i32 q1, q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmovlb.u16 q2, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.u16 q0, q3 -; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmlav.u8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i16> @@ -637,115 +357,7 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sext(<16 x i8> %x, <16 x i8> %y) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[8] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[9] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[10] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[11] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[12] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[13] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[14] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmullb.s8 q2, q3, q2 -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[0] -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[6] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmovlb.s16 q3, q3 -; CHECK-NEXT: vmullb.s8 q0, q1, q4 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmovlb.s16 q1, q1 -; CHECK-NEXT: vadd.i32 q1, q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmovlb.s16 q2, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.s16 q0, q3 -; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmlav.s8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i16> @@ -1596,91 +1208,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %y, i64 %a) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r2, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmullb.u16 q3, q3, q2 -; CHECK-NEXT: vmov.i64 q2, #0xffffffff -; CHECK-NEXT: vmov.f32 s16, s12 -; CHECK-NEXT: vmov.f32 s18, s13 -; CHECK-NEXT: vand q4, q4, q2 -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov r12, s19 -; CHECK-NEXT: vmov lr, s17 -; CHECK-NEXT: vmov.f32 s16, s14 -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: vand q3, q4, q2 -; CHECK-NEXT: adds r4, r3, r2 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: adc.w r12, r12, lr -; CHECK-NEXT: adds.w lr, r4, r3 -; CHECK-NEXT: vmov r3, s14 -; CHECK-NEXT: adc.w r4, r12, r2 -; CHECK-NEXT: vmov r2, s15 -; CHECK-NEXT: adds.w r12, lr, r3 -; CHECK-NEXT: adc.w r3, r4, r2 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.u16 q0, q1, q3 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: adds.w r4, r4, r12 -; CHECK-NEXT: adc.w r12, r3, r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: vand q0, q1, q2 -; CHECK-NEXT: adds.w lr, r4, r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r4, s1 -; CHECK-NEXT: adc.w r3, r3, r12 -; CHECK-NEXT: adds.w r12, lr, r2 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: adcs r3, r4 -; CHECK-NEXT: vmov r4, s3 -; CHECK-NEXT: adds.w r2, r2, r12 -; CHECK-NEXT: adcs r3, r4 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: vmlalva.u16 r0, r1, q0, q1 +; CHECK-NEXT: bx lr entry: %xx = zext <8 x i16> %x to <8 x i32> %yy = zext <8 x i16> %y to <8 x i32> @@ -1694,105 +1223,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, i64 %a) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r2, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmullb.s16 q2, q3, q2 -; CHECK-NEXT: vmov.f32 s12, s8 -; CHECK-NEXT: vmov.f32 s14, s9 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.32 q4[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q4[1], r2 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.f32 s12, s10 -; CHECK-NEXT: vmov.f32 s14, s11 -; CHECK-NEXT: asrs r3, r2, #31 -; CHECK-NEXT: vmov.32 q4[3], r3 -; CHECK-NEXT: vmov lr, s18 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov r12, s17 -; CHECK-NEXT: adds.w lr, lr, r3 -; CHECK-NEXT: adc.w r12, r12, r2, asr #31 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: asrs r3, r2, #31 -; CHECK-NEXT: vmov.32 q2[3], r3 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: adds.w r4, r4, lr -; CHECK-NEXT: adc.w lr, r12, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adds.w r12, r4, r3 -; CHECK-NEXT: adc.w lr, lr, r2, asr #31 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q0, q1, q2 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: asrs r4, r2, #31 -; CHECK-NEXT: vmov.32 q2[3], r4 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov r4, s9 -; CHECK-NEXT: adds.w r12, r12, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adc.w r4, r4, lr -; CHECK-NEXT: adds.w r3, r3, r12 -; CHECK-NEXT: adc.w r2, r4, r2, asr #31 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: adc.w r2, r2, r4, asr #31 -; CHECK-NEXT: vmov r4, s6 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: adc.w r2, r2, r4, asr #31 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: vmlalva.s16 r0, r1, q0, q1 +; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i32> %yy = sext <8 x i16> %y to <8 x i32> @@ -1816,80 +1248,66 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sextzext(<8 x i16> %x, <8 ; CHECK-NEXT: vmov.32 q1[2], r2 ; CHECK-NEXT: vmov.u16 r2, q0[3] ; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q1, q1, q1 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s10, s5 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: asrs r3, r2, #31 -; CHECK-NEXT: vmov.32 q3[3], r3 -; CHECK-NEXT: vmov lr, s14 +; CHECK-NEXT: vmullb.s16 q2, q1, q1 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vmov.f32 s12, s8 +; CHECK-NEXT: vmov.f32 s14, s9 +; CHECK-NEXT: vand q3, q3, q1 +; CHECK-NEXT: vmov r2, s14 ; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r12, s13 -; CHECK-NEXT: adds.w lr, lr, r3 -; CHECK-NEXT: adc.w r12, r12, r2, asr #31 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: vmov r12, s15 +; CHECK-NEXT: vmov lr, s13 +; CHECK-NEXT: vmov.f32 s12, s10 +; CHECK-NEXT: vmov.f32 s14, s11 +; CHECK-NEXT: vand q2, q3, q1 +; CHECK-NEXT: adds r4, r3, r2 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r2, s9 +; CHECK-NEXT: adc.w r12, r12, lr +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: vmov r4, s11 +; CHECK-NEXT: adc.w r12, r12, r2 ; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: asrs r3, r2, #31 -; CHECK-NEXT: vmov.32 q1[3], r3 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: adds.w r4, r4, lr -; CHECK-NEXT: adc.w lr, r12, r3 -; CHECK-NEXT: vmov r3, s6 -; CHECK-NEXT: adds.w r12, r4, r3 -; CHECK-NEXT: adc.w lr, lr, r2, asr #31 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q0, q1, q1 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: asrs r4, r2, #31 +; CHECK-NEXT: adds.w lr, r3, r2 +; CHECK-NEXT: adc.w r3, r12, r4 +; CHECK-NEXT: vmov.u16 r4, q0[4] +; CHECK-NEXT: vmov.32 q2[0], r4 +; CHECK-NEXT: vmov.u16 r4, q0[5] +; CHECK-NEXT: vmov.32 q2[1], r4 +; CHECK-NEXT: vmov.u16 r4, q0[6] +; CHECK-NEXT: vmov.32 q2[2], r4 +; CHECK-NEXT: vmov.u16 r4, q0[7] ; CHECK-NEXT: vmov.32 q2[3], r4 -; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmullb.s16 q0, q2, q2 +; CHECK-NEXT: vmov.f32 s8, s0 +; CHECK-NEXT: vmov.f32 s10, s1 +; CHECK-NEXT: vand q2, q2, q1 +; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: vmov r4, s9 -; CHECK-NEXT: adds.w r12, r12, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adc.w r4, r4, lr -; CHECK-NEXT: adds.w r3, r3, r12 -; CHECK-NEXT: adc.w r2, r4, r2, asr #31 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: adc.w r2, r2, r4, asr #31 -; CHECK-NEXT: vmov r4, s6 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: adc.w r2, r2, r4, asr #31 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: adds.w r12, lr, r2 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: vmov r4, s11 +; CHECK-NEXT: vmov.f32 s8, s2 +; CHECK-NEXT: vmov.f32 s10, s3 +; CHECK-NEXT: vand q0, q2, q1 +; CHECK-NEXT: adds.w r12, r12, r2 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: adds.w r12, r12, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: vmov r4, s3 +; CHECK-NEXT: adds.w r2, r2, r12 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r4, pc} entry: %xx = sext <8 x i16> %x to <8 x i32> %m = mul <8 x i32> %xx, %xx - %ma = sext <8 x i32> %m to <8 x i64> + %ma = zext <8 x i32> %m to <8 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma) %r = add i64 %z, %a ret i64 %r @@ -1979,115 +1397,7 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %y, i32 %a) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u8 r1, q1[8] -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[9] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[10] -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[11] -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[12] -; CHECK-NEXT: vmov.16 q2[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[13] -; CHECK-NEXT: vmov.16 q2[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[14] -; CHECK-NEXT: vmov.16 q2[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[15] -; CHECK-NEXT: vmov.16 q2[7], r1 -; CHECK-NEXT: vmov.u8 r1, q0[8] -; CHECK-NEXT: vmov.16 q3[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[9] -; CHECK-NEXT: vmov.16 q3[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[10] -; CHECK-NEXT: vmov.16 q3[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[11] -; CHECK-NEXT: vmov.16 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[12] -; CHECK-NEXT: vmov.16 q3[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[13] -; CHECK-NEXT: vmov.16 q3[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[14] -; CHECK-NEXT: vmov.16 q3[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[15] -; CHECK-NEXT: vmov.16 q3[7], r1 -; CHECK-NEXT: vmullb.u8 q2, q3, q2 -; CHECK-NEXT: vmov.u16 r1, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[0] -; CHECK-NEXT: vmov.16 q4[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[1] -; CHECK-NEXT: vmov.16 q4[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov.16 q4[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[3] -; CHECK-NEXT: vmov.16 q4[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[4] -; CHECK-NEXT: vmov.16 q4[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[5] -; CHECK-NEXT: vmov.16 q4[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[6] -; CHECK-NEXT: vmov.16 q4[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[7] -; CHECK-NEXT: vmov.16 q4[7], r1 -; CHECK-NEXT: vmov.u8 r1, q0[0] -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[2] -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[3] -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[4] -; CHECK-NEXT: vmov.16 q1[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[5] -; CHECK-NEXT: vmov.16 q1[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[6] -; CHECK-NEXT: vmov.16 q1[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[7] -; CHECK-NEXT: vmov.16 q1[7], r1 -; CHECK-NEXT: vmovlb.u16 q3, q3 -; CHECK-NEXT: vmullb.u8 q0, q1, q4 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: vmov.u16 r1, q2[0] -; CHECK-NEXT: vmovlb.u16 q1, q1 -; CHECK-NEXT: vadd.i32 q1, q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q2[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q2[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q2[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmovlb.u16 q2, q3 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmovlb.u16 q0, q3 -; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vaddva.u32 r0, q0 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmlava.u8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %xx = zext <16 x i8> %x to <16 x i16> @@ -2102,115 +1412,7 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %y, i32 %a) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u8 r1, q1[8] -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[9] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[10] -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[11] -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[12] -; CHECK-NEXT: vmov.16 q2[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[13] -; CHECK-NEXT: vmov.16 q2[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[14] -; CHECK-NEXT: vmov.16 q2[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[15] -; CHECK-NEXT: vmov.16 q2[7], r1 -; CHECK-NEXT: vmov.u8 r1, q0[8] -; CHECK-NEXT: vmov.16 q3[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[9] -; CHECK-NEXT: vmov.16 q3[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[10] -; CHECK-NEXT: vmov.16 q3[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[11] -; CHECK-NEXT: vmov.16 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[12] -; CHECK-NEXT: vmov.16 q3[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[13] -; CHECK-NEXT: vmov.16 q3[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[14] -; CHECK-NEXT: vmov.16 q3[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[15] -; CHECK-NEXT: vmov.16 q3[7], r1 -; CHECK-NEXT: vmullb.s8 q2, q3, q2 -; CHECK-NEXT: vmov.u16 r1, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[0] -; CHECK-NEXT: vmov.16 q4[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[1] -; CHECK-NEXT: vmov.16 q4[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov.16 q4[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[3] -; CHECK-NEXT: vmov.16 q4[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[4] -; CHECK-NEXT: vmov.16 q4[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[5] -; CHECK-NEXT: vmov.16 q4[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[6] -; CHECK-NEXT: vmov.16 q4[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[7] -; CHECK-NEXT: vmov.16 q4[7], r1 -; CHECK-NEXT: vmov.u8 r1, q0[0] -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[2] -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[3] -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[4] -; CHECK-NEXT: vmov.16 q1[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[5] -; CHECK-NEXT: vmov.16 q1[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[6] -; CHECK-NEXT: vmov.16 q1[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[7] -; CHECK-NEXT: vmov.16 q1[7], r1 -; CHECK-NEXT: vmovlb.s16 q3, q3 -; CHECK-NEXT: vmullb.s8 q0, q1, q4 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: vmov.u16 r1, q2[0] -; CHECK-NEXT: vmovlb.s16 q1, q1 -; CHECK-NEXT: vadd.i32 q1, q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q2[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q2[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q2[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmovlb.s16 q2, q3 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmovlb.s16 q0, q3 -; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vaddva.u32 r0, q0 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmlava.s8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i16> diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll index bc316c3c2478a..fd268fd4c5a9a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll @@ -239,149 +239,9 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_zext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vmov.i8 q3, #0x0 -; CHECK-NEXT: vmov.i8 q4, #0xff -; CHECK-NEXT: vcmp.i16 eq, q2, zr -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: vmov.32 q4[0], r1 -; CHECK-NEXT: vmov.32 q4[1], r1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: vmov.32 q4[2], r1 -; CHECK-NEXT: vmov.32 q4[3], r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmov.32 q5[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q5[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q5[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q5[3], r1 -; CHECK-NEXT: vmullb.u16 q5, q5, q2 -; CHECK-NEXT: vmov.i64 q2, #0xffffffff -; CHECK-NEXT: vmov.f32 s24, s20 -; CHECK-NEXT: vmov.f32 s26, s21 -; CHECK-NEXT: vand q6, q6, q2 -; CHECK-NEXT: vand q4, q6, q4 -; CHECK-NEXT: vmov.f32 s24, s22 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmov r1, s19 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: vmov.f32 s26, s23 -; CHECK-NEXT: vand q5, q6, q2 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: ubfx r2, r12, #8, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.32 q4[0], r2 -; CHECK-NEXT: vmov.32 q4[1], r2 -; CHECK-NEXT: ubfx r2, r12, #12, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: vand q4, q5, q4 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s19 -; CHECK-NEXT: adds.w r12, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov.u16 r2, q3[4] -; CHECK-NEXT: vmov.32 q4[0], r2 -; CHECK-NEXT: vmov.u16 r2, q3[5] -; CHECK-NEXT: vmov.32 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q3[6] -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q3[7] -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q4, zr -; CHECK-NEXT: vmrs lr, p0 -; CHECK-NEXT: and r3, lr, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q3[0], r3 -; CHECK-NEXT: vmov.32 q3[1], r3 -; CHECK-NEXT: ubfx r3, lr, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q3[2], r3 -; CHECK-NEXT: vmov.32 q3[3], r3 -; CHECK-NEXT: vmov.u16 r3, q1[4] -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: vmov.u16 r3, q1[5] -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: vmov.u16 r3, q1[6] -; CHECK-NEXT: vmov.32 q4[2], r3 -; CHECK-NEXT: vmov.u16 r3, q1[7] -; CHECK-NEXT: vmov.32 q4[3], r3 -; CHECK-NEXT: vmov.u16 r3, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r3 -; CHECK-NEXT: vmov.u16 r3, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r3 -; CHECK-NEXT: vmov.u16 r3, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r3 -; CHECK-NEXT: vmov.u16 r3, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r3 -; CHECK-NEXT: vmullb.u16 q0, q1, q4 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vand q1, q1, q3 -; CHECK-NEXT: vmov.f32 s12, s2 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vand q0, q3, q2 -; CHECK-NEXT: adds.w r0, r0, r12 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: ubfx r2, lr, #8, #1 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: ubfx r2, lr, #12, #1 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvt.u16 r0, r1, q0, q1 +; CHECK-NEXT: bx lr entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = zext <8 x i16> %x to <8 x i32> @@ -396,173 +256,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.i8 q6, #0xff -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q4[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vcmp.i16 eq, q2, zr -; CHECK-NEXT: vmullb.s16 q3, q4, q3 -; CHECK-NEXT: vmov.f32 s20, s12 -; CHECK-NEXT: vmov.f32 s22, s13 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: asrs r0, r0, #31 -; CHECK-NEXT: vmov.32 q4[1], r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vmov.i8 q5, #0x0 -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vpsel q2, q6, q5 -; CHECK-NEXT: asrs r0, r0, #31 -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.32 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.32 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.32 q5[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.32 q5[3], r0 -; CHECK-NEXT: vcmp.i32 ne, q5, zr -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: and r1, r0, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: vmov.32 q5[0], r1 -; CHECK-NEXT: vmov.32 q5[1], r1 -; CHECK-NEXT: ubfx r1, r0, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: vmov.32 q5[2], r1 -; CHECK-NEXT: vmov.32 q5[3], r1 -; CHECK-NEXT: vand q4, q4, q5 -; CHECK-NEXT: vmov r1, s18 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vmov r12, s19 -; CHECK-NEXT: vmov r3, s17 -; CHECK-NEXT: vmov.f32 s16, s14 -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: adds r1, r1, r2 -; CHECK-NEXT: adc.w r2, r3, r12 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov.32 q3[0], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q3[1], r3 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov.32 q3[2], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q3[3], r3 -; CHECK-NEXT: ubfx r3, r0, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: ubfx r0, r0, #12, #1 -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vand q3, q3, q4 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r0, s13 -; CHECK-NEXT: adds r1, r1, r3 -; CHECK-NEXT: vmov r3, s15 -; CHECK-NEXT: adcs r2, r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: adds.w r12, r1, r0 -; CHECK-NEXT: adc.w r1, r2, r3 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q0, q1, q3 -; CHECK-NEXT: vmov.f32 s12, s0 -; CHECK-NEXT: vmov.f32 s14, s1 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmov.u16 r2, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q2[0], r3 -; CHECK-NEXT: vmov.32 q2[1], r3 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q2[2], r3 -; CHECK-NEXT: vmov.32 q2[3], r3 -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: adds.w r12, r12, r0 -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: adds.w r0, r0, r12 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.32 q0[0], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q0[1], r3 -; CHECK-NEXT: vmov r3, s6 -; CHECK-NEXT: vmov.32 q0[2], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q0[3], r3 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: vmov.32 q1[0], r3 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.32 q1[1], r3 -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvt.s16 r0, r1, q0, q1 ; CHECK-NEXT: bx lr entry: %c = icmp eq <8 x i16> %b, zeroinitializer @@ -839,436 +534,37 @@ define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y, <1 ; CHECK-NEXT: vmlavt.s8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: - %c = icmp eq <16 x i8> %b, zeroinitializer - %xx = sext <16 x i8> %x to <16 x i32> - %yy = sext <16 x i8> %y to <16 x i32> - %m = mul <16 x i32> %xx, %yy - %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) - ret i32 %z -} - -define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_zext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { -; CHECK-LABEL: add_v16i8_v16i16_v16i32_zext: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #64 -; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vcmp.i8 eq, q2, zr -; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: vmov.i8 q7, #0xff -; CHECK-NEXT: vmov q6, q1 -; CHECK-NEXT: vpsel q1, q7, q2 -; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vcmp.i16 ne, q3, zr -; CHECK-NEXT: vpsel q3, q7, q2 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vstrw.32 q3, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q6[0] -; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov.u8 r0, q6[1] -; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov.u8 r0, q6[2] -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov.u8 r0, q6[3] -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov.u8 r0, q6[4] -; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.u8 r0, q6[5] -; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u8 r0, q6[6] -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov.u8 r0, q6[7] -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[0] -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] -; CHECK-NEXT: vmov.16 q5[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.16 q5[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[6] -; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: vmullb.u8 q5, q5, q4 -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmov.u16 r0, q5[4] -; CHECK-NEXT: vmov.i32 q4, #0x0 -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q5[5] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q5[6] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q5[7] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmov.i32 q2, #0xffff -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q4, q0, q2 -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vcmp.i16 ne, q0, zr -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vpsel q0, q7, q0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q6[8] -; CHECK-NEXT: vmov.16 q7[0], r0 -; CHECK-NEXT: vmov.u8 r0, q6[9] -; CHECK-NEXT: vmov.16 q7[1], r0 -; CHECK-NEXT: vmov.u8 r0, q6[10] -; CHECK-NEXT: vmov.16 q7[2], r0 -; CHECK-NEXT: vmov.u8 r0, q6[11] -; CHECK-NEXT: vmov.16 q7[3], r0 -; CHECK-NEXT: vmov.u8 r0, q6[12] -; CHECK-NEXT: vmov.16 q7[4], r0 -; CHECK-NEXT: vmov.u8 r0, q6[13] -; CHECK-NEXT: vmov.16 q7[5], r0 -; CHECK-NEXT: vmov.u8 r0, q6[14] -; CHECK-NEXT: vmov.16 q7[6], r0 -; CHECK-NEXT: vmov.u8 r0, q6[15] -; CHECK-NEXT: vmov.16 q7[7], r0 -; CHECK-NEXT: vmov.u8 r0, q3[8] -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.u8 r0, q3[9] -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov.u8 r0, q3[10] -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.u8 r0, q3[11] -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q3[12] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u8 r0, q3[13] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u8 r0, q3[14] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u8 r0, q3[15] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmullb.u8 q1, q1, q7 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmovlb.u16 q2, q2 -; CHECK-NEXT: vpst -; CHECK-NEXT: vaddt.i32 q4, q4, q2 -; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q5[0] -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q5[1] -; CHECK-NEXT: vldrw.u32 q3, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q5[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q5[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q3, q2, q5 -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmov q1, q3 -; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vpt.i32 ne, q2, zr -; CHECK-NEXT: vaddt.i32 q1, q3, q0 -; CHECK-NEXT: vadd.i32 q0, q1, q4 -; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #64 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: bx lr -entry: - %c = icmp eq <16 x i8> %b, zeroinitializer - %xx = zext <16 x i8> %x to <16 x i16> - %yy = zext <16 x i8> %y to <16 x i16> - %m = mul <16 x i16> %xx, %yy - %ma = zext <16 x i16> %m to <16 x i32> - %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer - %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) - ret i32 %z -} - -define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { -; CHECK-LABEL: add_v16i8_v16i16_v16i32_sext: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov q3, q2 -; CHECK-NEXT: vmov q6, q0 -; CHECK-NEXT: vmov.i8 q0, #0x0 -; CHECK-NEXT: vcmp.i8 eq, q3, zr -; CHECK-NEXT: vmov.i8 q5, #0xff -; CHECK-NEXT: vmov q2, q1 -; CHECK-NEXT: vpsel q1, q5, q0 -; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vcmp.i16 ne, q3, zr -; CHECK-NEXT: vpsel q3, q5, q0 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q2[0] -; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov.u8 r0, q2[1] -; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov.u8 r0, q2[2] -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov.u8 r0, q2[3] -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov.u8 r0, q2[4] -; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.u8 r0, q2[5] -; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u8 r0, q2[6] -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov.u8 r0, q2[7] -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov.u8 r0, q6[0] -; CHECK-NEXT: vmov.16 q7[0], r0 -; CHECK-NEXT: vmov.u8 r0, q6[1] -; CHECK-NEXT: vmov.16 q7[1], r0 -; CHECK-NEXT: vmov.u8 r0, q6[2] -; CHECK-NEXT: vmov.16 q7[2], r0 -; CHECK-NEXT: vmov.u8 r0, q6[3] -; CHECK-NEXT: vmov.16 q7[3], r0 -; CHECK-NEXT: vmov.u8 r0, q6[4] -; CHECK-NEXT: vmov.16 q7[4], r0 -; CHECK-NEXT: vmov.u8 r0, q6[5] -; CHECK-NEXT: vmov.16 q7[5], r0 -; CHECK-NEXT: vmov.u8 r0, q6[6] -; CHECK-NEXT: vmov.16 q7[6], r0 -; CHECK-NEXT: vmov.u8 r0, q6[7] -; CHECK-NEXT: vmov.16 q7[7], r0 -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vmullb.s8 q4, q7, q4 -; CHECK-NEXT: vmov.u16 r0, q4[4] -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q4[5] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q4[6] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q4[7] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmovlb.s16 q7, q0 -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vpsel q7, q7, q0 -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vcmp.i16 ne, q0, zr -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vpsel q0, q5, q0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q2[8] -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u8 r0, q2[9] -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u8 r0, q2[10] -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.u8 r0, q2[11] -; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u8 r0, q2[12] -; CHECK-NEXT: vmov.16 q5[4], r0 -; CHECK-NEXT: vmov.u8 r0, q2[13] -; CHECK-NEXT: vmov.16 q5[5], r0 -; CHECK-NEXT: vmov.u8 r0, q2[14] -; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u8 r0, q2[15] -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov.u8 r0, q6[8] -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.u8 r0, q6[9] -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov.u8 r0, q6[10] -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.u8 r0, q6[11] -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q6[12] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u8 r0, q6[13] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u8 r0, q6[14] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u8 r0, q6[15] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmullb.s8 q1, q1, q5 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmovlb.s16 q2, q2 -; CHECK-NEXT: vpst -; CHECK-NEXT: vaddt.i32 q7, q7, q2 -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q4[1] -; CHECK-NEXT: vldrw.u32 q3, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q4[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q4[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmovlb.s16 q2, q2 -; CHECK-NEXT: vpsel q2, q2, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vpt.i32 ne, q3, zr -; CHECK-NEXT: vaddt.i32 q2, q2, q0 -; CHECK-NEXT: vadd.i32 q0, q2, q7 -; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = sext <16 x i8> %x to <16 x i32> + %yy = sext <16 x i8> %y to <16 x i32> + %m = mul <16 x i32> %xx, %yy + %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_zext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_zext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavt.u8 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %b, zeroinitializer + %xx = zext <16 x i8> %x to <16 x i16> + %yy = zext <16 x i8> %y to <16 x i16> + %m = mul <16 x i16> %xx, %yy + %ma = zext <16 x i16> %m to <16 x i32> + %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer + %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s) + ret i32 %z +} + +define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { +; CHECK-LABEL: add_v16i8_v16i16_v16i32_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavt.s8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer @@ -2763,338 +2059,27 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vmov.i8 q3, #0x0 -; CHECK-NEXT: vmov.i8 q4, #0xff -; CHECK-NEXT: vcmp.i16 eq, q2, zr -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vmov.u16 r2, q3[0] -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.u16 r2, q3[1] -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov.u16 r2, q3[2] -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.u16 r2, q3[3] -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r3, r12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: ubfx r3, r12, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q4[2], r3 -; CHECK-NEXT: vmov.32 q4[3], r3 -; CHECK-NEXT: vmov.u16 r3, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r3 -; CHECK-NEXT: vmov.u16 r3, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r3 -; CHECK-NEXT: vmov.u16 r3, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r3 -; CHECK-NEXT: vmov.u16 r3, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r3 -; CHECK-NEXT: vmov.u16 r3, q0[0] -; CHECK-NEXT: vmov.32 q5[0], r3 -; CHECK-NEXT: vmov.u16 r3, q0[1] -; CHECK-NEXT: vmov.32 q5[1], r3 -; CHECK-NEXT: vmov.u16 r3, q0[2] -; CHECK-NEXT: vmov.32 q5[2], r3 -; CHECK-NEXT: vmov.u16 r3, q0[3] -; CHECK-NEXT: vmov.32 q5[3], r3 -; CHECK-NEXT: vmullb.u16 q5, q5, q2 -; CHECK-NEXT: vmov.i64 q2, #0xffffffff -; CHECK-NEXT: vmov.f32 s24, s20 -; CHECK-NEXT: vmov.f32 s26, s21 -; CHECK-NEXT: vand q6, q6, q2 -; CHECK-NEXT: vand q4, q6, q4 -; CHECK-NEXT: vmov.f32 s24, s22 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov r4, s16 -; CHECK-NEXT: vmov lr, s19 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: vmov.f32 s26, s23 -; CHECK-NEXT: vand q5, q6, q2 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: ubfx r4, r12, #8, #1 -; CHECK-NEXT: rsb.w r4, r4, #0 -; CHECK-NEXT: vmov.32 q4[0], r4 -; CHECK-NEXT: adc.w lr, lr, r2 -; CHECK-NEXT: vmov.32 q4[1], r4 -; CHECK-NEXT: ubfx r4, r12, #12, #1 -; CHECK-NEXT: rsbs r4, r4, #0 -; CHECK-NEXT: vmov.32 q4[2], r4 -; CHECK-NEXT: vmov.32 q4[3], r4 -; CHECK-NEXT: vand q4, q5, q4 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vmov r4, s17 -; CHECK-NEXT: adds.w r12, r3, r2 -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: adc.w r3, lr, r4 -; CHECK-NEXT: vmov r4, s19 -; CHECK-NEXT: adds.w r12, r12, r2 -; CHECK-NEXT: vmov.u16 r2, q3[4] -; CHECK-NEXT: vmov.32 q4[0], r2 -; CHECK-NEXT: vmov.u16 r2, q3[5] -; CHECK-NEXT: vmov.32 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q3[6] -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q3[7] -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: adc.w lr, r3, r4 -; CHECK-NEXT: vcmp.i32 ne, q4, zr -; CHECK-NEXT: vmrs r6, p0 -; CHECK-NEXT: and r4, r6, #1 -; CHECK-NEXT: rsbs r4, r4, #0 -; CHECK-NEXT: vmov.32 q3[0], r4 -; CHECK-NEXT: vmov.32 q3[1], r4 -; CHECK-NEXT: ubfx r4, r6, #4, #1 -; CHECK-NEXT: rsbs r4, r4, #0 -; CHECK-NEXT: vmov.32 q3[2], r4 -; CHECK-NEXT: vmov.32 q3[3], r4 -; CHECK-NEXT: vmov.u16 r4, q1[4] -; CHECK-NEXT: vmov.32 q4[0], r4 -; CHECK-NEXT: vmov.u16 r4, q1[5] -; CHECK-NEXT: vmov.32 q4[1], r4 -; CHECK-NEXT: vmov.u16 r4, q1[6] -; CHECK-NEXT: vmov.32 q4[2], r4 -; CHECK-NEXT: vmov.u16 r4, q1[7] -; CHECK-NEXT: vmov.32 q4[3], r4 -; CHECK-NEXT: vmov.u16 r4, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r4 -; CHECK-NEXT: vmov.u16 r4, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r4 -; CHECK-NEXT: vmov.u16 r4, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r4 -; CHECK-NEXT: vmov.u16 r4, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r4 -; CHECK-NEXT: vmullb.u16 q0, q1, q4 -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vand q1, q1, q3 -; CHECK-NEXT: vmov.f32 s12, s2 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov r4, s5 -; CHECK-NEXT: vmov r5, s6 -; CHECK-NEXT: vmov r2, s7 -; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vand q0, q3, q2 -; CHECK-NEXT: adds.w r3, r3, r12 -; CHECK-NEXT: adc.w r4, r4, lr -; CHECK-NEXT: adds r3, r3, r5 -; CHECK-NEXT: ubfx r5, r6, #8, #1 -; CHECK-NEXT: rsb.w r5, r5, #0 -; CHECK-NEXT: ubfx r6, r6, #12, #1 -; CHECK-NEXT: vmov.32 q1[0], r5 -; CHECK-NEXT: rsb.w r6, r6, #0 -; CHECK-NEXT: vmov.32 q1[1], r5 -; CHECK-NEXT: adcs r2, r4 -; CHECK-NEXT: vmov.32 q1[2], r6 -; CHECK-NEXT: vmov.32 q1[3], r6 -; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r5, s0 -; CHECK-NEXT: vmov r6, s1 -; CHECK-NEXT: adds r3, r3, r5 -; CHECK-NEXT: vmov r5, s2 -; CHECK-NEXT: adcs r2, r6 -; CHECK-NEXT: vmov r6, s3 -; CHECK-NEXT: adds r3, r3, r5 -; CHECK-NEXT: adcs r2, r6 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: pop {r4, r5, r6, pc} -entry: - %c = icmp eq <8 x i16> %b, zeroinitializer - %xx = zext <8 x i16> %x to <8 x i32> - %yy = zext <8 x i16> %y to <8 x i32> - %m = mul <8 x i32> %xx, %yy - %ma = zext <8 x i32> %m to <8 x i64> - %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer - %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) - %r = add i64 %z, %a - ret i64 %r -} - -define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { -; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sext: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vmov.u16 r2, q1[0] -; CHECK-NEXT: vmov.i8 q6, #0xff -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[1] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: vmov.32 q4[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.32 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: vcmp.i16 eq, q2, zr -; CHECK-NEXT: vmullb.s16 q3, q4, q3 -; CHECK-NEXT: vmov.f32 s20, s12 -; CHECK-NEXT: vmov.f32 s22, s13 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmov.32 q4[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q4[1], r2 -; CHECK-NEXT: vmov r2, s22 -; CHECK-NEXT: vmov.i8 q5, #0x0 -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vpsel q2, q6, q5 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: vmov.u16 r2, q2[0] -; CHECK-NEXT: vmov.32 q5[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[1] -; CHECK-NEXT: vmov.32 q5[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[2] -; CHECK-NEXT: vmov.32 q5[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.32 q5[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q5, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q5[0], r3 -; CHECK-NEXT: vmov.32 q5[1], r3 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q5[2], r3 -; CHECK-NEXT: vmov.32 q5[3], r3 -; CHECK-NEXT: vand q4, q4, q5 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov r4, s16 -; CHECK-NEXT: vmov r12, s19 -; CHECK-NEXT: vmov r5, s17 -; CHECK-NEXT: vmov.f32 s16, s14 -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: adds.w lr, r4, r3 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov.32 q3[0], r3 -; CHECK-NEXT: adc.w r12, r12, r5 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q3[1], r3 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov.32 q3[2], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q3[3], r3 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: vand q3, q3, q4 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: vmov r4, s14 -; CHECK-NEXT: vmov r5, s15 -; CHECK-NEXT: adds.w r3, r3, lr -; CHECK-NEXT: adc.w r2, r2, r12 -; CHECK-NEXT: adds.w r12, r3, r4 -; CHECK-NEXT: adc.w r3, r2, r5 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q0, q1, q3 -; CHECK-NEXT: vmov.f32 s12, s0 -; CHECK-NEXT: vmov.f32 s14, s1 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmov.u16 r2, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r5, r2, #1 -; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: vmov.32 q2[0], r5 -; CHECK-NEXT: vmov.32 q2[1], r5 -; CHECK-NEXT: ubfx r5, r2, #4, #1 -; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: vmov.32 q2[2], r5 -; CHECK-NEXT: vmov.32 q2[3], r5 -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vmov r5, s5 -; CHECK-NEXT: adds.w r12, r12, r4 -; CHECK-NEXT: vmov r4, s6 -; CHECK-NEXT: adcs r5, r3 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: adds.w r4, r4, r12 -; CHECK-NEXT: adc.w r12, r5, r3 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.32 q0[0], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q0[1], r3 -; CHECK-NEXT: vmov r3, s6 -; CHECK-NEXT: vmov.32 q0[2], r3 -; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov.32 q0[3], r3 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: vmov.32 q1[0], r3 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.32 q1[1], r3 -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: vmov r5, s3 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: adc.w r2, r2, r12 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: adcs r2, r5 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvat.u16 r0, r1, q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> %b, zeroinitializer + %xx = zext <8 x i16> %x to <8 x i32> + %yy = zext <8 x i16> %y to <8 x i32> + %m = mul <8 x i32> %xx, %yy + %ma = zext <8 x i32> %m to <8 x i64> + %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer + %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s) + %r = add i64 %z, %a + ret i64 %r +} + +define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvat.s16 r0, r1, q0, q1 +; CHECK-NEXT: bx lr entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = sext <8 x i16> %x to <8 x i32> @@ -3400,210 +2385,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b, i32 %a) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #64 -; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vcmp.i8 eq, q2, zr -; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: vmov.i8 q7, #0xff -; CHECK-NEXT: vmov q6, q1 -; CHECK-NEXT: vpsel q1, q7, q2 -; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.u8 r1, q1[0] -; CHECK-NEXT: vmov.16 q3[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[1] -; CHECK-NEXT: vmov.16 q3[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov.16 q3[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[3] -; CHECK-NEXT: vmov.16 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[4] -; CHECK-NEXT: vmov.16 q3[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[5] -; CHECK-NEXT: vmov.16 q3[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[6] -; CHECK-NEXT: vmov.16 q3[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[7] -; CHECK-NEXT: vmov.16 q3[7], r1 -; CHECK-NEXT: vcmp.i16 ne, q3, zr -; CHECK-NEXT: vpsel q3, q7, q2 -; CHECK-NEXT: vmov.u16 r1, q3[4] -; CHECK-NEXT: vstrw.32 q3, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q3[5] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q3[6] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q3[7] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u8 r1, q6[0] -; CHECK-NEXT: vmov.16 q4[0], r1 -; CHECK-NEXT: vmov.u8 r1, q6[1] -; CHECK-NEXT: vmov.16 q4[1], r1 -; CHECK-NEXT: vmov.u8 r1, q6[2] -; CHECK-NEXT: vmov.16 q4[2], r1 -; CHECK-NEXT: vmov.u8 r1, q6[3] -; CHECK-NEXT: vmov.16 q4[3], r1 -; CHECK-NEXT: vmov.u8 r1, q6[4] -; CHECK-NEXT: vmov.16 q4[4], r1 -; CHECK-NEXT: vmov.u8 r1, q6[5] -; CHECK-NEXT: vmov.16 q4[5], r1 -; CHECK-NEXT: vmov.u8 r1, q6[6] -; CHECK-NEXT: vmov.16 q4[6], r1 -; CHECK-NEXT: vmov.u8 r1, q6[7] -; CHECK-NEXT: vmov.16 q4[7], r1 -; CHECK-NEXT: vmov.u8 r1, q0[0] -; CHECK-NEXT: vmov.16 q5[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: vmov.16 q5[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[2] -; CHECK-NEXT: vmov.16 q5[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[3] -; CHECK-NEXT: vmov.16 q5[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[4] -; CHECK-NEXT: vmov.16 q5[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[5] -; CHECK-NEXT: vmov.16 q5[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[6] -; CHECK-NEXT: vmov.16 q5[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[7] -; CHECK-NEXT: vmov.16 q5[7], r1 -; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: vmullb.u8 q5, q5, q4 -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmov.u16 r1, q5[4] -; CHECK-NEXT: vmov.i32 q4, #0x0 -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q5[5] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q5[6] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q5[7] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmov.i32 q2, #0xffff -; CHECK-NEXT: vmov.u8 r1, q1[8] -; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q4, q0, q2 -; CHECK-NEXT: vmov.16 q0[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[9] -; CHECK-NEXT: vmov.16 q0[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[10] -; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[11] -; CHECK-NEXT: vmov.16 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[12] -; CHECK-NEXT: vmov.16 q0[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[13] -; CHECK-NEXT: vmov.16 q0[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[14] -; CHECK-NEXT: vmov.16 q0[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[15] -; CHECK-NEXT: vmov.16 q0[7], r1 -; CHECK-NEXT: vcmp.i16 ne, q0, zr -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vpsel q0, q7, q0 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q6[8] -; CHECK-NEXT: vmov.16 q7[0], r1 -; CHECK-NEXT: vmov.u8 r1, q6[9] -; CHECK-NEXT: vmov.16 q7[1], r1 -; CHECK-NEXT: vmov.u8 r1, q6[10] -; CHECK-NEXT: vmov.16 q7[2], r1 -; CHECK-NEXT: vmov.u8 r1, q6[11] -; CHECK-NEXT: vmov.16 q7[3], r1 -; CHECK-NEXT: vmov.u8 r1, q6[12] -; CHECK-NEXT: vmov.16 q7[4], r1 -; CHECK-NEXT: vmov.u8 r1, q6[13] -; CHECK-NEXT: vmov.16 q7[5], r1 -; CHECK-NEXT: vmov.u8 r1, q6[14] -; CHECK-NEXT: vmov.16 q7[6], r1 -; CHECK-NEXT: vmov.u8 r1, q6[15] -; CHECK-NEXT: vmov.16 q7[7], r1 -; CHECK-NEXT: vmov.u8 r1, q3[8] -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.u8 r1, q3[9] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vmov.u8 r1, q3[10] -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.u8 r1, q3[11] -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q3[12] -; CHECK-NEXT: vmov.16 q1[4], r1 -; CHECK-NEXT: vmov.u8 r1, q3[13] -; CHECK-NEXT: vmov.16 q1[5], r1 -; CHECK-NEXT: vmov.u8 r1, q3[14] -; CHECK-NEXT: vmov.16 q1[6], r1 -; CHECK-NEXT: vmov.u8 r1, q3[15] -; CHECK-NEXT: vmov.16 q1[7], r1 -; CHECK-NEXT: vmullb.u8 q1, q1, q7 -; CHECK-NEXT: vmov.u16 r1, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmovlb.u16 q2, q2 -; CHECK-NEXT: vpst -; CHECK-NEXT: vaddt.i32 q4, q4, q2 -; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov.u16 r1, q3[0] -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q3[1] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q3[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q3[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q5[0] -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q5[1] -; CHECK-NEXT: vldrw.u32 q3, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q5[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q5[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q3, q2, q5 -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[3] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmov q1, q3 -; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vpt.i32 ne, q2, zr -; CHECK-NEXT: vaddt.i32 q1, q3, q0 -; CHECK-NEXT: vadd.i32 q0, q1, q4 -; CHECK-NEXT: vaddva.u32 r0, q0 -; CHECK-NEXT: add sp, #64 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavat.u8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer @@ -3620,205 +2403,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b, i32 %a) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov q3, q2 -; CHECK-NEXT: vmov q6, q0 -; CHECK-NEXT: vmov.i8 q0, #0x0 -; CHECK-NEXT: vcmp.i8 eq, q3, zr -; CHECK-NEXT: vmov.i8 q5, #0xff -; CHECK-NEXT: vmov q2, q1 -; CHECK-NEXT: vpsel q1, q5, q0 -; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.u8 r1, q1[0] -; CHECK-NEXT: vmov.16 q3[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[1] -; CHECK-NEXT: vmov.16 q3[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov.16 q3[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[3] -; CHECK-NEXT: vmov.16 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[4] -; CHECK-NEXT: vmov.16 q3[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[5] -; CHECK-NEXT: vmov.16 q3[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[6] -; CHECK-NEXT: vmov.16 q3[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[7] -; CHECK-NEXT: vmov.16 q3[7], r1 -; CHECK-NEXT: vcmp.i16 ne, q3, zr -; CHECK-NEXT: vpsel q3, q5, q0 -; CHECK-NEXT: vmov.u16 r1, q3[4] -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q3[5] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q3[6] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q3[7] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q2[0] -; CHECK-NEXT: vmov.16 q4[0], r1 -; CHECK-NEXT: vmov.u8 r1, q2[1] -; CHECK-NEXT: vmov.16 q4[1], r1 -; CHECK-NEXT: vmov.u8 r1, q2[2] -; CHECK-NEXT: vmov.16 q4[2], r1 -; CHECK-NEXT: vmov.u8 r1, q2[3] -; CHECK-NEXT: vmov.16 q4[3], r1 -; CHECK-NEXT: vmov.u8 r1, q2[4] -; CHECK-NEXT: vmov.16 q4[4], r1 -; CHECK-NEXT: vmov.u8 r1, q2[5] -; CHECK-NEXT: vmov.16 q4[5], r1 -; CHECK-NEXT: vmov.u8 r1, q2[6] -; CHECK-NEXT: vmov.16 q4[6], r1 -; CHECK-NEXT: vmov.u8 r1, q2[7] -; CHECK-NEXT: vmov.16 q4[7], r1 -; CHECK-NEXT: vmov.u8 r1, q6[0] -; CHECK-NEXT: vmov.16 q7[0], r1 -; CHECK-NEXT: vmov.u8 r1, q6[1] -; CHECK-NEXT: vmov.16 q7[1], r1 -; CHECK-NEXT: vmov.u8 r1, q6[2] -; CHECK-NEXT: vmov.16 q7[2], r1 -; CHECK-NEXT: vmov.u8 r1, q6[3] -; CHECK-NEXT: vmov.16 q7[3], r1 -; CHECK-NEXT: vmov.u8 r1, q6[4] -; CHECK-NEXT: vmov.16 q7[4], r1 -; CHECK-NEXT: vmov.u8 r1, q6[5] -; CHECK-NEXT: vmov.16 q7[5], r1 -; CHECK-NEXT: vmov.u8 r1, q6[6] -; CHECK-NEXT: vmov.16 q7[6], r1 -; CHECK-NEXT: vmov.u8 r1, q6[7] -; CHECK-NEXT: vmov.16 q7[7], r1 -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vmullb.s8 q4, q7, q4 -; CHECK-NEXT: vmov.u16 r1, q4[4] -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q4[5] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q4[6] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q4[7] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[8] -; CHECK-NEXT: vmovlb.s16 q7, q0 -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vpsel q7, q7, q0 -; CHECK-NEXT: vmov.16 q0[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[9] -; CHECK-NEXT: vmov.16 q0[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[10] -; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[11] -; CHECK-NEXT: vmov.16 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[12] -; CHECK-NEXT: vmov.16 q0[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[13] -; CHECK-NEXT: vmov.16 q0[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[14] -; CHECK-NEXT: vmov.16 q0[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[15] -; CHECK-NEXT: vmov.16 q0[7], r1 -; CHECK-NEXT: vcmp.i16 ne, q0, zr -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vpsel q0, q5, q0 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q2[8] -; CHECK-NEXT: vmov.16 q5[0], r1 -; CHECK-NEXT: vmov.u8 r1, q2[9] -; CHECK-NEXT: vmov.16 q5[1], r1 -; CHECK-NEXT: vmov.u8 r1, q2[10] -; CHECK-NEXT: vmov.16 q5[2], r1 -; CHECK-NEXT: vmov.u8 r1, q2[11] -; CHECK-NEXT: vmov.16 q5[3], r1 -; CHECK-NEXT: vmov.u8 r1, q2[12] -; CHECK-NEXT: vmov.16 q5[4], r1 -; CHECK-NEXT: vmov.u8 r1, q2[13] -; CHECK-NEXT: vmov.16 q5[5], r1 -; CHECK-NEXT: vmov.u8 r1, q2[14] -; CHECK-NEXT: vmov.16 q5[6], r1 -; CHECK-NEXT: vmov.u8 r1, q2[15] -; CHECK-NEXT: vmov.16 q5[7], r1 -; CHECK-NEXT: vmov.u8 r1, q6[8] -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.u8 r1, q6[9] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vmov.u8 r1, q6[10] -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.u8 r1, q6[11] -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q6[12] -; CHECK-NEXT: vmov.16 q1[4], r1 -; CHECK-NEXT: vmov.u8 r1, q6[13] -; CHECK-NEXT: vmov.16 q1[5], r1 -; CHECK-NEXT: vmov.u8 r1, q6[14] -; CHECK-NEXT: vmov.16 q1[6], r1 -; CHECK-NEXT: vmov.u8 r1, q6[15] -; CHECK-NEXT: vmov.16 q1[7], r1 -; CHECK-NEXT: vmullb.s8 q1, q1, q5 -; CHECK-NEXT: vmov.u16 r1, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q3[0] -; CHECK-NEXT: vmovlb.s16 q2, q2 -; CHECK-NEXT: vpst -; CHECK-NEXT: vaddt.i32 q7, q7, q2 -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q3[1] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q3[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q3[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q4[0] -; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q4[1] -; CHECK-NEXT: vldrw.u32 q3, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q4[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q4[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmovlb.s16 q2, q2 -; CHECK-NEXT: vpsel q2, q2, q3 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[3] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vpt.i32 ne, q3, zr -; CHECK-NEXT: vaddt.i32 q2, q2, q0 -; CHECK-NEXT: vadd.i32 q0, q2, q7 -; CHECK-NEXT: vaddva.u32 r0, q0 -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavat.s8 r0, q0, q1 ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer From 50ee0b99ec2902f5cf7a62a5e9b4a4f882b17031 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Sep 2020 13:51:25 +0100 Subject: [PATCH 0447/1079] [InstCombine][X86] getNegativeIsTrueBoolVec - use ConstantExpr evaluators. NFCI. Don't do this manually, we can just use the ConstantExpr evaluators to do it more tidily for us. --- .../Target/X86/X86InstCombineIntrinsic.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index e2582bae3010c..d93f22d0365c0 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -24,19 +24,12 @@ using namespace llvm; /// Return a constant boolean vector that has true elements in all positions /// where the input constant data vector has an element with the sign bit set. -static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { - SmallVector BoolVec; - IntegerType *BoolTy = Type::getInt1Ty(V->getContext()); - for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) { - Constant *Elt = V->getElementAsConstant(I); - assert((isa(Elt) || isa(Elt)) && - "Unexpected constant data vector element type"); - bool Sign = V->getElementType()->isIntegerTy() - ? cast(Elt)->isNegative() - : cast(Elt)->isNegative(); - BoolVec.push_back(ConstantInt::get(BoolTy, Sign)); - } - return ConstantVector::get(BoolVec); +static Constant *getNegativeIsTrueBoolVec(Constant *V) { + VectorType *IntTy = VectorType::getInteger(cast(V->getType())); + V = ConstantExpr::getBitCast(V, IntTy); + V = ConstantExpr::getICmp(CmpInst::ICMP_SGT, Constant::getNullValue(IntTy), + V); + return V; } // TODO: If the x86 backend knew how to convert a bool vector mask back to an From 3a8ea8609b82b7e5401698b7c63df6680e1257a8 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 12 Sep 2020 09:08:07 -0400 Subject: [PATCH 0448/1079] [Intrinsics] define semantics for experimental fmax/fmin vector reductions As discussed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2020-April/140729.html This is hopefully the final remaining showstopper before we can remove the 'experimental' from the reduction intrinsics. No behavior was specified for the FP min/max reductions, so we have a mess of different interpretations. There are a few potential options for the semantics of these max/min ops. I think this is the simplest based on current behavior/implementation: make the reductions inherit from the existing llvm.maxnum/minnum intrinsics. These correspond to libm fmax/fmin, and those are similar to the (now deprecated?) IEEE-754 maxNum/minNum functions (NaNs are treated as missing data). So the default expansion creates calls to libm functions. Another option would be to inherit from llvm.maximum/minimum (NaNs propagate), but most targets just crash in codegen when given those nodes because no default expansion was ever implemented AFAICT. We could also just assume 'nnan' semantics by default (we are already assuming 'nsz' semantics in the maxnum/minnum intrinsics), but some targets (AArch64, PowerPC) support the more defined behavior, so it doesn't make much sense to not allow a tighter spec. Fast-math-flags (nnan) can be used to loosen the semantics. (Note that D67507 was proposed to update the LangRef to acknowledge the more recent IEEE-754 2019 standard, but that patch seems to have stalled. If we do update based on the new standard, the reduction instructions can seamlessly inherit from whatever updates are made to the max/min intrinsics.) x86 sees a regression here on 'nnan' tests because we have underlying, longstanding bugs in FMF creation/propagation. Those need to be fixed apart from this change (for example: https://llvm.org/PR35538). The expansion sequence before this patch may not have been correct. Differential Revision: https://reviews.llvm.org/D87391 --- llvm/docs/LangRef.rst | 14 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 - llvm/lib/CodeGen/ExpandReductions.cpp | 16 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 22 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 9 +- .../Target/AArch64/AArch64ISelLowering.cpp | 2 - .../AArch64/AArch64TargetTransformInfo.h | 5 - llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 6 +- .../vecreduce-fmax-legalization-nan.ll | 20 +- .../AArch64/vecreduce-fmax-legalization.ll | 2 +- .../Generic/expand-experimental-reductions.ll | 40 +- .../CodeGen/Thumb2/mve-vecreduce-fminmax.ll | 1307 +++++------------ .../CodeGen/Thumb2/mve-vecreduce-loops.ll | 30 +- .../CodeGen/X86/vector-reduce-fmax-nnan.ll | 348 ++++- llvm/test/CodeGen/X86/vector-reduce-fmax.ll | 1088 ++++++++++++-- .../CodeGen/X86/vector-reduce-fmin-nnan.ll | 358 ++++- llvm/test/CodeGen/X86/vector-reduce-fmin.ll | 1078 ++++++++++++-- 17 files changed, 2835 insertions(+), 1514 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 781b2385de500..5e35b913bef4a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15824,7 +15824,12 @@ The '``llvm.experimental.vector.reduce.fmax.*``' intrinsics do a floating-point ``MAX`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. -If the intrinsic call has the ``nnan`` fast-math flag then the operation can +This instruction has the same comparison semantics as the '``llvm.maxnum.*``' +intrinsic. That is, the result will always be a number unless all elements of +the vector are NaN. For a vector with maximum element magnitude 0.0 and +containing both +0.0 and -0.0 elements, the sign of the result is unspecified. + +If the intrinsic call has the ``nnan`` fast-math flag, then the operation can assume that NaNs are not present in the input vector. Arguments: @@ -15850,7 +15855,12 @@ The '``llvm.experimental.vector.reduce.fmin.*``' intrinsics do a floating-point ``MIN`` reduction of a vector, returning the result as a scalar. The return type matches the element-type of the vector input. -If the intrinsic call has the ``nnan`` fast-math flag then the operation can +This instruction has the same comparison semantics as the '``llvm.minnum.*``' +intrinsic. That is, the result will always be a number unless all elements of +the vector are NaN. For a vector with minimum element magnitude 0.0 and +containing both +0.0 and -0.0 elements, the sign of the result is unspecified. + +If the intrinsic call has the ``nnan`` fast-math flag, then the operation can assume that NaNs are not present in the input vector. Arguments: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2b72dc3490d75..d5c0b83ea6f7b 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1349,13 +1349,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; case Intrinsic::minnum: ISDs.push_back(ISD::FMINNUM); - if (FMF.noNaNs()) - ISDs.push_back(ISD::FMINIMUM); break; case Intrinsic::maxnum: ISDs.push_back(ISD::FMAXNUM); - if (FMF.noNaNs()) - ISDs.push_back(ISD::FMAXIMUM); break; case Intrinsic::copysign: ISDs.push_back(ISD::FCOPYSIGN); diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 45f21c1085dda..dfaaafaf811f1 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -143,12 +143,24 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_smin: case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umin: { + Value *Vec = II->getArgOperand(0); + if (!isPowerOf2_32( + cast(Vec->getType())->getNumElements())) + continue; + + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + break; + } case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: { + // FIXME: We only expand 'fast' reductions here because the underlying + // code in createMinMaxOp() assumes that comparisons use 'fast' + // semantics. Value *Vec = II->getArgOperand(0); if (!isPowerOf2_32( - cast(Vec->getType())->getNumElements())) + cast(Vec->getType())->getNumElements()) || + !FMF.isFast()) continue; Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 764472e570c04..509ae2c6bdcb6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2146,7 +2146,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { EVT LoOpVT, HiOpVT; std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); - bool NoNaN = N->getFlags().hasNoNaNs(); unsigned CombineOpc = 0; switch (N->getOpcode()) { case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break; @@ -2160,12 +2159,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break; case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; - case ISD::VECREDUCE_FMAX: - CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; - break; - case ISD::VECREDUCE_FMIN: - CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; - break; + case ISD::VECREDUCE_FMAX: CombineOpc = ISD::FMAXNUM; break; + case ISD::VECREDUCE_FMIN: CombineOpc = ISD::FMINNUM; break; default: llvm_unreachable("Unexpected reduce ISD node"); } @@ -4771,6 +4766,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { EVT OrigVT = N->getOperand(0).getValueType(); EVT WideVT = Op.getValueType(); EVT ElemVT = OrigVT.getVectorElementType(); + SDNodeFlags Flags = N->getFlags(); SDValue NeutralElem; switch (N->getOpcode()) { @@ -4802,12 +4798,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT); break; case ISD::VECREDUCE_FMAX: + // This has maxnum semantics, so NaN represents missing data. We must clear + // 'nnan' if it was set because the NaN would be a poison value. NeutralElem = DAG.getConstantFP( - -std::numeric_limits::infinity(), dl, ElemVT); + std::numeric_limits::quiet_NaN(), dl, ElemVT); + Flags.setNoNaNs(false); break; case ISD::VECREDUCE_FMIN: + // This has minnum semantics, so NaN represents missing data. We must clear + // 'nnan' if it was set because the NaN would be a poison value. NeutralElem = DAG.getConstantFP( - std::numeric_limits::infinity(), dl, ElemVT); + std::numeric_limits::quiet_NaN(), dl, ElemVT); + Flags.setNoNaNs(false); break; } @@ -4818,7 +4820,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); - return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags()); + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, Flags); } SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a80ca04921f45..ea2344e4f5515 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7934,7 +7934,6 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); - bool NoNaN = Node->getFlags().hasNoNaNs(); unsigned BaseOpcode = 0; switch (Node->getOpcode()) { default: llvm_unreachable("Expected VECREDUCE opcode"); @@ -7949,12 +7948,8 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break; case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break; case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break; - case ISD::VECREDUCE_FMAX: - BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; - break; - case ISD::VECREDUCE_FMIN: - BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; - break; + case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break; + case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break; } SDValue Op = Node->getOperand(0); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d4f324490430c..6745b848f0eda 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9529,14 +9529,12 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, case ISD::VECREDUCE_UMIN: return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG); case ISD::VECREDUCE_FMAX: { - assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag"); return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), Op.getOperand(0)); } case ISD::VECREDUCE_FMIN: { - assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag"); return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 05b7f70f2335c..3c3a246b90a12 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -223,11 +223,6 @@ class AArch64TTIImpl : public BasicTTIImplBase { // We don't have legalization support for ordered FP reductions. return !II->getFastMathFlags().allowReassoc(); - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: - // Lowering asserts that there are no NaNs. - return !II->getFastMathFlags().noNaNs(); - default: // Don't expand anything else, let legalization deal with it. return false; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index cc2019b47a076..508bb9e21d3af 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -201,10 +201,8 @@ class ARMTTIImpl : public BasicTTIImplBase { case Intrinsic::experimental_vector_reduce_fmin: case Intrinsic::experimental_vector_reduce_fmax: - // Can't legalize reductions with soft floats, and NoNan will create - // fminimum which we do not know how to lower. - return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() || - !II->getFastMathFlags().noNaNs(); + // Can't legalize reductions with soft floats. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs(); default: // Don't expand anything else, let legalization deal with it. diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll index 4d888317b343e..514a43a5e171f 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll @@ -54,19 +54,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 // =48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 // =0 -; CHECK-NEXT: b.le .LBB4_2 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #48 // =48 -; CHECK-NEXT: ret +; CHECK-NEXT: b fmaxl %b = call fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) ret fp128 %b } @@ -77,11 +65,7 @@ define float @test_v16f32(<16 x float> %a) nounwind { ; CHECK-NEXT: fmaxnm v1.4s, v1.4s, v3.4s ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v1.2d, v0.d[1] -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v1.4s, v0.s[1] -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret %b = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index 975ba2687792f..7d6d424d64a94 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-8388608 +; CHECK-NEXT: mov w8, #2143289344 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s diff --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll index 11abf902eeb3a..e0e3149e35119 100644 --- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll +++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll @@ -93,8 +93,8 @@ define float @fadd_f32(<4 x float> %vec) { ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float 0.000000e+00, [[TMP0]] -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %vec) @@ -109,8 +109,8 @@ define float @fadd_f32_accum(float %accum, <4 x float> %vec) { ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float %accum, [[TMP0]] -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec) @@ -161,8 +161,8 @@ define float @fmul_f32(<4 x float> %vec) { ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float 1.000000e+00, [[TMP0]] -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %vec) @@ -177,8 +177,8 @@ define float @fmul_f32_accum(float %accum, <4 x float> %vec) { ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float %accum, [[TMP0]] -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec) @@ -277,40 +277,40 @@ entry: ret i64 %r } +; FIXME: Expand using maxnum intrinsic? + define double @fmax_f64(<2 x double> %vec) { ; CHECK-LABEL: @fmax_f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]] -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0 -; CHECK-NEXT: ret double [[TMP0]] +; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]]) +; CHECK-NEXT: ret double [[R]] ; entry: %r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec) ret double %r } +; FIXME: Expand using minnum intrinsic? + define double @fmin_f64(<2 x double> %vec) { ; CHECK-LABEL: @fmin_f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]] -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0 -; CHECK-NEXT: ret double [[TMP0]] +; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]]) +; CHECK-NEXT: ret double [[R]] ; entry: %r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec) ret double %r } +; FIXME: Why is this not expanded? + ; Test when the vector size is not power of two. define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-LABEL: @test_v3i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: %b = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a) -; CHECK-NEXT: ret i8 %b +; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]]) +; CHECK-NEXT: ret i8 [[B]] ; entry: %b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll index 6936b7ea3ad1f..a83fa6882cb90 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll @@ -2,30 +2,11 @@ ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; FIXME minnum nonan X, +Inf -> X ? define arm_aapcs_vfpcc float @fmin_v2f32(<2 x float> %x) { -; CHECK-FP-LABEL: fmin_v2f32: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vldr s4, .LCPI0_0 -; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 -; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI0_0: -; CHECK-FP-NEXT: .long 0x7f800000 @ float +Inf -; -; CHECK-NOFP-LABEL: fmin_v2f32: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vldr s4, .LCPI0_0 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI0_0: -; CHECK-NOFP-NEXT: .long 0x7f800000 @ float +Inf +; CHECK-LABEL: fmin_v2f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) ret float %z @@ -99,17 +80,8 @@ define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) { ; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI3_0 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI3_0: -; CHECK-NOFP-NEXT: .short 0x7c00 @ half +Inf entry: %z = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) ret half %z @@ -237,23 +209,11 @@ entry: ret double %z } -; FIXME should not be vminnm -; FIXME better reductions (no vmovs/vdups) define arm_aapcs_vfpcc float @fmin_v2f32_nofast(<2 x float> %x) { -; CHECK-FP-LABEL: fmin_v2f32_nofast: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fmin_v2f32_nofast: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s1, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: bx lr +; CHECK-LABEL: fmin_v2f32_nofast: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) ret float %z @@ -262,28 +222,16 @@ entry: define arm_aapcs_vfpcc float @fmin_v4f32_nofast(<4 x float> %x) { ; CHECK-FP-LABEL: fmin_v4f32_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v4f32_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s3, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f64 d2, d1 -; CHECK-NOFP-NEXT: vmov.f32 s5, s3 -; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f32 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s8 +; CHECK-NOFP-NEXT: vminnm.f32 s4, s0, s1 +; CHECK-NOFP-NEXT: vminnm.f32 s4, s4, s2 +; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s3 ; CHECK-NOFP-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x) @@ -294,38 +242,20 @@ define arm_aapcs_vfpcc float @fmin_v8f32_nofast(<8 x float> %x) { ; CHECK-FP-LABEL: fmin_v8f32_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v8f32_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s7, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s5, s1 -; CHECK-NOFP-NEXT: vselgt.f32 s8, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s6, s2 -; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f32 s12, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s8, s10 -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s12, s0 -; CHECK-NOFP-NEXT: vselgt.f32 s2, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s12 -; CHECK-NOFP-NEXT: vcmp.f32 s2, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s2 +; CHECK-NOFP-NEXT: vminnm.f32 s10, s0, s4 +; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5 +; CHECK-NOFP-NEXT: vminnm.f32 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f32 s10, s2, s6 +; CHECK-NOFP-NEXT: vminnm.f32 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f32 s0, s3, s7 +; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x) @@ -335,30 +265,20 @@ entry: define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) { ; CHECK-FP-LABEL: fmin_v4f16_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r0, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r0 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vmovx.f16 s4, s1 +; CHECK-FP-NEXT: vmovx.f16 s6, s0 +; CHECK-FP-NEXT: vminnm.f16 s4, s1, s4 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v4f16_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmov r0, s1 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 -; CHECK-NOFP-NEXT: vdup.32 q1, r0 -; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) @@ -368,47 +288,26 @@ entry: define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) { ; CHECK-FP-LABEL: fmin_v8f16_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r0, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r0 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v8f16_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s3 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s1 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmov.f64 d2, d1 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f32 s5, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s3, s1 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s2 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s3 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) @@ -419,73 +318,38 @@ define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) { ; CHECK-FP-LABEL: fmin_v16f16_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r0, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r0 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v16f16_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s7 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s3 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmovx.f16 s14, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s0, s4 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s1, s5 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s2, s6 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s14 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s14, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s7, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s5, s1 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s1, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s6, s2 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s12, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s3, s7 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) @@ -504,9 +368,7 @@ entry: define arm_aapcs_vfpcc double @fmin_v2f64_nofast(<2 x double> %x) { ; CHECK-LABEL: fmin_v2f64_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d1, d0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d1 +; CHECK-NEXT: vminnm.f64 d0, d0, d1 ; CHECK-NEXT: bx lr entry: %z = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x) @@ -516,15 +378,9 @@ entry: define arm_aapcs_vfpcc double @fmin_v4f64_nofast(<4 x double> %x) { ; CHECK-LABEL: fmin_v4f64_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d3, d1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f64 d2, d0 -; CHECK-NEXT: vselgt.f64 d4, d1, d3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d2 -; CHECK-NEXT: vcmp.f64 d4, d0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d4 +; CHECK-NEXT: vminnm.f64 d4, d1, d3 +; CHECK-NEXT: vminnm.f64 d0, d0, d2 +; CHECK-NEXT: vminnm.f64 d0, d0, d4 ; CHECK-NEXT: bx lr entry: %z = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x) @@ -532,30 +388,11 @@ entry: } define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fmin_v2f32_acc: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vldr s6, .LCPI18_0 -; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 -; CHECK-FP-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-FP-NEXT: vminnm.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI18_0: -; CHECK-FP-NEXT: .long 0x7f800000 @ float +Inf -; -; CHECK-NOFP-LABEL: fmin_v2f32_acc: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vldr s6, .LCPI18_0 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI18_0: -; CHECK-NOFP-NEXT: .long 0x7f800000 @ float +Inf +; CHECK-LABEL: fmin_v2f32_acc: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-NEXT: vminnm.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) %c = fcmp fast olt float %y, %z @@ -641,20 +478,11 @@ define arm_aapcs_vfpcc void @fmin_v4f16_acc(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI21_0 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] +; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s2, s0 ; CHECK-NOFP-NEXT: vstr.16 s0, [r0] ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI21_0: -; CHECK-NOFP-NEXT: .short 0x7c00 @ half +Inf entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) @@ -665,34 +493,14 @@ entry: } define arm_aapcs_vfpcc void @fmin_v2f16_acc(<2 x half> %x, half* %yy) { -; CHECK-FP-LABEL: fmin_v2f16_acc: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmovx.f16 s4, s0 -; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 -; CHECK-FP-NEXT: vldr.16 s2, [r0] -; CHECK-FP-NEXT: vminnm.f16 s0, s2, s0 -; CHECK-FP-NEXT: vstr.16 s0, [r0] -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fmin_v2f16_acc: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI22_0 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vminnm.f16 s0, s2, s0 -; CHECK-NOFP-NEXT: vstr.16 s0, [r0] -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI22_0: -; CHECK-NOFP-NEXT: .short 0x7c00 @ half +Inf +; CHECK-LABEL: fmin_v2f16_acc: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vminnm.f16 s0, s0, s4 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vminnm.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half> %x) @@ -854,25 +662,13 @@ entry: } define arm_aapcs_vfpcc float @fmin_v2f32_acc_nofast(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fmin_v2f32_acc_nofast: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q2, r0 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q2 -; CHECK-FP-NEXT: vcmp.f32 s0, s4 -; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fmin_v2f32_acc_nofast: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s1, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr +; CHECK-LABEL: fmin_v2f32_acc_nofast: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-NEXT: vcmp.f32 s0, s4 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x) %c = fcmp olt float %y, %z @@ -883,12 +679,9 @@ entry: define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) { ; CHECK-FP-LABEL: fmin_v4f32_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d4, d1 -; CHECK-FP-NEXT: vmov.f32 s9, s3 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q2 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q2, r0 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q2 +; CHECK-FP-NEXT: vminnm.f32 s6, s2, s3 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s6 ; CHECK-FP-NEXT: vcmp.f32 s0, s4 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0 @@ -896,17 +689,9 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) { ; ; CHECK-NOFP-LABEL: fmin_v4f32_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s3, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f64 d4, d1 -; CHECK-NOFP-NEXT: vmov.f32 s9, s3 -; CHECK-NOFP-NEXT: vcmp.f32 s8, s0 -; CHECK-NOFP-NEXT: vselgt.f32 s6, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s8 -; CHECK-NOFP-NEXT: vcmp.f32 s6, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s6 +; CHECK-NOFP-NEXT: vminnm.f32 s6, s0, s1 +; CHECK-NOFP-NEXT: vminnm.f32 s6, s6, s2 +; CHECK-NOFP-NEXT: vminnm.f32 s0, s6, s3 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 @@ -922,12 +707,9 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) { ; CHECK-FP-LABEL: fmin_v8f32_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 +; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-FP-NEXT: vcmp.f32 s0, s8 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0 @@ -935,27 +717,13 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) { ; ; CHECK-NOFP-LABEL: fmin_v8f32_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s7, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s5, s1 -; CHECK-NOFP-NEXT: vselgt.f32 s10, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s6, s2 -; CHECK-NOFP-NEXT: vselgt.f32 s12, s1, s5 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f32 s14, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s10, s12 -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s14, s0 -; CHECK-NOFP-NEXT: vselgt.f32 s2, s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s14 -; CHECK-NOFP-NEXT: vcmp.f32 s2, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s2 +; CHECK-NOFP-NEXT: vminnm.f32 s12, s0, s4 +; CHECK-NOFP-NEXT: vminnm.f32 s10, s1, s5 +; CHECK-NOFP-NEXT: vminnm.f32 s10, s12, s10 +; CHECK-NOFP-NEXT: vminnm.f32 s12, s2, s6 +; CHECK-NOFP-NEXT: vminnm.f32 s10, s10, s12 +; CHECK-NOFP-NEXT: vminnm.f32 s0, s3, s7 +; CHECK-NOFP-NEXT: vminnm.f32 s0, s10, s0 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s8 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0 @@ -970,35 +738,26 @@ entry: define arm_aapcs_vfpcc void @fmin_v4f16_acc_nofast(<4 x half> %x, half* %yy) { ; CHECK-FP-LABEL: fmin_v4f16_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r1, s1 -; CHECK-FP-NEXT: vdup.32 q1, r1 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r1, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r1 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vldr.16 s4, [r0] -; CHECK-FP-NEXT: vcmp.f16 s0, s4 +; CHECK-FP-NEXT: vmovx.f16 s4, s1 +; CHECK-FP-NEXT: vmovx.f16 s6, s0 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6 +; CHECK-FP-NEXT: vminnm.f16 s4, s1, s4 +; CHECK-FP-NEXT: vldr.16 s2, [r0] +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 +; CHECK-FP-NEXT: vcmp.f16 s0, s2 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0 +; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v4f16_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmov r1, s1 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 -; CHECK-NOFP-NEXT: vdup.32 q1, r1 -; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s2 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 @@ -1016,52 +775,32 @@ entry: define arm_aapcs_vfpcc void @fmin_v8f16_acc_nofast(<8 x half> %x, half* %yy) { ; CHECK-FP-LABEL: fmin_v8f16_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r1, s1 -; CHECK-FP-NEXT: vdup.32 q1, r1 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r1, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r1 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vldr.16 s4, [r0] -; CHECK-FP-NEXT: vcmp.f16 s0, s4 +; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vldr.16 s2, [r0] +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 +; CHECK-FP-NEXT: vcmp.f16 s0, s2 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0 +; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v8f16_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s3 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s1 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmov.f64 d2, d1 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f32 s5, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s3, s1 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vcmp.f16 s10, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s3 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s2 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 @@ -1080,78 +819,44 @@ define arm_aapcs_vfpcc void @fmin_v16f16_acc_nofast(<16 x half> %x, half* %yy) { ; CHECK-FP-LABEL: fmin_v16f16_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r1, s1 -; CHECK-FP-NEXT: vdup.32 q1, r1 -; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r1, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r1 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vldr.16 s4, [r0] -; CHECK-FP-NEXT: vcmp.f16 s0, s4 +; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vldr.16 s2, [r0] +; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 +; CHECK-FP-NEXT: vcmp.f16 s0, s2 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0 +; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmin_v16f16_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s7 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s3 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmovx.f16 s14, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s0, s4 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s1, s5 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s2, s6 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s14 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s14, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s7, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s5, s1 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s1, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s6, s2 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s12, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f16 s10, s3, s7 +; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vcmp.f16 s12, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s2 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 @@ -1183,9 +888,7 @@ entry: define arm_aapcs_vfpcc double @fmin_v2f64_acc_nofast(<2 x double> %x, double %y) { ; CHECK-LABEL: fmin_v2f64_acc_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d1, d0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d1 +; CHECK-NEXT: vminnm.f64 d0, d0, d1 ; CHECK-NEXT: vcmp.f64 d0, d2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f64 d0, d2, d0 @@ -1200,15 +903,9 @@ entry: define arm_aapcs_vfpcc double @fmin_v4f64_acc_nofast(<4 x double> %x, double %y) { ; CHECK-LABEL: fmin_v4f64_acc_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d3, d1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f64 d2, d0 -; CHECK-NEXT: vselgt.f64 d5, d1, d3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d2 -; CHECK-NEXT: vcmp.f64 d5, d0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d5 +; CHECK-NEXT: vminnm.f64 d5, d1, d3 +; CHECK-NEXT: vminnm.f64 d0, d0, d2 +; CHECK-NEXT: vminnm.f64 d0, d0, d5 ; CHECK-NEXT: vcmp.f64 d0, d4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f64 d0, d4, d0 @@ -1221,28 +918,10 @@ entry: } define arm_aapcs_vfpcc float @fmax_v2f32(<2 x float> %x) { -; CHECK-FP-LABEL: fmax_v2f32: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vldr s4, .LCPI37_0 -; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 -; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI37_0: -; CHECK-FP-NEXT: .long 0xff800000 @ float -Inf -; -; CHECK-NOFP-LABEL: fmax_v2f32: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vldr s4, .LCPI37_0 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI37_0: -; CHECK-NOFP-NEXT: .long 0xff800000 @ float -Inf +; CHECK-LABEL: fmax_v2f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) ret float %z @@ -1315,17 +994,8 @@ define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI40_0 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI40_0: -; CHECK-NOFP-NEXT: .short 0xfc00 @ half -Inf entry: %z = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) ret half %z @@ -1454,20 +1124,10 @@ entry: } define arm_aapcs_vfpcc float @fmax_v2f32_nofast(<2 x float> %x) { -; CHECK-FP-LABEL: fmax_v2f32_nofast: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fmax_v2f32_nofast: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s0, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: bx lr +; CHECK-LABEL: fmax_v2f32_nofast: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) ret float %z @@ -1476,28 +1136,16 @@ entry: define arm_aapcs_vfpcc float @fmax_v4f32_nofast(<4 x float> %x) { ; CHECK-FP-LABEL: fmax_v4f32_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v4f32_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f64 d2, d1 -; CHECK-NOFP-NEXT: vmov.f32 s5, s3 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s8 +; CHECK-NOFP-NEXT: vmaxnm.f32 s4, s0, s1 +; CHECK-NOFP-NEXT: vmaxnm.f32 s4, s4, s2 +; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s3 ; CHECK-NOFP-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x) @@ -1508,38 +1156,20 @@ define arm_aapcs_vfpcc float @fmax_v8f32_nofast(<8 x float> %x) { ; CHECK-FP-LABEL: fmax_v8f32_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v8f32_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s1, s5 -; CHECK-NOFP-NEXT: vselgt.f32 s8, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s2, s6 -; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f32 s12, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s10, s8 -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s0, s12 -; CHECK-NOFP-NEXT: vselgt.f32 s2, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s12 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s2 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s2 +; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s0, s4 +; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s1, s5 +; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s2, s6 +; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s3, s7 +; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x) @@ -1549,30 +1179,20 @@ entry: define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) { ; CHECK-FP-LABEL: fmax_v4f16_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r0, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vmovx.f16 s4, s1 +; CHECK-FP-NEXT: vmovx.f16 s6, s0 +; CHECK-FP-NEXT: vmaxnm.f16 s4, s1, s4 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v4f16_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmov r0, s1 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 -; CHECK-NOFP-NEXT: vdup.32 q1, r0 -; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) @@ -1582,47 +1202,26 @@ entry: define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) { ; CHECK-FP-LABEL: fmax_v8f16_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r0, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r0 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v8f16_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s3 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s1 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmov.f64 d2, d1 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f32 s5, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s1, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s2 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s3 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) @@ -1633,73 +1232,38 @@ define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) { ; CHECK-FP-LABEL: fmax_v16f16_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r0, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r0 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v16f16_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s7 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s3 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmovx.f16 s14, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s0, s4 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s1, s5 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s2, s6 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s14, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s14, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s3, s7 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s1, s5 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s1, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s2, s6 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f16 s12, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s3, s7 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: bx lr entry: %z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) @@ -1718,9 +1282,7 @@ entry: define arm_aapcs_vfpcc double @fmax_v2f64_nofast(<2 x double> %x) { ; CHECK-LABEL: fmax_v2f64_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d0, d1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d1 +; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 ; CHECK-NEXT: bx lr entry: %z = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x) @@ -1730,15 +1292,9 @@ entry: define arm_aapcs_vfpcc double @fmax_v4f64_nofast(<4 x double> %x) { ; CHECK-LABEL: fmax_v4f64_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d1, d3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f64 d0, d2 -; CHECK-NEXT: vselgt.f64 d4, d1, d3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d2 -; CHECK-NEXT: vcmp.f64 d0, d4 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d4 +; CHECK-NEXT: vmaxnm.f64 d4, d1, d3 +; CHECK-NEXT: vmaxnm.f64 d0, d0, d2 +; CHECK-NEXT: vmaxnm.f64 d0, d0, d4 ; CHECK-NEXT: bx lr entry: %z = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x) @@ -1746,30 +1302,11 @@ entry: } define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fmax_v2f32_acc: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vldr s6, .LCPI55_0 -; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 -; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s6 -; CHECK-FP-NEXT: vmaxnm.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI55_0: -; CHECK-FP-NEXT: .long 0xff800000 @ float -Inf -; -; CHECK-NOFP-LABEL: fmax_v2f32_acc: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vldr s6, .LCPI55_0 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s6 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s6 -; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI55_0: -; CHECK-NOFP-NEXT: .long 0xff800000 @ float -Inf +; CHECK-LABEL: fmax_v2f32_acc: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-NEXT: vmaxnm.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) %c = fcmp fast ogt float %y, %z @@ -1837,34 +1374,14 @@ entry: } define arm_aapcs_vfpcc void @fmax_v2f16_acc(<2 x half> %x, half* %yy) { -; CHECK-FP-LABEL: fmax_v2f16_acc: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmovx.f16 s4, s0 -; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 -; CHECK-FP-NEXT: vldr.16 s2, [r0] -; CHECK-FP-NEXT: vmaxnm.f16 s0, s2, s0 -; CHECK-FP-NEXT: vstr.16 s0, [r0] -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fmax_v2f16_acc: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI58_0 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s2, s0 -; CHECK-NOFP-NEXT: vstr.16 s0, [r0] -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI58_0: -; CHECK-NOFP-NEXT: .short 0xfc00 @ half -Inf +; CHECK-LABEL: fmax_v2f16_acc: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s4 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vmaxnm.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half> %x) @@ -1893,20 +1410,11 @@ define arm_aapcs_vfpcc void @fmax_v4f16_acc(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI59_0 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s2, s0 ; CHECK-NOFP-NEXT: vstr.16 s0, [r0] ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI59_0: -; CHECK-NOFP-NEXT: .short 0xfc00 @ half -Inf entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) @@ -2068,25 +1576,13 @@ entry: } define arm_aapcs_vfpcc float @fmax_v2f32_acc_nofast(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fmax_v2f32_acc_nofast: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q2, r0 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q2 -; CHECK-FP-NEXT: vcmp.f32 s4, s0 -; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fmax_v2f32_acc_nofast: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s0, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr +; CHECK-LABEL: fmax_v2f32_acc_nofast: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-NEXT: vcmp.f32 s4, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x) %c = fcmp ogt float %y, %z @@ -2097,12 +1593,9 @@ entry: define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) { ; CHECK-FP-LABEL: fmax_v4f32_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d4, d1 -; CHECK-FP-NEXT: vmov.f32 s9, s3 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q2 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q2, r0 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q2 +; CHECK-FP-NEXT: vmaxnm.f32 s6, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s6 ; CHECK-FP-NEXT: vcmp.f32 s4, s0 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0 @@ -2110,17 +1603,9 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) { ; ; CHECK-NOFP-LABEL: fmax_v4f32_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f64 d4, d1 -; CHECK-NOFP-NEXT: vmov.f32 s9, s3 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s8 -; CHECK-NOFP-NEXT: vselgt.f32 s6, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s8 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s6 +; CHECK-NOFP-NEXT: vmaxnm.f32 s6, s0, s1 +; CHECK-NOFP-NEXT: vmaxnm.f32 s6, s6, s2 +; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s6, s3 ; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 @@ -2136,12 +1621,9 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) { ; CHECK-FP-LABEL: fmax_v8f32_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-FP-NEXT: vmov r0, s1 -; CHECK-FP-NEXT: vdup.32 q1, r0 -; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 ; CHECK-FP-NEXT: vcmp.f32 s8, s0 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0 @@ -2149,27 +1631,13 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) { ; ; CHECK-NOFP-LABEL: fmax_v8f32_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vcmp.f32 s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s1, s5 -; CHECK-NOFP-NEXT: vselgt.f32 s10, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s2, s6 -; CHECK-NOFP-NEXT: vselgt.f32 s12, s1, s5 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f32 s14, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s12, s10 -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f32 s0, s14 -; CHECK-NOFP-NEXT: vselgt.f32 s2, s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s14 -; CHECK-NOFP-NEXT: vcmp.f32 s0, s2 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s2 +; CHECK-NOFP-NEXT: vmaxnm.f32 s12, s0, s4 +; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s1, s5 +; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s12, s10 +; CHECK-NOFP-NEXT: vmaxnm.f32 s12, s2, s6 +; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s10, s12 +; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s3, s7 +; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s10, s0 ; CHECK-NOFP-NEXT: vcmp.f32 s8, s0 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0 @@ -2184,35 +1652,26 @@ entry: define arm_aapcs_vfpcc void @fmax_v4f16_acc_nofast(<4 x half> %x, half* %yy) { ; CHECK-FP-LABEL: fmax_v4f16_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov r1, s1 -; CHECK-FP-NEXT: vdup.32 q1, r1 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r1, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r1 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vldr.16 s4, [r0] -; CHECK-FP-NEXT: vcmp.f16 s4, s0 +; CHECK-FP-NEXT: vmovx.f16 s4, s1 +; CHECK-FP-NEXT: vmovx.f16 s6, s0 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6 +; CHECK-FP-NEXT: vmaxnm.f16 s4, s1, s4 +; CHECK-FP-NEXT: vldr.16 s2, [r0] +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 +; CHECK-FP-NEXT: vcmp.f16 s2, s0 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0 +; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v4f16_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmov r1, s1 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 -; CHECK-NOFP-NEXT: vdup.32 q1, r1 -; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s0 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 @@ -2230,52 +1689,32 @@ entry: define arm_aapcs_vfpcc void @fmax_v8f16_acc_nofast(<8 x half> %x, half* %yy) { ; CHECK-FP-LABEL: fmax_v8f16_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r1, s1 -; CHECK-FP-NEXT: vdup.32 q1, r1 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r1, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r1 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vldr.16 s4, [r0] -; CHECK-FP-NEXT: vcmp.f16 s4, s0 +; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vldr.16 s2, [r0] +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 +; CHECK-FP-NEXT: vcmp.f16 s2, s0 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0 +; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v8f16_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s3 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s1 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmov.f64 d2, d1 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s0 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmov.f32 s5, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s1, s3 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s3 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vcmp.f16 s0, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 +; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s3 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s0 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 @@ -2294,78 +1733,44 @@ define arm_aapcs_vfpcc void @fmax_v16f16_acc_nofast(<16 x half> %x, half* %yy) { ; CHECK-FP-LABEL: fmax_v16f16_acc_nofast: ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.f64 d2, d1 -; CHECK-FP-NEXT: vmov.f32 s5, s3 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov r1, s1 -; CHECK-FP-NEXT: vdup.32 q1, r1 -; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vmov.u16 r1, q0[1] -; CHECK-FP-NEXT: vdup.16 q1, r1 +; CHECK-FP-NEXT: vrev32.16 q1, q0 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-FP-NEXT: vldr.16 s4, [r0] -; CHECK-FP-NEXT: vcmp.f16 s4, s0 +; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 +; CHECK-FP-NEXT: vldr.16 s2, [r0] +; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 +; CHECK-FP-NEXT: vcmp.f16 s2, s0 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0 +; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: bx lr ; ; CHECK-NOFP-LABEL: fmax_v16f16_acc_nofast: ; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s8, s7 -; CHECK-NOFP-NEXT: vmovx.f16 s10, s3 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vmovx.f16 s14, s0 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 +; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s0, s4 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s1, s5 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 +; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s2, s6 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmovx.f16 s12, s4 -; CHECK-NOFP-NEXT: vcmp.f16 s14, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s14, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s3, s7 -; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s1, s5 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s12, s1, s5 -; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s2, s6 -; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 -; CHECK-NOFP-NEXT: vselgt.f16 s12, s2, s6 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 +; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s3, s7 +; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vcmp.f16 s0, s12 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s12 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s10 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s10 -; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 -; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 +; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s0 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 @@ -2397,9 +1802,7 @@ entry: define arm_aapcs_vfpcc double @fmax_v2f64_acc_nofast(<2 x double> %x, double %y) { ; CHECK-LABEL: fmax_v2f64_acc_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d0, d1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d1 +; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 ; CHECK-NEXT: vcmp.f64 d2, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f64 d0, d2, d0 @@ -2414,15 +1817,9 @@ entry: define arm_aapcs_vfpcc double @fmax_v4f64_acc_nofast(<4 x double> %x, double %y) { ; CHECK-LABEL: fmax_v4f64_acc_nofast: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.f64 d1, d3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f64 d0, d2 -; CHECK-NEXT: vselgt.f64 d5, d1, d3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d2 -; CHECK-NEXT: vcmp.f64 d0, d5 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f64 d0, d0, d5 +; CHECK-NEXT: vmaxnm.f64 d5, d1, d3 +; CHECK-NEXT: vmaxnm.f64 d0, d0, d2 +; CHECK-NEXT: vmaxnm.f64 d0, d0, d5 ; CHECK-NEXT: vcmp.f64 d4, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f64 d0, d4, d0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 64a76f38920a7..382c32dbe2bf5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -1512,13 +1512,10 @@ define float @fmin_f32(float* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: le lr, .LBB15_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block -; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vminnm.f32 s4, s2, s3 +; CHECK-NEXT: vminnm.f32 s0, s0, s1 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: vdup.32 q1, r3 -; CHECK-NEXT: vminnm.f32 q0, q0, q1 ; CHECK-NEXT: beq .LBB15_9 ; CHECK-NEXT: .LBB15_7: @ %for.body.preheader1 ; CHECK-NEXT: sub.w lr, r1, r2 @@ -1526,10 +1523,10 @@ define float @fmin_f32(float* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB15_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldmia r0!, {s4} -; CHECK-NEXT: vcmp.f32 s0, s4 +; CHECK-NEXT: vldmia r0!, {s2} +; CHECK-NEXT: vcmp.f32 s0, s2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselge.f32 s0, s4, s0 +; CHECK-NEXT: vselge.f32 s0, s2, s0 ; CHECK-NEXT: le lr, .LBB15_8 ; CHECK-NEXT: .LBB15_9: @ %for.cond.cleanup ; CHECK-NEXT: vmov r0, s0 @@ -1620,13 +1617,10 @@ define float @fmax_f32(float* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: le lr, .LBB16_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block -; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmaxnm.f32 s4, s2, s3 +; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 +; CHECK-NEXT: vmaxnm.f32 s0, s0, s4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: vdup.32 q1, r3 -; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 ; CHECK-NEXT: beq .LBB16_9 ; CHECK-NEXT: .LBB16_7: @ %for.body.preheader1 ; CHECK-NEXT: sub.w lr, r1, r2 @@ -1634,10 +1628,10 @@ define float @fmax_f32(float* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldmia r0!, {s4} -; CHECK-NEXT: vcmp.f32 s4, s0 +; CHECK-NEXT: vldmia r0!, {s2} +; CHECK-NEXT: vcmp.f32 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselge.f32 s0, s4, s0 +; CHECK-NEXT: vselge.f32 s0, s2, s0 ; CHECK-NEXT: le lr, .LBB16_8 ; CHECK-NEXT: .LBB16_9: @ %for.cond.cleanup ; CHECK-NEXT: vmov r0, s0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index e2025be011343..d304a925d24a0 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -13,27 +13,46 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: maxss %xmm0, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0) ret float %1 @@ -43,35 +62,45 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: maxps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] +; SSE2-NEXT: maxss %xmm3, %xmm0 +; SSE2-NEXT: maxss %xmm2, %xmm0 ; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; SSE41-NEXT: maxss %xmm3, %xmm0 +; SSE41-NEXT: maxss %xmm2, %xmm0 ; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0) @@ -82,43 +111,67 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2-LABEL: test_v8f32: ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: maxps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE2-NEXT: maxss %xmm2, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: maxss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm2, %xmm1 +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE41-NEXT: maxss %xmm2, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE41-NEXT: maxss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm7, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm6, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm5, %xmm0, %xmm0 ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm4, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm7, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm6, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm5, %xmm0, %xmm0 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm4, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0) @@ -131,12 +184,16 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: maxps %xmm3, %xmm1 ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: maxps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE2-NEXT: maxss %xmm2, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: maxss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: @@ -144,35 +201,69 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: maxps %xmm3, %xmm1 ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm2, %xmm1 +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE41-NEXT: maxss %xmm2, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE41-NEXT: maxss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: ; AVX: # %bb.0: ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmaxss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; AVX-NEXT: vmaxss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmaxss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm1[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm10 = xmm1[1,1,3,3] +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm5 +; AVX512-NEXT: vpermilps {{.*#+}} xmm11 = xmm5[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm12 = xmm5[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm13 = xmm5[1,1,3,3] +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX512-NEXT: vpermilps {{.*#+}} xmm14 = xmm3[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm15 = xmm3[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm6, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm4, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm7, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm15, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm14, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm5, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm13, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm12, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm11, %xmm0, %xmm0 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm10, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm9, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm8, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0) @@ -206,6 +297,76 @@ define double @test_v2f64(<2 x double> %a0) { ret double %1 } +define double @test_v3f64(<3 x double> %a0) { +; SSE2-LABEL: test_v3f64: +; SSE2: # %bb.0: +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],mem[1] +; SSE2-NEXT: movapd %xmm2, %xmm1 +; SSE2-NEXT: maxpd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm1, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: maxsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v3f64: +; SSE41: # %bb.0: +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],mem[1] +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: maxpd %xmm0, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm2, %xmm3 +; SSE41-NEXT: maxsd %xmm1, %xmm2 +; SSE41-NEXT: andnpd %xmm2, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v3f64: +; AVX: # %bb.0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v3f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double> %a0) + ret double %1 +} + define double @test_v4f64(<4 x double> %a0) { ; SSE-LABEL: test_v4f64: ; SSE: # %bb.0: @@ -218,18 +379,22 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm3, %xmm0, %xmm0 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0) @@ -250,21 +415,31 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX-LABEL: test_v8f64: ; AVX: # %bb.0: ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm7, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm5, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm6, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm4, %xmm0, %xmm0 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0) @@ -274,12 +449,12 @@ define double @test_v8f64(<8 x double> %a0) { define double @test_v16f64(<16 x double> %a0) { ; SSE-LABEL: test_v16f64: ; SSE: # %bb.0: -; SSE-NEXT: maxpd %xmm6, %xmm2 -; SSE-NEXT: maxpd %xmm4, %xmm0 -; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm7, %xmm3 ; SSE-NEXT: maxpd %xmm5, %xmm1 ; SSE-NEXT: maxpd %xmm3, %xmm1 +; SSE-NEXT: maxpd %xmm6, %xmm2 +; SSE-NEXT: maxpd %xmm4, %xmm0 +; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] @@ -291,22 +466,32 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vmaxpd %ymm3, %ymm1, %ymm1 ; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0) @@ -319,6 +504,7 @@ declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double>) declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll index d3b17d25ef096..c5e025be5423a 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll @@ -10,69 +10,225 @@ ; vXf32 ; +define float @test_v1f32(<1 x float> %a0) { +; ALL-LABEL: test_v1f32: +; ALL: # %bb.0: +; ALL-NEXT: retq + %1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a0) + ret float %1 +} + define float @test_v2f32(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: maxss %xmm0, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0) ret float %1 } +define float @test_v3f32(<3 x float> %a0) { +; SSE2-LABEL: test_v3f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v3f32: +; SSE41: # %bb.0: +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: maxss %xmm0, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: maxss %xmm1, %xmm2 +; SSE41-NEXT: cmpunordss %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andnps %xmm2, %xmm3 +; SSE41-NEXT: andps %xmm0, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v3f32: +; AVX: # %bb.0: +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v3f32: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: retq + %1 = call float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a0) + ret float %1 +} + define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: maxps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andps %xmm3, %xmm4 +; SSE2-NEXT: maxss %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE2-NEXT: andnps %xmm3, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm1, %xmm3 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm4 +; SSE41-NEXT: andps %xmm3, %xmm4 +; SSE41-NEXT: maxss %xmm0, %xmm3 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE41-NEXT: andnps %xmm3, %xmm1 +; SSE41-NEXT: orps %xmm4, %xmm1 +; SSE41-NEXT: movaps %xmm2, %xmm3 +; SSE41-NEXT: maxss %xmm1, %xmm3 +; SSE41-NEXT: cmpunordss %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm4 +; SSE41-NEXT: andnps %xmm3, %xmm4 +; SSE41-NEXT: andps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm4, %xmm1 +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: maxss %xmm1, %xmm2 +; SSE41-NEXT: cmpunordss %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andnps %xmm2, %xmm3 +; SSE41-NEXT: andps %xmm0, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm0, %xmm3, %xmm4 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm3, %xmm4, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vmaxss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vblendvps %xmm3, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm0, %xmm3, %xmm4 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1} +; AVX512-NEXT: vcmpunordss %xmm4, %xmm4, %k1 +; AVX512-NEXT: vmaxss %xmm4, %xmm2, %xmm0 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0) ret float %1 @@ -81,46 +237,170 @@ define float @test_v4f32(<4 x float> %a0) { define float @test_v8f32(<8 x float> %a0) { ; SSE2-LABEL: test_v8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: maxps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: maxps %xmm0, %xmm2 +; SSE2-NEXT: cmpunordps %xmm0, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm1, %xmm3 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: ; SSE41: # %bb.0: -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: maxps %xmm0, %xmm2 +; SSE41-NEXT: cmpunordps %xmm0, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: cmpunordss %xmm2, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andps %xmm1, %xmm3 +; SSE41-NEXT: maxss %xmm2, %xmm1 +; SSE41-NEXT: andnps %xmm1, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: maxss %xmm0, %xmm3 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm4 +; SSE41-NEXT: andnps %xmm3, %xmm4 +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: orps %xmm4, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3] +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: maxss %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andnps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm0, %xmm7, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm7, %xmm2, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vmaxss %xmm0, %xmm6, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm6, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vmaxss %xmm0, %xmm5, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm5, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vmaxss %xmm0, %xmm4, %xmm0 +; AVX-NEXT: vblendvps %xmm1, %xmm4, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vmaxss %xmm0, %xmm3, %xmm0 +; AVX-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vmaxss %xmm0, %xmm8, %xmm0 +; AVX-NEXT: vblendvps %xmm1, %xmm8, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v8f32: -; AVX512: # %bb.0: -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v8f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm8 = xmm3[3,3,3,3] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm3[1,0] +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vmaxss %xmm0, %xmm7, %xmm1 +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovss %xmm7, %xmm1, %xmm1 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; AVX512BW-NEXT: vmaxss %xmm1, %xmm6, %xmm0 +; AVX512BW-NEXT: vmovss %xmm6, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxss %xmm0, %xmm5, %xmm0 +; AVX512BW-NEXT: vmovss %xmm5, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxss %xmm0, %xmm3, %xmm0 +; AVX512BW-NEXT: vmovss %xmm3, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxss %xmm0, %xmm4, %xmm0 +; AVX512BW-NEXT: vmovss %xmm4, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxss %xmm0, %xmm8, %xmm0 +; AVX512BW-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v8f32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vmaxss %xmm0, %xmm7, %xmm2 +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovss %xmm7, %xmm2, %xmm2 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512VL-NEXT: vmaxss %xmm2, %xmm6, %xmm0 +; AVX512VL-NEXT: vmovss %xmm6, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm5, %xmm0 +; AVX512VL-NEXT: vmovss %xmm5, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm4, %xmm0 +; AVX512VL-NEXT: vmovss %xmm4, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vmovss %xmm3, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm8, %xmm0 +; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0) ret float %1 } @@ -128,53 +408,259 @@ define float @test_v8f32(<8 x float> %a0) { define float @test_v16f32(<16 x float> %a0) { ; SSE2-LABEL: test_v16f32: ; SSE2: # %bb.0: -; SSE2-NEXT: maxps %xmm3, %xmm1 -; SSE2-NEXT: maxps %xmm2, %xmm0 -; SSE2-NEXT: maxps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm2, %xmm4 +; SSE2-NEXT: maxps %xmm0, %xmm4 +; SSE2-NEXT: cmpunordps %xmm0, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm4, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm3, %xmm2 +; SSE2-NEXT: maxps %xmm1, %xmm2 +; SSE2-NEXT: cmpunordps %xmm1, %xmm1 +; SSE2-NEXT: andps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: maxps %xmm0, %xmm2 +; SSE2-NEXT: cmpunordps %xmm0, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: maxss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: maxss %xmm1, %xmm3 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: ; SSE41: # %bb.0: -; SSE41-NEXT: maxps %xmm3, %xmm1 -; SSE41-NEXT: maxps %xmm2, %xmm0 -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: maxps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxss %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm2, %xmm4 +; SSE41-NEXT: maxps %xmm0, %xmm4 +; SSE41-NEXT: cmpunordps %xmm0, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movaps %xmm3, %xmm2 +; SSE41-NEXT: maxps %xmm1, %xmm2 +; SSE41-NEXT: cmpunordps %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: maxps %xmm4, %xmm1 +; SSE41-NEXT: cmpunordps %xmm4, %xmm4 +; SSE41-NEXT: movaps %xmm4, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: cmpunordss %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: maxss %xmm1, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movaps %xmm2, %xmm3 +; SSE41-NEXT: maxss %xmm0, %xmm3 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm4 +; SSE41-NEXT: andnps %xmm3, %xmm4 +; SSE41-NEXT: andps %xmm2, %xmm0 +; SSE41-NEXT: orps %xmm4, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: maxss %xmm0, %xmm2 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andnps %xmm2, %xmm3 +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: ; AVX: # %bb.0: -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxps %ymm0, %ymm1, %ymm2 +; AVX-NEXT: vcmpunordps %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvps %ymm0, %ymm1, %ymm2, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3] +; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v16f32: -; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v16f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vmaxss %xmm0, %xmm2, %xmm3 +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm2 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vmaxss %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vmaxss %xmm2, %xmm1, %xmm2 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vmaxss %xmm2, %xmm1, %xmm2 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vmaxss %xmm2, %xmm1, %xmm0 +; AVX512BW-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v16f32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm3 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm8 = xmm3[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm9 = xmm3[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm10 = xmm3[1,1,3,3] +; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm6 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm11 = xmm6[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm12 = xmm6[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm13 = xmm6[1,1,3,3] +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm14 = xmm2[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm15 = xmm2[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm16 = xmm2[1,1,3,3] +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vmaxss %xmm0, %xmm5, %xmm7 +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovss %xmm5, %xmm7, %xmm7 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm7, %xmm7, %k1 +; AVX512VL-NEXT: vmaxss %xmm7, %xmm4, %xmm0 +; AVX512VL-NEXT: vmovss %xmm4, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm16, %xmm0 +; AVX512VL-NEXT: vmovss %xmm16, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm15, %xmm0 +; AVX512VL-NEXT: vmovss %xmm15, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm14, %xmm0 +; AVX512VL-NEXT: vmovss %xmm14, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm6, %xmm0 +; AVX512VL-NEXT: vmovss %xmm6, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm13, %xmm0 +; AVX512VL-NEXT: vmovss %xmm13, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm12, %xmm0 +; AVX512VL-NEXT: vmovss %xmm12, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm11, %xmm0 +; AVX512VL-NEXT: vmovss %xmm11, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vmovss %xmm3, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm10, %xmm0 +; AVX512VL-NEXT: vmovss %xmm10, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm9, %xmm0 +; AVX512VL-NEXT: vmovss %xmm9, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxss %xmm0, %xmm8, %xmm0 +; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0) ret float %1 } @@ -186,50 +672,106 @@ define float @test_v16f32(<16 x float> %a0) { define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxsd %xmm1, %xmm0 +; SSE-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm3 +; SSE-NEXT: andpd %xmm2, %xmm3 +; SSE-NEXT: maxsd %xmm0, %xmm2 +; SSE-NEXT: andnpd %xmm2, %xmm1 +; SSE-NEXT: orpd %xmm3, %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovapd %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0) ret double %1 } define double @test_v4f64(<4 x double> %a0) { -; SSE-LABEL: test_v4f64: -; SSE: # %bb.0: -; SSE-NEXT: maxpd %xmm1, %xmm0 -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_v4f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: maxpd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: maxsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v4f64: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: maxpd %xmm0, %xmm2 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm2, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm1, %xmm3 +; SSE41-NEXT: maxsd %xmm2, %xmm1 +; SSE41-NEXT: andnpd %xmm1, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm0, %xmm3, %xmm4 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vmaxsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm0, %xmm3, %xmm4 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm4, %xmm4, %k1 +; AVX512-NEXT: vmaxsd %xmm4, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmaxsd %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0) @@ -237,83 +779,325 @@ define double @test_v4f64(<4 x double> %a0) { } define double @test_v8f64(<8 x double> %a0) { -; SSE-LABEL: test_v8f64: -; SSE: # %bb.0: -; SSE-NEXT: maxpd %xmm3, %xmm1 -; SSE-NEXT: maxpd %xmm2, %xmm0 -; SSE-NEXT: maxpd %xmm1, %xmm0 -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_v8f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm2, %xmm4 +; SSE2-NEXT: maxpd %xmm0, %xmm4 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm4, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm3, %xmm2 +; SSE2-NEXT: maxpd %xmm1, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE2-NEXT: andpd %xmm1, %xmm3 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: maxpd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: maxsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v8f64: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm2, %xmm4 +; SSE41-NEXT: maxpd %xmm0, %xmm4 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd %xmm3, %xmm2 +; SSE41-NEXT: maxpd %xmm1, %xmm2 +; SSE41-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: maxpd %xmm4, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm4, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm2, %xmm3 +; SSE41-NEXT: maxsd %xmm1, %xmm2 +; SSE41-NEXT: andnpd %xmm2, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f64: ; AVX: # %bb.0: -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxpd %ymm0, %ymm1, %ymm2 +; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v8f64: -; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v8f64: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm2 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm8 = xmm2[1,0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] +; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm7, %xmm1 +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovsd %xmm7, %xmm1, %xmm1 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512BW-NEXT: vmaxsd %xmm1, %xmm5, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm5, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm6, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm6, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm3, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm3, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm4, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm4, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm8, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm8, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v8f64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm8 = xmm1[1,0] +; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512VL-NEXT: vmaxsd %xmm0, %xmm7, %xmm2 +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovsd %xmm7, %xmm2, %xmm2 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; AVX512VL-NEXT: vmaxsd %xmm2, %xmm5, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm5, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxsd %xmm0, %xmm6, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm6, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxsd %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm3, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxsd %xmm0, %xmm4, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm4, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmaxsd %xmm0, %xmm8, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm8, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0) ret double %1 } define double @test_v16f64(<16 x double> %a0) { -; SSE-LABEL: test_v16f64: -; SSE: # %bb.0: -; SSE-NEXT: maxpd %xmm6, %xmm2 -; SSE-NEXT: maxpd %xmm4, %xmm0 -; SSE-NEXT: maxpd %xmm2, %xmm0 -; SSE-NEXT: maxpd %xmm7, %xmm3 -; SSE-NEXT: maxpd %xmm5, %xmm1 -; SSE-NEXT: maxpd %xmm3, %xmm1 -; SSE-NEXT: maxpd %xmm1, %xmm0 -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_v16f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm4, %xmm8 +; SSE2-NEXT: maxpd %xmm0, %xmm8 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm4 +; SSE2-NEXT: andnpd %xmm8, %xmm0 +; SSE2-NEXT: orpd %xmm4, %xmm0 +; SSE2-NEXT: movapd %xmm6, %xmm4 +; SSE2-NEXT: maxpd %xmm2, %xmm4 +; SSE2-NEXT: cmpunordpd %xmm2, %xmm2 +; SSE2-NEXT: andpd %xmm2, %xmm6 +; SSE2-NEXT: andnpd %xmm4, %xmm2 +; SSE2-NEXT: orpd %xmm6, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm4 +; SSE2-NEXT: maxpd %xmm0, %xmm4 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm4, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm5, %xmm2 +; SSE2-NEXT: maxpd %xmm1, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE2-NEXT: andpd %xmm1, %xmm5 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm5, %xmm1 +; SSE2-NEXT: movapd %xmm7, %xmm2 +; SSE2-NEXT: maxpd %xmm3, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm3, %xmm3 +; SSE2-NEXT: andpd %xmm3, %xmm7 +; SSE2-NEXT: andnpd %xmm2, %xmm3 +; SSE2-NEXT: orpd %xmm7, %xmm3 +; SSE2-NEXT: movapd %xmm3, %xmm2 +; SSE2-NEXT: maxpd %xmm1, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE2-NEXT: andpd %xmm1, %xmm3 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: maxpd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: maxsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v16f64: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm3, %xmm8 +; SSE41-NEXT: movapd %xmm4, %xmm3 +; SSE41-NEXT: maxpd %xmm0, %xmm3 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 +; SSE41-NEXT: movapd %xmm6, %xmm4 +; SSE41-NEXT: maxpd %xmm2, %xmm4 +; SSE41-NEXT: cmpunordpd %xmm2, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: maxpd %xmm3, %xmm2 +; SSE41-NEXT: cmpunordpd %xmm3, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: movapd %xmm5, %xmm3 +; SSE41-NEXT: maxpd %xmm1, %xmm3 +; SSE41-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm3 +; SSE41-NEXT: movapd %xmm7, %xmm1 +; SSE41-NEXT: maxpd %xmm8, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm8, %xmm8 +; SSE41-NEXT: movapd %xmm8, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm4 +; SSE41-NEXT: maxpd %xmm3, %xmm4 +; SSE41-NEXT: cmpunordpd %xmm3, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm1 +; SSE41-NEXT: maxpd %xmm2, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm2, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm2, %xmm3 +; SSE41-NEXT: maxsd %xmm1, %xmm2 +; SSE41-NEXT: andnpd %xmm2, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f64: ; AVX: # %bb.0: -; AVX-NEXT: vmaxpd %ymm3, %ymm1, %ymm1 -; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxpd %ymm0, %ymm2, %ymm4 +; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0 +; AVX-NEXT: vmaxpd %ymm1, %ymm3, %ymm2 +; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1 +; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1 +; AVX-NEXT: vmaxpd %ymm0, %ymm1, %ymm2 +; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxpd %zmm0, %zmm1, %zmm2 +; AVX512-NEXT: vcmpunordpd %zmm0, %zmm0, %k1 +; AVX512-NEXT: vmovapd %zmm1, %zmm2 {%k1} +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm2[1,0] +; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm1 +; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vextractf128 $1, %ymm2, %xmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmaxsd %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0) ret double %1 } +declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float>) declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float>) declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index f25852f0c6a85..28e812748abaa 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -10,68 +10,176 @@ ; vXf32 ; +define float @test_v1f32(<1 x float> %a0) { +; ALL-LABEL: test_v1f32: +; ALL: # %bb.0: +; ALL-NEXT: retq + %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a0) + ret float %1 +} + define float @test_v2f32(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: minss %xmm0, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) ret float %1 } +define float @test_v3f32(<3 x float> %a0) { +; SSE2-LABEL: test_v3f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: minss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v3f32: +; SSE41: # %bb.0: +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: minss %xmm0, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: minss %xmm1, %xmm2 +; SSE41-NEXT: cmpunordss %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andnps %xmm2, %xmm3 +; SSE41-NEXT: andps %xmm0, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v3f32: +; AVX: # %bb.0: +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v3f32: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vminss %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: retq + %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a0) + ret float %1 +} + define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: minps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] +; SSE2-NEXT: minss %xmm3, %xmm0 +; SSE2-NEXT: minss %xmm2, %xmm0 ; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; SSE41-NEXT: minss %xmm3, %xmm0 +; SSE41-NEXT: minss %xmm2, %xmm0 ; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0) @@ -82,43 +190,67 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2-LABEL: test_v8f32: ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: minps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE2-NEXT: minss %xmm2, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: minss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm2, %xmm1 +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE41-NEXT: minss %xmm2, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE41-NEXT: minss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm7, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm6, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm5, %xmm0, %xmm0 ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm4, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm7, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm6, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm5, %xmm0, %xmm0 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm4, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0) @@ -131,12 +263,16 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: minps %xmm3, %xmm1 ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: minps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE2-NEXT: minss %xmm2, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: minss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: @@ -144,35 +280,69 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: minps %xmm3, %xmm1 ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm2, %xmm1 +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE41-NEXT: minss %xmm2, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE41-NEXT: minss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: ; AVX: # %bb.0: ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm1[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm10 = xmm1[1,1,3,3] +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm5 +; AVX512-NEXT: vpermilps {{.*#+}} xmm11 = xmm5[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm12 = xmm5[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm13 = xmm5[1,1,3,3] +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX512-NEXT: vpermilps {{.*#+}} xmm14 = xmm3[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm15 = xmm3[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm6, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm4, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm7, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm15, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm14, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm5, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm13, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm12, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm11, %xmm0, %xmm0 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm10, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm9, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm8, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0) @@ -218,18 +388,22 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm3, %xmm0, %xmm0 ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0) @@ -250,21 +424,31 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX-LABEL: test_v8f64: ; AVX: # %bb.0: ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm7, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm5, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm6, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm4, %xmm0, %xmm0 ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0) @@ -274,12 +458,12 @@ define double @test_v8f64(<8 x double> %a0) { define double @test_v16f64(<16 x double> %a0) { ; SSE-LABEL: test_v16f64: ; SSE: # %bb.0: -; SSE-NEXT: minpd %xmm6, %xmm2 -; SSE-NEXT: minpd %xmm4, %xmm0 -; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm7, %xmm3 ; SSE-NEXT: minpd %xmm5, %xmm1 ; SSE-NEXT: minpd %xmm3, %xmm1 +; SSE-NEXT: minpd %xmm6, %xmm2 +; SSE-NEXT: minpd %xmm4, %xmm0 +; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] @@ -291,29 +475,41 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vminpd %ymm3, %ymm1, %ymm1 ; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0) ret double %1 } +declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float>) declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float>) declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll index d6c681f507522..1d7436eaa8a44 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll @@ -13,27 +13,46 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: minss %xmm0, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) ret float %1 @@ -42,37 +61,95 @@ define float @test_v2f32(<2 x float> %a0) { define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: minps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andps %xmm3, %xmm4 +; SSE2-NEXT: minss %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE2-NEXT: andnps %xmm3, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm1, %xmm3 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: minss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: cmpunordss %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm4 +; SSE41-NEXT: andps %xmm3, %xmm4 +; SSE41-NEXT: minss %xmm0, %xmm3 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE41-NEXT: andnps %xmm3, %xmm1 +; SSE41-NEXT: orps %xmm4, %xmm1 +; SSE41-NEXT: movaps %xmm2, %xmm3 +; SSE41-NEXT: minss %xmm1, %xmm3 +; SSE41-NEXT: cmpunordss %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm4 +; SSE41-NEXT: andnps %xmm3, %xmm4 +; SSE41-NEXT: andps %xmm2, %xmm1 +; SSE41-NEXT: orps %xmm4, %xmm1 +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: minss %xmm1, %xmm2 +; SSE41-NEXT: cmpunordss %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: andnps %xmm2, %xmm3 +; SSE41-NEXT: andps %xmm0, %xmm1 +; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm0, %xmm3, %xmm4 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm3, %xmm4, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vminss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vblendvps %xmm3, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm0, %xmm3, %xmm4 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1} +; AVX512-NEXT: vcmpunordss %xmm4, %xmm4, %k1 +; AVX512-NEXT: vminss %xmm4, %xmm2, %xmm0 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0) ret float %1 @@ -81,46 +158,170 @@ define float @test_v4f32(<4 x float> %a0) { define float @test_v8f32(<8 x float> %a0) { ; SSE2-LABEL: test_v8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: minps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: minps %xmm0, %xmm2 +; SSE2-NEXT: cmpunordps %xmm0, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm1, %xmm3 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: minss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: ; SSE41: # %bb.0: -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: minps %xmm0, %xmm2 +; SSE41-NEXT: cmpunordps %xmm0, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: cmpunordss %xmm2, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andps %xmm1, %xmm3 +; SSE41-NEXT: minss %xmm2, %xmm1 +; SSE41-NEXT: andnps %xmm1, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE41-NEXT: movaps %xmm1, %xmm3 +; SSE41-NEXT: minss %xmm0, %xmm3 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm4 +; SSE41-NEXT: andnps %xmm3, %xmm4 +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: orps %xmm4, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3] +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: minss %xmm0, %xmm1 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andnps %xmm1, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm0, %xmm7, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm7, %xmm2, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vminss %xmm0, %xmm6, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm6, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vminss %xmm0, %xmm5, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm5, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vminss %xmm0, %xmm4, %xmm0 +; AVX-NEXT: vblendvps %xmm1, %xmm4, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vminss %xmm0, %xmm3, %xmm0 +; AVX-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vminss %xmm0, %xmm8, %xmm0 +; AVX-NEXT: vblendvps %xmm1, %xmm8, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v8f32: -; AVX512: # %bb.0: -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v8f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm8 = xmm3[3,3,3,3] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm3[1,0] +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vminss %xmm0, %xmm7, %xmm1 +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovss %xmm7, %xmm1, %xmm1 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; AVX512BW-NEXT: vminss %xmm1, %xmm6, %xmm0 +; AVX512BW-NEXT: vmovss %xmm6, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminss %xmm0, %xmm5, %xmm0 +; AVX512BW-NEXT: vmovss %xmm5, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminss %xmm0, %xmm3, %xmm0 +; AVX512BW-NEXT: vmovss %xmm3, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminss %xmm0, %xmm4, %xmm0 +; AVX512BW-NEXT: vmovss %xmm4, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminss %xmm0, %xmm8, %xmm0 +; AVX512BW-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v8f32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vminss %xmm0, %xmm7, %xmm2 +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovss %xmm7, %xmm2, %xmm2 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512VL-NEXT: vminss %xmm2, %xmm6, %xmm0 +; AVX512VL-NEXT: vmovss %xmm6, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm5, %xmm0 +; AVX512VL-NEXT: vmovss %xmm5, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm4, %xmm0 +; AVX512VL-NEXT: vmovss %xmm4, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vmovss %xmm3, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm8, %xmm0 +; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0) ret float %1 } @@ -128,53 +329,259 @@ define float @test_v8f32(<8 x float> %a0) { define float @test_v16f32(<16 x float> %a0) { ; SSE2-LABEL: test_v16f32: ; SSE2: # %bb.0: -; SSE2-NEXT: minps %xmm3, %xmm1 -; SSE2-NEXT: minps %xmm2, %xmm0 -; SSE2-NEXT: minps %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm2, %xmm4 +; SSE2-NEXT: minps %xmm0, %xmm4 +; SSE2-NEXT: cmpunordps %xmm0, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm4, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm3, %xmm2 +; SSE2-NEXT: minps %xmm1, %xmm2 +; SSE2-NEXT: cmpunordps %xmm1, %xmm1 +; SSE2-NEXT: andps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: minps %xmm0, %xmm2 +; SSE2-NEXT: cmpunordps %xmm0, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: minss %xmm1, %xmm0 +; SSE2-NEXT: cmpunordss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andps %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: minss %xmm1, %xmm3 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm4 +; SSE2-NEXT: andnps %xmm3, %xmm4 +; SSE2-NEXT: andps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: minss %xmm1, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: ; SSE41: # %bb.0: -; SSE41-NEXT: minps %xmm3, %xmm1 -; SSE41-NEXT: minps %xmm2, %xmm0 -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE41-NEXT: minps %xmm1, %xmm0 -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minss %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm2, %xmm4 +; SSE41-NEXT: minps %xmm0, %xmm4 +; SSE41-NEXT: cmpunordps %xmm0, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movaps %xmm3, %xmm2 +; SSE41-NEXT: minps %xmm1, %xmm2 +; SSE41-NEXT: cmpunordps %xmm1, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: minps %xmm4, %xmm1 +; SSE41-NEXT: cmpunordps %xmm4, %xmm4 +; SSE41-NEXT: movaps %xmm4, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: cmpunordss %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andps %xmm2, %xmm3 +; SSE41-NEXT: minss %xmm1, %xmm2 +; SSE41-NEXT: andnps %xmm2, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movaps %xmm2, %xmm3 +; SSE41-NEXT: minss %xmm0, %xmm3 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm4 +; SSE41-NEXT: andnps %xmm3, %xmm4 +; SSE41-NEXT: andps %xmm2, %xmm0 +; SSE41-NEXT: orps %xmm4, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: minss %xmm0, %xmm2 +; SSE41-NEXT: cmpunordss %xmm0, %xmm0 +; SSE41-NEXT: movaps %xmm0, %xmm3 +; SSE41-NEXT: andnps %xmm2, %xmm3 +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: orps %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: ; AVX: # %bb.0: -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminps %ymm0, %ymm1, %ymm2 +; AVX-NEXT: vcmpunordps %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvps %ymm0, %ymm1, %ymm2, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vminss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3] +; AVX-NEXT: vminss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX-NEXT: vminss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vminss %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v16f32: -; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v16f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vminss %xmm0, %xmm2, %xmm3 +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm2, %xmm3 +; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm1, %xmm3 +; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1} +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1 +; AVX512BW-NEXT: vminss %xmm3, %xmm1, %xmm2 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vminss %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vminss %xmm2, %xmm1, %xmm2 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vminss %xmm2, %xmm1, %xmm2 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; AVX512BW-NEXT: vminss %xmm2, %xmm1, %xmm0 +; AVX512BW-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v16f32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm3 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm8 = xmm3[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm9 = xmm3[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm10 = xmm3[1,1,3,3] +; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm6 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm11 = xmm6[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm12 = xmm6[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm13 = xmm6[1,1,3,3] +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm14 = xmm2[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm15 = xmm2[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm16 = xmm2[1,1,3,3] +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vminss %xmm0, %xmm5, %xmm7 +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovss %xmm5, %xmm7, %xmm7 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm7, %xmm7, %k1 +; AVX512VL-NEXT: vminss %xmm7, %xmm4, %xmm0 +; AVX512VL-NEXT: vmovss %xmm4, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm16, %xmm0 +; AVX512VL-NEXT: vmovss %xmm16, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm15, %xmm0 +; AVX512VL-NEXT: vmovss %xmm15, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm14, %xmm0 +; AVX512VL-NEXT: vmovss %xmm14, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm6, %xmm0 +; AVX512VL-NEXT: vmovss %xmm6, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm13, %xmm0 +; AVX512VL-NEXT: vmovss %xmm13, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm12, %xmm0 +; AVX512VL-NEXT: vmovss %xmm12, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm11, %xmm0 +; AVX512VL-NEXT: vmovss %xmm11, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vmovss %xmm3, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm10, %xmm0 +; AVX512VL-NEXT: vmovss %xmm10, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm9, %xmm0 +; AVX512VL-NEXT: vmovss %xmm9, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminss %xmm0, %xmm8, %xmm0 +; AVX512VL-NEXT: vmovss %xmm8, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0) ret float %1 } @@ -186,50 +593,176 @@ define float @test_v16f32(<16 x float> %a0) { define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minsd %xmm1, %xmm0 +; SSE-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm3 +; SSE-NEXT: andpd %xmm2, %xmm3 +; SSE-NEXT: minsd %xmm0, %xmm2 +; SSE-NEXT: andnpd %xmm2, %xmm1 +; SSE-NEXT: orpd %xmm3, %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm0, %xmm2, %xmm1 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovapd %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0) ret double %1 } +define double @test_v3f64(<3 x double> %a0) { +; SSE2-LABEL: test_v3f64: +; SSE2: # %bb.0: +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],mem[1] +; SSE2-NEXT: movapd %xmm2, %xmm1 +; SSE2-NEXT: minpd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm1, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: minsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v3f64: +; SSE41: # %bb.0: +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],mem[1] +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: minpd %xmm0, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm2, %xmm3 +; SSE41-NEXT: minsd %xmm1, %xmm2 +; SSE41-NEXT: andnpd %xmm2, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v3f64: +; AVX: # %bb.0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v3f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = call double @llvm.experimental.vector.reduce.fmin.v3f64(<3 x double> %a0) + ret double %1 +} + define double @test_v4f64(<4 x double> %a0) { -; SSE-LABEL: test_v4f64: -; SSE: # %bb.0: -; SSE-NEXT: minpd %xmm1, %xmm0 -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_v4f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: minpd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: minsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v4f64: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: minpd %xmm0, %xmm2 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm2, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm1, %xmm3 +; SSE41-NEXT: minsd %xmm2, %xmm1 +; SSE41-NEXT: andnpd %xmm1, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm0, %xmm3, %xmm4 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vminsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm0, %xmm3, %xmm4 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm4, %xmm4, %k1 +; AVX512-NEXT: vminsd %xmm4, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vminsd %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0) @@ -237,76 +770,316 @@ define double @test_v4f64(<4 x double> %a0) { } define double @test_v8f64(<8 x double> %a0) { -; SSE-LABEL: test_v8f64: -; SSE: # %bb.0: -; SSE-NEXT: minpd %xmm3, %xmm1 -; SSE-NEXT: minpd %xmm2, %xmm0 -; SSE-NEXT: minpd %xmm1, %xmm0 -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_v8f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm2, %xmm4 +; SSE2-NEXT: minpd %xmm0, %xmm4 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm4, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm3, %xmm2 +; SSE2-NEXT: minpd %xmm1, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE2-NEXT: andpd %xmm1, %xmm3 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: minpd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: minsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v8f64: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm2, %xmm4 +; SSE41-NEXT: minpd %xmm0, %xmm4 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd %xmm3, %xmm2 +; SSE41-NEXT: minpd %xmm1, %xmm2 +; SSE41-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: minpd %xmm4, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm4, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm2, %xmm3 +; SSE41-NEXT: minsd %xmm1, %xmm2 +; SSE41-NEXT: andnpd %xmm2, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f64: ; AVX: # %bb.0: -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminpd %ymm0, %ymm1, %ymm2 +; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v8f64: -; AVX512: # %bb.0: -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v8f64: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm2 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm8 = xmm2[1,0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] +; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512BW-NEXT: vminsd %xmm0, %xmm7, %xmm1 +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovsd %xmm7, %xmm1, %xmm1 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512BW-NEXT: vminsd %xmm1, %xmm5, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm5, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminsd %xmm0, %xmm6, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm6, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminsd %xmm0, %xmm3, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm3, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminsd %xmm0, %xmm4, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm4, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminsd %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vminsd %xmm0, %xmm8, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm8, %xmm0, %xmm0 {%k1} +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v8f64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm8 = xmm1[1,0] +; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512VL-NEXT: vminsd %xmm0, %xmm7, %xmm2 +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovsd %xmm7, %xmm2, %xmm2 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; AVX512VL-NEXT: vminsd %xmm2, %xmm5, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm5, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminsd %xmm0, %xmm6, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm6, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminsd %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm3, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminsd %xmm0, %xmm4, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm4, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminsd %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vminsd %xmm0, %xmm8, %xmm0 +; AVX512VL-NEXT: vmovsd %xmm8, %xmm0, %xmm0 {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0) ret double %1 } define double @test_v16f64(<16 x double> %a0) { -; SSE-LABEL: test_v16f64: -; SSE: # %bb.0: -; SSE-NEXT: minpd %xmm6, %xmm2 -; SSE-NEXT: minpd %xmm4, %xmm0 -; SSE-NEXT: minpd %xmm2, %xmm0 -; SSE-NEXT: minpd %xmm7, %xmm3 -; SSE-NEXT: minpd %xmm5, %xmm1 -; SSE-NEXT: minpd %xmm3, %xmm1 -; SSE-NEXT: minpd %xmm1, %xmm0 -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_v16f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm4, %xmm8 +; SSE2-NEXT: minpd %xmm0, %xmm8 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm4 +; SSE2-NEXT: andnpd %xmm8, %xmm0 +; SSE2-NEXT: orpd %xmm4, %xmm0 +; SSE2-NEXT: movapd %xmm6, %xmm4 +; SSE2-NEXT: minpd %xmm2, %xmm4 +; SSE2-NEXT: cmpunordpd %xmm2, %xmm2 +; SSE2-NEXT: andpd %xmm2, %xmm6 +; SSE2-NEXT: andnpd %xmm4, %xmm2 +; SSE2-NEXT: orpd %xmm6, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm4 +; SSE2-NEXT: minpd %xmm0, %xmm4 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm4, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm5, %xmm2 +; SSE2-NEXT: minpd %xmm1, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE2-NEXT: andpd %xmm1, %xmm5 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm5, %xmm1 +; SSE2-NEXT: movapd %xmm7, %xmm2 +; SSE2-NEXT: minpd %xmm3, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm3, %xmm3 +; SSE2-NEXT: andpd %xmm3, %xmm7 +; SSE2-NEXT: andnpd %xmm2, %xmm3 +; SSE2-NEXT: orpd %xmm7, %xmm3 +; SSE2-NEXT: movapd %xmm3, %xmm2 +; SSE2-NEXT: minpd %xmm1, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE2-NEXT: andpd %xmm1, %xmm3 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: minpd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: andpd %xmm2, %xmm3 +; SSE2-NEXT: minsd %xmm0, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v16f64: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm3, %xmm8 +; SSE41-NEXT: movapd %xmm4, %xmm3 +; SSE41-NEXT: minpd %xmm0, %xmm3 +; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 +; SSE41-NEXT: movapd %xmm6, %xmm4 +; SSE41-NEXT: minpd %xmm2, %xmm4 +; SSE41-NEXT: cmpunordpd %xmm2, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: minpd %xmm3, %xmm2 +; SSE41-NEXT: cmpunordpd %xmm3, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: movapd %xmm5, %xmm3 +; SSE41-NEXT: minpd %xmm1, %xmm3 +; SSE41-NEXT: cmpunordpd %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm3 +; SSE41-NEXT: movapd %xmm7, %xmm1 +; SSE41-NEXT: minpd %xmm8, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm8, %xmm8 +; SSE41-NEXT: movapd %xmm8, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm4 +; SSE41-NEXT: minpd %xmm3, %xmm4 +; SSE41-NEXT: cmpunordpd %xmm3, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm1 +; SSE41-NEXT: minpd %xmm2, %xmm1 +; SSE41-NEXT: cmpunordpd %xmm2, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm3 +; SSE41-NEXT: andpd %xmm2, %xmm3 +; SSE41-NEXT: minsd %xmm1, %xmm2 +; SSE41-NEXT: andnpd %xmm2, %xmm0 +; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f64: ; AVX: # %bb.0: -; AVX-NEXT: vminpd %ymm3, %ymm1, %ymm1 -; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminpd %ymm0, %ymm2, %ymm4 +; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0 +; AVX-NEXT: vminpd %ymm1, %ymm3, %ymm2 +; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1 +; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1 +; AVX-NEXT: vminpd %ymm0, %ymm1, %ymm2 +; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 +; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminpd %zmm0, %zmm1, %zmm2 +; AVX512-NEXT: vcmpunordpd %zmm0, %zmm0, %k1 +; AVX512-NEXT: vmovapd %zmm1, %zmm2 {%k1} +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm2[1,0] +; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm1 +; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vextractf128 $1, %ymm2, %xmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vminsd %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0) @@ -319,6 +1092,7 @@ declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v3f64(<3 x double>) declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) From 6cfd38d03d5fc3cde929ebf82529415595e8ef8e Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 12 Sep 2020 14:31:26 +0100 Subject: [PATCH 0449/1079] [ARM] Fixup single source mla reductions. This fixes a complication on top of D87276. If we are sign extending around a mul with the two operands that are the same, instcombine will helpfully convert one of the sext to a zext. Reverse that so that we again generate a reduction. Differnetial Revision: https://reviews.llvm.org/D87287 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 20 + llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll | 280 +------- .../CodeGen/Thumb2/mve-vecreduce-mlapred.ll | 598 +----------------- 3 files changed, 34 insertions(+), 864 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 83d89de7b4772..943dc467025dd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14890,6 +14890,26 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask)) return DAG.getNode(ISD::TRUNCATE, dl, ResVT, DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask)); + + // Some complications. We can get a case where the two inputs of the mul are + // the same, then the output sext will have been helpfully converted to a + // zext. Turn it back. + SDValue Op = N0; + if (Op->getOpcode() == ISD::VSELECT) + Op = Op->getOperand(1); + if (Op->getOpcode() == ISD::ZERO_EXTEND && + Op->getOperand(0)->getOpcode() == ISD::MUL) { + SDValue Mul = Op->getOperand(0); + if (Mul->getOperand(0) == Mul->getOperand(1) && + Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) { + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul); + if (Op != N0) + Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0), + N0->getOperand(0), Ext, N0->getOperand(2)); + return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll index 8cef85de3d956..b83b51b6f564f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -201,67 +201,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sextzext(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vmullb.s16 q2, q1, q1 -; CHECK-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-NEXT: vmov.f32 s12, s8 -; CHECK-NEXT: vmov.f32 s14, s9 -; CHECK-NEXT: vand q3, q3, q1 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r0, s15 -; CHECK-NEXT: vmov r1, s13 -; CHECK-NEXT: vmov.f32 s12, s10 -; CHECK-NEXT: vmov.f32 s14, s11 -; CHECK-NEXT: vand q2, q3, q1 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: adcs r0, r1 -; CHECK-NEXT: vmov r1, s9 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adcs r0, r1 -; CHECK-NEXT: vmov r1, s11 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: adcs r0, r1 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmullb.s16 q0, q2, q2 -; CHECK-NEXT: vmov.f32 s8, s0 -; CHECK-NEXT: vmov.f32 s10, s1 -; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov r1, s9 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: adcs r0, r1 -; CHECK-NEXT: vmov r1, s11 -; CHECK-NEXT: vmov.f32 s8, s2 -; CHECK-NEXT: vmov.f32 s10, s3 -; CHECK-NEXT: vand q0, q2, q1 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: adcs r0, r1 -; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov r3, s3 -; CHECK-NEXT: adcs r1, r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmlalv.s16 r0, r1, q0, q0 ; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i32> @@ -371,80 +311,7 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sextzext(<16 x i8> %x, <16 x i8> %y) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u8 r0, q0[8] -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[9] -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[10] -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[11] -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[12] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[13] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[14] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmullb.s8 q1, q1, q1 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[6] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmovlb.u16 q2, q2 -; CHECK-NEXT: vmullb.s8 q0, q3, q3 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmovlb.u16 q3, q3 -; CHECK-NEXT: vadd.i32 q2, q3, q2 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmovlb.u16 q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.u16 q0, q3 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vaddv.u32 r0, q0 +; CHECK-NEXT: vmlav.s8 r0, q0, q0 ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i16> @@ -1238,72 +1105,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sextzext(<8 x i16> %x, <8 x i16> %y, i64 %a) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vmullb.s16 q2, q1, q1 -; CHECK-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-NEXT: vmov.f32 s12, s8 -; CHECK-NEXT: vmov.f32 s14, s9 -; CHECK-NEXT: vand q3, q3, q1 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r12, s15 -; CHECK-NEXT: vmov lr, s13 -; CHECK-NEXT: vmov.f32 s12, s10 -; CHECK-NEXT: vmov.f32 s14, s11 -; CHECK-NEXT: vand q2, q3, q1 -; CHECK-NEXT: adds r4, r3, r2 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: adc.w r12, r12, lr -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: vmov r4, s11 -; CHECK-NEXT: adc.w r12, r12, r2 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: adc.w r3, r12, r4 -; CHECK-NEXT: vmov.u16 r4, q0[4] -; CHECK-NEXT: vmov.32 q2[0], r4 -; CHECK-NEXT: vmov.u16 r4, q0[5] -; CHECK-NEXT: vmov.32 q2[1], r4 -; CHECK-NEXT: vmov.u16 r4, q0[6] -; CHECK-NEXT: vmov.32 q2[2], r4 -; CHECK-NEXT: vmov.u16 r4, q0[7] -; CHECK-NEXT: vmov.32 q2[3], r4 -; CHECK-NEXT: vmullb.s16 q0, q2, q2 -; CHECK-NEXT: vmov.f32 s8, s0 -; CHECK-NEXT: vmov.f32 s10, s1 -; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov r4, s9 -; CHECK-NEXT: adds.w r12, lr, r2 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: adcs r3, r4 -; CHECK-NEXT: vmov r4, s11 -; CHECK-NEXT: vmov.f32 s8, s2 -; CHECK-NEXT: vmov.f32 s10, s3 -; CHECK-NEXT: vand q0, q2, q1 -; CHECK-NEXT: adds.w r12, r12, r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: adcs r3, r4 -; CHECK-NEXT: vmov r4, s1 -; CHECK-NEXT: adds.w r12, r12, r2 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: adcs r3, r4 -; CHECK-NEXT: vmov r4, s3 -; CHECK-NEXT: adds.w r2, r2, r12 -; CHECK-NEXT: adcs r3, r4 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: vmlalva.s16 r0, r1, q0, q0 +; CHECK-NEXT: bx lr entry: %xx = sext <8 x i16> %x to <8 x i32> %m = mul <8 x i32> %xx, %xx @@ -1427,80 +1230,7 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sextzext(<16 x i8> %x, i32 %a) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u8 r1, q0[8] -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[9] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[10] -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[11] -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[12] -; CHECK-NEXT: vmov.16 q1[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[13] -; CHECK-NEXT: vmov.16 q1[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[14] -; CHECK-NEXT: vmov.16 q1[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[15] -; CHECK-NEXT: vmov.16 q1[7], r1 -; CHECK-NEXT: vmullb.s8 q1, q1, q1 -; CHECK-NEXT: vmov.u16 r1, q1[4] -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[5] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[6] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[7] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[0] -; CHECK-NEXT: vmov.16 q3[0], r1 -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: vmov.16 q3[1], r1 -; CHECK-NEXT: vmov.u8 r1, q0[2] -; CHECK-NEXT: vmov.16 q3[2], r1 -; CHECK-NEXT: vmov.u8 r1, q0[3] -; CHECK-NEXT: vmov.16 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q0[4] -; CHECK-NEXT: vmov.16 q3[4], r1 -; CHECK-NEXT: vmov.u8 r1, q0[5] -; CHECK-NEXT: vmov.16 q3[5], r1 -; CHECK-NEXT: vmov.u8 r1, q0[6] -; CHECK-NEXT: vmov.16 q3[6], r1 -; CHECK-NEXT: vmov.u8 r1, q0[7] -; CHECK-NEXT: vmov.16 q3[7], r1 -; CHECK-NEXT: vmovlb.u16 q2, q2 -; CHECK-NEXT: vmullb.s8 q0, q3, q3 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmovlb.u16 q3, q3 -; CHECK-NEXT: vadd.i32 q2, q3, q2 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmovlb.u16 q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r1 -; CHECK-NEXT: vmovlb.u16 q0, q3 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vaddva.u32 r0, q0 +; CHECK-NEXT: vmlava.s8 r0, q0, q0 ; CHECK-NEXT: bx lr entry: %xx = sext <16 x i8> %x to <16 x i16> diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll index fd268fd4c5a9a..02d124890c6bb 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll @@ -273,130 +273,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_sextzext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov.i8 q1, #0x0 -; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: vcmp.i16 eq, q2, zr -; CHECK-NEXT: vpsel q2, q3, q1 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: and r1, r0, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: vmov.32 q4[0], r1 -; CHECK-NEXT: vmov.32 q4[1], r1 -; CHECK-NEXT: ubfx r1, r0, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: vmov.32 q4[2], r1 -; CHECK-NEXT: vmov.32 q4[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: vmullb.s16 q3, q1, q1 -; CHECK-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-NEXT: vmov.f32 s20, s12 -; CHECK-NEXT: vmov.f32 s22, s13 -; CHECK-NEXT: vand q5, q5, q1 -; CHECK-NEXT: vand q4, q5, q4 -; CHECK-NEXT: vmov.f32 s20, s14 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov r1, s16 -; CHECK-NEXT: vmov r12, s19 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: vmov.f32 s22, s15 -; CHECK-NEXT: vand q3, q5, q1 -; CHECK-NEXT: adds r1, r1, r3 -; CHECK-NEXT: ubfx r3, r0, #8, #1 -; CHECK-NEXT: rsb.w r3, r3, #0 -; CHECK-NEXT: ubfx r0, r0, #12, #1 -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: rsb.w r0, r0, #0 -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: adc.w r2, r2, r12 -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vand q3, q3, q4 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r0, s13 -; CHECK-NEXT: adds r1, r1, r3 -; CHECK-NEXT: vmov r3, s15 -; CHECK-NEXT: adcs r2, r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: adds.w r12, r1, r0 -; CHECK-NEXT: adc.w r1, r2, r3 -; CHECK-NEXT: vmov.u16 r2, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q2[0], r3 -; CHECK-NEXT: vmov.32 q2[1], r3 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q2[2], r3 -; CHECK-NEXT: vmov.32 q2[3], r3 -; CHECK-NEXT: vmov.u16 r3, q0[4] -; CHECK-NEXT: vmov.32 q3[0], r3 -; CHECK-NEXT: vmov.u16 r3, q0[5] -; CHECK-NEXT: vmov.32 q3[1], r3 -; CHECK-NEXT: vmov.u16 r3, q0[6] -; CHECK-NEXT: vmov.32 q3[2], r3 -; CHECK-NEXT: vmov.u16 r3, q0[7] -; CHECK-NEXT: vmov.32 q3[3], r3 -; CHECK-NEXT: vmullb.s16 q0, q3, q3 -; CHECK-NEXT: vmov.f32 s12, s0 -; CHECK-NEXT: vmov.f32 s14, s1 -; CHECK-NEXT: vand q3, q3, q1 -; CHECK-NEXT: vand q2, q3, q2 -; CHECK-NEXT: vmov.f32 s12, s2 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vand q0, q3, q1 -; CHECK-NEXT: adds.w r12, r12, r0 -; CHECK-NEXT: vmov r0, s10 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov r3, s11 -; CHECK-NEXT: adds.w r0, r0, r12 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: vmov.32 q2[0], r3 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.32 q2[1], r3 -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvt.s16 r0, r1, q0, q0 ; CHECK-NEXT: bx lr entry: %c = icmp eq <8 x i16> %b, zeroinitializer @@ -580,174 +458,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_sextzext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcmp.i8 eq, q2, zr -; CHECK-NEXT: vmov.i8 q2, #0xff -; CHECK-NEXT: vmov.i8 q0, #0x0 -; CHECK-NEXT: vpsel q1, q2, q0 -; CHECK-NEXT: vmov q3, q2 -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vcmp.i16 ne, q2, zr -; CHECK-NEXT: vmov.i32 q6, #0x0 -; CHECK-NEXT: vpsel q5, q3, q0 -; CHECK-NEXT: vmov q7, q6 -; CHECK-NEXT: vmov.u16 r0, q5[4] -; CHECK-NEXT: vmov.i32 q2, #0xffff -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q5[5] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q5[6] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q5[7] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q4[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q4[1] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q4[2] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q4[3] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q4[4] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q4[5] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q4[6] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q4[7] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vmullb.s8 q3, q3, q3 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q7, q0, q2 -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload -; CHECK-NEXT: vcmp.i16 ne, q0, zr -; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vpsel q0, q1, q0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q4[8] -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.u8 r0, q4[9] -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov.u8 r0, q4[10] -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.u8 r0, q4[11] -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov.u8 r0, q4[12] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u8 r0, q4[13] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u8 r0, q4[14] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u8 r0, q4[15] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmullb.s8 q1, q1, q1 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.32 q4[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vmov.u16 r0, q5[0] -; CHECK-NEXT: vmovlb.u16 q4, q4 -; CHECK-NEXT: vpst -; CHECK-NEXT: vaddt.i32 q7, q7, q4 -; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: vmov.u16 r0, q5[1] -; CHECK-NEXT: vmov.32 q4[1], r0 -; CHECK-NEXT: vmov.u16 r0, q5[2] -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.u16 r0, q5[3] -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vcmp.i32 ne, q4, zr -; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.32 q4[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.32 q4[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q6, q4, q2 -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q0[3], r0 -; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vpt.i32 ne, q2, zr -; CHECK-NEXT: vaddt.i32 q6, q6, q0 -; CHECK-NEXT: vadd.i32 q0, q6, q7 -; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavt.s8 r0, q0, q0 ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer @@ -2095,135 +1807,9 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i16_v8i32_v8i64_acc_sextzext(<8 x i16> %x, <8 x i16> %y, <8 x i16> %b, i64 %a) { ; CHECK-LABEL: add_v8i16_v8i32_v8i64_acc_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov.i8 q1, #0x0 -; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: vcmp.i16 eq, q2, zr -; CHECK-NEXT: vpsel q2, q3, q1 -; CHECK-NEXT: vmov.u16 r2, q2[0] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[1] -; CHECK-NEXT: vmov.32 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[2] -; CHECK-NEXT: vmov.32 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov.32 q4[2], r3 -; CHECK-NEXT: vmov.32 q4[3], r3 -; CHECK-NEXT: vmov.u16 r3, q0[0] -; CHECK-NEXT: vmov.32 q1[0], r3 -; CHECK-NEXT: vmov.u16 r3, q0[1] -; CHECK-NEXT: vmov.32 q1[1], r3 -; CHECK-NEXT: vmov.u16 r3, q0[2] -; CHECK-NEXT: vmov.32 q1[2], r3 -; CHECK-NEXT: vmov.u16 r3, q0[3] -; CHECK-NEXT: vmov.32 q1[3], r3 -; CHECK-NEXT: vmullb.s16 q3, q1, q1 -; CHECK-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-NEXT: vmov.f32 s20, s12 -; CHECK-NEXT: vmov.f32 s22, s13 -; CHECK-NEXT: vand q5, q5, q1 -; CHECK-NEXT: vand q4, q5, q4 -; CHECK-NEXT: vmov.f32 s20, s14 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov r4, s16 -; CHECK-NEXT: vmov r12, s19 -; CHECK-NEXT: vmov lr, s17 -; CHECK-NEXT: vmov.f32 s22, s15 -; CHECK-NEXT: vand q3, q5, q1 -; CHECK-NEXT: adds r5, r4, r3 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: rsb.w r3, r3, #0 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: vmov.32 q4[0], r3 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: vmov.32 q4[1], r3 -; CHECK-NEXT: adc.w r4, lr, r12 -; CHECK-NEXT: vmov.32 q4[2], r2 -; CHECK-NEXT: vmov.32 q4[3], r2 -; CHECK-NEXT: vand q3, q3, q4 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: adds r3, r3, r5 -; CHECK-NEXT: vmov r5, s15 -; CHECK-NEXT: adcs r2, r4 -; CHECK-NEXT: vmov r4, s14 -; CHECK-NEXT: adds.w r12, r3, r4 -; CHECK-NEXT: adc.w r3, r2, r5 -; CHECK-NEXT: vmov.u16 r2, q2[4] -; CHECK-NEXT: vmov.32 q3[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] -; CHECK-NEXT: vmov.32 q3[3], r2 -; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r5, r2, #1 -; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: vmov.32 q2[0], r5 -; CHECK-NEXT: vmov.32 q2[1], r5 -; CHECK-NEXT: ubfx r5, r2, #4, #1 -; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: vmov.32 q2[2], r5 -; CHECK-NEXT: vmov.32 q2[3], r5 -; CHECK-NEXT: vmov.u16 r5, q0[4] -; CHECK-NEXT: vmov.32 q3[0], r5 -; CHECK-NEXT: vmov.u16 r5, q0[5] -; CHECK-NEXT: vmov.32 q3[1], r5 -; CHECK-NEXT: vmov.u16 r5, q0[6] -; CHECK-NEXT: vmov.32 q3[2], r5 -; CHECK-NEXT: vmov.u16 r5, q0[7] -; CHECK-NEXT: vmov.32 q3[3], r5 -; CHECK-NEXT: vmullb.s16 q0, q3, q3 -; CHECK-NEXT: vmov.f32 s12, s0 -; CHECK-NEXT: vmov.f32 s14, s1 -; CHECK-NEXT: vand q3, q3, q1 -; CHECK-NEXT: vand q2, q3, q2 -; CHECK-NEXT: vmov.f32 s12, s2 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmov r5, s9 -; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vand q0, q3, q1 -; CHECK-NEXT: adds.w r4, r4, r12 -; CHECK-NEXT: adc.w r12, r3, r5 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: vmov r5, s11 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: ubfx r4, r2, #8, #1 -; CHECK-NEXT: rsb.w r4, r4, #0 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: vmov.32 q2[0], r4 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: vmov.32 q2[1], r4 -; CHECK-NEXT: adc.w r5, r5, r12 -; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: adcs r2, r5 -; CHECK-NEXT: vmov r5, s3 -; CHECK-NEXT: adds r3, r3, r4 -; CHECK-NEXT: adcs r2, r5 -; CHECK-NEXT: adds r0, r0, r3 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: vpt.i16 eq, q2, zr +; CHECK-NEXT: vmlalvat.s16 r0, r1, q0, q0 +; CHECK-NEXT: bx lr entry: %c = icmp eq <8 x i16> %b, zeroinitializer %xx = sext <8 x i16> %x to <8 x i32> @@ -2421,174 +2007,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v16i8_v16i16_v16i32_acc_sextzext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b, i32 %a) { ; CHECK-LABEL: add_v16i8_v16i16_v16i32_acc_sextzext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcmp.i8 eq, q2, zr -; CHECK-NEXT: vmov.i8 q2, #0xff -; CHECK-NEXT: vmov.i8 q0, #0x0 -; CHECK-NEXT: vpsel q1, q2, q0 -; CHECK-NEXT: vmov q3, q2 -; CHECK-NEXT: vmov.u8 r1, q1[0] -; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[1] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[3] -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[4] -; CHECK-NEXT: vmov.16 q2[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[5] -; CHECK-NEXT: vmov.16 q2[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[6] -; CHECK-NEXT: vmov.16 q2[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[7] -; CHECK-NEXT: vmov.16 q2[7], r1 -; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vcmp.i16 ne, q2, zr -; CHECK-NEXT: vmov.i32 q6, #0x0 -; CHECK-NEXT: vpsel q5, q3, q0 -; CHECK-NEXT: vmov q7, q6 -; CHECK-NEXT: vmov.u16 r1, q5[4] -; CHECK-NEXT: vmov.i32 q2, #0xffff -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q5[5] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q5[6] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q5[7] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q4[0] -; CHECK-NEXT: vmov.16 q3[0], r1 -; CHECK-NEXT: vmov.u8 r1, q4[1] -; CHECK-NEXT: vmov.16 q3[1], r1 -; CHECK-NEXT: vmov.u8 r1, q4[2] -; CHECK-NEXT: vmov.16 q3[2], r1 -; CHECK-NEXT: vmov.u8 r1, q4[3] -; CHECK-NEXT: vmov.16 q3[3], r1 -; CHECK-NEXT: vmov.u8 r1, q4[4] -; CHECK-NEXT: vmov.16 q3[4], r1 -; CHECK-NEXT: vmov.u8 r1, q4[5] -; CHECK-NEXT: vmov.16 q3[5], r1 -; CHECK-NEXT: vmov.u8 r1, q4[6] -; CHECK-NEXT: vmov.16 q3[6], r1 -; CHECK-NEXT: vmov.u8 r1, q4[7] -; CHECK-NEXT: vmov.16 q3[7], r1 -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vmullb.s8 q3, q3, q3 -; CHECK-NEXT: vmov.u16 r1, q3[4] -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q3[5] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q3[6] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q3[7] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[8] -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q7, q0, q2 -; CHECK-NEXT: vmov.16 q0[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[9] -; CHECK-NEXT: vmov.16 q0[1], r1 -; CHECK-NEXT: vmov.u8 r1, q1[10] -; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: vmov.u8 r1, q1[11] -; CHECK-NEXT: vmov.16 q0[3], r1 -; CHECK-NEXT: vmov.u8 r1, q1[12] -; CHECK-NEXT: vmov.16 q0[4], r1 -; CHECK-NEXT: vmov.u8 r1, q1[13] -; CHECK-NEXT: vmov.16 q0[5], r1 -; CHECK-NEXT: vmov.u8 r1, q1[14] -; CHECK-NEXT: vmov.16 q0[6], r1 -; CHECK-NEXT: vmov.u8 r1, q1[15] -; CHECK-NEXT: vmov.16 q0[7], r1 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload -; CHECK-NEXT: vcmp.i16 ne, q0, zr -; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vpsel q0, q1, q0 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q4[8] -; CHECK-NEXT: vcmp.i32 ne, q1, zr -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.u8 r1, q4[9] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vmov.u8 r1, q4[10] -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.u8 r1, q4[11] -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: vmov.u8 r1, q4[12] -; CHECK-NEXT: vmov.16 q1[4], r1 -; CHECK-NEXT: vmov.u8 r1, q4[13] -; CHECK-NEXT: vmov.16 q1[5], r1 -; CHECK-NEXT: vmov.u8 r1, q4[14] -; CHECK-NEXT: vmov.16 q1[6], r1 -; CHECK-NEXT: vmov.u8 r1, q4[15] -; CHECK-NEXT: vmov.16 q1[7], r1 -; CHECK-NEXT: vmullb.s8 q1, q1, q1 -; CHECK-NEXT: vmov.u16 r1, q1[4] -; CHECK-NEXT: vmov.32 q4[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[5] -; CHECK-NEXT: vmov.32 q4[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[6] -; CHECK-NEXT: vmov.32 q4[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[7] -; CHECK-NEXT: vmov.32 q4[3], r1 -; CHECK-NEXT: vmov.u16 r1, q5[0] -; CHECK-NEXT: vmovlb.u16 q4, q4 -; CHECK-NEXT: vpst -; CHECK-NEXT: vaddt.i32 q7, q7, q4 -; CHECK-NEXT: vmov.32 q4[0], r1 -; CHECK-NEXT: vmov.u16 r1, q5[1] -; CHECK-NEXT: vmov.32 q4[1], r1 -; CHECK-NEXT: vmov.u16 r1, q5[2] -; CHECK-NEXT: vmov.32 q4[2], r1 -; CHECK-NEXT: vmov.u16 r1, q5[3] -; CHECK-NEXT: vmov.32 q4[3], r1 -; CHECK-NEXT: vmov.u16 r1, q3[0] -; CHECK-NEXT: vcmp.i32 ne, q4, zr -; CHECK-NEXT: vmov.32 q4[0], r1 -; CHECK-NEXT: vmov.u16 r1, q3[1] -; CHECK-NEXT: vmov.32 q4[1], r1 -; CHECK-NEXT: vmov.u16 r1, q3[2] -; CHECK-NEXT: vmov.32 q4[2], r1 -; CHECK-NEXT: vmov.u16 r1, q3[3] -; CHECK-NEXT: vmov.32 q4[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vpst -; CHECK-NEXT: vandt q6, q4, q2 -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: vmov.32 q0[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[3] -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vpt.i32 ne, q2, zr -; CHECK-NEXT: vaddt.i32 q6, q6, q0 -; CHECK-NEXT: vadd.i32 q0, q6, q7 -; CHECK-NEXT: vaddva.u32 r0, q0 -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpt.i8 eq, q2, zr +; CHECK-NEXT: vmlavat.s8 r0, q0, q0 ; CHECK-NEXT: bx lr entry: %c = icmp eq <16 x i8> %b, zeroinitializer From d030aad7893a8cf7a68877b8b55eed1cd632411a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Sep 2020 14:31:26 +0100 Subject: [PATCH 0450/1079] [InstCombine][X86] Add tests for masked load/stores with comparisons. As detailed on PR11210, if the mask is known to come from a (sign extended) bool vector (e.g. comparisons) then we can represent with a generic masked load/store without losing anything. --- .../InstCombine/X86/x86-masked-memops.ll | 107 ++++++++++++++---- 1 file changed, 87 insertions(+), 20 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll index d845dcb5cac4d..2975b1c274795 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll @@ -12,7 +12,21 @@ define <4 x float> @mload(i8* %f, <4 x i32> %mask) { ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask) ret <4 x float> %ld +} + +; TODO: If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further. +define <4 x float> @mload_v4f32_cmp(i8* %f, <4 x i32> %src) { +; CHECK-LABEL: @mload_v4f32_cmp( +; CHECK-NEXT: [[ICMP:%.*]] = icmp ne <4 x i32> [[SRC:%.*]], zeroinitializer +; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> [[ICMP]] to <4 x i32> +; CHECK-NEXT: [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* [[F:%.*]], <4 x i32> [[MASK]]) +; CHECK-NEXT: ret <4 x float> [[LD]] +; + %icmp = icmp ne <4 x i32> %src, zeroinitializer + %mask = sext <4 x i1> %icmp to <4 x i32> + %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask) + ret <4 x float> %ld } ; Zero mask returns a zero vector. @@ -23,7 +37,6 @@ define <4 x float> @mload_zeros(i8* %f) { ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer) ret <4 x float> %ld - } ; Only the sign bit matters. @@ -34,7 +47,6 @@ define <4 x float> @mload_fake_ones(i8* %f) { ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> ) ret <4 x float> %ld - } ; All mask bits are set, so this is just a vector load. @@ -47,7 +59,6 @@ define <4 x float> @mload_real_ones(i8* %f) { ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> ) ret <4 x float> %ld - } ; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further. @@ -60,7 +71,6 @@ define <4 x float> @mload_one_one(i8* %f) { ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> ) ret <4 x float> %ld - } ; Try doubles. @@ -73,7 +83,6 @@ define <2 x double> @mload_one_one_double(i8* %f) { ; %ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(i8* %f, <2 x i64> ) ret <2 x double> %ld - } ; Try 256-bit FP ops. @@ -86,7 +95,24 @@ define <8 x float> @mload_v8f32(i8* %f) { ; %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> ) ret <8 x float> %ld +} +define <8 x float> @mload_v8f32_cmp(i8* %f, <8 x float> %src0, <8 x float> %src1) { +; CHECK-LABEL: @mload_v8f32_cmp( +; CHECK-NEXT: [[ICMP0:%.*]] = fcmp one <8 x float> [[SRC0:%.*]], zeroinitializer +; CHECK-NEXT: [[ICMP1:%.*]] = fcmp one <8 x float> [[SRC1:%.*]], zeroinitializer +; CHECK-NEXT: [[MASK1:%.*]] = and <8 x i1> [[ICMP0]], [[ICMP1]] +; CHECK-NEXT: [[MASK:%.*]] = sext <8 x i1> [[MASK1]] to <8 x i32> +; CHECK-NEXT: [[LD:%.*]] = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* [[F:%.*]], <8 x i32> [[MASK]]) +; CHECK-NEXT: ret <8 x float> [[LD]] +; + %icmp0 = fcmp one <8 x float> %src0, zeroinitializer + %icmp1 = fcmp one <8 x float> %src1, zeroinitializer + %ext0 = sext <8 x i1> %icmp0 to <8 x i32> + %ext1 = sext <8 x i1> %icmp1 to <8 x i32> + %mask = and <8 x i32> %ext0, %ext1 + %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> %mask) + ret <8 x float> %ld } define <4 x double> @mload_v4f64(i8* %f) { @@ -97,7 +123,6 @@ define <4 x double> @mload_v4f64(i8* %f) { ; %ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %f, <4 x i64> ) ret <4 x double> %ld - } ; Try the AVX2 variants. @@ -110,7 +135,6 @@ define <4 x i32> @mload_v4i32(i8* %f) { ; %ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %f, <4 x i32> ) ret <4 x i32> %ld - } define <2 x i64> @mload_v2i64(i8* %f) { @@ -121,7 +145,6 @@ define <2 x i64> @mload_v2i64(i8* %f) { ; %ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %f, <2 x i64> ) ret <2 x i64> %ld - } define <8 x i32> @mload_v8i32(i8* %f) { @@ -132,7 +155,6 @@ define <8 x i32> @mload_v8i32(i8* %f) { ; %ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %f, <8 x i32> ) ret <8 x i32> %ld - } define <4 x i64> @mload_v4i64(i8* %f) { @@ -143,9 +165,20 @@ define <4 x i64> @mload_v4i64(i8* %f) { ; %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> ) ret <4 x i64> %ld - } +define <4 x i64> @mload_v4i64_cmp(i8* %f, <4 x i64> %src) { +; CHECK-LABEL: @mload_v4i64_cmp( +; CHECK-NEXT: [[SRC_LOBIT:%.*]] = ashr <4 x i64> [[SRC:%.*]], +; CHECK-NEXT: [[SRC_LOBIT_NOT:%.*]] = xor <4 x i64> [[SRC_LOBIT]], +; CHECK-NEXT: [[LD:%.*]] = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* [[F:%.*]], <4 x i64> [[SRC_LOBIT_NOT]]) +; CHECK-NEXT: ret <4 x i64> [[LD]] +; + %icmp = icmp sge <4 x i64> %src, zeroinitializer + %mask = sext <4 x i1> %icmp to <4 x i64> + %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> %mask) + ret <4 x i64> %ld +} ;; MASKED STORES @@ -158,7 +191,21 @@ define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) { ; tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v) ret void +} +; TODO: If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further. + +define void @mstore_v4f32_cmp(i8* %f, <4 x i32> %src, <4 x float> %v) { +; CHECK-LABEL: @mstore_v4f32_cmp( +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[SRC:%.*]], zeroinitializer +; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> [[ICMP]] to <4 x i32> +; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.ps(i8* [[F:%.*]], <4 x i32> [[MASK]], <4 x float> [[V:%.*]]) +; CHECK-NEXT: ret void +; + %icmp = icmp eq <4 x i32> %src, zeroinitializer + %mask = sext <4 x i1> %icmp to <4 x i32> + tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v) + ret void } ; Zero mask is a nop. @@ -169,7 +216,6 @@ define void @mstore_zeros(i8* %f, <4 x float> %v) { ; tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> zeroinitializer, <4 x float> %v) ret void - } ; Only the sign bit matters. @@ -180,7 +226,6 @@ define void @mstore_fake_ones(i8* %f, <4 x float> %v) { ; tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> , <4 x float> %v) ret void - } ; All mask bits are set, so this is just a vector store. @@ -193,7 +238,6 @@ define void @mstore_real_ones(i8* %f, <4 x float> %v) { ; tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> , <4 x float> %v) ret void - } ; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further. @@ -206,7 +250,6 @@ define void @mstore_one_one(i8* %f, <4 x float> %v) { ; tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> , <4 x float> %v) ret void - } ; Try doubles. @@ -219,7 +262,6 @@ define void @mstore_one_one_double(i8* %f, <2 x double> %v) { ; tail call void @llvm.x86.avx.maskstore.pd(i8* %f, <2 x i64> , <2 x double> %v) ret void - } ; Try 256-bit FP ops. @@ -232,7 +274,6 @@ define void @mstore_v8f32(i8* %f, <8 x float> %v) { ; tail call void @llvm.x86.avx.maskstore.ps.256(i8* %f, <8 x i32> , <8 x float> %v) ret void - } define void @mstore_v4f64(i8* %f, <4 x double> %v) { @@ -243,7 +284,20 @@ define void @mstore_v4f64(i8* %f, <4 x double> %v) { ; tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> , <4 x double> %v) ret void +} +define void @mstore_v4f64_cmp(i8* %f, <4 x i32> %src, <4 x double> %v) { +; CHECK-LABEL: @mstore_v4f64_cmp( +; CHECK-NEXT: [[SRC_LOBIT:%.*]] = ashr <4 x i32> [[SRC:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[SRC_LOBIT]], +; CHECK-NEXT: [[DOTNOT:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.pd.256(i8* [[F:%.*]], <4 x i64> [[DOTNOT]], <4 x double> [[V:%.*]]) +; CHECK-NEXT: ret void +; + %icmp = icmp sge <4 x i32> %src, zeroinitializer + %mask = sext <4 x i1> %icmp to <4 x i64> + tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> %mask, <4 x double> %v) + ret void } ; Try the AVX2 variants. @@ -256,7 +310,6 @@ define void @mstore_v4i32(i8* %f, <4 x i32> %v) { ; tail call void @llvm.x86.avx2.maskstore.d(i8* %f, <4 x i32> , <4 x i32> %v) ret void - } define void @mstore_v2i64(i8* %f, <2 x i64> %v) { @@ -278,7 +331,6 @@ define void @mstore_v8i32(i8* %f, <8 x i32> %v) { ; tail call void @llvm.x86.avx2.maskstore.d.256(i8* %f, <8 x i32> , <8 x i32> %v) ret void - } define void @mstore_v4i64(i8* %f, <4 x i64> %v) { @@ -289,7 +341,24 @@ define void @mstore_v4i64(i8* %f, <4 x i64> %v) { ; tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> , <4 x i64> %v) ret void +} +define void @mstore_v4i64_cmp(i8* %f, <4 x i64> %src0, <4 x i64> %src1, <4 x i64> %v) { +; CHECK-LABEL: @mstore_v4i64_cmp( +; CHECK-NEXT: [[ICMP0:%.*]] = icmp eq <4 x i64> [[SRC0:%.*]], zeroinitializer +; CHECK-NEXT: [[ICMP1:%.*]] = icmp ne <4 x i64> [[SRC1:%.*]], zeroinitializer +; CHECK-NEXT: [[MASK1:%.*]] = and <4 x i1> [[ICMP0]], [[ICMP1]] +; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> [[MASK1]] to <4 x i64> +; CHECK-NEXT: tail call void @llvm.x86.avx2.maskstore.q.256(i8* [[F:%.*]], <4 x i64> [[MASK]], <4 x i64> [[V:%.*]]) +; CHECK-NEXT: ret void +; + %icmp0 = icmp eq <4 x i64> %src0, zeroinitializer + %icmp1 = icmp ne <4 x i64> %src1, zeroinitializer + %ext0 = sext <4 x i1> %icmp0 to <4 x i64> + %ext1 = sext <4 x i1> %icmp1 to <4 x i64> + %mask = and <4 x i64> %ext0, %ext1 + tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> %mask, <4 x i64> %v) + ret void } ; The original SSE2 masked store variant. @@ -300,10 +369,8 @@ define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) { ; tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p) ret void - } - declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) From 78de7297abe2e8fa782682168989c70e3cb34a5c Mon Sep 17 00:00:00 2001 From: Tyker Date: Sat, 12 Sep 2020 13:36:45 +0200 Subject: [PATCH 0451/1079] Reland [AssumeBundles] Use operand bundles to encode alignment assumptions NOTE: There is a mailing list discussion on this: http://lists.llvm.org/pipermail/llvm-dev/2019-December/137632.html Complemantary to the assumption outliner prototype in D71692, this patch shows how we could simplify the code emitted for an alignemnt assumption. The generated code is smaller, less fragile, and it makes it easier to recognize the additional use as a "assumption use". As mentioned in D71692 and on the mailing list, we could adopt this scheme, and similar schemes for other patterns, without adopting the assumption outlining. --- clang/lib/CodeGen/CodeGenFunction.cpp | 36 +++++- clang/test/CodeGen/align_value.cpp | 30 +---- clang/test/CodeGen/alloc-align-attr.c | 44 ++----- ...ssume-aligned-and-alloc-align-attributes.c | 8 +- clang/test/CodeGen/builtin-align-array.c | 32 ++--- clang/test/CodeGen/builtin-align.c | 24 +--- clang/test/CodeGen/builtin-assume-aligned.c | 32 +---- ...mption-attribute-align_value-on-lvalue.cpp | 8 +- ...tion-attribute-align_value-on-paramvar.cpp | 2 +- ...ibute-alloc_align-on-function-variable.cpp | 10 +- ...tion-attribute-alloc_align-on-function.cpp | 2 +- ...-assume_aligned-on-function-two-params.cpp | 10 +- ...n-attribute-assume_aligned-on-function.cpp | 2 +- ...n_assume_aligned-three-params-variable.cpp | 10 +- ...on-builtin_assume_aligned-three-params.cpp | 10 +- ...tion-builtin_assume_aligned-two-params.cpp | 8 +- .../catch-alignment-assumption-openmp.cpp | 8 +- .../non-power-of-2-alignment-assumptions.c | 13 +- clang/test/OpenMP/simd_codegen.cpp | 16 --- clang/test/OpenMP/simd_metadata.c | 117 +++++++---------- ...s_distribute_parallel_for_simd_codegen.cpp | 5 +- llvm/include/llvm/IR/IRBuilder.h | 28 ++-- .../Scalar/AlignmentFromAssumptions.h | 6 +- llvm/lib/Analysis/AssumeBundleQueries.cpp | 13 +- llvm/lib/IR/IRBuilder.cpp | 77 ++++------- llvm/lib/IR/Verifier.cpp | 23 +++- .../InstCombine/InstCombineCalls.cpp | 15 ++- .../Scalar/AlignmentFromAssumptions.cpp | 121 +++++------------- .../AlignmentFromAssumptions/simple.ll | 75 ++++------- .../AlignmentFromAssumptions/simple32.ll | 114 ++++------------- llvm/test/Transforms/Inline/align.ll | 15 +-- llvm/test/Transforms/Inline/byref-align.ll | 9 +- llvm/test/Transforms/InstCombine/assume.ll | 1 + .../inlining-alignment-assumptions.ll | 27 +--- llvm/test/Verifier/assume-bundles.ll | 16 ++- .../Analysis/AssumeBundleQueriesTest.cpp | 38 ++++++ 36 files changed, 372 insertions(+), 633 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e7f81087f0d20..016c7105b52dc 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2157,13 +2157,39 @@ void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue) { - llvm::Value *TheCheck; - llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( - CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck); + if (Alignment->getType() != IntPtrTy) + Alignment = + Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align"); + if (OffsetValue && OffsetValue->getType() != IntPtrTy) + OffsetValue = + Builder.CreateIntCast(OffsetValue, IntPtrTy, true, "casted.offset"); + llvm::Value *TheCheck = nullptr; if (SanOpts.has(SanitizerKind::Alignment)) { - emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment, - OffsetValue, TheCheck, Assumption); + llvm::Value *PtrIntValue = + Builder.CreatePtrToInt(PtrValue, IntPtrTy, "ptrint"); + + if (OffsetValue) { + bool IsOffsetZero = false; + if (const auto *CI = dyn_cast(OffsetValue)) + IsOffsetZero = CI->isZero(); + + if (!IsOffsetZero) + PtrIntValue = Builder.CreateSub(PtrIntValue, OffsetValue, "offsetptr"); + } + + llvm::Value *Zero = llvm::ConstantInt::get(IntPtrTy, 0); + llvm::Value *Mask = + Builder.CreateSub(Alignment, llvm::ConstantInt::get(IntPtrTy, 1)); + llvm::Value *MaskedPtr = Builder.CreateAnd(PtrIntValue, Mask, "maskedptr"); + TheCheck = Builder.CreateICmpEQ(MaskedPtr, Zero, "maskcond"); } + llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( + CGM.getDataLayout(), PtrValue, Alignment, OffsetValue); + + if (!SanOpts.has(SanitizerKind::Alignment)) + return; + emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment, + OffsetValue, TheCheck, Assumption); } void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue, diff --git a/clang/test/CodeGen/align_value.cpp b/clang/test/CodeGen/align_value.cpp index acbfbaf2ba5c7..a18cb651fe4c0 100644 --- a/clang/test/CodeGen/align_value.cpp +++ b/clang/test/CodeGen/align_value.cpp @@ -29,10 +29,7 @@ struct ad_struct { // CHECK-NEXT: [[TMP0:%.*]] = load %struct.ad_struct*, %struct.ad_struct** [[X_ADDR]], align 8 // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_AD_STRUCT:%.*]], %struct.ad_struct* [[TMP0]], i32 0, i32 0 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[A]], align 8 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ] // CHECK-NEXT: ret double* [[TMP1]] // double *foo(ad_struct& x) { @@ -48,10 +45,7 @@ double *foo(ad_struct& x) { // CHECK-NEXT: [[TMP0:%.*]] = load %struct.ad_struct*, %struct.ad_struct** [[X_ADDR]], align 8 // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_AD_STRUCT:%.*]], %struct.ad_struct* [[TMP0]], i32 0, i32 0 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[A]], align 8 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ] // CHECK-NEXT: ret double* [[TMP1]] // double *goo(ad_struct *x) { @@ -66,10 +60,7 @@ double *goo(ad_struct *x) { // CHECK-NEXT: store double** [[X]], double*** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP0]], align 8 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ] // CHECK-NEXT: ret double* [[TMP1]] // double *bar(aligned_double *x) { @@ -84,10 +75,7 @@ double *bar(aligned_double *x) { // CHECK-NEXT: store double** [[X]], double*** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP0]], align 8 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ] // CHECK-NEXT: ret double* [[TMP1]] // double *car(aligned_double &x) { @@ -103,10 +91,7 @@ double *car(aligned_double &x) { // CHECK-NEXT: [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double*, double** [[TMP0]], i64 5 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[ARRAYIDX]], align 8 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ] // CHECK-NEXT: ret double* [[TMP1]] // double *dar(aligned_double *x) { @@ -118,10 +103,7 @@ aligned_double eep(); // CHECK-LABEL: define {{[^@]+}}@_Z3retv() #0 // CHECK-NEXT: entry: // CHECK-NEXT: [[CALL:%.*]] = call double* @_Z3eepv() -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint double* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[CALL]], i64 64) ] // CHECK-NEXT: ret double* [[CALL]] // double *ret() { diff --git a/clang/test/CodeGen/alloc-align-attr.c b/clang/test/CodeGen/alloc-align-attr.c index 9517c50dbb1db..44a57291b47c8 100644 --- a/clang/test/CodeGen/alloc-align-attr.c +++ b/clang/test/CodeGen/alloc-align-attr.c @@ -11,12 +11,8 @@ __INT32_TYPE__*m1(__INT32_TYPE__ i) __attribute__((alloc_align(1))); // CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call i32* @m1(i32 [[TMP0]]) -// CHECK-NEXT: [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ] // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -32,12 +28,8 @@ __INT32_TYPE__ test1(__INT32_TYPE__ a) { // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[A_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 // CHECK-NEXT: [[CALL:%.*]] = call i32* @m1(i32 [[CONV]]) -// CHECK-NEXT: [[ALIGNMENTCAST:%.*]] = zext i32 [[CONV]] to i64 -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = zext i32 [[CONV]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ] // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -55,11 +47,7 @@ __INT32_TYPE__ *m2(__SIZE_TYPE__ i) __attribute__((alloc_align(1))); // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[CALL:%.*]] = call i32* @m2(i64 [[CONV]]) -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[CONV]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CONV]]) ] // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -75,11 +63,7 @@ __INT32_TYPE__ test3(__INT32_TYPE__ a) { // CHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[A_ADDR]], align 8 // CHECK-NEXT: [[CALL:%.*]] = call i32* @m2(i64 [[TMP0]]) -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[TMP0]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[TMP0]]) ] // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -115,12 +99,8 @@ __INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2))) // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[TMP4]], i32 0, i32 1 // CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 // CHECK-NEXT: [[CALL:%.*]] = call i32* @m3(i64 [[TMP6]], i64 [[TMP8]]) -// CHECK-NEXT: [[ALIGNMENTCAST:%.*]] = trunc i128 [[TMP3]] to i64 -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP3]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ] // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP9]] // @@ -157,12 +137,8 @@ __INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align( // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[TMP9]], i32 0, i32 1 // CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK-NEXT: [[CALL:%.*]] = call i32* @m4(i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP11]], i64 [[TMP13]]) -// CHECK-NEXT: [[ALIGNMENTCAST:%.*]] = trunc i128 [[TMP3]] to i64 -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP3]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ] // CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP14]] // diff --git a/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c b/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c index fa4ee8db12e7f..cd8a6f19b4f49 100644 --- a/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c +++ b/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c @@ -36,12 +36,8 @@ void *t2_immediate2() { // CHECK-NEXT: store i32 [[ALIGNMENT:%.*]], i32* [[ALIGNMENT_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ALIGNMENT_ADDR]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call align 32 i8* @my_aligned_alloc(i32 320, i32 [[TMP0]]) -// CHECK-NEXT: [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 [[TMP1]]) ] // CHECK-NEXT: ret i8* [[CALL]] // void *t3_variable(int alignment) { diff --git a/clang/test/CodeGen/builtin-align-array.c b/clang/test/CodeGen/builtin-align-array.c index 97235c33b7fbe..31f7b42b56170 100644 --- a/clang/test/CodeGen/builtin-align-array.c +++ b/clang/test/CodeGen/builtin-align-array.c @@ -4,7 +4,7 @@ extern int func(char *c); -// CHECK-LABEL: define {{[^@]+}}@test_array() #0 +// CHECK-LABEL: @test_array( // CHECK-NEXT: entry: // CHECK-NEXT: [[BUF:%.*]] = alloca [1024 x i8], align 16 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 44 @@ -12,10 +12,7 @@ extern int func(char *c); // CHECK-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16 // CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] // CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX]], i64 [[DIFF]] -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 15 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT: [[CALL:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT]]) // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 22 // CHECK-NEXT: [[INTPTR2:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 @@ -23,13 +20,10 @@ extern int func(char *c); // CHECK-NEXT: [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32 // CHECK-NEXT: [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]] // CHECK-NEXT: [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX1]], i64 [[DIFF5]] -// CHECK-NEXT: [[PTRINT7:%.*]] = ptrtoint i8* [[ALIGNED_RESULT6]] to i64 -// CHECK-NEXT: [[MASKEDPTR8:%.*]] = and i64 [[PTRINT7]], 31 -// CHECK-NEXT: [[MASKCOND9:%.*]] = icmp eq i64 [[MASKEDPTR8]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND9]]) -// CHECK-NEXT: [[CALL10:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]]) -// CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 16 -// CHECK-NEXT: [[SRC_ADDR:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT6]], i64 32) ] +// CHECK-NEXT: [[CALL7:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]]) +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 16 +// CHECK-NEXT: [[SRC_ADDR:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 // CHECK-NEXT: [[SET_BITS:%.*]] = and i64 [[SRC_ADDR]], 63 // CHECK-NEXT: [[IS_ALIGNED:%.*]] = icmp eq i64 [[SET_BITS]], 0 // CHECK-NEXT: [[CONV:%.*]] = zext i1 [[IS_ALIGNED]] to i32 @@ -42,7 +36,7 @@ int test_array(void) { return __builtin_is_aligned(&buf[16], 64); } -// CHECK-LABEL: define {{[^@]+}}@test_array_should_not_mask() #0 +// CHECK-LABEL: @test_array_should_not_mask( // CHECK-NEXT: entry: // CHECK-NEXT: [[BUF:%.*]] = alloca [1024 x i8], align 32 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 64 @@ -50,10 +44,7 @@ int test_array(void) { // CHECK-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16 // CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] // CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX]], i64 [[DIFF]] -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 15 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT: [[CALL:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT]]) // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 32 // CHECK-NEXT: [[INTPTR2:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 @@ -61,11 +52,8 @@ int test_array(void) { // CHECK-NEXT: [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32 // CHECK-NEXT: [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]] // CHECK-NEXT: [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX1]], i64 [[DIFF5]] -// CHECK-NEXT: [[PTRINT7:%.*]] = ptrtoint i8* [[ALIGNED_RESULT6]] to i64 -// CHECK-NEXT: [[MASKEDPTR8:%.*]] = and i64 [[PTRINT7]], 31 -// CHECK-NEXT: [[MASKCOND9:%.*]] = icmp eq i64 [[MASKEDPTR8]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND9]]) -// CHECK-NEXT: [[CALL10:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT6]], i64 32) ] +// CHECK-NEXT: [[CALL7:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]]) // CHECK-NEXT: ret i32 1 // int test_array_should_not_mask(void) { diff --git a/clang/test/CodeGen/builtin-align.c b/clang/test/CodeGen/builtin-align.c index 7e66e2b5c0b9b..60f7fc99c1d4d 100644 --- a/clang/test/CodeGen/builtin-align.c +++ b/clang/test/CodeGen/builtin-align.c @@ -122,11 +122,7 @@ _Bool is_aligned(TYPE ptr, unsigned align) { // CHECK-VOID_PTR-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[OVER_BOUNDARY]], [[INVERTED_MASK]] // CHECK-VOID_PTR-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] // CHECK-VOID_PTR-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[DIFF]] -// CHECK-VOID_PTR-NEXT: [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1 -// CHECK-VOID_PTR-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64 -// CHECK-VOID_PTR-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]] -// CHECK-VOID_PTR-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-VOID_PTR-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-VOID_PTR-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ] // CHECK-VOID_PTR-NEXT: ret i8* [[ALIGNED_RESULT]] // // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_up @@ -142,11 +138,7 @@ _Bool is_aligned(TYPE ptr, unsigned align) { // CHECK-FLOAT_PTR-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR]] to i8* // CHECK-FLOAT_PTR-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[DIFF]] // CHECK-FLOAT_PTR-NEXT: [[TMP1:%.*]] = bitcast i8* [[ALIGNED_RESULT]] to float* -// CHECK-FLOAT_PTR-NEXT: [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1 -// CHECK-FLOAT_PTR-NEXT: [[PTRINT:%.*]] = ptrtoint float* [[TMP1]] to i64 -// CHECK-FLOAT_PTR-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]] -// CHECK-FLOAT_PTR-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-FLOAT_PTR-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-FLOAT_PTR-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 [[ALIGNMENT]]) ] // CHECK-FLOAT_PTR-NEXT: ret float* [[TMP1]] // // CHECK-LONG-LABEL: define {{[^@]+}}@align_up @@ -184,11 +176,7 @@ TYPE align_up(TYPE ptr, unsigned align) { // CHECK-VOID_PTR-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], [[INVERTED_MASK]] // CHECK-VOID_PTR-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] // CHECK-VOID_PTR-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[DIFF]] -// CHECK-VOID_PTR-NEXT: [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1 -// CHECK-VOID_PTR-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64 -// CHECK-VOID_PTR-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]] -// CHECK-VOID_PTR-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-VOID_PTR-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-VOID_PTR-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ] // CHECK-VOID_PTR-NEXT: ret i8* [[ALIGNED_RESULT]] // // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_down @@ -203,11 +191,7 @@ TYPE align_up(TYPE ptr, unsigned align) { // CHECK-FLOAT_PTR-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR]] to i8* // CHECK-FLOAT_PTR-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[DIFF]] // CHECK-FLOAT_PTR-NEXT: [[TMP1:%.*]] = bitcast i8* [[ALIGNED_RESULT]] to float* -// CHECK-FLOAT_PTR-NEXT: [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1 -// CHECK-FLOAT_PTR-NEXT: [[PTRINT:%.*]] = ptrtoint float* [[TMP1]] to i64 -// CHECK-FLOAT_PTR-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]] -// CHECK-FLOAT_PTR-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-FLOAT_PTR-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-FLOAT_PTR-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 [[ALIGNMENT]]) ] // CHECK-FLOAT_PTR-NEXT: ret float* [[TMP1]] // // CHECK-LONG-LABEL: define {{[^@]+}}@align_down diff --git a/clang/test/CodeGen/builtin-assume-aligned.c b/clang/test/CodeGen/builtin-assume-aligned.c index 90693cc215200..b9f1ebfbdcf58 100644 --- a/clang/test/CodeGen/builtin-assume-aligned.c +++ b/clang/test/CodeGen/builtin-assume-aligned.c @@ -8,10 +8,7 @@ // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 0) ] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* // CHECK-NEXT: store i32* [[TMP2]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 @@ -31,10 +28,7 @@ int test1(int *a) { // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 0) ] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* // CHECK-NEXT: store i32* [[TMP2]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 @@ -54,10 +48,7 @@ int test2(int *a) { // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32) ] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* // CHECK-NEXT: store i32* [[TMP2]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 @@ -81,11 +72,7 @@ int test3(int *a) { // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP2]] to i64 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64 -// CHECK-NEXT: [[OFFSETPTR:%.*]] = sub i64 [[PTRINT]], [[CONV]] -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 [[CONV]]) ] // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1]] to i32* // CHECK-NEXT: store i32* [[TMP3]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 @@ -115,11 +102,7 @@ int *m2() __attribute__((assume_aligned(64, 12))); // CHECK-LABEL: define {{[^@]+}}@test6() #0 // CHECK-NEXT: entry: // CHECK-NEXT: [[CALL:%.*]] = call i32* (...) @m2() -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64 -// CHECK-NEXT: [[OFFSETPTR:%.*]] = sub i64 [[PTRINT]], 12 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 63 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 64, i64 12) ] // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 // CHECK-NEXT: ret i32 [[TMP0]] // @@ -134,10 +117,7 @@ int test6() { // CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 536870911 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 536870912) ] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* // CHECK-NEXT: store i32* [[TMP2]], i32** [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp index 96d264190bec7..fb2b1a76116e9 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp @@ -21,9 +21,9 @@ char **load_from_ac_struct(struct ac_struct *x) { // CHECK-NEXT: %[[X_RELOADED:.*]] = load %[[STRUCT_AC_STRUCT]]*, %[[STRUCT_AC_STRUCT]]** %[[STRUCT_AC_STRUCT_ADDR]], align 8 // CHECK: %[[A_ADDR:.*]] = getelementptr inbounds %[[STRUCT_AC_STRUCT]], %[[STRUCT_AC_STRUCT]]* %[[X_RELOADED]], i32 0, i32 0 // CHECK: %[[A:.*]] = load i8**, i8*** %[[A_ADDR]], align 8 - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64 - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647 - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64 + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647 + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[A]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -32,7 +32,7 @@ char **load_from_ac_struct(struct ac_struct *x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8** %[[A]], i64 2147483648) ] // CHECK-NEXT: ret i8** %[[A]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp index 0e3fa750c66c3..46f7d09ae2aa5 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp @@ -24,7 +24,7 @@ char **passthrough(__attribute__((align_value(0x80000000))) char **x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-SANITIZE-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-SANITIZE-NEXT: call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RELOADED]], i64 2147483648) ] // CHECK-NEXT: ret i8** %[[X_RELOADED]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp index 591eaa0e13131..40abbc3871996 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp @@ -30,10 +30,10 @@ char **caller(char **x, unsigned long alignment) { // CHECK-NEXT: %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[ALIGNMENT_RELOADED:.*]] = load i64, i64* %[[ALIGNMENT_ADDR]], align 8 // CHECK-NEXT: %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]], i64 %[[ALIGNMENT_RELOADED]]) - // CHECK-NEXT: %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1 - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64 - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]] - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64 + // CHECK-SANITIZE-NEXT: %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1 + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]] + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -42,7 +42,7 @@ char **caller(char **x, unsigned long alignment) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 %1) ] // CHECK-NEXT: ret i8** %[[X_RETURNED]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp index a41357933f918..87d903c69716c 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp @@ -39,7 +39,7 @@ char **caller(char **x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-SANITIZE-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-SANITIZE-NEXT: call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 128) ] // CHECK-NEXT: ret i8** %[[X_RETURNED]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp index e78667ce16e06..ecc96bcf6a53b 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp @@ -24,10 +24,10 @@ char **caller(char **x) { // CHECK-NEXT: store i8** %[[X]], i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]]) - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64 - // CHECK-NEXT: %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42 - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 2147483647 - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64 + // CHECK-SANITIZE-NEXT: %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42 + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 2147483647 + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -36,7 +36,7 @@ char **caller(char **x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 2147483648, i64 42) ] // CHECK-NEXT: ret i8** %[[X_RETURNED]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp index f750bbd77d42f..5bbc5843b89f8 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp @@ -36,7 +36,7 @@ char **caller(char **x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-SANITIZE-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-SANITIZE-NEXT: call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 128) ] // CHECK-NEXT: ret i8** %[[X_RETURNED]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp index 4306e322f5fb6..9c8944ba280b4 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp @@ -16,10 +16,10 @@ void *caller(char **x, unsigned long offset) { // CHECK-NEXT: %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8* // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64 - // CHECK-NEXT: %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], %[[OFFSET_RELOADED]] - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911 - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64 + // CHECK-SANITIZE-NEXT: %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911 + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -28,7 +28,7 @@ void *caller(char **x, unsigned long offset) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912, i64 %[[OFFSET_RELOADED]]) ] // CHECK-NEXT: ret i8* %[[BITCAST]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp index 27f53e92bed89..9f61e08106a01 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp @@ -13,10 +13,10 @@ void *caller(char **x) { // CHECK-NEXT: store i8** %[[X]], i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8* - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64 - // CHECK-NEXT: %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42 - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911 - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64 + // CHECK-SANITIZE-NEXT: %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42 + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911 + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -25,7 +25,7 @@ void *caller(char **x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912, i64 42) ] // CHECK-NEXT: ret i8* %[[BITCAST]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp index 5412270f37619..20bed646ff951 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp @@ -13,9 +13,9 @@ void *caller(char **x) { // CHECK-NEXT: store i8** %[[X]], i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8 // CHECK-NEXT: %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8* - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64 - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 536870911 - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64 + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 536870911 + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -24,7 +24,7 @@ void *caller(char **x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912) ] // CHECK-NEXT: ret i8* %[[BITCAST]] // CHECK-NEXT: } #line 100 diff --git a/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp b/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp index 6d75ee0858dac..353f2fd7f17bd 100644 --- a/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp +++ b/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp @@ -12,9 +12,9 @@ void func(char *data) { // CHECK-NEXT: %[[DATA_ADDR:.*]] = alloca i8*, align 8 // CHECK: store i8* %[[DATA]], i8** %[[DATA_ADDR]], align 8 // CHECK: %[[DATA_RELOADED:.*]] = load i8*, i8** %[[DATA_ADDR]], align 8 - // CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64 - // CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 1073741823 - // CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 + // CHECK-SANITIZE-NEXT: %[[PTRINT:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64 + // CHECK-SANITIZE-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 1073741823 + // CHECK-SANITIZE-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0 // CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]: @@ -23,7 +23,7 @@ void func(char *data) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]]) + // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* %[[DATA_RELOADED]], i64 1073741824) ] #line 100 #pragma omp for simd aligned(data : 0x40000000) diff --git a/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c b/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c index 9467f6228dfc4..b8ce1699f7ed0 100644 --- a/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c +++ b/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c @@ -9,12 +9,8 @@ void *__attribute__((alloc_align(1))) alloc(int align); // CHECK-NEXT: store i32 [[ALIGN:%.*]], i32* [[ALIGN_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ALIGN_ADDR]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call i8* @alloc(i32 [[TMP0]]) -// CHECK-NEXT: [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1 -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]] -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 [[TMP1]]) ] // CHECK-NEXT: ret void // void t0(int align) { @@ -25,10 +21,7 @@ void t0(int align) { // CHECK-NEXT: [[ALIGN_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32 [[ALIGN:%.*]], i32* [[ALIGN_ADDR]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call i8* @alloc(i32 7) -// CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64 -// CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 6 -// CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 7) ] // CHECK-NEXT: ret void // void t1(int align) { diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp index 8ba87dce82fcb..335dfd78cacea 100644 --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -817,25 +817,9 @@ void parallel_simd(float *a) { // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]], // CHECK-LABEL: S8 -// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 -// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 -// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 -// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 - -// CHECK-DAG: and i64 %{{.+}}, 15 -// CHECK-DAG: icmp eq i64 %{{.+}}, 0 // CHECK-DAG: call void @llvm.assume(i1 - -// CHECK-DAG: and i64 %{{.+}}, 7 -// CHECK-DAG: icmp eq i64 %{{.+}}, 0 // CHECK-DAG: call void @llvm.assume(i1 - -// CHECK-DAG: and i64 %{{.+}}, 15 -// CHECK-DAG: icmp eq i64 %{{.+}}, 0 // CHECK-DAG: call void @llvm.assume(i1 - -// CHECK-DAG: and i64 %{{.+}}, 3 -// CHECK-DAG: icmp eq i64 %{{.+}}, 0 // CHECK-DAG: call void @llvm.assume(i1 struct SS { SS(): a(0) {} diff --git a/clang/test/OpenMP/simd_metadata.c b/clang/test/OpenMP/simd_metadata.c index f0ae0200dd08e..18133e3b6c2e7 100644 --- a/clang/test/OpenMP/simd_metadata.c +++ b/clang/test/OpenMP/simd_metadata.c @@ -21,30 +21,21 @@ void h1(float *c, float *a, double b[], int size) // CHECK-LABEL: define void @h1 int t = 0; #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) -// CHECK: [[C_PTRINT:%.+]] = ptrtoint -// CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 -// CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) -// CHECK: [[A_PTRINT:%.+]] = ptrtoint - -// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 -// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 -// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 -// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 -// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 - -// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) -// CHECK: [[B_PTRINT:%.+]] = ptrtoint - -// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 -// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 -// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 -// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 -// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 - -// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) + // CHECK: call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ] + // CHECK-NEXT: load + + // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ] + // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ] + // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // PPC-QPX-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // CHECK-NEXT: load + + // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ] + // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ] + // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ] + // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ] + // PPC-QPX-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ] for (int i = 0; i < size; ++i) { c[i] = a[i] * a[i] + b[i] * b[t]; ++t; @@ -52,30 +43,21 @@ void h1(float *c, float *a, double b[], int size) // do not emit llvm.access.group metadata due to usage of safelen clause. // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}} #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8) -// CHECK: [[C_PTRINT:%.+]] = ptrtoint -// CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 -// CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) -// CHECK: [[A_PTRINT:%.+]] = ptrtoint - -// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 -// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 -// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 -// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 -// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 - -// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) -// CHECK: [[B_PTRINT:%.+]] = ptrtoint - -// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 -// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 -// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 -// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 -// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 - -// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) + // CHECK: call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ] + // CHECK-NEXT: load + + // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ] + // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ] + // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // PPC-QPX-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // CHECK-NEXT: load + + // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ] + // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ] + // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ] + // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ] + // PPC-QPX-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ] for (int i = 0; i < size; ++i) { c[i] = a[i] * a[i] + b[i] * b[t]; ++t; @@ -83,30 +65,21 @@ void h1(float *c, float *a, double b[], int size) // do not emit llvm.access.group metadata due to usage of safelen clause. // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}} #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8) -// CHECK: [[C_PTRINT:%.+]] = ptrtoint -// CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 -// CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) -// CHECK: [[A_PTRINT:%.+]] = ptrtoint - -// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 -// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 -// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 -// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 -// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 - -// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) -// CHECK: [[B_PTRINT:%.+]] = ptrtoint - -// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 -// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 -// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 -// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 -// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 - -// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 -// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) + // CHECK: call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ] + // CHECK-NEXT: load + + // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ] + // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ] + // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // PPC-QPX-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ] + // CHECK-NEXT: load + + // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ] + // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ] + // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ] + // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ] + // PPC-QPX-NEXT: call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ] for (int i = 0; i < size; ++i) { c[i] = a[i] * a[i] + b[i] * b[t]; ++t; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index d2031d6d214b1..7dff11951d9f8 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -101,10 +101,7 @@ int target_teams_fun(int *g){ // CK1: define internal void @[[OUTL1]]({{.+}}) // CK1: [[ARRDECAY:%.+]] = getelementptr inbounds [1000 x i32], [1000 x i32]* %{{.+}}, i{{32|64}} 0, i{{32|64}} 0 - // CK1: [[ARR_CAST:%.+]] = ptrtoint i32* [[ARRDECAY]] to i{{32|64}} - // CK1: [[MASKED_PTR:%.+]] = and i{{32|64}} [[ARR_CAST]], 7 - // CK1: [[COND:%.+]] = icmp eq i{{32|64}} [[MASKED_PTR]], 0 - // CK1: call void @llvm.assume(i1 [[COND]]) + // CK1: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRDECAY]], {{i64|i32}} 8) ] // CK1: call void @__kmpc_for_static_init_4( // CK1: call void {{.+}} @__kmpc_fork_call( // CK1: call void @__kmpc_for_static_fini( diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index f223fadcce23f..5fa3620791856 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -785,7 +785,11 @@ class IRBuilderBase { /// Create an assume intrinsic call that allows the optimizer to /// assume that the provided condition will be true. - CallInst *CreateAssumption(Value *Cond); + /// + /// The optional argument \p OpBundles specifies operand bundles that are + /// added to the call instruction. + CallInst *CreateAssumption(Value *Cond, + ArrayRef OpBundles = llvm::None); /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. @@ -2513,13 +2517,11 @@ class IRBuilderBase { private: /// Helper function that creates an assume intrinsic call that - /// represents an alignment assumption on the provided Ptr, Mask, Type - /// and Offset. It may be sometimes useful to do some other logic - /// based on this alignment check, thus it can be stored into 'TheCheck'. + /// represents an alignment assumption on the provided pointer \p PtrValue + /// with offset \p OffsetValue and alignment value \p AlignValue. CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL, - Value *PtrValue, Value *Mask, - Type *IntPtrTy, Value *OffsetValue, - Value **TheCheck); + Value *PtrValue, Value *AlignValue, + Value *OffsetValue); public: /// Create an assume intrinsic call that represents an alignment @@ -2528,13 +2530,9 @@ class IRBuilderBase { /// An optional offset can be provided, and if it is provided, the offset /// must be subtracted from the provided pointer to get the pointer with the /// specified alignment. - /// - /// It may be sometimes useful to do some other logic - /// based on this alignment check, thus it can be stored into 'TheCheck'. CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, - Value *OffsetValue = nullptr, - Value **TheCheck = nullptr); + Value *OffsetValue = nullptr); /// Create an assume intrinsic call that represents an alignment /// assumption on the provided pointer. @@ -2543,15 +2541,11 @@ class IRBuilderBase { /// must be subtracted from the provided pointer to get the pointer with the /// specified alignment. /// - /// It may be sometimes useful to do some other logic - /// based on this alignment check, thus it can be stored into 'TheCheck'. - /// /// This overload handles the condition where the Alignment is dependent /// on an existing value rather than a static value. CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, Value *Alignment, - Value *OffsetValue = nullptr, - Value **TheCheck = nullptr); + Value *OffsetValue = nullptr); }; /// This provides a uniform API for creating instructions and inserting diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h index be119b8ab8552..10b6e1c6a21b6 100644 --- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h +++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h @@ -37,9 +37,9 @@ struct AlignmentFromAssumptionsPass ScalarEvolution *SE = nullptr; DominatorTree *DT = nullptr; - bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV, - const SCEV *&OffSCEV); - bool processAssumption(CallInst *I); + bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr, + const SCEV *&AlignSCEV, const SCEV *&OffSCEV); + bool processAssumption(CallInst *I, unsigned Idx); }; } diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp index 9539af6d9d457..0084e2f13f5f9 100644 --- a/llvm/lib/Analysis/AssumeBundleQueries.cpp +++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp @@ -108,10 +108,17 @@ llvm::getKnowledgeFromBundle(CallInst &Assume, Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey()); if (bundleHasArgument(BOI, ABA_WasOn)) Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn); + auto GetArgOr1 = [&](unsigned Idx) -> unsigned { + if (auto *ConstInt = dyn_cast( + getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx))) + return ConstInt->getZExtValue(); + return 1; + }; if (BOI.End - BOI.Begin > ABA_Argument) - Result.ArgValue = - cast(getValueFromBundleOpInfo(Assume, BOI, ABA_Argument)) - ->getZExtValue(); + Result.ArgValue = GetArgOr1(0); + if (Result.AttrKind == Attribute::Alignment) + if (BOI.End - BOI.Begin > ABA_Argument + 1) + Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1)); return Result; } diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index d6eeffd44b368..febfe189df6ea 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -72,8 +72,9 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) { static CallInst *createCallHelper(Function *Callee, ArrayRef Ops, IRBuilderBase *Builder, const Twine &Name = "", - Instruction *FMFSource = nullptr) { - CallInst *CI = Builder->CreateCall(Callee, Ops, Name); + Instruction *FMFSource = nullptr, + ArrayRef OpBundles = {}) { + CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name); if (FMFSource) CI->copyFastMathFlags(FMFSource); return CI; @@ -450,14 +451,16 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) { return createCallHelper(TheFn, Ops, this); } -CallInst *IRBuilderBase::CreateAssumption(Value *Cond) { +CallInst * +IRBuilderBase::CreateAssumption(Value *Cond, + ArrayRef OpBundles) { assert(Cond->getType() == getInt1Ty() && "an assumption condition must be of type i1"); Value *Ops[] = { Cond }; Module *M = BB->getParent()->getParent(); Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume); - return createCallHelper(FnAssume, Ops, this); + return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles); } /// Create a call to a Masked Load intrinsic. @@ -1113,63 +1116,37 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex( return Fn; } -CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper( - const DataLayout &DL, Value *PtrValue, Value *Mask, Type *IntPtrTy, - Value *OffsetValue, Value **TheCheck) { - Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint"); - - if (OffsetValue) { - bool IsOffsetZero = false; - if (const auto *CI = dyn_cast(OffsetValue)) - IsOffsetZero = CI->isZero(); - - if (!IsOffsetZero) { - if (OffsetValue->getType() != IntPtrTy) - OffsetValue = CreateIntCast(OffsetValue, IntPtrTy, /*isSigned*/ true, - "offsetcast"); - PtrIntValue = CreateSub(PtrIntValue, OffsetValue, "offsetptr"); - } - } - - Value *Zero = ConstantInt::get(IntPtrTy, 0); - Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr"); - Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond"); - if (TheCheck) - *TheCheck = InvCond; - - return CreateAssumption(InvCond); +CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(const DataLayout &DL, + Value *PtrValue, + Value *AlignValue, + Value *OffsetValue) { + SmallVector Vals({PtrValue, AlignValue}); + if (OffsetValue) + Vals.push_back(OffsetValue); + OperandBundleDefT AlignOpB("align", Vals); + return CreateAssumption(ConstantInt::getTrue(getContext()), {AlignOpB}); } -CallInst *IRBuilderBase::CreateAlignmentAssumption( - const DataLayout &DL, Value *PtrValue, unsigned Alignment, - Value *OffsetValue, Value **TheCheck) { +CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL, + Value *PtrValue, + unsigned Alignment, + Value *OffsetValue) { assert(isa(PtrValue->getType()) && "trying to create an alignment assumption on a non-pointer?"); assert(Alignment != 0 && "Invalid Alignment"); auto *PtrTy = cast(PtrValue->getType()); Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace()); - - Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1); - return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy, - OffsetValue, TheCheck); + Value *AlignValue = ConstantInt::get(IntPtrTy, Alignment); + return CreateAlignmentAssumptionHelper(DL, PtrValue, AlignValue, OffsetValue); } -CallInst *IRBuilderBase::CreateAlignmentAssumption( - const DataLayout &DL, Value *PtrValue, Value *Alignment, - Value *OffsetValue, Value **TheCheck) { +CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL, + Value *PtrValue, + Value *Alignment, + Value *OffsetValue) { assert(isa(PtrValue->getType()) && "trying to create an alignment assumption on a non-pointer?"); - auto *PtrTy = cast(PtrValue->getType()); - Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace()); - - if (Alignment->getType() != IntPtrTy) - Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false, - "alignmentcast"); - - Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask"); - - return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy, - OffsetValue, TheCheck); + return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue); } IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {} diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 6cae21e3cfe1a..783c492dbeae1 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4483,21 +4483,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Assert(Elem.Tag->getKey() == "ignore" || Attribute::isExistingAttribute(Elem.Tag->getKey()), "tags must be valid attribute names"); - Assert(Elem.End - Elem.Begin <= 2, "to many arguments"); Attribute::AttrKind Kind = Attribute::getAttrKindFromName(Elem.Tag->getKey()); + unsigned ArgCount = Elem.End - Elem.Begin; + if (Kind == Attribute::Alignment) { + Assert(ArgCount <= 3 && ArgCount >= 2, + "alignment assumptions should have 2 or 3 arguments"); + Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(), + "first argument should be a pointer"); + Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(), + "second argument should be an integer"); + if (ArgCount == 3) + Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(), + "third argument should be an integer if present"); + return; + } + Assert(ArgCount <= 2, "to many arguments"); if (Kind == Attribute::None) break; if (Attribute::doesAttrKindHaveArgument(Kind)) { - Assert(Elem.End - Elem.Begin == 2, - "this attribute should have 2 arguments"); + Assert(ArgCount == 2, "this attribute should have 2 arguments"); Assert(isa(Call.getOperand(Elem.Begin + 1)), "the second argument should be a constant integral value"); } else if (isFuncOnlyAttr(Kind)) { - Assert((Elem.End - Elem.Begin) == 0, "this attribute has no argument"); + Assert((ArgCount) == 0, "this attribute has no argument"); } else if (!isFuncOrArgAttr(Kind)) { - Assert((Elem.End - Elem.Begin) == 1, - "this attribute should have one argument"); + Assert((ArgCount) == 1, "this attribute should have one argument"); } } break; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 334e4e3e74abb..90571bd033670 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1461,11 +1461,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; case Intrinsic::assume: { Value *IIOperand = II->getArgOperand(0); + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + bool HasOpBundles = !OpBundles.empty(); // Remove an assume if it is followed by an identical assume. // TODO: Do we need this? Unless there are conflicting assumptions, the // computeKnownBits(IIOperand) below here eliminates redundant assumes. Instruction *Next = II->getNextNonDebugInstruction(); - if (match(Next, m_Intrinsic(m_Specific(IIOperand)))) + if (HasOpBundles && + match(Next, m_Intrinsic(m_Specific(IIOperand))) && + !cast(Next)->hasOperandBundles()) return eraseInstFromFunction(CI); // Canonicalize assume(a && b) -> assume(a); assume(b); @@ -1475,14 +1480,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *AssumeIntrinsic = II->getCalledOperand(); Value *A, *B; if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { - Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName()); + Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles, + II->getName()); Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName()); return eraseInstFromFunction(*II); } // assume(!(a || b)) -> assume(!a); assume(!b); if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, - Builder.CreateNot(A), II->getName()); + Builder.CreateNot(A), OpBundles, II->getName()); Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, Builder.CreateNot(B), II->getName()); return eraseInstFromFunction(*II); @@ -1498,7 +1504,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { isValidAssumeForContext(II, LHS, &DT)) { MDNode *MD = MDNode::get(II->getContext(), None); LHS->setMetadata(LLVMContext::MD_nonnull, MD); - return eraseInstFromFunction(*II); + if (!HasOpBundles) + return eraseInstFromFunction(*II); // TODO: apply nonnull return attributes to calls and invokes // TODO: apply range metadata for range check patterns? diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 5c008585869cd..bccf94fc217fe 100644 --- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -15,6 +15,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #define AA_NAME "alignment-from-assumptions" #define DEBUG_TYPE AA_NAME @@ -203,103 +204,33 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV, } bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I, + unsigned Idx, Value *&AAPtr, const SCEV *&AlignSCEV, const SCEV *&OffSCEV) { - // An alignment assume must be a statement about the least-significant - // bits of the pointer being zero, possibly with some offset. - ICmpInst *ICI = dyn_cast(I->getArgOperand(0)); - if (!ICI) + Type *Int64Ty = Type::getInt64Ty(I->getContext()); + OperandBundleUse AlignOB = I->getOperandBundleAt(Idx); + if (AlignOB.getTagName() != "align") return false; - - // This must be an expression of the form: x & m == 0. - if (ICI->getPredicate() != ICmpInst::ICMP_EQ) - return false; - - // Swap things around so that the RHS is 0. - Value *CmpLHS = ICI->getOperand(0); - Value *CmpRHS = ICI->getOperand(1); - const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS); - const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS); - if (CmpLHSSCEV->isZero()) - std::swap(CmpLHS, CmpRHS); - else if (!CmpRHSSCEV->isZero()) - return false; - - BinaryOperator *CmpBO = dyn_cast(CmpLHS); - if (!CmpBO || CmpBO->getOpcode() != Instruction::And) - return false; - - // Swap things around so that the right operand of the and is a constant - // (the mask); we cannot deal with variable masks. - Value *AndLHS = CmpBO->getOperand(0); - Value *AndRHS = CmpBO->getOperand(1); - const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS); - const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS); - if (isa(AndLHSSCEV)) { - std::swap(AndLHS, AndRHS); - std::swap(AndLHSSCEV, AndRHSSCEV); - } - - const SCEVConstant *MaskSCEV = dyn_cast(AndRHSSCEV); - if (!MaskSCEV) - return false; - - // The mask must have some trailing ones (otherwise the condition is - // trivial and tells us nothing about the alignment of the left operand). - unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes(); - if (!TrailingOnes) - return false; - - // Cap the alignment at the maximum with which LLVM can deal (and make sure - // we don't overflow the shift). - uint64_t Alignment; - TrailingOnes = std::min(TrailingOnes, - unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment); - - Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext()); - AlignSCEV = SE->getConstant(Int64Ty, Alignment); - - // The LHS might be a ptrtoint instruction, or it might be the pointer - // with an offset. - AAPtr = nullptr; - OffSCEV = nullptr; - if (PtrToIntInst *PToI = dyn_cast(AndLHS)) { - AAPtr = PToI->getPointerOperand(); + assert(AlignOB.Inputs.size() >= 2); + AAPtr = AlignOB.Inputs[0].get(); + // TODO: Consider accumulating the offset to the base. + AAPtr = AAPtr->stripPointerCastsSameRepresentation(); + AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get()); + AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty); + if (AlignOB.Inputs.size() == 3) + OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get()); + else OffSCEV = SE->getZero(Int64Ty); - } else if (const SCEVAddExpr* AndLHSAddSCEV = - dyn_cast(AndLHSSCEV)) { - // Try to find the ptrtoint; subtract it and the rest is the offset. - for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(), - JE = AndLHSAddSCEV->op_end(); J != JE; ++J) - if (const SCEVUnknown *OpUnk = dyn_cast(*J)) - if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) { - AAPtr = PToI->getPointerOperand(); - OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J); - break; - } - } - - if (!AAPtr) - return false; - - // Sign extend the offset to 64 bits (so that it is like all of the other - // expressions). - unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits(); - if (OffSCEVBits < 64) - OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); - else if (OffSCEVBits > 64) - return false; - - AAPtr = AAPtr->stripPointerCasts(); + OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty); return true; } -bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { +bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall, + unsigned Idx) { Value *AAPtr; const SCEV *AlignSCEV, *OffSCEV; - if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV)) + if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV)) return false; // Skip ConstantPointerNull and UndefValue. Assumptions on these shouldn't @@ -317,13 +248,14 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { continue; if (Instruction *K = dyn_cast(J)) - if (isValidAssumeForContext(ACall, K, DT)) WorkList.push_back(K); } while (!WorkList.empty()) { Instruction *J = WorkList.pop_back_val(); if (LoadInst *LI = dyn_cast(J)) { + if (!isValidAssumeForContext(ACall, J, DT)) + continue; Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, LI->getPointerOperand(), SE); if (NewAlignment > LI->getAlign()) { @@ -331,6 +263,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { ++NumLoadAlignChanged; } } else if (StoreInst *SI = dyn_cast(J)) { + if (!isValidAssumeForContext(ACall, J, DT)) + continue; Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, SI->getPointerOperand(), SE); if (NewAlignment > SI->getAlign()) { @@ -338,6 +272,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { ++NumStoreAlignChanged; } } else if (MemIntrinsic *MI = dyn_cast(J)) { + if (!isValidAssumeForContext(ACall, J, DT)) + continue; Align NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE); @@ -369,7 +305,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { Visited.insert(J); for (User *UJ : J->users()) { Instruction *K = cast(UJ); - if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT)) + if (!Visited.count(K)) WorkList.push_back(K); } } @@ -396,8 +332,11 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC, bool Changed = false; for (auto &AssumeVH : AC.assumptions()) - if (AssumeVH) - Changed |= processAssumption(cast(AssumeVH)); + if (AssumeVH) { + CallInst *Call = cast(AssumeVH); + for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++) + Changed |= processAssumption(Call, Idx); + } return Changed; } diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll index 14e764f042c7a..610fd448c3b98 100644 --- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll @@ -4,10 +4,7 @@ target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32)] %0 = load i32, i32* %a, align 4 ret i32 %0 @@ -18,11 +15,7 @@ entry: define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %offsetptr = add i64 %ptrint, 24 - %maskedptr = and i64 %offsetptr, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 24)] %arrayidx = getelementptr inbounds i32, i32* %a, i64 2 %0 = load i32, i32* %arrayidx, align 4 ret i32 %0 @@ -34,11 +27,7 @@ entry: define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %offsetptr = add i64 %ptrint, 28 - %maskedptr = and i64 %offsetptr, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)] %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1 %0 = load i32, i32* %arrayidx, align 4 ret i32 %0 @@ -50,10 +39,7 @@ entry: define i32 @goo(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 0)] %0 = load i32, i32* %a, align 4 ret i32 %0 @@ -64,10 +50,7 @@ entry: define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i32 0)] br label %for.body for.body: ; preds = %entry, %for.body @@ -98,10 +81,7 @@ for.end: ; preds = %for.body ; load(a, i0+i1+i2+32) define void @hoo2(i32* nocapture %a, i64 %id, i64 %num) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i64 0)] %id.mul = shl nsw i64 %id, 6 %num.mul = shl nsw i64 %num, 6 br label %for0.body @@ -147,10 +127,7 @@ return: define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)] br label %for.body for.body: ; preds = %entry, %for.body @@ -175,16 +152,13 @@ for.end: ; preds = %for.body define i32 @koo(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)] %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %r.06 %indvars.iv.next = add i64 %indvars.iv, 4 @@ -203,10 +177,7 @@ for.end: ; preds = %for.body define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i128 32, i128 0)] br label %for.body for.body: ; preds = %entry, %for.body @@ -231,10 +202,7 @@ for.end: ; preds = %for.body define i32 @moo(i32* nocapture %a) nounwind uwtable { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + tail call void @llvm.assume(i1 true) ["align"(i32* %a, i16 32)] %0 = bitcast i32* %a to i8* tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false) ret i32 undef @@ -246,15 +214,9 @@ entry: define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) - %ptrint1 = ptrtoint i32* %b to i64 - %maskedptr3 = and i64 %ptrint1, 127 - %maskcond4 = icmp eq i64 %maskedptr3, 0 - tail call void @llvm.assume(i1 %maskcond4) + tail call void @llvm.assume(i1 true) ["align"(i32* %b, i32 128)] %0 = bitcast i32* %a to i8* + tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32)] %1 = bitcast i32* %b to i8* tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false) ret i32 undef @@ -264,6 +226,19 @@ entry: ; CHECK: ret i32 undef } +define i32 @moo3(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { +entry: + %0 = bitcast i32* %a to i8* + tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32), "align"(i32* %b, i32 128)] + %1 = bitcast i32* %b to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false) + ret i32 undef + +; CHECK-LABEL: @moo3 +; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false) +; CHECK: ret i32 undef +} + declare void @llvm.assume(i1) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll index 3f0819e3641b3..453899c15c4fb 100644 --- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll @@ -7,18 +7,12 @@ define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 32 ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] %0 = load i32, i32* %a, align 4 ret i32 %0 @@ -28,21 +22,13 @@ define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@foo2 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[OFFSETPTR:%.*]] = add i64 [[PTRINT]], 24 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 24) ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 16 ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %offsetptr = add i64 %ptrint, 24 - %maskedptr = and i64 %offsetptr, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 24)] %arrayidx = getelementptr inbounds i32, i32* %a, i64 2 %0 = load i32, i32* %arrayidx, align 4 ret i32 %0 @@ -53,21 +39,13 @@ define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@foo2a ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[OFFSETPTR:%.*]] = add i64 [[PTRINT]], 28 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 28) ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 32 ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %offsetptr = add i64 %ptrint, 28 - %maskedptr = and i64 %offsetptr, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 28)] %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1 %0 = load i32, i32* %arrayidx, align 4 ret i32 %0 @@ -78,18 +56,12 @@ define i32 @goo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@goo ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 32 ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] %0 = load i32, i32* %a, align 4 ret i32 %0 @@ -99,10 +71,7 @@ define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@hoo ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -119,10 +88,7 @@ define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] br label %for.body for.body: ; preds = %entry, %for.body @@ -146,10 +112,7 @@ define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@joo ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -166,10 +129,7 @@ define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] br label %for.body for.body: ; preds = %entry, %for.body @@ -193,10 +153,7 @@ define i32 @koo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@koo ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -213,10 +170,7 @@ define i32 @koo(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] br label %for.body for.body: ; preds = %entry, %for.body @@ -240,10 +194,7 @@ define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-LABEL: define {{[^@]+}}@koo2 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ -4, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -260,10 +211,7 @@ define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly { ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] br label %for.body for.body: ; preds = %entry, %for.body @@ -287,19 +235,13 @@ define i32 @moo(i32* nocapture %a) nounwind uwtable { ; CHECK-LABEL: define {{[^@]+}}@moo ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #1 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP0]], i8 0, i64 64, i1 false) ; CHECK-NEXT: ret i32 undef ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] %0 = bitcast i32* %a to i8* tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false) ret i32 undef @@ -310,28 +252,16 @@ define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { ; CHECK-LABEL: define {{[^@]+}}@moo2 ; CHECK-SAME: (i32* nocapture [[A:%.*]], i32* nocapture [[B:%.*]]) #1 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK-NEXT: [[PTRINT1:%.*]] = ptrtoint i32* [[B]] to i64 -; CHECK-NEXT: [[MASKEDPTR3:%.*]] = and i64 [[PTRINT1]], 127 -; CHECK-NEXT: [[MASKCOND4:%.*]] = icmp eq i64 [[MASKEDPTR3]], 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[B]], i64 128) ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 [[TMP0]], i8* align 128 [[TMP1]], i64 64, i1 false) ; CHECK-NEXT: ret i32 undef ; entry: - %ptrint = ptrtoint i32* %a to i64 - %maskedptr = and i64 %ptrint, 31 - %maskcond = icmp eq i64 %maskedptr, 0 - tail call void @llvm.assume(i1 %maskcond) - %ptrint1 = ptrtoint i32* %b to i64 - %maskedptr3 = and i64 %ptrint1, 127 - %maskcond4 = icmp eq i64 %maskedptr3, 0 - tail call void @llvm.assume(i1 %maskcond4) + call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)] + call void @llvm.assume(i1 true) ["align"(i32* %b, i64 128)] %0 = bitcast i32* %a to i8* %1 = bitcast i32* %b to i8* tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false) diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll index ede6c3fa7bcf4..f3a5184564850 100644 --- a/llvm/test/Transforms/Inline/align.ll +++ b/llvm/test/Transforms/Inline/align.ll @@ -23,10 +23,7 @@ define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture readonly [[C:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ] ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 ; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX_I]], align 4 @@ -87,14 +84,8 @@ define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture rea ; CHECK-LABEL: define {{[^@]+}}@foo2 ; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK-NEXT: [[PTRINT1:%.*]] = ptrtoint float* [[B]] to i64 -; CHECK-NEXT: [[MASKEDPTR2:%.*]] = and i64 [[PTRINT1]], 127 -; CHECK-NEXT: [[MASKCOND3:%.*]] = icmp eq i64 [[MASKEDPTR2]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND3]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[B]], i64 128) ] ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 ; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX_I]], align 4 diff --git a/llvm/test/Transforms/Inline/byref-align.ll b/llvm/test/Transforms/Inline/byref-align.ll index fb70db2af449d..4a94bd8bfe13a 100644 --- a/llvm/test/Transforms/Inline/byref-align.ll +++ b/llvm/test/Transforms/Inline/byref-align.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" ; should be inserted. define void @byref_callee(float* align(128) byref(float) nocapture %a, float* %b) #0 { ; CHECK-LABEL: define {{[^@]+}}@byref_callee -; CHECK-SAME: (float* nocapture byref(float) align 128 [[A:%.*]], float* [[B:%.*]]) #0 +; CHECK-SAME: (float* nocapture byref(float) align 128 [[A:%.*]], float* [[B:%.*]]) [[ATTR0:#.*]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOAD:%.*]] = load float, float* [[A]], align 4 ; CHECK-NEXT: [[B_IDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 @@ -26,12 +26,9 @@ entry: define void @byref_caller(float* nocapture align 64 %a, float* %b) #0 { ; CHECK-LABEL: define {{[^@]+}}@byref_caller -; CHECK-SAME: (float* nocapture align 64 [[A:%.*]], float* [[B:%.*]]) #0 +; CHECK-SAME: (float* nocapture align 64 [[A:%.*]], float* [[B:%.*]]) [[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64 -; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127 -; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ] ; CHECK-NEXT: [[LOAD_I:%.*]] = load float, float* [[A]], align 4 ; CHECK-NEXT: [[B_IDX_I:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 ; CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[LOAD_I]], 2.000000e+00 diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 8ca24caa2aa1b..a988eea894450 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -346,6 +346,7 @@ define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) { define void @debug_interference(i8 %x) { ; CHECK-LABEL: @debug_interference( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 false) ; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7:metadata !.*]], metadata !DIExpression()), [[DBG9:!dbg !.*]] ; CHECK-NEXT: tail call void @llvm.assume(i1 false) ; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll index 61287e35005ff..2605701d231d2 100644 --- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -41,10 +41,7 @@ define void @caller1(i1 %c, i64* align 1 %ptr) { ; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]] ; ASSUMPTIONS-ON: false1: ; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 8 -; ASSUMPTIONS-ON-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR]] to i64 -; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 -; ASSUMPTIONS-ON-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; ASSUMPTIONS-ON-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ] ; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 @@ -54,10 +51,7 @@ define void @caller1(i1 %c, i64* align 1 %ptr) { ; ASSUMPTIONS-ON-NEXT: store volatile i64 3, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: ret void ; ASSUMPTIONS-ON: true2.critedge: -; ASSUMPTIONS-ON-NEXT: [[PTRINT_C:%.*]] = ptrtoint i64* [[PTR]] to i64 -; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR_C:%.*]] = and i64 [[PTRINT_C]], 7 -; ASSUMPTIONS-ON-NEXT: [[MASKCOND_C:%.*]] = icmp eq i64 [[MASKEDPTR_C]], 0 -; ASSUMPTIONS-ON-NEXT: tail call void @llvm.assume(i1 [[MASKCOND_C]]) +; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ] ; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 @@ -94,26 +88,17 @@ false2: ; This test checks that alignment assumptions do not prevent SROA. ; See PR45763. -define internal void @callee2(i64* noalias sret align 8 %arg) { +define internal void @callee2(i64* noalias sret align 32 %arg) { store i64 0, i64* %arg, align 8 ret void } define amdgpu_kernel void @caller2() { -; ASSUMPTIONS-OFF-LABEL: @caller2( -; ASSUMPTIONS-OFF-NEXT: ret void -; -; ASSUMPTIONS-ON-LABEL: @caller2( -; ASSUMPTIONS-ON-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8, addrspace(5) -; ASSUMPTIONS-ON-NEXT: [[CAST:%.*]] = addrspacecast i64 addrspace(5)* [[ALLOCA]] to i64* -; ASSUMPTIONS-ON-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[CAST]] to i64 -; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 -; ASSUMPTIONS-ON-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) -; ASSUMPTIONS-ON-NEXT: ret void +; CHECK-LABEL: @caller2( +; CHECK-NEXT: ret void ; %alloca = alloca i64, align 8, addrspace(5) %cast = addrspacecast i64 addrspace(5)* %alloca to i64* - call void @callee2(i64* sret align 8 %cast) + call void @callee2(i64* sret align 32 %cast) ret void } diff --git a/llvm/test/Verifier/assume-bundles.ll b/llvm/test/Verifier/assume-bundles.ll index 302421715c797..6e260f25129ee 100644 --- a/llvm/test/Verifier/assume-bundles.ll +++ b/llvm/test/Verifier/assume-bundles.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: not opt -verify < %s 2>&1 | FileCheck %s declare void @llvm.assume(i1) @@ -6,14 +7,21 @@ define void @func(i32* %P, i32 %P1, i32* %P2, i32* %P3) { ; CHECK: tags must be valid attribute names call void @llvm.assume(i1 true) ["adazdazd"()] ; CHECK: the second argument should be a constant integral value - call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1)] + call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 %P1)] ; CHECK: to many arguments - call void @llvm.assume(i1 true) ["align"(i32* %P, i32 8, i32 8)] + call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 8, i32 8)] ; CHECK: this attribute should have 2 arguments - call void @llvm.assume(i1 true) ["align"(i32* %P)] + call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P)] ; CHECK: this attribute has no argument - call void @llvm.assume(i1 true) ["align"(i32* %P, i32 4), "cold"(i32* %P)] + call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 4), "cold"(i32* %P)] ; CHECK: this attribute should have one argument call void @llvm.assume(i1 true) ["noalias"()] + call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32 4)] +; CHECK: alignment assumptions should have 2 or 3 arguments + call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32 4, i32 4)] +; CHECK: second argument should be an integer + call void @llvm.assume(i1 true) ["align"(i32* %P, i32* %P2)] +; CHECK: third argument should be an integer if present + call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32* %P2)] ret void } diff --git a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp index d35a77fa379be..946368e1cb947 100644 --- a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp +++ b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp @@ -546,3 +546,41 @@ TEST(AssumeQueryAPI, AssumptionCache) { ASSERT_EQ(AR[0].Index, 1u); ASSERT_EQ(AR[0].Assume, &*First); } + +TEST(AssumeQueryAPI, Alignment) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr Mod = parseAssemblyString( + "declare void @llvm.assume(i1)\n" + "define void @test(i32* %P, i32* %P1, i32* %P2, i32 %I3, i1 %B) {\n" + "call void @llvm.assume(i1 true) [\"align\"(i32* %P, i32 8, i32 %I3)]\n" + "call void @llvm.assume(i1 true) [\"align\"(i32* %P1, i32 %I3, i32 " + "%I3)]\n" + "call void @llvm.assume(i1 true) [\"align\"(i32* %P2, i32 16, i32 8)]\n" + "ret void\n}\n", + Err, C); + if (!Mod) + Err.print("AssumeQueryAPI", errs()); + + Function *F = Mod->getFunction("test"); + BasicBlock::iterator Start = F->begin()->begin(); + IntrinsicInst *II; + RetainedKnowledge RK; + II = cast(&*Start); + RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]); + ASSERT_EQ(RK.AttrKind, Attribute::Alignment); + ASSERT_EQ(RK.WasOn, F->getArg(0)); + ASSERT_EQ(RK.ArgValue, 1u); + Start++; + II = cast(&*Start); + RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]); + ASSERT_EQ(RK.AttrKind, Attribute::Alignment); + ASSERT_EQ(RK.WasOn, F->getArg(1)); + ASSERT_EQ(RK.ArgValue, 1u); + Start++; + II = cast(&*Start); + RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]); + ASSERT_EQ(RK.AttrKind, Attribute::Alignment); + ASSERT_EQ(RK.WasOn, F->getArg(2)); + ASSERT_EQ(RK.ArgValue, 8u); +} From 2e61cd1295e0031b2379af2b65373e2798a551cb Mon Sep 17 00:00:00 2001 From: Evgeny Leviant Date: Sat, 12 Sep 2020 16:53:12 +0300 Subject: [PATCH 0452/1079] [MachineScheduler] Fix operand scheduling for pre/post-increment loads Differential revision: https://reviews.llvm.org/D87557 --- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 8 ++++---- llvm/test/tools/llvm-mca/AArch64/Exynos/load.s | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 25d478ebfc055..61155087cbe28 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -3939,7 +3939,7 @@ class LoadPreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, (outs GPR64sp:$wback, regtype:$Rt), (ins GPR64sp:$Rn, simm9:$offset), asm, "$Rn = $wback,@earlyclobber $wback", []>, - Sched<[WriteLD, WriteAdr]>; + Sched<[WriteAdr, WriteLD]>; let mayStore = 1, mayLoad = 0 in class StorePreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, @@ -3985,7 +3985,7 @@ class LoadPostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, (outs GPR64sp:$wback, regtype:$Rt), (ins GPR64sp:$Rn, simm9:$offset), asm, "$Rn = $wback,@earlyclobber $wback", []>, - Sched<[WriteLD, WriteAdr]>; + Sched<[WriteAdr, WriteLD]>; let mayStore = 1, mayLoad = 0 in class StorePostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, @@ -4082,7 +4082,7 @@ class LoadPairPreIdx opc, bit V, RegisterOperand regtype, : BaseLoadStorePairPreIdx, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; + Sched<[WriteAdr, WriteLD, WriteLDHi]>; let mayStore = 1, mayLoad = 0 in class StorePairPreIdx opc, bit V, RegisterOperand regtype, @@ -4123,7 +4123,7 @@ class LoadPairPostIdx opc, bit V, RegisterOperand regtype, : BaseLoadStorePairPostIdx, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; + Sched<[WriteAdr, WriteLD, WriteLDHi]>; let mayStore = 1, mayLoad = 0 in class StorePairPostIdx opc, bit V, RegisterOperand regtype, diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s index 04f30d353ae0d..2e90e5ab6f162 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s @@ -20,7 +20,7 @@ ldpsw x0, x1, [sp, #8]! # ALL: Iterations: 100 # ALL-NEXT: Instructions: 1200 -# ALL-NEXT: Total Cycles: 1904 +# ALL-NEXT: Total Cycles: 1304 # M3-NEXT: Total uOps: 1600 # M4-NEXT: Total uOps: 1400 @@ -28,11 +28,11 @@ ldpsw x0, x1, [sp, #8]! # ALL: Dispatch Width: 6 -# M3-NEXT: uOps Per Cycle: 0.84 -# M4-NEXT: uOps Per Cycle: 0.74 -# M5-NEXT: uOps Per Cycle: 0.74 +# M3-NEXT: uOps Per Cycle: 1.23 +# M4-NEXT: uOps Per Cycle: 1.07 +# M5-NEXT: uOps Per Cycle: 1.07 -# ALL-NEXT: IPC: 0.63 +# ALL-NEXT: IPC: 0.92 # ALL-NEXT: Block RThroughput: 6.0 # ALL: Instruction Info: From a874d63344093752c912d01de60211f65745ea6f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 12 Sep 2020 14:23:36 +0100 Subject: [PATCH 0453/1079] [Clang] Add option to allow marking pass-by-value args as noalias. After the recent discussion on cfe-dev 'Can indirect class parameters be noalias?' [1], it seems like using using noalias is problematic for current C++, but should be allowed for C-only code. This patch introduces a new option to let the user indicate that it is safe to mark indirect class parameters as noalias. Note that this also applies to external callers, e.g. it might not be safe to use this flag for C functions that are called by C++ functions. In targets that allocate indirect arguments in the called function, this enables more agressive optimizations with respect to memory operations and brings a ~1% - 2% codesize reduction for some programs. [1] : http://lists.llvm.org/pipermail/cfe-dev/2020-July/066353.html Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D85473 --- clang/include/clang/Basic/CodeGenOptions.def | 4 + clang/include/clang/Driver/Options.td | 3 + clang/lib/CodeGen/CGCall.cpp | 7 ++ clang/lib/Frontend/CompilerInvocation.cpp | 2 + clang/test/CodeGen/pass-by-value-noalias.c | 16 ++++ .../test/CodeGenCXX/pass-by-value-noalias.cpp | 73 +++++++++++++++++++ .../test/CodeGenObjC/pass-by-value-noalias.m | 22 ++++++ 7 files changed, 127 insertions(+) create mode 100644 clang/test/CodeGen/pass-by-value-noalias.c create mode 100644 clang/test/CodeGenCXX/pass-by-value-noalias.cpp create mode 100644 clang/test/CodeGenObjC/pass-by-value-noalias.m diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index ec77f68062e7a..740d544710510 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -395,6 +395,10 @@ CODEGENOPT(KeepStaticConsts, 1, 0) /// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) +/// Assume that by-value parameters do not alias any other values. +CODEGENOPT(PassByValueIsNoAlias, 1, 0) + + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 922ad580a53e7..f196c1b72d27f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4322,6 +4322,9 @@ def fno_signed_wchar : Flag<["-"], "fno-signed-wchar">, def fcompatibility_qualified_id_block_param_type_checking : Flag<["-"], "fcompatibility-qualified-id-block-type-checking">, HelpText<"Allow using blocks with parameters of more specific type than " "the type system guarantees when a parameter is qualified id">; +def fpass_by_value_is_noalias: Flag<["-"], "fpass-by-value-is-noalias">, + HelpText<"Allows assuming by-value parameters do not alias any other value. " + "Has no effect on non-trivially-copyable classes in C++.">, Group; // FIXME: Remove these entirely once functionality/tests have been excised. def fobjc_gc_only : Flag<["-"], "fobjc-gc-only">, Group, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a4b35edb1bd9d..adb68979568e7 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2201,6 +2201,13 @@ void CodeGenModule::ConstructAttributeList( if (AI.getIndirectByVal()) Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType)); + auto *Decl = ParamType->getAsRecordDecl(); + if (CodeGenOpts.PassByValueIsNoAlias && Decl && + Decl->getArgPassingRestrictions() == RecordDecl::APK_CanPassInRegs) + // When calling the function, the pointer passed in will be the only + // reference to the underlying object. Mark it accordingly. + Attrs.addAttribute(llvm::Attribute::NoAlias); + // TODO: We could add the byref attribute if not byval, but it would // require updating many testcases. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index fbccff11562c1..0d8b0f9d07ef5 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1453,6 +1453,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); + + Opts.PassByValueIsNoAlias = Args.hasArg(OPT_fpass_by_value_is_noalias); return Success; } diff --git a/clang/test/CodeGen/pass-by-value-noalias.c b/clang/test/CodeGen/pass-by-value-noalias.c new file mode 100644 index 0000000000000..f77ce2b1e35bb --- /dev/null +++ b/clang/test/CodeGen/pass-by-value-noalias.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -fpass-by-value-is-noalias -triple arm64-apple-iphoneos -emit-llvm -disable-llvm-optzns %s -o - 2>&1 | FileCheck --check-prefix=WITH_NOALIAS %s +// RUN: %clang_cc1 -triple arm64-apple-iphoneos -emit-llvm -disable-llvm-optzns %s -o - 2>&1 | FileCheck --check-prefix=NO_NOALIAS %s + +// A struct large enough so it is not passed in registers on ARM64. +struct Foo { + int a; + int b; + int c; + int d; + int e; + int f; +}; + +// WITH_NOALIAS: define void @take(%struct.Foo* noalias %arg) +// NO_NOALIAS: define void @take(%struct.Foo* %arg) +void take(struct Foo arg) {} diff --git a/clang/test/CodeGenCXX/pass-by-value-noalias.cpp b/clang/test/CodeGenCXX/pass-by-value-noalias.cpp new file mode 100644 index 0000000000000..fd96a36d3d6e5 --- /dev/null +++ b/clang/test/CodeGenCXX/pass-by-value-noalias.cpp @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -fpass-by-value-is-noalias -triple arm64-apple-iphoneos -emit-llvm -disable-llvm-optzns %s -o - 2>&1 | FileCheck --check-prefix=WITH_NOALIAS %s +// RUN: %clang_cc1 -triple arm64-apple-iphoneos -emit-llvm -disable-llvm-optzns %s -o - 2>&1 | FileCheck --check-prefix=NO_NOALIAS %s + +// A trivial struct large enough so it is not passed in registers on ARM64. +struct Foo { + int a; + int b; + int c; + int d; + int e; + int f; +}; + +// Make sure noalias is added to indirect arguments with trivially copyable types +// if -fpass-by-value-is-noalias is provided. + +// WITH_NOALIAS: define void @_Z4take3Foo(%struct.Foo* noalias %arg) +// NO_NOALIAS: define void @_Z4take3Foo(%struct.Foo* %arg) +void take(Foo arg) {} + +int G; + +// NonTrivial is not trivially-copyable, because it has a non-trivial copy +// constructor. +struct NonTrivial { + int a; + int b; + int c; + int d; + int e; + int f; + + NonTrivial(const NonTrivial &Other) { + a = G + 10 + Other.a; + } +}; + +// Make sure noalias is not added to indirect arguments that are not trivially +// copyable even if -fpass-by-value-is-noalias is provided. + +// WITH_NOALIAS: define void @_Z4take10NonTrivial(%struct.NonTrivial* %arg) +// NO_NOALIAS: define void @_Z4take10NonTrivial(%struct.NonTrivial* %arg) +void take(NonTrivial arg) {} + +// Escape examples. Pointers to the objects passed to take() may escape, depending on whether a temporary copy is created or not (e.g. due to NRVO). +struct A { + A(A **where) : data{"hello world 1"} { + *where = this; //Escaped pointer 1 (proposed UB?) + } + + A() : data{"hello world 2"} {} + + char data[32]; +}; +A *p; + +// WITH_NOALIAS: define void @_Z4take1A(%struct.A* noalias %arg) +// NO_NOALIAS: define void @_Z4take1A(%struct.A* %arg) +void take(A arg) {} + +// WITH_NOALIAS: define void @_Z7CreateAPP1A(%struct.A* noalias sret align 1 %agg.result, %struct.A** %where) +// NO_NOALIAS: define void @_Z7CreateAPP1A(%struct.A* noalias sret align 1 %agg.result, %struct.A** %where) +A CreateA(A **where) { + A justlikethis; + *where = &justlikethis; //Escaped pointer 2 (should also be UB, then) + return justlikethis; +} + +// elsewhere, perhaps compiled by a smarter compiler that doesn't make a copy here +void test() { + take({&p}); // 1 + take(CreateA(&p)); // 2 +} diff --git a/clang/test/CodeGenObjC/pass-by-value-noalias.m b/clang/test/CodeGenObjC/pass-by-value-noalias.m new file mode 100644 index 0000000000000..08252800dba2f --- /dev/null +++ b/clang/test/CodeGenObjC/pass-by-value-noalias.m @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -fpass-by-value-is-noalias -triple arm64-apple-iphoneos -emit-llvm -disable-llvm-optzns -fobjc-runtime-has-weak -fobjc-arc -fobjc-dispatch-method=mixed %s -o - 2>&1 | FileCheck --check-prefix=WITH_NOALIAS %s +// RUN: %clang_cc1 -triple arm64-apple-iphoneos -emit-llvm -disable-llvm-optzns -fobjc-runtime-has-weak -fobjc-arc -fobjc-dispatch-method=mixed %s -o - 2>&1 | FileCheck --check-prefix=NO_NOALIAS %s + +@interface Bar +@property char value; +@end + +// A struct large enough so it is not passed in registers on ARM64, but with a +// weak reference, so noalias should not be added even with +// -fpass-by-value-is-noalias. +struct Foo { + int a; + int b; + int c; + int d; + int e; + Bar *__weak f; +}; + +// WITH_NOALIAS: define void @take(%struct.Foo* %arg) +// NO_NOALIAS: define void @take(%struct.Foo* %arg) +void take(struct Foo arg) {} From 3170d54842655d6d936aae32b7d0bc92fce7f22e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Sep 2020 15:02:30 +0100 Subject: [PATCH 0454/1079] [InstCombine][X86] Covert masked load/stores with (sign extended) bool vector masks to generic intrinsics. As detailed on PR11210, if the mask is known to come from a (sign extended) bool vector (e.g. comparisons) then we can represent with a generic masked load/store without losing anything. We already do something similar for BLENDV -> SELECT conversion. --- .../Target/X86/X86InstCombineIntrinsic.cpp | 89 ++++++++++--------- .../InstCombine/X86/x86-masked-memops.ll | 24 ++--- 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index d93f22d0365c0..2390a98183692 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -32,6 +32,23 @@ static Constant *getNegativeIsTrueBoolVec(Constant *V) { return V; } +/// Convert the x86 XMM integer vector mask to a vector of bools based on +/// each element's most significant bit (the sign bit). +static Value *getBoolVecFromMask(Value *Mask) { + // Fold Constant Mask. + if (auto *ConstantMask = dyn_cast(Mask)) + return getNegativeIsTrueBoolVec(ConstantMask); + + // Mask was extended from a boolean vector. + Value *ExtMask; + if (PatternMatch::match( + Mask, PatternMatch::m_SExt(PatternMatch::m_Value(ExtMask))) && + ExtMask->getType()->isIntOrIntVectorTy(1)) + return ExtMask; + + return nullptr; +} + // TODO: If the x86 backend knew how to convert a bool vector mask back to an // XMM register mask efficiently, we could transform all x86 masked intrinsics // to LLVM masked intrinsics and remove the x86 masked intrinsic defs. @@ -40,32 +57,26 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { Value *Mask = II.getOperand(1); Constant *ZeroVec = Constant::getNullValue(II.getType()); - // Special case a zero mask since that's not a ConstantDataVector. - // This masked load instruction creates a zero vector. + // Zero Mask - masked load instruction creates a zero vector. if (isa(Mask)) return IC.replaceInstUsesWith(II, ZeroVec); - auto *ConstMask = dyn_cast(Mask); - if (!ConstMask) - return nullptr; - - // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic - // to allow target-independent optimizations. - - // First, cast the x86 intrinsic scalar pointer to a vector pointer to match - // the LLVM intrinsic definition for the pointer argument. - unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); - PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); - Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); - - // Second, convert the x86 XMM integer vector mask to a vector of bools based - // on each element's most significant bit (the sign bit). - Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); + // The mask is constant or extended from a bool vector. Convert this x86 + // intrinsic to the LLVM intrinsic to allow target-independent optimizations. + if (Value *BoolMask = getBoolVecFromMask(Mask)) { + // First, cast the x86 intrinsic scalar pointer to a vector pointer to match + // the LLVM intrinsic definition for the pointer argument. + unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); + PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); + + // The pass-through vector for an x86 masked load is a zero vector. + CallInst *NewMaskedLoad = + IC.Builder.CreateMaskedLoad(PtrCast, Align(1), BoolMask, ZeroVec); + return IC.replaceInstUsesWith(II, NewMaskedLoad); + } - // The pass-through vector for an x86 masked load is a zero vector. - CallInst *NewMaskedLoad = - IC.Builder.CreateMaskedLoad(PtrCast, Align(1), BoolMask, ZeroVec); - return IC.replaceInstUsesWith(II, NewMaskedLoad); + return nullptr; } // TODO: If the x86 backend knew how to convert a bool vector mask back to an @@ -76,8 +87,7 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { Value *Mask = II.getOperand(1); Value *Vec = II.getOperand(2); - // Special case a zero mask since that's not a ConstantDataVector: - // this masked store instruction does nothing. + // Zero Mask - this masked store instruction does nothing. if (isa(Mask)) { IC.eraseInstFromFunction(II); return true; @@ -88,28 +98,21 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu) return false; - auto *ConstMask = dyn_cast(Mask); - if (!ConstMask) - return false; - - // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic - // to allow target-independent optimizations. + // The mask is constant or extended from a bool vector. Convert this x86 + // intrinsic to the LLVM intrinsic to allow target-independent optimizations. + if (Value *BoolMask = getBoolVecFromMask(Mask)) { + unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); + PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); - // First, cast the x86 intrinsic scalar pointer to a vector pointer to match - // the LLVM intrinsic definition for the pointer argument. - unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); - PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); - Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); + IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask); - // Second, convert the x86 XMM integer vector mask to a vector of bools based - // on each element's most significant bit (the sign bit). - Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); - - IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask); + // 'Replace uses' doesn't work for stores. Erase the original masked store. + IC.eraseInstFromFunction(II); + return true; + } - // 'Replace uses' doesn't work for stores. Erase the original masked store. - IC.eraseInstFromFunction(II); - return true; + return false; } static Value *simplifyX86immShift(const IntrinsicInst &II, diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll index 2975b1c274795..ff4c05164d000 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll @@ -14,14 +14,14 @@ define <4 x float> @mload(i8* %f, <4 x i32> %mask) { ret <4 x float> %ld } -; TODO: If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further. +; If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further. define <4 x float> @mload_v4f32_cmp(i8* %f, <4 x i32> %src) { ; CHECK-LABEL: @mload_v4f32_cmp( ; CHECK-NEXT: [[ICMP:%.*]] = icmp ne <4 x i32> [[SRC:%.*]], zeroinitializer -; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> [[ICMP]] to <4 x i32> -; CHECK-NEXT: [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* [[F:%.*]], <4 x i32> [[MASK]]) -; CHECK-NEXT: ret <4 x float> [[LD]] +; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[CASTVEC]], i32 1, <4 x i1> [[ICMP]], <4 x float> zeroinitializer) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %icmp = icmp ne <4 x i32> %src, zeroinitializer %mask = sext <4 x i1> %icmp to <4 x i32> @@ -102,9 +102,9 @@ define <8 x float> @mload_v8f32_cmp(i8* %f, <8 x float> %src0, <8 x float> %src1 ; CHECK-NEXT: [[ICMP0:%.*]] = fcmp one <8 x float> [[SRC0:%.*]], zeroinitializer ; CHECK-NEXT: [[ICMP1:%.*]] = fcmp one <8 x float> [[SRC1:%.*]], zeroinitializer ; CHECK-NEXT: [[MASK1:%.*]] = and <8 x i1> [[ICMP0]], [[ICMP1]] -; CHECK-NEXT: [[MASK:%.*]] = sext <8 x i1> [[MASK1]] to <8 x i32> -; CHECK-NEXT: [[LD:%.*]] = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* [[F:%.*]], <8 x i32> [[MASK]]) -; CHECK-NEXT: ret <8 x float> [[LD]] +; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[CASTVEC]], i32 1, <8 x i1> [[MASK1]], <8 x float> zeroinitializer) +; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %icmp0 = fcmp one <8 x float> %src0, zeroinitializer %icmp1 = fcmp one <8 x float> %src1, zeroinitializer @@ -193,13 +193,13 @@ define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) { ret void } -; TODO: If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further. +; If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further. define void @mstore_v4f32_cmp(i8* %f, <4 x i32> %src, <4 x float> %v) { ; CHECK-LABEL: @mstore_v4f32_cmp( ; CHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[SRC:%.*]], zeroinitializer -; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> [[ICMP]] to <4 x i32> -; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.ps(i8* [[F:%.*]], <4 x i32> [[MASK]], <4 x float> [[V:%.*]]) +; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>* +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> [[V:%.*]], <4 x float>* [[CASTVEC]], i32 1, <4 x i1> [[ICMP]]) ; CHECK-NEXT: ret void ; %icmp = icmp eq <4 x i32> %src, zeroinitializer @@ -348,8 +348,8 @@ define void @mstore_v4i64_cmp(i8* %f, <4 x i64> %src0, <4 x i64> %src1, <4 x i64 ; CHECK-NEXT: [[ICMP0:%.*]] = icmp eq <4 x i64> [[SRC0:%.*]], zeroinitializer ; CHECK-NEXT: [[ICMP1:%.*]] = icmp ne <4 x i64> [[SRC1:%.*]], zeroinitializer ; CHECK-NEXT: [[MASK1:%.*]] = and <4 x i1> [[ICMP0]], [[ICMP1]] -; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> [[MASK1]] to <4 x i64> -; CHECK-NEXT: tail call void @llvm.x86.avx2.maskstore.q.256(i8* [[F:%.*]], <4 x i64> [[MASK]], <4 x i64> [[V:%.*]]) +; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i64>* +; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> [[V:%.*]], <4 x i64>* [[CASTVEC]], i32 1, <4 x i1> [[MASK1]]) ; CHECK-NEXT: ret void ; %icmp0 = icmp eq <4 x i64> %src0, zeroinitializer From 8ce75e2778daf0492421fb524986756ef7e84b2b Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Sat, 12 Sep 2020 11:50:01 -0400 Subject: [PATCH 0455/1079] TableGen: change a couple of member names to clarify their use. --- llvm/include/llvm/TableGen/Record.h | 21 +++++++++++---------- llvm/lib/TableGen/Record.cpp | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index a082fe5d74a1f..5d67ef4455cf6 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -67,6 +67,7 @@ class RecTy { private: RecTyKind Kind; + /// ListRecTy of the list that has elements of this type. ListRecTy *ListTy = nullptr; public: @@ -190,14 +191,14 @@ class StringRecTy : public RecTy { bool typeIsConvertibleTo(const RecTy *RHS) const override; }; -/// 'list' - Represent a list of values, all of which must be of -/// the specified type. +/// 'list' - Represent a list of element values, all of which must be of +/// the specified type. The type is stored in ElementTy. class ListRecTy : public RecTy { friend ListRecTy *RecTy::getListTy(); - RecTy *Ty; + RecTy *ElementTy; - explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {} + explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), ElementTy(T) {} public: static bool classof(const RecTy *RT) { @@ -205,7 +206,7 @@ class ListRecTy : public RecTy { } static ListRecTy *get(RecTy *T) { return T->getListTy(); } - RecTy *getElementType() const { return Ty; } + RecTy *getElementType() const { return ElementTy; } std::string getAsString() const override; @@ -420,14 +421,14 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) { I.print(OS); return OS; } -/// This is the common super-class of types that have a specific, -/// explicit, type. +/// This is the common superclass of types that have a specific, +/// explicit, type, stored in ValueTy. class TypedInit : public Init { - RecTy *Ty; + RecTy *ValueTy; protected: explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0) - : Init(K, Opc), Ty(T) {} + : Init(K, Opc), ValueTy(T) {} public: TypedInit(const TypedInit &) = delete; @@ -438,7 +439,7 @@ class TypedInit : public Init { I->getKind() <= IK_LastTypedInit; } - RecTy *getType() const { return Ty; } + RecTy *getType() const { return ValueTy; } Init *getCastTo(RecTy *Ty) const override; Init *convertInitializerTo(RecTy *Ty) const override; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index d3db004196b8b..3c40d45c1e051 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -128,12 +128,12 @@ bool StringRecTy::typeIsConvertibleTo(const RecTy *RHS) const { } std::string ListRecTy::getAsString() const { - return "list<" + Ty->getAsString() + ">"; + return "list<" + ElementTy->getAsString() + ">"; } bool ListRecTy::typeIsConvertibleTo(const RecTy *RHS) const { if (const auto *ListTy = dyn_cast(RHS)) - return Ty->typeIsConvertibleTo(ListTy->getElementType()); + return ElementTy->typeIsConvertibleTo(ListTy->getElementType()); return false; } From 74760bb00fb9b78a2fe12242716bd6976b8c3566 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 12 Sep 2020 17:47:04 +0100 Subject: [PATCH 0456/1079] [LV][ARM] Add preferInloopReduction target hook. This allows the backend to tell the vectorizer to produce inloop reductions through a TTI hook. For the moment on ARM under MVE this means allowing integer add reductions of the correct size. In the future this can include integer min/max too, under -Os. Differential Revision: https://reviews.llvm.org/D75512 --- .../llvm/Analysis/TargetTransformInfo.h | 10 ++ .../llvm/Analysis/TargetTransformInfoImpl.h | 5 + llvm/lib/Analysis/TargetTransformInfo.cpp | 5 + .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 14 ++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 + .../Transforms/Vectorize/LoopVectorize.cpp | 10 +- .../LoopVectorize/ARM/mve-reduction-types.ll | 42 ++--- .../LoopVectorize/ARM/mve-reductions.ll | 168 +++++++++--------- 8 files changed, 151 insertions(+), 106 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index ffbec74c61d02..9bf821fa1e3b8 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1288,6 +1288,10 @@ class TargetTransformInfo { bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const; + /// \returns True if the target prefers reductions in loop. + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const; + /// \returns True if the target prefers reductions select kept in the loop /// when tail folding. i.e. /// loop: @@ -1592,6 +1596,8 @@ class TargetTransformInfo::Concept { VectorType *VecTy) const = 0; virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; + virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags) const = 0; virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; @@ -2094,6 +2100,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { ReductionFlags Flags) const override { return Impl.useReductionIntrinsic(Opcode, Ty, Flags); } + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const override { + return Impl.preferInLoopReduction(Opcode, Ty, Flags); + } bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const override { return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index bb70b97870804..7f42074119667 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -660,6 +660,11 @@ class TargetTransformInfoImplBase { return false; } + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return false; + } + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 52c88180c9ec5..2ffe4ff5a8238 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1013,6 +1013,11 @@ bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty, return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } +bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const { + return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags); +} + bool TargetTransformInfo::preferPredicatedReductionSelect( unsigned Opcode, Type *Ty, ReductionFlags Flags) const { return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index c789b35f32af5..2f89e807c1c5d 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1861,6 +1861,20 @@ bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, return ST->hasMVEIntegerOps(); } +bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + if (!ST->hasMVEIntegerOps()) + return false; + + unsigned ScalarBits = Ty->getScalarSizeInBits(); + switch (Opcode) { + case Instruction::Add: + return ScalarBits <= 32; + default: + return false; + } +} + bool ARMTTIImpl::preferPredicatedReductionSelect( unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { if (!ST->hasMVEIntegerOps()) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 508bb9e21d3af..8b0fe30152a32 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -186,6 +186,9 @@ class ARMTTIImpl : public BasicTTIImplBase { bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const; + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b9f7ae71d0cf2..545540efc2841 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6883,7 +6883,7 @@ void LoopVectorizationCostModel::collectInLoopReductions() { // For the moment, without predicated reduction instructions, we do not // support inloop reductions whilst folding the tail, and hence in those cases // all reductions are currently out of the loop. - if (!PreferInLoopReductions || foldTailByMasking()) + if (foldTailByMasking()) return; for (auto &Reduction : Legal->getReductionVars()) { @@ -6894,6 +6894,14 @@ void LoopVectorizationCostModel::collectInLoopReductions() { if (RdxDesc.getRecurrenceType() != Phi->getType()) continue; + // If the target would prefer this reduction to happen "in-loop", then we + // want to record it as such. + unsigned Opcode = RdxDesc.getRecurrenceBinOp(RdxDesc.getRecurrenceKind()); + if (!PreferInLoopReductions && + !TTI.preferInLoopReduction(Opcode, Phi->getType(), + TargetTransformInfo::ReductionFlags())) + continue; + // Check that we can correctly put the reductions into the loop, by // finding the chain of operations that leads from the phi to the loop // exit value. diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index a315c7c7ca692..34a1c83721d4c 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -18,7 +18,7 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 @@ -31,17 +31,17 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -58,7 +58,7 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !2 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -102,7 +102,7 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 @@ -115,17 +115,17 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <16 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> [[TMP10]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -142,7 +142,7 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !5 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -186,23 +186,23 @@ define i32 @add_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -214,7 +214,7 @@ define i32 @add_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index 0d4cc31677b80..677142e3c37af 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -195,23 +195,23 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -223,7 +223,7 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !2 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -263,24 +263,24 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -293,7 +293,7 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !5 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -334,24 +334,24 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -364,7 +364,7 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -404,23 +404,23 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4]] = add <8 x i16> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5]] = add i16 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -432,7 +432,7 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !9 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -472,24 +472,24 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <8 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> -; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = add i16 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -502,7 +502,7 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -542,23 +542,23 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4]] = add <16 x i8> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5]] = add i8 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -570,7 +570,7 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -815,7 +815,7 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -826,17 +826,17 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[TMP7]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -851,7 +851,7 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -893,7 +893,7 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 @@ -906,17 +906,17 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -933,7 +933,7 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -977,7 +977,7 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 @@ -990,17 +990,17 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <4 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1017,7 +1017,7 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -1061,7 +1061,7 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 @@ -1072,17 +1072,17 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP6]], align 2 ; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i16> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8]] = add <8 x i16> [[TMP7]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP7]]) +; CHECK-NEXT: [[TMP9]] = add i16 [[TMP8]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1097,7 +1097,7 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -1139,7 +1139,7 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 @@ -1152,17 +1152,17 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP7]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i16> ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw <8 x i16> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10]] = add <8 x i16> [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP9]]) +; CHECK-NEXT: [[TMP11]] = add i16 [[TMP10]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP10]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1179,7 +1179,7 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] @@ -1223,7 +1223,7 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly % ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 @@ -1234,17 +1234,17 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly % ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = mul <16 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8]] = add <16 x i8> [[TMP7]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP7]]) +; CHECK-NEXT: [[TMP9]] = add i8 [[TMP8]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24 ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1259,7 +1259,7 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly % ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25 ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] From 3de9e3e493baed93e1aa0e99b04a0b11f370a939 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 12 Sep 2020 18:28:57 +0100 Subject: [PATCH 0457/1079] [DSE] Precommit test case with loop carried dependence. --- .../multiblock-loop-carried-dependence.ll | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll new file mode 100644 index 0000000000000..76292374e1f92 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll @@ -0,0 +1,140 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; RUN: opt -dse -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" + +declare void @use(i32) + +; Test cases with a loop carried dependence in %loop.2, where %l.2 reads the +; value stored by the previous iteration. Hence, the store in %loop.2 is not +; dead at the end of the function or after the call to lifetime.end(). + +define void @test.1() { +; CHECK-LABEL: @test.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [100 x i32], align 4 +; CHECK-NEXT: br label [[LOOP_1:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_1]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[IV_1]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[IV_1_NEXT]] = add nsw i64 [[IV_1]], 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV_1_NEXT]], 100 +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_1]], label [[LOOP_2_PH:%.*]] +; CHECK: loop.2.ph: +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ], [ 0, [[LOOP_2_PH]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[IV_2]] +; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR_IV_2]], align 4 +; CHECK-NEXT: call void @use(i32 [[L_0]]) +; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[PTR_IV_2]], align 4 +; CHECK-NEXT: call void @use(i32 [[L_1]]) +; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i64 [[IV_2]], 1 +; CHECK-NEXT: [[C_2:%.*]] = icmp slt i64 [[IV_2_NEXT]], 100 +; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %A = alloca [100 x i32], align 4 + br label %loop.1 + +loop.1: + %iv.1 = phi i64 [ 1, %entry ], [ %iv.1.next, %loop.1 ] + %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %iv.1 + store i32 0, i32* %arrayidx1, align 4 + %iv.1.next = add nsw i64 %iv.1, 1 + %c.1 = icmp slt i64 %iv.1.next, 100 + br i1 %c.1, label %loop.1, label %loop.2.ph + +loop.2.ph: + br label %loop.2 + +loop.2: + %iv.2 = phi i64 [ %iv.2.next, %loop.2 ], [ 0, %loop.2.ph ] + %ptr.iv.2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %iv.2 + %l.0 = load i32, i32* %ptr.iv.2, align 4 + call void @use(i32 %l.0) + %add = add nsw i64 %iv.2, 1 + %ptr.iv.2.add.1 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %add + store i32 10, i32* %ptr.iv.2.add.1, align 4 + %l.1 = load i32, i32* %ptr.iv.2, align 4 + call void @use(i32 %l.1) + %iv.2.next = add nsw i64 %iv.2, 1 + %c.2 = icmp slt i64 %iv.2.next, 100 + br i1 %c.2, label %loop.2, label %exit + +exit: + ret void +} + +define void @test.2() { +; CHECK-LABEL: @test.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [100 x i32], align 4 +; CHECK-NEXT: [[A_CAST:%.*]] = bitcast [100 x i32]* [[A]] to i8* +; CHECK-NEXT: br label [[LOOP_1:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_1]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[IV_1]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[IV_1_NEXT]] = add nsw i64 [[IV_1]], 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV_1_NEXT]], 100 +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_1]], label [[LOOP_2_PH:%.*]] +; CHECK: loop.2.ph: +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ], [ 0, [[LOOP_2_PH]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[IV_2]] +; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR_IV_2]], align 4 +; CHECK-NEXT: call void @use(i32 [[L_0]]) +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[IV_2]], 1 +; CHECK-NEXT: [[PTR_IV_2_ADD_1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[ADD]] +; CHECK-NEXT: store i32 10, i32* [[PTR_IV_2_ADD_1]], align 4 +; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[PTR_IV_2]], align 4 +; CHECK-NEXT: call void @use(i32 [[L_1]]) +; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i64 [[IV_2]], 1 +; CHECK-NEXT: [[C_2:%.*]] = icmp slt i64 [[IV_2_NEXT]], 100 +; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull [[A_CAST]]) +; CHECK-NEXT: ret void +; +entry: + %A = alloca [100 x i32], align 4 + %A.cast = bitcast [100 x i32]* %A to i8* + br label %loop.1 + +loop.1: + %iv.1 = phi i64 [ 1, %entry ], [ %iv.1.next, %loop.1 ] + %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %iv.1 + store i32 0, i32* %arrayidx1, align 4 + %iv.1.next = add nsw i64 %iv.1, 1 + %c.1 = icmp slt i64 %iv.1.next, 100 + br i1 %c.1, label %loop.1, label %loop.2.ph + +loop.2.ph: + br label %loop.2 + +loop.2: + %iv.2 = phi i64 [ %iv.2.next, %loop.2 ], [ 0, %loop.2.ph ] + %ptr.iv.2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %iv.2 + %l.0 = load i32, i32* %ptr.iv.2, align 4 + call void @use(i32 %l.0) + %add = add nsw i64 %iv.2, 1 + %ptr.iv.2.add.1 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %add + store i32 10, i32* %ptr.iv.2.add.1, align 4 + %l.1 = load i32, i32* %ptr.iv.2, align 4 + call void @use(i32 %l.1) + %iv.2.next = add nsw i64 %iv.2, 1 + %c.2 = icmp slt i64 %iv.2.next, 100 + br i1 %c.2, label %loop.2, label %exit + +exit: + call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %A.cast) #5 + ret void +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) From e082dee2b5885bba65e20b22b088bcaca5546984 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 12 Sep 2020 18:57:26 +0100 Subject: [PATCH 0458/1079] [DSE] Bail out on MemoryPhis when deleting stores at end of function. When deleting stores at the end of a function, we have to do PHI translation, otherwise we might miss reads in different iterations of a loop. See multiblock-loop-carried-dependence.ll for details. This fixes a mis-compile and surprisingly also increases the number of eliminated stores from 26047 to 26572 for MultiSource/SPEC2000/SPEC2006 on X86 with -O3 -flto. This is most likely because we save budget by not exploring through MemoryPhis, which are less likely to result in valid candidates for elimination. The issue was reported post-commit for fb109c42d91c. --- .../Transforms/Scalar/DeadStoreElimination.cpp | 10 ++++++---- .../MSSA/multiblock-loop-carried-dependence.ll | 3 +++ .../MSSA/multiblock-malloc-free.ll | 1 + .../MSSA/multiblock-memintrinsics.ll | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 10b00287552ab..16f4ea2f900c1 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1776,10 +1776,12 @@ struct DSEState { } MemoryAccess *UseAccess = WorkList[I]; - if (isa(UseAccess)) { - PushMemUses(UseAccess); - continue; - } + // Simply adding the users of MemoryPhi to the worklist is not enough, + // because we might miss read clobbers in different iterations of a loop, + // for example. + // TODO: Add support for phi translation to handle the loop case. + if (isa(UseAccess)) + return false; // TODO: Checking for aliasing is expensive. Consider reducing the amount // of times this is called and/or caching it. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll index 76292374e1f92..b168dcaa859eb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll @@ -29,6 +29,9 @@ define void @test.1() { ; CHECK-NEXT: [[PTR_IV_2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[IV_2]] ; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR_IV_2]], align 4 ; CHECK-NEXT: call void @use(i32 [[L_0]]) +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[IV_2]], 1 +; CHECK-NEXT: [[PTR_IV_2_ADD_1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[ADD]] +; CHECK-NEXT: store i32 10, i32* [[PTR_IV_2_ADD_1]], align 4 ; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[PTR_IV_2]], align 4 ; CHECK-NEXT: call void @use(i32 [[L_1]]) ; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i64 [[IV_2]], 1 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll index 56f8ee6487d9d..f60a8e536a0be 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -180,6 +180,7 @@ define void @test27() { ; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i64 10) +; CHECK-NEXT: store i8 1, i8* [[M]], align 1 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll index 58ef70c1b541b..b22f5b60d7584 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -123,10 +123,18 @@ bb3: define void @alloca_1(i1 %c) { ; CHECK-LABEL: @alloca_1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32], align 4 +; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void @@ -152,10 +160,20 @@ bb3: define void @alloca_2(i1 %c) { ; CHECK-LABEL: @alloca_2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32], align 4 +; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void From d85ac6d577ac5d4a7812e6cd3b0171f5e356c805 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 12 Sep 2020 19:19:49 +0100 Subject: [PATCH 0459/1079] [DSE] Adjust coroutines test after e082dee2b588. --- llvm/test/Transforms/Coroutines/ArgAddr.ll | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll index b711f1f12c9fa..99e418599c671 100644 --- a/llvm/test/Transforms/Coroutines/ArgAddr.ll +++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -46,18 +46,19 @@ entry: call void @llvm.coro.destroy(i8* %hdl) ret i32 0 ; CHECK: call void @ctor -; CHECK-NEXT: %dec1.spill.addr.i = getelementptr inbounds i8, i8* %call.i, i64 16 +; CHECK-NEXT: %dec1.spill.addr.i = getelementptr inbounds i8, i8* %call.i, i64 20 ; CHECK-NEXT: bitcast i8* %dec1.spill.addr.i to i32* ; CHECK-NEXT: store i32 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: %index.addr5.i = getelementptr inbounds i8, i8* %call.i, i64 20 -; CHECK-NEXT: bitcast i8* %index.addr5.i to i1* +; CHECK-NEXT: %index.addr13.i = getelementptr inbounds i8, i8* %call.i, i64 24 +; CHECK-NEXT: bitcast i8* %index.addr13.i to i1* ; CHECK-NEXT: store i1 false -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8( +; CHECK-NEXT: store i32 3 ; CHECK-NEXT: store i32 3 ; CHECK-NEXT: call void @print(i32 3) ; CHECK-NEXT: store i1 false ; CHECK-NEXT: store i32 2 +; CHECK-NEXT: store i32 2 ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 } From ad3d6f993d9f7ff3a54c5a716ccc918026fa0252 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Sep 2020 11:42:18 -0700 Subject: [PATCH 0460/1079] [SelectionDAG][X86][ARM][AArch64] Add ISD opcode for __builtin_parity. Expand it to shifts and xors. Clang emits (and (ctpop X), 1) for __builtin_parity. If ctpop isn't natively supported by the target, this leads to poor codegen due to the expansion of ctpop being more complex than what is needed for parity. This adds a DAG combine to convert the pattern to ISD::PARITY before operation legalization. Type legalization is updated to handled Expanding and Promoting this operation. If after type legalization, CTPOP is supported for this type, LegalizeDAG will turn it back into CTPOP+AND. Otherwise LegalizeDAG will emit a series of shifts and xors followed by an AND with 1. I've avoided vectors in this patch to avoid more legalization complexity for this patch. X86 previously had a custom DAG combiner for this. This is now moved to Custom lowering for the new opcode. There is a minor regression in vector-reduce-xor-bool.ll, but a follow up patch can easily fix that. Fixes PR47433 Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87209 --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 1 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 26 +++ .../SelectionDAG/LegalizeIntegerTypes.cpp | 21 +- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 +- .../SelectionDAG/SelectionDAGDumper.cpp | 1 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 + llvm/lib/Target/X86/X86ISelLowering.cpp | 146 ++++++-------- llvm/test/CodeGen/AArch64/parity.ll | 161 +++++++++++++++ llvm/test/CodeGen/ARM/parity.ll | 162 +++++++++++++++ llvm/test/CodeGen/X86/parity.ll | 189 +++++++++++++++++- .../CodeGen/X86/vector-reduce-xor-bool.ll | 12 +- 12 files changed, 642 insertions(+), 100 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/parity.ll create mode 100644 llvm/test/CodeGen/ARM/parity.ll diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index ae08d6e9313d6..ba5a5d6e87519 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -598,6 +598,7 @@ enum NodeType { CTLZ, CTPOP, BITREVERSE, + PARITY, /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eaa70444578a4..3aaf5e01d26a4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5574,6 +5574,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) return V; + // fold (and (ctpop X), 1) -> parity X + // Only do this before op legalization as it might be turned back into ctpop. + // TODO: Support vectors? + if (!LegalOperations && isOneConstant(N1) && N0.hasOneUse()) { + SDValue Tmp = N0; + + // It's possible the ctpop has been truncated, but since we only care about + // the LSB we can look through it. + if (Tmp.getOpcode() == ISD::TRUNCATE && Tmp.getOperand(0).hasOneUse()) + Tmp = Tmp.getOperand(0); + + if (Tmp.getOpcode() == ISD::CTPOP) { + SDLoc dl(N); + SDValue Parity = + DAG.getNode(ISD::PARITY, dl, Tmp.getValueType(), Tmp.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Parity); + } + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7751ebb7705a3..71ba228b53f6f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -181,6 +181,7 @@ class SelectionDAGLegalize { SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); + SDValue ExpandPARITY(SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -2785,6 +2786,28 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { } } +/// Open code the operations for PARITY of the specified operation. +SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + // If CTPOP is legal, use it. Otherwise use shifts and xor. + SDValue Result; + if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); + } else { + Result = Op; + for (unsigned i = Log2_32_Ceil(Sz); i != 0;) { + SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result, + DAG.getConstant(1 << (--i), dl, ShVT)); + Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift); + } + } + + return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT)); +} + bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector Results; @@ -2816,6 +2839,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; + case ISD::PARITY: + Results.push_back(ExpandPARITY(Node->getOperand(0), dl)); + break; case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index bfe1b365efc4d..0000fcb1dde1b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -62,7 +62,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; - case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::PARITY: + case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break; case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: @@ -503,10 +504,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { NVT)); } -SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { - // Zero extend to the promoted type and do the count there. +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { + // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -1980,6 +1981,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: @@ -2772,6 +2774,17 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); } +void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + // parity(HiLo) -> parity(Lo^Hi) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = + DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi)); + Hi = DAG.getConstant(0, dl, NVT); +} + void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 34c563672753d..86f4fcc023dd9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -311,7 +311,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); SDValue PromoteIntRes_CTLZ(SDNode *N); - SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); @@ -431,6 +431,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcd09b6141677..f854a4f4d35f8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -412,6 +412,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + case ISD::PARITY: return "parity"; // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 958bb7939046b..7ef37db68a28b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -692,6 +692,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::BITREVERSE, VT, Expand); + setOperationAction(ISD::PARITY, VT, Expand); // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8913dff47df42..5f7721267db0e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -385,6 +385,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::f80, MVT::f16, Expand); setTruncStoreAction(MVT::f128, MVT::f16, Expand); + setOperationAction(ISD::PARITY, MVT::i8, Custom); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); } else { @@ -395,6 +396,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP , MVT::i64 , Expand); else setOperationAction(ISD::CTPOP , MVT::i64 , Custom); + + setOperationAction(ISD::PARITY, MVT::i16, Custom); + setOperationAction(ISD::PARITY, MVT::i32, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::PARITY, MVT::i64, Custom); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); @@ -28865,6 +28871,58 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); } +static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + SDValue X = Op.getOperand(0); + MVT VT = Op.getSimpleValueType(); + + // Special case. If the input fits in 8-bits we can use a single 8-bit TEST. + if (VT == MVT::i8 || + DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { + X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); + SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, + DAG.getConstant(0, DL, MVT::i8)); + // Copy the inverse of the parity flag into a register with setcc. + SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); + // Extend to the original type. + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); + } + + if (VT == MVT::i64) { + // Xor the high and low 16-bits together using a 32-bit operation. + SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, + DAG.getNode(ISD::SRL, DL, MVT::i64, X, + DAG.getConstant(32, DL, MVT::i8))); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); + X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); + } + + if (VT != MVT::i16) { + // Xor the high and low 16-bits together using a 32-bit operation. + SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X, + DAG.getConstant(16, DL, MVT::i8)); + X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16); + } else { + // If the input is 16-bits, we need to extend to use an i32 shift below. + X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X); + } + + // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. + // This should allow an h-reg to be used to save a shift. + SDValue Hi = DAG.getNode( + ISD::TRUNCATE, DL, MVT::i8, + DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8))); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); + SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); + SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); + + // Copy the inverse of the parity flag into a register with setcc. + SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); + // Extend to the original type. + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); +} + static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { unsigned NewOpc = 0; @@ -29483,6 +29541,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget); case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); + case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG); @@ -43285,89 +43344,6 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, return SDValue(); } -// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity. -// Turn it into series of XORs and a setnp. -static SDValue combineParity(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - // RHS needs to be 1. - if (!isOneConstant(N1)) - return SDValue(); - - // Popcnt may be truncated. - if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) - N0 = N0.getOperand(0); - - // LHS needs to be a single use CTPOP. - if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse()) - return SDValue(); - - EVT VT = N0.getValueType(); - - // We only support 64-bit and 32-bit. 64-bit requires special handling - // unless the 64-bit popcnt instruction is legal. - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT)) - return SDValue(); - - SDLoc DL(N); - SDValue X = N0.getOperand(0); - - // Special case. If the input fits in 8-bits we can use a single 8-bit TEST. - if (DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); - SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, - DAG.getConstant(0, DL, MVT::i8)); - // Copy the inverse of the parity flag into a register with setcc. - SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); - // Extend or truncate to the original type. - return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0)); - } - - // If this is 64-bit, its always best to xor the two 32-bit pieces together - // even if we have popcnt. - if (VT == MVT::i64) { - SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, - DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(32, DL, MVT::i8))); - SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); - // Generate a 32-bit parity idiom. This will bring us back here if we need - // to expand it too. - SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32, - DAG.getNode(ISD::CTPOP, DL, MVT::i32, X), - DAG.getConstant(1, DL, MVT::i32)); - return DAG.getZExtOrTrunc(Parity, DL, N->getValueType(0)); - } - assert(VT == MVT::i32 && "Unexpected VT!"); - - // Xor the high and low 16-bits together using a 32-bit operation. - SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(16, DL, MVT::i8)); - X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16); - - // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. - // This should allow an h-reg to be used to save a shift. - // FIXME: We only get an h-reg in 32-bit mode. - SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, - DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(8, DL, MVT::i8))); - SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); - SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); - SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); - - // Copy the inverse of the parity flag into a register with setcc. - SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); - // Extend or truncate to the original type. - return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0)); -} - - // Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C) // Where C is a mask containing the same number of bits as the setcc and // where the setcc will freely 0 upper bits of k-register. We can replace the @@ -43459,10 +43435,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } } - // This must be done before legalization has expanded the ctpop. - if (SDValue V = combineParity(N, DAG, Subtarget)) - return V; - // Match all-of bool scalar reductions into a bitcast/movmsk + cmp. // TODO: Support multiple SrcOps. if (VT == MVT::i1) { diff --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll new file mode 100644 index 0000000000000..bdddb6f1069ce --- /dev/null +++ b/llvm/test/CodeGen/AArch64/parity.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s + +define i4 @parity_4(i4 %x) { +; CHECK-LABEL: parity_4: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xf +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i4 @llvm.ctpop.i4(i4 %x) + %2 = and i4 %1, 1 + ret i4 %2 +} + +define i8 @parity_8(i8 %x) { +; CHECK-LABEL: parity_8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i8 @llvm.ctpop.i8(i8 %x) + %2 = and i8 %1, 1 + ret i8 %2 +} + +define i16 @parity_16(i16 %x) { +; CHECK-LABEL: parity_16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i16 @llvm.ctpop.i16(i16 %x) + %2 = and i16 %1, 1 + ret i16 %2 +} + +define i17 @parity_17(i17 %x) { +; CHECK-LABEL: parity_17: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x1ffff +; CHECK-NEXT: eor w8, w8, w8, lsr #16 +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i17 @llvm.ctpop.i17(i17 %x) + %2 = and i17 %1, 1 + ret i17 %2 +} + +define i32 @parity_32(i32 %x) { +; CHECK-LABEL: parity_32: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w0, lsr #16 +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = and i32 %1, 1 + ret i32 %2 +} + +define i64 @parity_64(i64 %x) { +; CHECK-LABEL: parity_64: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x0, lsr #32 +; CHECK-NEXT: eor x8, x8, x8, lsr #16 +; CHECK-NEXT: eor x8, x8, x8, lsr #8 +; CHECK-NEXT: eor x8, x8, x8, lsr #4 +; CHECK-NEXT: eor x8, x8, x8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and x0, x8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = and i64 %1, 1 + ret i64 %2 +} + +define i32 @parity_64_trunc(i64 %x) { +; CHECK-LABEL: parity_64_trunc: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x0, lsr #32 +; CHECK-NEXT: eor x8, x8, x8, lsr #16 +; CHECK-NEXT: eor x8, x8, x8, lsr #8 +; CHECK-NEXT: eor x8, x8, x8, lsr #4 +; CHECK-NEXT: eor x8, x8, x8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = trunc i64 %1 to i32 + %3 = and i32 %2, 1 + ret i32 %3 +} + +define i8 @parity_32_trunc(i32 %x) { +; CHECK-LABEL: parity_32_trunc: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w0, lsr #16 +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = trunc i32 %1 to i8 + %3 = and i8 %2, 1 + ret i8 %3 +} + +define i32 @parity_8_zext(i8 %x) { +; CHECK-LABEL: parity_8_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %a = zext i8 %x to i32 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +define i32 @parity_8_mask(i32 %x) { +; CHECK-LABEL: parity_8_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %a = and i32 %x, 255 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +declare i4 @llvm.ctpop.i4(i4 %x) +declare i8 @llvm.ctpop.i8(i8 %x) +declare i16 @llvm.ctpop.i16(i16 %x) +declare i17 @llvm.ctpop.i17(i17 %x) +declare i32 @llvm.ctpop.i32(i32 %x) +declare i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/ARM/parity.ll b/llvm/test/CodeGen/ARM/parity.ll new file mode 100644 index 0000000000000..40c0d7bd32f11 --- /dev/null +++ b/llvm/test/CodeGen/ARM/parity.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s + +define i4 @parity_4(i4 %x) { +; CHECK-LABEL: parity_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i4 @llvm.ctpop.i4(i4 %x) + %2 = and i4 %1, 1 + ret i4 %2 +} + +define i8 @parity_8(i8 %x) { +; CHECK-LABEL: parity_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i8 @llvm.ctpop.i8(i8 %x) + %2 = and i8 %1, 1 + ret i8 %2 +} + +define i16 @parity_16(i16 %x) { +; CHECK-LABEL: parity_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i16 @llvm.ctpop.i16(i16 %x) + %2 = and i16 %1, 1 + ret i16 %2 +} + +define i17 @parity_17(i17 %x) { +; CHECK-LABEL: parity_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: bfc r0, #17, #15 +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i17 @llvm.ctpop.i17(i17 %x) + %2 = and i17 %1, 1 + ret i17 %2 +} + +define i32 @parity_32(i32 %x) { +; CHECK-LABEL: parity_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = and i32 %1, 1 + ret i32 %2 +} + +define i64 @parity_64(i64 %x) { +; CHECK-LABEL: parity_64: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = and i64 %1, 1 + ret i64 %2 +} + +define i32 @parity_64_trunc(i64 %x) { +; CHECK-LABEL: parity_64_trunc: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = trunc i64 %1 to i32 + %3 = and i32 %2, 1 + ret i32 %3 +} + +define i8 @parity_32_trunc(i32 %x) { +; CHECK-LABEL: parity_32_trunc: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = trunc i32 %1 to i8 + %3 = and i8 %2, 1 + ret i8 %3 +} + +define i32 @parity_8_zext(i8 %x) { +; CHECK-LABEL: parity_8_zext: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %a = zext i8 %x to i32 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +define i32 @parity_8_mask(i32 %x) { +; CHECK-LABEL: parity_8_mask: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %a = and i32 %x, 255 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +declare i4 @llvm.ctpop.i4(i4 %x) +declare i8 @llvm.ctpop.i8(i8 %x) +declare i16 @llvm.ctpop.i16(i16 %x) +declare i17 @llvm.ctpop.i17(i17 %x) +declare i32 @llvm.ctpop.i32(i32 %x) +declare i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 6289ab482426c..d7344a4a2ed78 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -4,6 +4,187 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT +define i4 @parity_4(i4 %x) { +; X86-NOPOPCNT-LABEL: parity_4: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: testb $15, {{[0-9]+}}(%esp) +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_4: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: testb $15, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_4: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: testb $15, {{[0-9]+}}(%esp) +; X86-POPCNT-NEXT: setnp %al +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_4: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: testb $15, %dil +; X64-POPCNT-NEXT: setnp %al +; X64-POPCNT-NEXT: retq + %1 = tail call i4 @llvm.ctpop.i4(i4 %x) + %2 = and i4 %1, 1 + ret i4 %2 +} + +define i8 @parity_8(i8 %x) { +; X86-NOPOPCNT-LABEL: parity_8: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_8: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: testb %dil, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_8: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-POPCNT-NEXT: setnp %al +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_8: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: testb %dil, %dil +; X64-POPCNT-NEXT: setnp %al +; X64-POPCNT-NEXT: retq + %1 = tail call i8 @llvm.ctpop.i8(i8 %x) + %2 = and i8 %1, 1 + ret i8 %2 +} + +define i16 @parity_16(i16 %x) { +; X86-NOPOPCNT-LABEL: parity_16: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_16: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movl %edi, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %ch, %cl +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_16: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_16: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-POPCNT-NEXT: retq + %1 = tail call i16 @llvm.ctpop.i16(i16 %x) + %2 = and i16 %1, 1 + ret i16 %2 +} + +define i16 @parity_16_load(i16* %x) { +; X86-NOPOPCNT-LABEL: parity_16_load: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movzwl (%eax), %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_16_load: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movzwl (%rdi), %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %ch, %cl +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_16_load: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntw (%eax), %ax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_16_load: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntw (%rdi), %ax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-POPCNT-NEXT: retq + %1 = load i16, i16* %x + %2 = tail call i16 @llvm.ctpop.i16(i16 %1) + %3 = and i16 %2, 1 + ret i16 %3 +} + +define i17 @parity_17(i17 %x) { +; X86-NOPOPCNT-LABEL: parity_17: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %eax +; X86-NOPOPCNT-NEXT: andl $131071, %eax # imm = 0x1FFFF +; X86-NOPOPCNT-NEXT: movl %eax, %edx +; X86-NOPOPCNT-NEXT: shrl $16, %edx +; X86-NOPOPCNT-NEXT: xorl %eax, %edx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %dl, %ch +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_17: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $131071, %eax # imm = 0x1FFFF +; X64-NOPOPCNT-NEXT: movl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $16, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $8, %edi +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %cl, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_17: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movl $131071, %eax # imm = 0x1FFFF +; X86-POPCNT-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_17: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: andl $131071, %edi # imm = 0x1FFFF +; X64-POPCNT-NEXT: popcntl %edi, %eax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: retq + %1 = tail call i17 @llvm.ctpop.i17(i17 %x) + %2 = and i17 %1, 1 + ret i17 %2 +} + define i32 @parity_32(i32 %x) { ; X86-NOPOPCNT-LABEL: parity_32: ; X86-NOPOPCNT: # %bb.0: @@ -157,14 +338,14 @@ define i8 @parity_32_trunc(i32 %x) { ; X86-POPCNT-LABEL: parity_32_trunc: ; X86-POPCNT: # %bb.0: ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: andb $1, %al +; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_32_trunc: ; X64-POPCNT: # %bb.0: ; X64-POPCNT-NEXT: popcntl %edi, %eax -; X64-POPCNT-NEXT: andb $1, %al +; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-POPCNT-NEXT: retq %1 = tail call i32 @llvm.ctpop.i32(i32 %x) @@ -241,5 +422,9 @@ define i32 @parity_8_mask(i32 %x) { ret i32 %c } +declare i4 @llvm.ctpop.i4(i4 %x) +declare i8 @llvm.ctpop.i8(i8 %x) +declare i16 @llvm.ctpop.i16(i16 %x) +declare i17 @llvm.ctpop.i17(i17 %x) declare i32 @llvm.ctpop.i32(i32 %x) declare i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll index fb019ffd99e9b..06a428c514a78 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -53,7 +53,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $3, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <2 x i64> %0 to <2 x i1> @@ -103,7 +103,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <4 x i32> %0 to <4 x i1> @@ -251,7 +251,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -974,7 +974,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $3, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <2 x i64> %0, zeroinitializer @@ -1025,7 +1025,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <4 x i32> %0, zeroinitializer @@ -1214,7 +1214,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq From cc76965b19085519278bff1052059e03769b71e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Sat, 12 Sep 2020 22:00:42 +0300 Subject: [PATCH 0461/1079] [MinGW] Use lib prefix for libraries In MinGW world, UNIX like lib prefix is preferred for the libraries. This patch adjusts CMake files to do that. Differential Revision: https://reviews.llvm.org/D87517 --- clang/tools/libclang/CMakeLists.txt | 2 +- lldb/source/API/CMakeLists.txt | 4 ++-- llvm/cmake/modules/AddLLVM.cmake | 2 +- llvm/tools/llvm-config/llvm-config.cpp | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index c3b9ab6ffb9b0..88279ff7dae67 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -101,7 +101,7 @@ if (WIN32 AND ENABLE_SHARED AND ENABLE_STATIC) unset(ENABLE_STATIC) endif() -if(WIN32) +if(MSVC) set(output_name "libclang") else() set(output_name "clang") diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 8a7f28c01a9c2..aeb1f15e294b2 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -182,10 +182,10 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc") endif() -if ( CMAKE_SYSTEM_NAME MATCHES "Windows" ) +if (MSVC) # Only MSVC has the ABI compatibility problem and avoids using FindPythonLibs, # so only it needs to explicitly link against ${Python3_LIBRARIES} - if (MSVC AND LLDB_ENABLE_PYTHON) + if (LLDB_ENABLE_PYTHON) target_link_libraries(liblldb PRIVATE ${Python3_LIBRARIES}) endif() else() diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index a40cf17426fe0..e57abea427530 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -567,7 +567,7 @@ function(llvm_add_library name) endif() if(ARG_SHARED) - if(WIN32) + if(MSVC) set_target_properties(${name} PROPERTIES PREFIX "" ) diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp index a9d3f64aaa5b3..1a2f04552d137 100644 --- a/llvm/tools/llvm-config/llvm-config.cpp +++ b/llvm/tools/llvm-config/llvm-config.cpp @@ -381,6 +381,7 @@ int main(int argc, char **argv) { SharedExt = "dll"; SharedVersionedExt = LLVM_DYLIB_VERSION ".dll"; if (HostTriple.isOSCygMing()) { + SharedPrefix = "lib"; StaticExt = "a"; StaticPrefix = "lib"; } else { From bb613044b6800b8ccc238232677f905bda423819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Sat, 12 Sep 2020 22:02:11 +0300 Subject: [PATCH 0462/1079] [MinGW][clang-shlib] Build by default on MinGW It builds without errors and makes possible to use CLANG_LINK_CLANG_DYLIB=1. Differential Revision: https://reviews.llvm.org/D87547 --- clang/tools/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt index e46c3669a2c2b..85a85812a8d41 100644 --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -15,7 +15,7 @@ add_clang_subdirectory(c-index-test) add_clang_subdirectory(clang-rename) add_clang_subdirectory(clang-refactor) -if(UNIX) +if(UNIX OR MINGW) add_clang_subdirectory(clang-shlib) endif() From 7da941939902768af25ffa45149695a0a5f15951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Sat, 12 Sep 2020 22:03:22 +0300 Subject: [PATCH 0463/1079] [MinGW][libclang] Allow simultaneous shared and static lib It builds fine for MinGW on Windows. Differential Revision: https://reviews.llvm.org/D87539 --- clang/tools/libclang/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index 88279ff7dae67..15f7ff94dfead 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -97,7 +97,7 @@ if(NOT LLVM_ENABLE_PIC OR LIBCLANG_BUILD_STATIC) set(ENABLE_STATIC STATIC) endif() -if (WIN32 AND ENABLE_SHARED AND ENABLE_STATIC) +if (MSVC AND ENABLE_SHARED AND ENABLE_STATIC) unset(ENABLE_STATIC) endif() From c34a99fe589b870354c9a7863b79d882c74f7d50 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 12 Sep 2020 20:31:46 +0200 Subject: [PATCH 0464/1079] [InstCombine] Add extra use tests for abs canonicalization (NFC) --- llvm/test/Transforms/InstCombine/abs-1.ll | 103 +++++++++++++++++++++- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll index 08cab94e3dfc2..f879b165f4b81 100644 --- a/llvm/test/Transforms/InstCombine/abs-1.ll +++ b/llvm/test/Transforms/InstCombine/abs-1.ll @@ -461,6 +461,7 @@ define i8 @shifty_abs_commute3(i8 %x) { ; Negative test - don't transform if it would increase instruction count. declare void @extra_use(i8) +declare void @extra_use_i1(i1) define i8 @shifty_abs_too_many_uses(i8 %x) { ; CHECK-LABEL: @shifty_abs_too_many_uses( @@ -534,8 +535,8 @@ define i8 @negate_abs(i8 %x) { ; CHECK-LABEL: @negate_abs( ; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]] -; CHECK-NEXT: ret i8 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]] +; CHECK-NEXT: ret i8 [[TMP1]] ; %n = sub i8 0, %x %c = icmp slt i8 %x, 0 @@ -548,8 +549,8 @@ define <2 x i8> @negate_nabs(<2 x i8> %x) { ; CHECK-LABEL: @negate_nabs( ; CHECK-NEXT: [[N:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X]], zeroinitializer -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[N]], <2 x i8> [[X]] -; CHECK-NEXT: ret <2 x i8> [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i8> [[N]], <2 x i8> [[X]] +; CHECK-NEXT: ret <2 x i8> [[TMP1]] ; %n = sub <2 x i8> zeroinitializer, %x %c = icmp slt <2 x i8> %x, zeroinitializer @@ -647,3 +648,97 @@ define i64 @infinite_loop_constant_expression_abs(i64 %arg) { %t3 = select i1 %t1, i64 %t2, i64 %t ret i64 %t3 } + +define i8 @abs_extra_use_icmp(i8 %x) { +; CHECK-LABEL: @abs_extra_use_icmp( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: call void @extra_use_i1(i1 [[C]]) +; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[N]], i8 [[X]] +; CHECK-NEXT: ret i8 [[S]] +; + %c = icmp slt i8 %x, 0 + call void @extra_use_i1(i1 %c) + %n = sub i8 0, %x + %s = select i1 %c, i8 %n, i8 %x + ret i8 %s +} + +define i8 @abs_extra_use_sub(i8 %x) { +; CHECK-LABEL: @abs_extra_use_sub( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: call void @extra_use(i8 [[N]]) +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[N]], i8 [[X]] +; CHECK-NEXT: ret i8 [[S]] +; + %c = icmp slt i8 %x, 0 + %n = sub i8 0, %x + call void @extra_use(i8 %n) + %s = select i1 %c, i8 %n, i8 %x + ret i8 %s +} + +define i8 @abs_extra_use_icmp_sub(i8 %x) { +; CHECK-LABEL: @abs_extra_use_icmp_sub( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: call void @extra_use_i1(i1 [[C]]) +; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: call void @extra_use(i8 [[N]]) +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[N]], i8 [[X]] +; CHECK-NEXT: ret i8 [[S]] +; + %c = icmp slt i8 %x, 0 + call void @extra_use_i1(i1 %c) + %n = sub i8 0, %x + call void @extra_use(i8 %n) + %s = select i1 %c, i8 %n, i8 %x + ret i8 %s +} + +define i8 @nabs_extra_use_icmp(i8 %x) { +; CHECK-LABEL: @nabs_extra_use_icmp( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: call void @extra_use_i1(i1 [[C]]) +; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]] +; CHECK-NEXT: ret i8 [[S]] +; + %c = icmp slt i8 %x, 0 + call void @extra_use_i1(i1 %c) + %n = sub i8 0, %x + %s = select i1 %c, i8 %x, i8 %n + ret i8 %s +} + +define i8 @nabs_extra_use_sub(i8 %x) { +; CHECK-LABEL: @nabs_extra_use_sub( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: call void @extra_use(i8 [[N]]) +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]] +; CHECK-NEXT: ret i8 [[S]] +; + %c = icmp slt i8 %x, 0 + %n = sub i8 0, %x + call void @extra_use(i8 %n) + %s = select i1 %c, i8 %x, i8 %n + ret i8 %s +} + +define i8 @nabs_extra_use_icmp_sub(i8 %x) { +; CHECK-LABEL: @nabs_extra_use_icmp_sub( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: call void @extra_use_i1(i1 [[C]]) +; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: call void @extra_use(i8 [[N]]) +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]] +; CHECK-NEXT: ret i8 [[S]] +; + %c = icmp slt i8 %x, 0 + call void @extra_use_i1(i1 %c) + %n = sub i8 0, %x + call void @extra_use(i8 %n) + %s = select i1 %c, i8 %x, i8 %n + ret i8 %s +} From c55c14837e148b817de989106560328219df342b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 12 Sep 2020 12:05:25 -0700 Subject: [PATCH 0465/1079] [gcov] Clean up by getting llvm.dbg.cu earlier --- .../Instrumentation/GCOVProfiling.cpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index cc8b92e21c7ce..15355ff8efd17 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -112,11 +112,11 @@ class GCOVProfiler { private: // Create the .gcno files for the Module based on DebugInfo. - void emitProfileNotes(); + void emitProfileNotes(NamedMDNode *CUNode); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. - bool emitProfileArcs(); + bool emitProfileArcs(NamedMDNode *CUNode); bool isFunctionInstrumented(const Function &F); std::vector createRegexesFromString(StringRef RegexesStr); @@ -550,14 +550,19 @@ bool GCOVProfiler::runOnModule( this->GetTLI = std::move(GetTLI); Ctx = &M.getContext(); + NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu"); + if (!CUNode) + return false; + bool Modified = AddFlushBeforeForkAndExec(); FilterRe = createRegexesFromString(Options.Filter); ExcludeRe = createRegexesFromString(Options.Exclude); - if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitNotes) + emitProfileNotes(CUNode); if (Options.EmitData) - Modified |= emitProfileArcs(); + Modified |= emitProfileArcs(CUNode); return Modified; } @@ -683,10 +688,7 @@ bool GCOVProfiler::AddFlushBeforeForkAndExec() { return !Forks.empty() || !Execs.empty(); } -void GCOVProfiler::emitProfileNotes() { - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (!CU_Nodes) return; - +void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { int Version; { uint8_t c3 = Options.Version[0]; @@ -696,12 +698,12 @@ void GCOVProfiler::emitProfileNotes() { : (c3 - '0') * 10 + c1 - '0'; } - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) { // Each compile unit gets its own .gcno file. This means that whether we run // this pass over the original .o's as they're produced, or run it after // LTO, we'll generate the same .gcno files. - auto *CU = cast(CU_Nodes->getOperand(i)); + auto *CU = cast(CUNode->getOperand(i)); // Skip module skeleton (and module) CUs. if (CU->getDWOId()) @@ -818,12 +820,9 @@ void GCOVProfiler::emitProfileNotes() { } } -bool GCOVProfiler::emitProfileArcs() { - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (!CU_Nodes) return false; - +bool GCOVProfiler::emitProfileArcs(NamedMDNode *CUNode) { bool Result = false; - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) { SmallVector, 8> CountersBySP; for (auto &F : M->functions()) { DISubprogram *SP = F.getSubprogram(); From 412c9c0bf2a8ccbda2d925575891a51ef5df846e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 12 Sep 2020 12:17:40 -0700 Subject: [PATCH 0466/1079] [gcov] emitProfileArcs: iterate over GCOVFunction's instead of Function's to avoid duplicated filtering --- .../Instrumentation/GCOVProfiling.cpp | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 15355ff8efd17..56f6a045501c8 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -322,14 +322,14 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP, + GCOVFunction(GCOVProfiler *P, Function &F, const DISubprogram *SP, unsigned EndLine, uint32_t Ident, int Version) - : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident), + : GCOVRecord(P), F(F), SP(SP), EndLine(EndLine), Ident(Ident), Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) { LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); bool ExitBlockBeforeBody = Version >= 48; uint32_t i = ExitBlockBeforeBody ? 2 : 1; - for (BasicBlock &BB : *F) + for (BasicBlock &BB : F) Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++))); if (!ExitBlockBeforeBody) ReturnBlock.Number = i; @@ -424,6 +424,8 @@ namespace { getBlock(&I).writeOut(); } + Function &F; + private: const DISubprogram *SP; unsigned EndLine; @@ -736,7 +738,7 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { // single successor, so split the entry block to make sure of that. BasicBlock &EntryBlock = F.getEntryBlock(); - Funcs.push_back(std::make_unique(this, &F, SP, EndLine, + Funcs.push_back(std::make_unique(this, F, SP, EndLine, FunctionIdent++, Version)); GCOVFunction &Func = *Funcs.back(); @@ -824,15 +826,8 @@ bool GCOVProfiler::emitProfileArcs(NamedMDNode *CUNode) { bool Result = false; for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) { SmallVector, 8> CountersBySP; - for (auto &F : M->functions()) { - DISubprogram *SP = F.getSubprogram(); - unsigned EndLine; - if (!SP) continue; - if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F)) - continue; - // TODO: Functions using scope-based EH are currently not supported. - if (isUsingScopeBasedEH(F)) continue; - + for (const GCOVFunction &GF : make_pointee_range(Funcs)) { + Function &F = GF.F; DenseMap, unsigned> EdgeToCounter; unsigned Edges = 0; EdgeToCounter[{nullptr, &F.getEntryBlock()}] = Edges++; @@ -854,7 +849,7 @@ bool GCOVProfiler::emitProfileArcs(NamedMDNode *CUNode) { GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); - CountersBySP.push_back(std::make_pair(Counters, SP)); + CountersBySP.emplace_back(Counters, F.getSubprogram()); // If a BB has several predecessors, use a PHINode to select // the correct counter. From 7d3825ed954aa1578790b96a8a544d034ea112f6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 12 Sep 2020 12:34:43 -0700 Subject: [PATCH 0467/1079] Revert "[gcov] emitProfileArcs: iterate over GCOVFunction's instead of Function's to avoid duplicated filtering" This reverts commit 412c9c0bf2a8ccbda2d925575891a51ef5df846e. --- .../Instrumentation/GCOVProfiling.cpp | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 56f6a045501c8..15355ff8efd17 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -322,14 +322,14 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(GCOVProfiler *P, Function &F, const DISubprogram *SP, + GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP, unsigned EndLine, uint32_t Ident, int Version) - : GCOVRecord(P), F(F), SP(SP), EndLine(EndLine), Ident(Ident), + : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident), Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) { LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); bool ExitBlockBeforeBody = Version >= 48; uint32_t i = ExitBlockBeforeBody ? 2 : 1; - for (BasicBlock &BB : F) + for (BasicBlock &BB : *F) Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++))); if (!ExitBlockBeforeBody) ReturnBlock.Number = i; @@ -424,8 +424,6 @@ namespace { getBlock(&I).writeOut(); } - Function &F; - private: const DISubprogram *SP; unsigned EndLine; @@ -738,7 +736,7 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { // single successor, so split the entry block to make sure of that. BasicBlock &EntryBlock = F.getEntryBlock(); - Funcs.push_back(std::make_unique(this, F, SP, EndLine, + Funcs.push_back(std::make_unique(this, &F, SP, EndLine, FunctionIdent++, Version)); GCOVFunction &Func = *Funcs.back(); @@ -826,8 +824,15 @@ bool GCOVProfiler::emitProfileArcs(NamedMDNode *CUNode) { bool Result = false; for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) { SmallVector, 8> CountersBySP; - for (const GCOVFunction &GF : make_pointee_range(Funcs)) { - Function &F = GF.F; + for (auto &F : M->functions()) { + DISubprogram *SP = F.getSubprogram(); + unsigned EndLine; + if (!SP) continue; + if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F)) + continue; + // TODO: Functions using scope-based EH are currently not supported. + if (isUsingScopeBasedEH(F)) continue; + DenseMap, unsigned> EdgeToCounter; unsigned Edges = 0; EdgeToCounter[{nullptr, &F.getEntryBlock()}] = Edges++; @@ -849,7 +854,7 @@ bool GCOVProfiler::emitProfileArcs(NamedMDNode *CUNode) { GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); - CountersBySP.emplace_back(Counters, F.getSubprogram()); + CountersBySP.push_back(std::make_pair(Counters, SP)); // If a BB has several predecessors, use a PHINode to select // the correct counter. From e8e3693ceaa1afe267f21d2ba8d9565ea8fe7c12 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Fri, 11 Sep 2020 09:49:27 -0400 Subject: [PATCH 0468/1079] Change range operator from deprecated '-' to '...' --- .../test/TableGen/AllowDuplicateRegisterNames.td | 2 +- llvm/test/TableGen/BigEncoder.td | 12 ++++++------ llvm/test/TableGen/BitOffsetDecoder.td | 16 ++++++++-------- llvm/test/TableGen/BitsInit.td | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/test/TableGen/AllowDuplicateRegisterNames.td b/llvm/test/TableGen/AllowDuplicateRegisterNames.td index 2ba63c434ca5f..897a628fe64b8 100644 --- a/llvm/test/TableGen/AllowDuplicateRegisterNames.td +++ b/llvm/test/TableGen/AllowDuplicateRegisterNames.td @@ -27,7 +27,7 @@ class ArchReg alt, list altidx> def ABIRegAltName : RegAltNameIndex; -foreach i = 0-3 in { +foreach i = 0...3 in { def R#i#_32 : ArchReg<"r"#i, ["x"#i], [ABIRegAltName]>; def R#i#_64 : ArchReg<"r"#i, ["x"#i], [ABIRegAltName]>; } diff --git a/llvm/test/TableGen/BigEncoder.td b/llvm/test/TableGen/BigEncoder.td index 5c4bc016e269c..9b9d382433508 100644 --- a/llvm/test/TableGen/BigEncoder.td +++ b/llvm/test/TableGen/BigEncoder.td @@ -19,8 +19,8 @@ def foo : Instruction { let InOperandList = (ins i32imm:$factor); field bits<65> Inst; bits<32> factor; - let Inst{7-0} = 0xAA; - let Inst{14-8} = factor{6-0}; // no offset + let Inst{7...0} = 0xAA; + let Inst{14...8} = factor{6...0}; // no offset let AsmString = "foo $factor"; field bits<16> SoftFail = 0; } @@ -29,8 +29,8 @@ def bar : Instruction { let InOperandList = (ins i32imm:$factor); field bits<65> Inst; bits<32> factor; - let Inst{7-0} = 0xBB; - let Inst{15-8} = factor{10-3}; // offset by 3 + let Inst{7...0} = 0xBB; + let Inst{15...8} = factor{10...3}; // offset by 3 let AsmString = "bar $factor"; field bits<16> SoftFail = 0; } @@ -39,8 +39,8 @@ def biz : Instruction { let InOperandList = (ins i32imm:$factor); field bits<65> Inst; bits<32> factor; - let Inst{7-0} = 0xCC; - let Inst{11-8,15-12} = factor{10-3}; // offset by 3, multipart + let Inst{7...0} = 0xCC; + let Inst{11...8,15...12} = factor{10...3}; // offset by 3, multipart let AsmString = "biz $factor"; field bits<16> SoftFail = 0; } diff --git a/llvm/test/TableGen/BitOffsetDecoder.td b/llvm/test/TableGen/BitOffsetDecoder.td index a928664398f0f..f94e8d4f09789 100644 --- a/llvm/test/TableGen/BitOffsetDecoder.td +++ b/llvm/test/TableGen/BitOffsetDecoder.td @@ -19,8 +19,8 @@ def foo : Instruction { let InOperandList = (ins i32imm:$factor); field bits<16> Inst; bits<32> factor; - let Inst{7-0} = 0xAA; - let Inst{14-8} = factor{6-0}; // no offset + let Inst{7...0} = 0xAA; + let Inst{14...8} = factor{6...0}; // no offset let AsmString = "foo $factor"; field bits<16> SoftFail = 0; } @@ -29,8 +29,8 @@ def bar : Instruction { let InOperandList = (ins i32imm:$factor); field bits<16> Inst; bits<32> factor; - let Inst{7-0} = 0xBB; - let Inst{15-8} = factor{10-3}; // offset by 3 + let Inst{7...0} = 0xBB; + let Inst{15...8} = factor{10...3}; // offset by 3 let AsmString = "bar $factor"; field bits<16> SoftFail = 0; } @@ -39,8 +39,8 @@ def biz : Instruction { let InOperandList = (ins i32imm:$factor); field bits<16> Inst; bits<32> factor; - let Inst{7-0} = 0xCC; - let Inst{11-8,15-12} = factor{10-3}; // offset by 3, multipart + let Inst{7...0} = 0xCC; + let Inst{11...8,15...12} = factor{10...3}; // offset by 3, multipart let AsmString = "biz $factor"; field bits<16> SoftFail = 0; } @@ -49,8 +49,8 @@ def baz : Instruction { let InOperandList = (ins Myi32:$factor); field bits<16> Inst; bits<32> factor; - let Inst{7-0} = 0xDD; - let Inst{15-8} = factor{11-4}; // offset by 4 + custom decode + let Inst{7...0} = 0xDD; + let Inst{15...8} = factor{11...4}; // offset by 4 + custom decode let AsmString = "baz $factor"; field bits<16> SoftFail = 0; } diff --git a/llvm/test/TableGen/BitsInit.td b/llvm/test/TableGen/BitsInit.td index 16d2d07753ad7..6f9acd346ba88 100644 --- a/llvm/test/TableGen/BitsInit.td +++ b/llvm/test/TableGen/BitsInit.td @@ -38,7 +38,7 @@ def { bits<2> D8 = { 0 }; // type mismatch. RHS doesn't have enough bits bits<8> E; - let E{7-0} = {0,0,1,?,?,?,?,?}; + let E{7..0} = {0,0,1,?,?,?,?,?}; let E{3-0} = 0b0010; bits<8> F1 = { 0, 1, 0b1001, 0, 0b0 }; // ok From 93b4f8538267e620de4a36e7cf0abc0d4f8d7c10 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Fri, 11 Sep 2020 10:26:26 -0400 Subject: [PATCH 0469/1079] Update TableGen test files to use the new '...' range punctuation. --- llvm/test/TableGen/BitsInit.td | 6 ++-- llvm/test/TableGen/DAGDefaultOps.td | 16 +++++----- llvm/test/TableGen/ForeachLoop.td | 4 +-- llvm/test/TableGen/HwModeEncodeDecode.td | 14 ++++---- llvm/test/TableGen/JSON.td | 4 +-- llvm/test/TableGen/ListSlices.td | 4 +-- llvm/test/TableGen/UnsetBitInit.td | 4 +-- llvm/test/TableGen/cond-let.td | 14 ++++---- .../TableGen/dag-isel-regclass-emit-enum.td | 2 +- llvm/test/TableGen/defset.td | 2 +- llvm/test/TableGen/foreach-variable-range.td | 32 +++++++++---------- llvm/test/TableGen/if.td | 12 +++---- llvm/test/TableGen/ifstmt.td | 6 ++-- llvm/test/TableGen/list-element-bitref.td | 4 +-- llvm/test/TableGen/range-lists.td | 3 +- llvm/test/TableGen/simplify-patfrag.td | 2 +- llvm/test/TableGen/trydecode-emission3.td | 4 +-- 17 files changed, 67 insertions(+), 66 deletions(-) diff --git a/llvm/test/TableGen/BitsInit.td b/llvm/test/TableGen/BitsInit.td index 6f9acd346ba88..c5527aebb9417 100644 --- a/llvm/test/TableGen/BitsInit.td +++ b/llvm/test/TableGen/BitsInit.td @@ -38,8 +38,8 @@ def { bits<2> D8 = { 0 }; // type mismatch. RHS doesn't have enough bits bits<8> E; - let E{7..0} = {0,0,1,?,?,?,?,?}; - let E{3-0} = 0b0010; + let E{7...0} = {0,0,1,?,?,?,?,?}; + let E{3...0} = 0b0010; bits<8> F1 = { 0, 1, 0b1001, 0, 0b0 }; // ok bits<7> F2 = { 0, 1, 0b1001, 0, 0b0 }; // LHS doesn't have enough bits @@ -50,7 +50,7 @@ def { bits<8> G3 = { 0, 1, { 0b1001 }, 0, 0b0 }; // ok bits<16> H; - let H{15-0} = { { 0b11001100 }, 0b00110011 }; + let H{15...0} = { { 0b11001100 }, 0b00110011 }; bits<16> I = { G1, G2 }; // Make sure we can initialise ints with bits<> values. diff --git a/llvm/test/TableGen/DAGDefaultOps.td b/llvm/test/TableGen/DAGDefaultOps.td index 1c98c4d8d07be..702a2232db305 100644 --- a/llvm/test/TableGen/DAGDefaultOps.td +++ b/llvm/test/TableGen/DAGDefaultOps.td @@ -16,10 +16,10 @@ class TestEncoding : Instruction { } class TestReg : Register<"R"#index, []> { - let HWEncoding{15-4} = 0; - let HWEncoding{3-0} = !cast>(index); + let HWEncoding{15...4} = 0; + let HWEncoding{3...0} = !cast>(index); } -foreach i = 0-15 in +foreach i = 0...15 in def "R"#i : TestReg; def Reg : RegisterClass<"TestTarget", [i32], 32, (sequence "R%d", 0, 15)>; @@ -36,11 +36,11 @@ class RRI Opcode> : TestEncoding { field bits<4> src1; field bits<4> src2; field bits<16> imm; - let Inst{31-28} = Opcode; - let Inst{27-24} = dest; - let Inst{23-20} = src1; - let Inst{19-16} = src2; - let Inst{15-0} = imm; + let Inst{31...28} = Opcode; + let Inst{27...24} = dest; + let Inst{23...20} = src1; + let Inst{19...16} = src2; + let Inst{15...0} = imm; } def AddRRI : RRI<"add", 0b0001>; diff --git a/llvm/test/TableGen/ForeachLoop.td b/llvm/test/TableGen/ForeachLoop.td index ce8d44c7526e7..173285b5e722f 100644 --- a/llvm/test/TableGen/ForeachLoop.td +++ b/llvm/test/TableGen/ForeachLoop.td @@ -7,7 +7,7 @@ class Register { // CHECK-NOT: !strconcat -foreach i = 0-3 in +foreach i = 0...3 in def Q#i : Register<"Q"#i, i>; // CHECK: def Q0 @@ -50,7 +50,7 @@ foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in // CHECK: string Name = "R7"; // CHECK: int Index = 7; -foreach i = {0-3,9-7} in { +foreach i = {0...3,9...7} in { def S#i : Register<"Q"#i, i>; def : Register<"T"#i, i>; } diff --git a/llvm/test/TableGen/HwModeEncodeDecode.td b/llvm/test/TableGen/HwModeEncodeDecode.td index 1c9b86ff26a75..bac432271888b 100644 --- a/llvm/test/TableGen/HwModeEncodeDecode.td +++ b/llvm/test/TableGen/HwModeEncodeDecode.td @@ -22,9 +22,9 @@ def fooTypeEncA : InstructionEncoding { field bits<32> SoftFail = 0; bits<32> Inst; bits<8> factor; - let Inst{7-0} = factor; - let Inst{3-2} = 0b11; - let Inst{1-0} = 0b00; + let Inst{7...0} = factor; + let Inst{3...2} = 0b11; + let Inst{1...0} = 0b00; } def fooTypeEncB : InstructionEncoding { @@ -32,8 +32,8 @@ def fooTypeEncB : InstructionEncoding { field bits<32> SoftFail = 0; bits<32> Inst; bits<8> factor; - let Inst{15-8} = factor; - let Inst{1-0} = 0b11; + let Inst{15...8} = factor; + let Inst{1...0} = 0b11; } let OutOperandList = (outs) in { @@ -52,8 +52,8 @@ def bar: Instruction { bits<32> Inst; bits<32> SoftFail; bits<8> factor; - let Inst{31-24} = factor; - let Inst{1-0} = 0b10; + let Inst{31...24} = factor; + let Inst{1...0} = 0b10; let AsmString = "bar $factor"; } diff --git a/llvm/test/TableGen/JSON.td b/llvm/test/TableGen/JSON.td index 968c2577fa993..3fb2ec4014fbc 100644 --- a/llvm/test/TableGen/JSON.td +++ b/llvm/test/TableGen/JSON.td @@ -97,8 +97,8 @@ def VarObj : Variables { bits<2> undef_bits; bits<4> ref_bits; - let ref_bits{3-2} = 0b10; - let ref_bits{1-0} = undef_bits{1-0}; + let ref_bits{3...2} = 0b10; + let ref_bits{1...0} = undef_bits{1...0}; // CHECK: data['VarObj']['ref_bits'][3] == 1 // CHECK: data['VarObj']['ref_bits'][2] == 0 // CHECK: data['VarObj']['ref_bits'][1]['kind'] == 'varbit' diff --git a/llvm/test/TableGen/ListSlices.td b/llvm/test/TableGen/ListSlices.td index cbb2326a95c00..2f40334798b28 100644 --- a/llvm/test/TableGen/ListSlices.td +++ b/llvm/test/TableGen/ListSlices.td @@ -6,12 +6,12 @@ def A { } def B { - list X = [10, 20, 30, 4, 1, 1231, 20] [2-4,2,2,0-6]; + list X = [10, 20, 30, 4, 1, 1231, 20] [2...4,2,2,0...6]; list Y = X[4,5]; int Z = X[4]; - list C = A.B[1-4]; + list C = A.B[1...4]; list> AA = [X, Y]; diff --git a/llvm/test/TableGen/UnsetBitInit.td b/llvm/test/TableGen/UnsetBitInit.td index 694847358f66c..07e37e08efab3 100644 --- a/llvm/test/TableGen/UnsetBitInit.td +++ b/llvm/test/TableGen/UnsetBitInit.td @@ -21,7 +21,7 @@ def A { bit P; bit Q; - let Inst{7-2} = 0x3f; + let Inst{7...2} = 0x3f; let Inst{1} = P; let Inst{0} = Q; @@ -34,7 +34,7 @@ class x { } class y B> : x { - let A{21-20} = B; + let A{21...20} = B; } def z : y<{0,?}>; diff --git a/llvm/test/TableGen/cond-let.td b/llvm/test/TableGen/cond-let.td index 044878f2ab8e3..4e46445cc327a 100644 --- a/llvm/test/TableGen/cond-let.td +++ b/llvm/test/TableGen/cond-let.td @@ -11,13 +11,13 @@ class C x, bits<4> y, bit z> { y{1}: x{1}, y{0}: x{2}, {1} :?); - let n{10-9}= !cond(x{2}: y{3-2}, - x{1}: y{2-1}, - x{1}: y{1-0}, - {1} : ?); - let n{8-6} = !cond(x{2}: 0b010, 1 : 0b110); - let n{5-4} = !cond(x{1}: y{3-2}, 1 : {0, 1}); - let n{3-0} = !cond(x{0}: y{3-0}, 1 : {z, y{2}, y{1}, y{0}}); + let n{10...9}= !cond(x{2}: y{3...2}, + x{1}: y{2...1}, + x{1}: y{1...0}, + {1} : ?); + let n{8...6} = !cond(x{2}: 0b010, 1 : 0b110); + let n{5...4} = !cond(x{1}: y{3...2}, 1 : {0, 1}); + let n{3...0} = !cond(x{0}: y{3...0}, 1 : {z, y{2}, y{1}, y{0}}); } diff --git a/llvm/test/TableGen/dag-isel-regclass-emit-enum.td b/llvm/test/TableGen/dag-isel-regclass-emit-enum.td index 0002614fd5748..462bb3f2cd6da 100644 --- a/llvm/test/TableGen/dag-isel-regclass-emit-enum.td +++ b/llvm/test/TableGen/dag-isel-regclass-emit-enum.td @@ -12,7 +12,7 @@ let Namespace = "TestNamespace" in { def R0 : Register<"r0">; -foreach i = 0-127 in { +foreach i = 0...127 in { def GPR#i : RegisterClass<"TestTarget", [i32], 32, (add R0)>; } diff --git a/llvm/test/TableGen/defset.td b/llvm/test/TableGen/defset.td index 3c5fb68ea7ef0..ef9f54ba6e2db 100644 --- a/llvm/test/TableGen/defset.td +++ b/llvm/test/TableGen/defset.td @@ -40,7 +40,7 @@ multiclass C { defset list As = { def A0 : A<1>; - foreach i = 1-2 in { + foreach i = 1...2 in { def A#i : A; } defset list SubAs = { diff --git a/llvm/test/TableGen/foreach-variable-range.td b/llvm/test/TableGen/foreach-variable-range.td index 3ddb2c08ff20e..2a576d247a351 100644 --- a/llvm/test/TableGen/foreach-variable-range.td +++ b/llvm/test/TableGen/foreach-variable-range.td @@ -13,84 +13,84 @@ def Constants : ConstantsImpl; // CHECK-DAG: def var_bound_whitespaceA0 // CHECK-DAG: def var_bound_whitespaceA1 // CHECK-DAG: def var_bound_whitespaceA2 -foreach Index = Constants.Zero - Constants.Two in { +foreach Index = Constants.Zero ... Constants.Two in { def var_bound_whitespaceA#Index; } // CHECK-DAG: def var_bound_whitespaceB0 // CHECK-DAG: def var_bound_whitespaceB1 // CHECK-DAG: def var_bound_whitespaceB2 -foreach Index = Constants.Zero-Constants.Two in { +foreach Index = Constants.Zero...Constants.Two in { def var_bounds_whitespaceB#Index; } // CHECK-DAG: def var_bound_whitespaceC0 // CHECK-DAG: def var_bound_whitespaceC1 // CHECK-DAG: def var_bound_whitespaceC2 -foreach Index = Constants.Zero -Constants.Two in { +foreach Index = Constants.Zero ...Constants.Two in { def var_bounds_whitespaceC#Index; } // CHECK-DAG: def var_bound_whitespaceD0 // CHECK-DAG: def var_bound_whitespaceD1 // CHECK-DAG: def var_bound_whitespaceD2 -foreach Index = Constants.Zero- Constants.Two in { +foreach Index = Constants.Zero... Constants.Two in { def var_bounds_whitespaceD#Index; } // CHECK-DAG: def const_lower_whitespaceA0 // CHECK-DAG: def const_lower_whitespaceA1 // CHECK-DAG: def const_lower_whitespaceA2 -foreach Index = 0 - Constants.Two in { +foreach Index = 0 ... Constants.Two in { def const_lower_whitespaceA#Index; } // CHECK-DAG: def const_lower_whitespaceB0 // CHECK-DAG: def const_lower_whitespaceB1 // CHECK-DAG: def const_lower_whitespaceB2 -foreach Index = 0-Constants.Two in { +foreach Index = 0...Constants.Two in { def const_lower_whitespaceB#Index; } // CHECK-DAG: def const_lower_whitespaceC0 // CHECK-DAG: def const_lower_whitespaceC1 // CHECK-DAG: def const_lower_whitespaceC2 -foreach Index = 0 -Constants.Two in { +foreach Index = 0 ...Constants.Two in { def const_lower_whitespaceC#Index; } // CHECK-DAG: def const_lower_whitespaceD0 // CHECK-DAG: def const_lower_whitespaceD1 // CHECK-DAG: def const_lower_whitespaceD2 -foreach Index = 0- Constants.Two in { +foreach Index = 0... Constants.Two in { def const_lower_whitespaceD#Index; } // CHECK-DAG: def const_upper_whitespaceA0 // CHECK-DAG: def const_upper_whitespaceA1 // CHECK-DAG: def const_upper_whitespaceA2 -foreach Index = Constants.Zero - 2 in { +foreach Index = Constants.Zero ... 2 in { def const_upper_whitespaceA#Index; } // CHECK-DAG: def const_upper_whitespaceB0 // CHECK-DAG: def const_upper_whitespaceB1 // CHECK-DAG: def const_upper_whitespaceB2 -foreach Index = Constants.Zero-2 in { +foreach Index = Constants.Zero...2 in { def const_upper_whitespaceB#Index; } // CHECK-DAG: def const_upper_whitespaceC0 // CHECK-DAG: def const_upper_whitespaceC1 // CHECK-DAG: def const_upper_whitespaceC2 -foreach Index = Constants.Zero -2 in { +foreach Index = Constants.Zero ...2 in { def const_upper_whitespaceC#Index; } // CHECK-DAG: def const_upper_whitespaceD0 // CHECK-DAG: def const_upper_whitespaceD1 // CHECK-DAG: def const_upper_whitespaceD2 -foreach Index = Constants.Zero- 2 in { +foreach Index = Constants.Zero... 2 in { def const_upper_whitespaceD#Index; } @@ -98,7 +98,7 @@ foreach Index = Constants.Zero- 2 in { // CHECK-DAG: def multi_rangeA1 // CHECK-DAG: def multi_rangeA2 // CHECK-DAG: def multi_rangeA3 -foreach Index = {Constants.Zero-Constants.One, Constants.Two-Constants.Three} in { +foreach Index = {Constants.Zero...Constants.One, Constants.Two...Constants.Three} in { def multi_rangeA#Index; } @@ -107,7 +107,7 @@ foreach Index = {Constants.Zero-Constants.One, Constants.Two-Constants.Three} in // CHECK-DAG: def multi_rangeB3 // CHECK-DAG: def multi_rangeB4 // CHECK-DAG: def multi_rangeB5 -foreach Index = {0-Constants.One, Constants.Three-Constants.Five} in { +foreach Index = {0...Constants.One, Constants.Three...Constants.Five} in { def multi_rangeB#Index; } @@ -115,7 +115,7 @@ foreach Index = {0-Constants.One, Constants.Three-Constants.Five} in { // CHECK-DAG: def multi_rangeC1 // CHECK-DAG: def multi_rangeC2 // CHECK-DAG: def multi_rangeC3 -foreach Index = {0-Constants.One, 2-Constants.Three} in { +foreach Index = {0...Constants.One, 2...Constants.Three} in { def multi_rangeC#Index; } @@ -123,6 +123,6 @@ foreach Index = {0-Constants.One, 2-Constants.Three} in { // CHECK-DAG: def multi_rangeD1 // CHECK-DAG: def multi_rangeD2 // CHECK-DAG: def multi_rangeD3 -foreach Index = {0-1, Constants.Two-3} in { +foreach Index = {0...1, Constants.Two...3} in { def multi_rangeD#Index; } diff --git a/llvm/test/TableGen/if.td b/llvm/test/TableGen/if.td index a6af59e72830d..1fbee6966ff38 100644 --- a/llvm/test/TableGen/if.td +++ b/llvm/test/TableGen/if.td @@ -11,12 +11,12 @@ class C x, bits<4> y, bit z> { !if(y{2}, x{0}, !if(y{1}, x{1}, !if(y{0}, x{2}, ?)))); - let n{10-9}= !if(x{2}, y{3-2}, - !if(x{1}, y{2-1}, - !if(x{0}, y{1-0}, ?))); - let n{8-6} = !if(x{2}, 0b010, 0b110); - let n{5-4} = !if(x{1}, y{3-2}, {0, 1}); - let n{3-0} = !if(x{0}, y{3-0}, {z, y{2}, y{1}, y{0}}); + let n{10...9}= !if(x{2}, y{3...2}, + !if(x{1}, y{2...1}, + !if(x{0}, y{1...0}, ?))); + let n{8...6} = !if(x{2}, 0b010, 0b110); + let n{5...4} = !if(x{1}, y{3...2}, {0, 1}); + let n{3...0} = !if(x{0}, y{3...0}, {z, y{2}, y{1}, y{0}}); } def C1 : C<{1, 0, 1}, {0, 1, 0, 1}, 0>; diff --git a/llvm/test/TableGen/ifstmt.td b/llvm/test/TableGen/ifstmt.td index 22354310e7baf..5c0093a9a9ea1 100644 --- a/llvm/test/TableGen/ifstmt.td +++ b/llvm/test/TableGen/ifstmt.td @@ -15,7 +15,7 @@ if 1 then def aYes; // CHECK: def bNotThree2 // CHECK: def bNotThree4 // CHECK: def bThree3 -foreach i = 1-4 in { +foreach i = 1...4 in { if !eq(i, 3) then { def "bThree" # i; } else { @@ -61,8 +61,8 @@ defm c3: Multi<3>; // CHECK-NOT: def dThenElse1 // CHECK-NOT: def dThenElse11 // CHECK: def dThenThen01 -foreach i = 0-1 in - foreach j = 0-1 in +foreach i = 0...1 in + foreach j = 0...1 in if !eq(i,0) then if !eq(j,1) then def "dThenThen"#i#j; diff --git a/llvm/test/TableGen/list-element-bitref.td b/llvm/test/TableGen/list-element-bitref.td index 0f59b537fa6d6..4aae62f329de1 100644 --- a/llvm/test/TableGen/list-element-bitref.td +++ b/llvm/test/TableGen/list-element-bitref.td @@ -2,8 +2,8 @@ // XFAIL: vg_leak class C> L> { - bits<2> V0 = L[0]{1-0}; - bits<2> V1 = L[1]{3-2}; + bits<2> V0 = L[0]{1...0}; + bits<2> V1 = L[1]{3...2}; string V2 = !if(L[0]{0}, "Odd", "Even"); } diff --git a/llvm/test/TableGen/range-lists.td b/llvm/test/TableGen/range-lists.td index 82f4338323e52..85e0939f2ec0e 100644 --- a/llvm/test/TableGen/range-lists.td +++ b/llvm/test/TableGen/range-lists.td @@ -1,7 +1,8 @@ // RUN: llvm-tblgen %s | FileCheck %s // XFAIL: vg_leak -// This file has tests for range lists and range pieces. +// This file has tests for range lists and range pieces. Some use the +// deprecated '-' range punctuation just to be sure it still works. // These are tests for bits ranges. diff --git a/llvm/test/TableGen/simplify-patfrag.td b/llvm/test/TableGen/simplify-patfrag.td index 693658317d5d0..904c29696a6e2 100644 --- a/llvm/test/TableGen/simplify-patfrag.td +++ b/llvm/test/TableGen/simplify-patfrag.td @@ -9,7 +9,7 @@ def Demo : Target { } // Some registers which can hold ints or floats -foreach i = 0-7 in +foreach i = 0...7 in def "R" # i: Register<"r" # i>; def GPR : RegisterClass<"Demo", [i32, f32], 32, (sequence "R%u", 0, 7)>; diff --git a/llvm/test/TableGen/trydecode-emission3.td b/llvm/test/TableGen/trydecode-emission3.td index 8fc5150a0d8ea..84ce4f9a749b1 100644 --- a/llvm/test/TableGen/trydecode-emission3.td +++ b/llvm/test/TableGen/trydecode-emission3.td @@ -28,8 +28,8 @@ def InstBOp : Operand { def InstB : TestInstruction { bits<2> op; - let Inst{7-2} = {0,0,0,0,0,0}; - let Inst{1-0} = op; + let Inst{7...2} = {0,0,0,0,0,0}; + let Inst{1...0} = op; let OutOperandList = (outs InstBOp:$op); let AsmString = "InstB"; } From bdd1eba37b64e64c2d93d3e79223b5933d631447 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 12 Sep 2020 22:39:39 +0200 Subject: [PATCH 0470/1079] [ARM] Add additional vecreduce float legalization test (NFC) --- .../vecreduce-fadd-legalization-soft-float.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll index f3eeb11a17fd2..164cfe1d88488 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll @@ -1,10 +1,49 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK +declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>) declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>) declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>) +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r7, #255 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: orr r7, r7, #65280 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: and r0, r1, r7 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: and r0, r4, r7 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: and r0, r6, r7 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: and r0, r5, r7 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: bl __aeabi_fadd +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fadd +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl __aeabi_fadd +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %a) + ret half %b +} + define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: test_v4f32: ; CHECK: @ %bb.0: From d6fadc49e3d7eb0977bca3ff92bf156bd059fcd4 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 12 Sep 2020 13:51:53 -0700 Subject: [PATCH 0471/1079] [gcov] Process .gcda immediately after the accompanying .gcno instead of doing all .gcda after all .gcno i.e. change the work flow from * .gcno for function A * .gcno for function B * .gcno for function C * .gcda for function A * .gcda for function B * .gcda for function C to * .gcno for function A * .gcda for function A * .gcno for function B * .gcda for function B * .gcno for function C * .gcda for function C Currently there is duplicate logic in .gcno & .gcda processing: how functions are filtered, which edges are instrumented, etc. This refactor enables simplification. Since we always process .gcno, in -fprofile-arcs -fno-test-coverage mode, __llvm_internal_gcov_emit_function_args.0 will have non-zero checksums. --- clang/test/CodeGen/code-coverage.c | 2 +- .../Instrumentation/GCOVProfiling.cpp | 307 +++++++++--------- 2 files changed, 152 insertions(+), 157 deletions(-) diff --git a/clang/test/CodeGen/code-coverage.c b/clang/test/CodeGen/code-coverage.c index 5a663135e2f03..014dd9cfb5a7b 100644 --- a/clang/test/CodeGen/code-coverage.c +++ b/clang/test/CodeGen/code-coverage.c @@ -38,7 +38,7 @@ int test2(int b) { // CHECK: @__llvm_internal_gcov_emit_function_args.0 = internal unnamed_addr constant [2 x %0] -// CHECK-SAME: [%0 zeroinitializer, %0 { i32 1, i32 0, i32 0 }] +// CHECK-SAME: [%0 { i32 0, i32 {{[-0-9]+}}, i32 {{[-0-9]+}} }, %0 { i32 1, i32 {{[-0-9]+}}, i32 {{[-0-9]+}} }] // CHECK: @__llvm_internal_gcov_emit_file_info = internal unnamed_addr constant [1 x %2] /// 0x3330342a '3' '0' '4' '*' diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 15355ff8efd17..68df0af4892af 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -116,7 +116,11 @@ class GCOVProfiler { // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. - bool emitProfileArcs(NamedMDNode *CUNode); + void instrumentFunction( + Function &F, + SmallVectorImpl> &CountersBySP); + void emitGlobalConstructor( + SmallVectorImpl> &CountersBySP); bool isFunctionInstrumented(const Function &F); std::vector createRegexesFromString(StringRef RegexesStr); @@ -551,19 +555,15 @@ bool GCOVProfiler::runOnModule( Ctx = &M.getContext(); NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu"); - if (!CUNode) + if (!CUNode || (!Options.EmitNotes && !Options.EmitData)) return false; bool Modified = AddFlushBeforeForkAndExec(); FilterRe = createRegexesFromString(Options.Filter); ExcludeRe = createRegexesFromString(Options.Exclude); - - if (Options.EmitNotes) - emitProfileNotes(CUNode); - if (Options.EmitData) - Modified |= emitProfileArcs(CUNode); - return Modified; + emitProfileNotes(CUNode); + return Modified || Options.EmitData; } PreservedAnalyses GCOVProfilerPass::run(Module &M, @@ -698,6 +698,7 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { : (c3 - '0') * 10 + c1 - '0'; } + bool EmitGCDA = Options.EmitData; for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) { // Each compile unit gets its own .gcno file. This means that whether we run // this pass over the original .o's as they're produced, or run it after @@ -709,16 +710,8 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { if (CU->getDWOId()) continue; - std::error_code EC; - raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, - sys::fs::OF_None); - if (EC) { - Ctx->emitError(Twine("failed to open coverage notes file for writing: ") + - EC.message()); - continue; - } - std::vector EdgeDestinations; + SmallVector, 8> CountersBySP; Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little : support::endianness::big; @@ -789,165 +782,167 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { } Line = 0; } + if (EmitGCDA) + instrumentFunction(F, CountersBySP); } char Tmp[4]; JamCRC JC; JC.update(EdgeDestinations); - os = &out; uint32_t Stamp = JC.getCRC(); FileChecksums.push_back(Stamp); - if (Endian == support::endianness::big) { - out.write("gcno", 4); - out.write(Options.Version, 4); - } else { - out.write("oncg", 4); - std::reverse_copy(Options.Version, Options.Version + 4, Tmp); - out.write(Tmp, 4); + + if (Options.EmitNotes) { + std::error_code EC; + raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, + sys::fs::OF_None); + if (EC) { + Ctx->emitError( + Twine("failed to open coverage notes file for writing: ") + + EC.message()); + continue; + } + os = &out; + if (Endian == support::endianness::big) { + out.write("gcno", 4); + out.write(Options.Version, 4); + } else { + out.write("oncg", 4); + std::reverse_copy(Options.Version, Options.Version + 4, Tmp); + out.write(Tmp, 4); + } + write(Stamp); + if (Version >= 90) + writeString(""); // unuseful current_working_directory + if (Version >= 80) + write(0); // unuseful has_unexecuted_blocks + + for (auto &Func : Funcs) + Func->writeOut(Stamp); + + write(0); + write(0); + out.close(); + } + + if (EmitGCDA) { + emitGlobalConstructor(CountersBySP); + EmitGCDA = false; } - write(Stamp); - if (Version >= 90) - writeString(""); // unuseful current_working_directory - if (Version >= 80) - write(0); // unuseful has_unexecuted_blocks - - for (auto &Func : Funcs) - Func->writeOut(Stamp); - - write(0); - write(0); - out.close(); } } -bool GCOVProfiler::emitProfileArcs(NamedMDNode *CUNode) { - bool Result = false; - for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) { - SmallVector, 8> CountersBySP; - for (auto &F : M->functions()) { - DISubprogram *SP = F.getSubprogram(); - unsigned EndLine; - if (!SP) continue; - if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F)) - continue; - // TODO: Functions using scope-based EH are currently not supported. - if (isUsingScopeBasedEH(F)) continue; - - DenseMap, unsigned> EdgeToCounter; - unsigned Edges = 0; - EdgeToCounter[{nullptr, &F.getEntryBlock()}] = Edges++; - for (auto &BB : F) { - Instruction *TI = BB.getTerminator(); - if (isa(TI)) { - EdgeToCounter[{&BB, nullptr}] = Edges++; - } else { - for (BasicBlock *Succ : successors(TI)) { - EdgeToCounter[{&BB, Succ}] = Edges++; - } - } +void GCOVProfiler::instrumentFunction( + Function &F, + SmallVectorImpl> &CountersBySP) { + DISubprogram *SP = F.getSubprogram(); + DenseMap, unsigned> EdgeToCounter; + unsigned Edges = 0; + EdgeToCounter[{nullptr, &F.getEntryBlock()}] = Edges++; + for (auto &BB : F) { + Instruction *TI = BB.getTerminator(); + if (isa(TI)) { + EdgeToCounter[{&BB, nullptr}] = Edges++; + } else { + for (BasicBlock *Succ : successors(TI)) { + EdgeToCounter[{&BB, Succ}] = Edges++; } + } + } - ArrayType *CounterTy = - ArrayType::get(Type::getInt64Ty(*Ctx), Edges); - GlobalVariable *Counters = - new GlobalVariable(*M, CounterTy, false, - GlobalValue::InternalLinkage, - Constant::getNullValue(CounterTy), - "__llvm_gcov_ctr"); - CountersBySP.push_back(std::make_pair(Counters, SP)); - - // If a BB has several predecessors, use a PHINode to select - // the correct counter. - for (auto &BB : F) { - // The phi node must be at the begin of the BB. - IRBuilder<> BuilderForPhi(&*BB.begin()); - IRBuilder<> Builder(&*BB.getFirstInsertionPt()); - Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); - Value *V; - if (&BB == &F.getEntryBlock()) { - auto It = EdgeToCounter.find({nullptr, &BB}); - V = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), - Counters, 0, It->second); - } else { - const unsigned EdgeCount = - std::distance(pred_begin(&BB), pred_end(&BB)); - if (EdgeCount == 0) - continue; - PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount); - for (BasicBlock *Pred : predecessors(&BB)) { - auto It = EdgeToCounter.find({Pred, &BB}); - assert(It != EdgeToCounter.end()); - const unsigned Edge = It->second; - Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64( - Counters->getValueType(), Counters, 0, Edge); - Phi->addIncoming(EdgeCounter, Pred); - V = Phi; - } - } - - if (Options.Atomic) { - Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1), - AtomicOrdering::Monotonic); - } else { - Value *Count = - Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr"); - Count = Builder.CreateAdd(Count, Builder.getInt64(1)); - Builder.CreateStore(Count, V); - } + ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Edges); + GlobalVariable *Counters = + new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); + CountersBySP.push_back(std::make_pair(Counters, SP)); - Instruction *TI = BB.getTerminator(); - if (isa(TI)) { - auto It = EdgeToCounter.find({&BB, nullptr}); - assert(It != EdgeToCounter.end()); - const unsigned Edge = It->second; - Value *Counter = Builder.CreateConstInBoundsGEP2_64( - Counters->getValueType(), Counters, 0, Edge); - if (Options.Atomic) { - Builder.CreateAtomicRMW(AtomicRMWInst::Add, Counter, - Builder.getInt64(1), - AtomicOrdering::Monotonic); - } else { - Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter); - Count = Builder.CreateAdd(Count, Builder.getInt64(1)); - Builder.CreateStore(Count, Counter); - } - } + // If a BB has several predecessors, use a PHINode to select + // the correct counter. + for (auto &BB : F) { + // The phi node must be at the begin of the BB. + IRBuilder<> BuilderForPhi(&*BB.begin()); + IRBuilder<> Builder(&*BB.getFirstInsertionPt()); + Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + Value *V; + if (&BB == &F.getEntryBlock()) { + auto It = EdgeToCounter.find({nullptr, &BB}); + V = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), Counters, + 0, It->second); + } else { + const unsigned EdgeCount = std::distance(pred_begin(&BB), pred_end(&BB)); + if (EdgeCount == 0) + continue; + PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount); + for (BasicBlock *Pred : predecessors(&BB)) { + auto It = EdgeToCounter.find({Pred, &BB}); + assert(It != EdgeToCounter.end()); + const unsigned Edge = It->second; + Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64( + Counters->getValueType(), Counters, 0, Edge); + Phi->addIncoming(EdgeCounter, Pred); + V = Phi; } } - Function *WriteoutF = insertCounterWriteout(CountersBySP); - Function *ResetF = insertReset(CountersBySP); - - // Create a small bit of code that registers the "__llvm_gcov_writeout" to - // be executed at exit and the "__llvm_gcov_flush" function to be executed - // when "__gcov_flush" is called. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); - IRBuilder<> Builder(BB); - - FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - auto *PFTy = PointerType::get(FTy, 0); - FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false); - - // Initialize the environment and register the local writeout, flush and - // reset functions. - FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); - Builder.CreateCall(GCOVInit, {WriteoutF, ResetF}); - Builder.CreateRetVoid(); + if (Options.Atomic) { + Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1), + AtomicOrdering::Monotonic); + } else { + Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr"); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); + Builder.CreateStore(Count, V); + } - appendToGlobalCtors(*M, F, 0); - Result = true; + Instruction *TI = BB.getTerminator(); + if (isa(TI)) { + auto It = EdgeToCounter.find({&BB, nullptr}); + assert(It != EdgeToCounter.end()); + const unsigned Edge = It->second; + Value *Counter = Builder.CreateConstInBoundsGEP2_64( + Counters->getValueType(), Counters, 0, Edge); + if (Options.Atomic) { + Builder.CreateAtomicRMW(AtomicRMWInst::Add, Counter, + Builder.getInt64(1), AtomicOrdering::Monotonic); + } else { + Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); + Builder.CreateStore(Count, Counter); + } + } } +} + +void GCOVProfiler::emitGlobalConstructor( + SmallVectorImpl> &CountersBySP) { + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *ResetF = insertReset(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + auto *PFTy = PointerType::get(FTy, 0); + FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false); + + // Initialize the environment and register the local writeout, flush and + // reset functions. + FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); + Builder.CreateCall(GCOVInit, {WriteoutF, ResetF}); + Builder.CreateRetVoid(); - return Result; + appendToGlobalCtors(*M, F, 0); } FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) { From 04febd30a8dab3ff4b6e6032f1a1a9f4725f8267 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 23 Jul 2020 15:06:21 -0700 Subject: [PATCH 0472/1079] [lld][WebAssembly] Error on import/export of mutable global without `mutable-globals` feature Also add the +mutable-globals features in clang when building with `-fPIC` since the linker will generate mutable globals imports and exports in that case. Differential Revision: https://reviews.llvm.org/D87537 --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 21 +++++++++++++++++++ clang/test/Driver/wasm-toolchain.c | 11 ++++++++++ lld/test/wasm/Inputs/undefined-globals.s | 4 ++-- lld/test/wasm/emit-relocs-fpic.s | 4 ++-- lld/test/wasm/gc-imports.s | 6 +++--- lld/test/wasm/mutable-globals.s | 13 ++++++++++++ lld/test/wasm/pie.ll | 2 +- lld/test/wasm/shared.ll | 2 +- lld/wasm/Writer.cpp | 23 +++++++++++++++++++++ 9 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 lld/test/wasm/mutable-globals.s diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 10168736400f8..d953082470aab 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -243,6 +243,27 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, CC1Args.push_back("+sign-ext"); } + if (!DriverArgs.hasFlag(options::OPT_mmutable_globals, + options::OPT_mno_mutable_globals, false)) { + // -fPIC implies +mutable-globals because the PIC ABI used by the linker + // depends on importing and exporting mutable globals. + llvm::Reloc::Model RelocationModel; + unsigned PICLevel; + bool IsPIE; + std::tie(RelocationModel, PICLevel, IsPIE) = + ParsePICArgs(*this, DriverArgs); + if (RelocationModel == llvm::Reloc::PIC_) { + if (DriverArgs.hasFlag(options::OPT_mno_mutable_globals, + options::OPT_mmutable_globals, false)) { + getDriver().Diag(diag::err_drv_argument_not_allowed_with) + << "-fPIC" + << "-mno-mutable-globals"; + } + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+mutable-globals"); + } + } + if (DriverArgs.getLastArg(options::OPT_fwasm_exceptions)) { // '-fwasm-exceptions' is not compatible with '-mno-exception-handling' if (DriverArgs.hasFlag(options::OPT_mno_exception_handing, diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index ad8b000ad2250..3c2eb66f9e199 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -119,3 +119,14 @@ // RUN: | FileCheck -check-prefix=CHECK-REACTOR %s // CHECK-REACTOR: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // CHECK-REACTOR: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" + +// -fPIC implies +mutable-globals + +// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -fPIC 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-PIC %s +// CHECK-PIC: clang{{.*}}" "-cc1" {{.*}} "-target-feature" "+mutable-globals" + +// '-mno-mutable-globals' is not allowed with '-fPIC' +// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -fPIC -mno-mutable-globals %s 2>&1 \ +// RUN: | FileCheck -check-prefix=PIC_NO_MUTABLE_GLOBALS %s +// PIC_NO_MUTABLE_GLOBALS: error: invalid argument '-fPIC' not allowed with '-mno-mutable-globals' diff --git a/lld/test/wasm/Inputs/undefined-globals.s b/lld/test/wasm/Inputs/undefined-globals.s index 607d7942d0037..54dc4189a7770 100644 --- a/lld/test/wasm/Inputs/undefined-globals.s +++ b/lld/test/wasm/Inputs/undefined-globals.s @@ -7,5 +7,5 @@ use_undef_global: global.get used_undef_global end_function -.globaltype unused_undef_global, i64 -.globaltype used_undef_global, i64 +.globaltype unused_undef_global, i64, immutable +.globaltype used_undef_global, i64, immutable diff --git a/lld/test/wasm/emit-relocs-fpic.s b/lld/test/wasm/emit-relocs-fpic.s index c70e1e6751098..1d81ca62786be 100644 --- a/lld/test/wasm/emit-relocs-fpic.s +++ b/lld/test/wasm/emit-relocs-fpic.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o %t.o < %s +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ret32.s -o %t.ret32.o -# RUN: wasm-ld -pie --export-all --no-gc-sections --no-entry --emit-relocs -o %t.wasm %t.o %t.ret32.o +# RUN: wasm-ld -pie --export-all --no-check-features --no-gc-sections --no-entry --emit-relocs -o %t.wasm %t.o %t.ret32.o # RUN: obj2yaml %t.wasm | FileCheck %s load_hidden_data: diff --git a/lld/test/wasm/gc-imports.s b/lld/test/wasm/gc-imports.s index 6564b5c1a7d87..1f8bca9064e09 100644 --- a/lld/test/wasm/gc-imports.s +++ b/lld/test/wasm/gc-imports.s @@ -31,7 +31,7 @@ _start: # CHECK-NEXT: Field: used_undef_global # CHECK-NEXT: Kind: GLOBAL # CHECK-NEXT: GlobalType: I64 -# CHECK-NEXT: GlobalMutable: true +# CHECK-NEXT: GlobalMutable: false # CHECK-NEXT: - Type: # CHECK: - Type: CUSTOM # CHECK-NEXT: Name: name @@ -62,12 +62,12 @@ _start: # NO-GC-NEXT: Field: unused_undef_global # NO-GC-NEXT: Kind: GLOBAL # NO-GC-NEXT: GlobalType: I64 -# NO-GC-NEXT: GlobalMutable: true +# NO-GC-NEXT: GlobalMutable: false # NO-GC-NEXT: - Module: env # NO-GC-NEXT: Field: used_undef_global # NO-GC-NEXT: Kind: GLOBAL # NO-GC-NEXT: GlobalType: I64 -# NO-GC-NEXT: GlobalMutable: true +# NO-GC-NEXT: GlobalMutable: false # NO-GC-NEXT: - Type: # NO-GC: - Type: CUSTOM # NO-GC-NEXT: Name: name diff --git a/lld/test/wasm/mutable-globals.s b/lld/test/wasm/mutable-globals.s new file mode 100644 index 0000000000000..98f216e1bebc8 --- /dev/null +++ b/lld/test/wasm/mutable-globals.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: not wasm-ld %t.o -o %t.wasm 2>&1 | FileCheck %s + +.globl _start +_start: + .functype _start () -> () + i32.const 1 + global.set foo + end_function + +.globaltype foo, i32 + +# CHECK: error: mutable global imported but 'mutable-globals' feature not present in inputs: `foo`. Use --no-check-features to suppress. diff --git a/lld/test/wasm/pie.ll b/lld/test/wasm/pie.ll index c576e7c7bf706..a203d31798c96 100644 --- a/lld/test/wasm/pie.ll +++ b/lld/test/wasm/pie.ll @@ -1,4 +1,4 @@ -; RUN: llc -relocation-model=pic -filetype=obj %s -o %t.o +; RUN: llc -relocation-model=pic -mattr=+mutable-globals -filetype=obj %s -o %t.o ; RUN: wasm-ld --no-gc-sections --allow-undefined -pie -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/shared.ll b/lld/test/wasm/shared.ll index 89fae3342ac2a..59c1855bed563 100644 --- a/lld/test/wasm/shared.ll +++ b/lld/test/wasm/shared.ll @@ -1,4 +1,4 @@ -; RUN: llc -relocation-model=pic -filetype=obj %s -o %t.o +; RUN: llc -relocation-model=pic -mattr=+mutable-globals -filetype=obj %s -o %t.o ; RUN: wasm-ld -shared -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 495050c0b6319..fb4b79c5f6342 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -461,6 +461,29 @@ void Writer::populateTargetFeatures() { if (!config->checkFeatures) return; + if (!config->relocatable && used.count("mutable-globals") == 0) { + for (Symbol *sym : symtab->getSymbols()) { + if (auto *global = dyn_cast(sym)) { + if (global->getGlobalType()->Mutable) { + if (!sym->isLive()) + continue; + if (!sym->isUsedInRegularObj) + continue; + if (sym->isUndefined() && sym->isWeak() && !config->relocatable) + continue; + if (sym->isUndefined()) + error(Twine("mutable global imported but 'mutable-globals' feature " + "not present in inputs: `") + + toString(*sym) + "`. Use --no-check-features to suppress."); + else if (sym->isExported()) + error(Twine("mutable global exported but 'mutable-globals' feature " + "not present in inputs: `") + + toString(*sym) + "`. Use --no-check-features to suppress."); + } + } + } + } + if (config->sharedMemory) { if (disallowed.count("shared-mem")) error("--shared-memory is disallowed by " + disallowed["shared-mem"] + From c2f8bc986fb39f6a72aafd5dd0d31ec29ad8ce9b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 13 Sep 2020 00:21:39 +0200 Subject: [PATCH 0473/1079] [ARM] Add tests for fmin/max + inf folds (NFC) --- llvm/test/CodeGen/ARM/fminmax-folds.ll | 256 +++++++++++++++++++++++++ 1 file changed, 256 insertions(+) diff --git a/llvm/test/CodeGen/ARM/fminmax-folds.ll b/llvm/test/CodeGen/ARM/fminmax-folds.ll index 35fdcd1d0d6fd..6bf251ef95cbd 100644 --- a/llvm/test/CodeGen/ARM/fminmax-folds.ll +++ b/llvm/test/CodeGen/ARM/fminmax-folds.ll @@ -41,3 +41,259 @@ define float @test_minimum_const_nan(float %x) { %r = call float @llvm.minimum.f32(float %x, float 0x7fff000000000000) ret float %r } + +define float @test_minnum_const_inf(float %x) { +; CHECK-LABEL: test_minnum_const_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI4_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI4_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call float @llvm.minnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maxnum_const_inf(float %x) { +; CHECK-LABEL: test_maxnum_const_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI5_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI5_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call float @llvm.maxnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maximum_const_inf(float %x) { +; CHECK-LABEL: test_maximum_const_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI6_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI6_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call float @llvm.maximum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minimum_const_inf(float %x) { +; CHECK-LABEL: test_minimum_const_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI7_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI7_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minnum_const_ninf(float %x) { +; CHECK-LABEL: test_minnum_const_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI8_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI8_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call float @llvm.minnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maxnum_const_ninf(float %x) { +; CHECK-LABEL: test_maxnum_const_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI9_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI9_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maximum_const_ninf(float %x) { +; CHECK-LABEL: test_maximum_const_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI10_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI10_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call float @llvm.maximum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minimum_const_ninf(float %x) { +; CHECK-LABEL: test_minimum_const_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI11_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI11_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call float @llvm.minimum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minnum_const_inf_nnan(float %x) { +; CHECK-LABEL: test_minnum_const_inf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI12_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI12_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.minnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maxnum_const_inf_nnan(float %x) { +; CHECK-LABEL: test_maxnum_const_inf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI13_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI13_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.maxnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maximum_const_inf_nnan(float %x) { +; CHECK-LABEL: test_maximum_const_inf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI14_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI14_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.maximum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minimum_const_inf_nnan(float %x) { +; CHECK-LABEL: test_minimum_const_inf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI15_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI15_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minnum_const_ninf_nnan(float %x) { +; CHECK-LABEL: test_minnum_const_ninf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI16_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI16_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call nnan float @llvm.minnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maxnum_const_ninf_nnan(float %x) { +; CHECK-LABEL: test_maxnum_const_ninf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI17_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI17_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call nnan float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maximum_const_ninf_nnan(float %x) { +; CHECK-LABEL: test_maximum_const_ninf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI18_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI18_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call nnan float @llvm.maximum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minimum_const_ninf_nnan(float %x) { +; CHECK-LABEL: test_minimum_const_ninf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI19_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI19_0: +; CHECK-NEXT: .long 0xff800000 @ float -Inf + %r = call nnan float @llvm.minimum.f32(float %x, float 0xfff0000000000000) + ret float %r +} From cc2da5554b5ee5d5939222af263699a9d0bf2049 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 27 Mar 2020 16:52:27 -0700 Subject: [PATCH 0474/1079] [lld][WebAssembly] Add initial support for -Map/--print-map Differential Revision: https://reviews.llvm.org/D77187 --- lld/test/ELF/map-file.s | 2 +- lld/test/wasm/early-exit-for-bad-paths.s | 8 +- lld/test/wasm/map-file.s | 47 +++++++ lld/wasm/CMakeLists.txt | 1 + lld/wasm/Config.h | 1 + lld/wasm/Driver.cpp | 7 +- lld/wasm/InputChunks.h | 4 +- lld/wasm/MapFile.cpp | 148 +++++++++++++++++++++++ lld/wasm/MapFile.h | 21 ++++ lld/wasm/Options.td | 6 + lld/wasm/OutputSections.cpp | 7 +- lld/wasm/OutputSections.h | 20 ++- lld/wasm/Symbols.h | 2 +- lld/wasm/Writer.cpp | 4 + 14 files changed, 268 insertions(+), 10 deletions(-) create mode 100644 lld/test/wasm/map-file.s create mode 100644 lld/wasm/MapFile.cpp create mode 100644 lld/wasm/MapFile.h diff --git a/lld/test/ELF/map-file.s b/lld/test/ELF/map-file.s index 1cd3b9087cbea..55b6b9e672812 100644 --- a/lld/test/ELF/map-file.s +++ b/lld/test/ELF/map-file.s @@ -11,7 +11,7 @@ # RUN: ld.lld %t1.o %t2.o %t3.o %t4.a %t5.so -o %t -M | FileCheck --match-full-lines --strict-whitespace %s # RUN: ld.lld %t1.o %t2.o %t3.o %t4.a %t5.so -o %t --print-map | FileCheck --match-full-lines -strict-whitespace %s # RUN: ld.lld %t1.o %t2.o %t3.o %t4.a %t5.so -o %t -Map=%t.map -# RUN: FileCheck -strict-whitespace %s < %t.map +# RUN: FileCheck -match-full-lines -strict-whitespace %s < %t.map .global _start _start: diff --git a/lld/test/wasm/early-exit-for-bad-paths.s b/lld/test/wasm/early-exit-for-bad-paths.s index 2866bfa62f865..21cec318e4490 100644 --- a/lld/test/wasm/early-exit-for-bad-paths.s +++ b/lld/test/wasm/early-exit-for-bad-paths.s @@ -4,10 +4,16 @@ # RUN: FileCheck %s -check-prefixes=NO-DIR-OUTPUT,CHECK # RUN: not wasm-ld %t.o -o %s/dir_is_a_file 2>&1 | \ # RUN: FileCheck %s -check-prefixes=DIR-IS-OUTPUT,CHECK -# TODO(sbc): check similar check for -Map file once we add that option + +# RUN: not wasm-ld %t.o -o %t -Map=does_not_exist/output 2>&1 | \ +# RUN: FileCheck %s -check-prefixes=NO-DIR-MAP,CHECK +# RUN: not wasm-ld %t.o -o %t -Map=%s/dir_is_a_file 2>&1 | \ +# RUN: FileCheck %s -check-prefixes=DIR-IS-MAP,CHECK # NO-DIR-OUTPUT: error: cannot open output file does_not_exist/output: # DIR-IS-OUTPUT: error: cannot open output file {{.*}}/dir_is_a_file: +# NO-DIR-MAP: error: cannot open map file does_not_exist/output: +# DIR-IS-MAP: error: cannot open map file {{.*}}/dir_is_a_file: # We should exit before doing the actual link. If an undefined symbol error is # discovered we haven't bailed out early as expected. diff --git a/lld/test/wasm/map-file.s b/lld/test/wasm/map-file.s new file mode 100644 index 0000000000000..c2ec089ccb137 --- /dev/null +++ b/lld/test/wasm/map-file.s @@ -0,0 +1,47 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t1.o +# RUN: wasm-ld %t1.o -o %t -M | FileCheck --match-full-lines --strict-whitespace %s +# RUN: wasm-ld %t1.o -o %t -print-map | FileCheck --match-full-lines --strict-whitespace %s +# RUN: wasm-ld %t1.o -o %t -Map=%t.map +# RUN: FileCheck --match-full-lines --strict-whitespace %s < %t.map + +bar: + .functype bar () -> () + i32.const somedata + end_function + + .globl _start +_start: + .functype _start () -> () + call bar + end_function + +.section .data.somedata,"",@ +somedata: + .int32 123 +.size somedata, 4 + +.section .debug_info,"",@ + .int32 bar + +# CHECK: Addr Off Size Out In Symbol +# CHECK-NEXT: - 8 6 TYPE +# CHECK-NEXT: - e 5 FUNCTION +# CHECK-NEXT: - 13 7 TABLE +# CHECK-NEXT: - 1a 5 MEMORY +# CHECK-NEXT: - 1f a GLOBAL +# CHECK-NEXT: - 29 15 EXPORT +# CHECK-NEXT: - 3e 15 CODE +# CHECK-NEXT: - 3f 9 {{.*}}{{/|\\}}map-file.s.tmp1.o:(bar) +# CHECK-NEXT: - 3f 9 bar +# CHECK-NEXT: - 48 9 {{.*}}{{/|\\}}map-file.s.tmp1.o:(_start) +# CHECK-NEXT: - 48 9 _start +# CHECK-NEXT: - 53 d DATA +# CHECK-NEXT: 400 54 4 .data +# CHECK-NEXT: 400 5a 4 {{.*}}{{/|\\}}map-file.s.tmp1.o:(.data.somedata) +# CHECK-NEXT: 400 5a 4 somedata +# CHECK-NEXT: - 60 12 CUSTOM(.debug_info) +# CHECK-NEXT: - 72 17 CUSTOM(name) + +# RUN: not wasm-ld %t1.o -o /dev/null -Map=/ 2>&1 \ +# RUN: | FileCheck -check-prefix=FAIL %s +# FAIL: wasm-ld: error: cannot open map file / diff --git a/lld/wasm/CMakeLists.txt b/lld/wasm/CMakeLists.txt index cd46f0a826ac9..37902ededa0c7 100644 --- a/lld/wasm/CMakeLists.txt +++ b/lld/wasm/CMakeLists.txt @@ -7,6 +7,7 @@ add_lld_library(lldWasm InputChunks.cpp InputFiles.cpp LTO.cpp + MapFile.cpp MarkLive.cpp OutputSections.cpp Relocations.cpp diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index e8d018f09bf6e..cd6d57333a212 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -58,6 +58,7 @@ struct Configuration { llvm::StringRef thinLTOJobs; llvm::StringRef entry; + llvm::StringRef mapFile; llvm::StringRef outputFile; llvm::StringRef thinLTOCacheDir; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 7307aaa3f7be1..09318421574c2 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -344,6 +344,7 @@ static void readConfigs(opt::InputArgList &args) { config->importTable = args.hasArg(OPT_import_table); config->ltoo = args::getInteger(args, OPT_lto_O, 2); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); + config->mapFile = args.getLastArgValue(OPT_Map); config->optimize = args::getInteger(args, OPT_O, 0); config->outputFile = args.getLastArgValue(OPT_o); config->relocatable = args.hasArg(OPT_relocatable); @@ -410,6 +411,9 @@ static void readConfigs(opt::InputArgList &args) { for (StringRef s : arg->getValues()) config->features->push_back(std::string(s)); } + + if (args.hasArg(OPT_print_map)) + config->mapFile = "-"; } // Some Config members do not directly correspond to any particular @@ -795,7 +799,8 @@ void LinkerDriver::link(ArrayRef argsArr) { // find that it failed because there was a mistake in their command-line. if (auto e = tryCreateFile(config->outputFile)) error("cannot open output file " + config->outputFile + ": " + e.message()); - // TODO(sbc): add check for map file too once we add support for that. + if (auto e = tryCreateFile(config->mapFile)) + error("cannot open map file " + config->mapFile + ": " + e.message()); if (errorCount()) return; diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h index cadff6883fa4f..be91b19ed452c 100644 --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -57,6 +57,8 @@ class InputChunk { void writeRelocations(llvm::raw_ostream &os) const; ObjFile *file; + OutputSection *outputSec = nullptr; + // Offset withing the output section int32_t outputOffset = 0; // Signals that the section is part of the output. The garbage collector, @@ -214,8 +216,6 @@ class InputSection : public InputChunk { StringRef getDebugName() const override { return StringRef(); } uint32_t getComdat() const override { return UINT32_MAX; } - OutputSection *outputSec = nullptr; - protected: ArrayRef data() const override { return section.Content; } diff --git a/lld/wasm/MapFile.cpp b/lld/wasm/MapFile.cpp new file mode 100644 index 0000000000000..a08d2a97d74a4 --- /dev/null +++ b/lld/wasm/MapFile.cpp @@ -0,0 +1,148 @@ +//===- MapFile.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the -Map option. It shows lists in order and +// hierarchically the output sections, input sections, input files and +// symbol: +// +// Addr Off Size Out In Symbol +// - 00000015 10 .text +// - 0000000e 10 test.o:(.text) +// - 00000000 5 local +// - 00000000 5 f(int) +// +//===----------------------------------------------------------------------===// + +#include "MapFile.h" +#include "InputFiles.h" +#include "OutputSections.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "lld/Common/Strings.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; +using namespace lld; +using namespace lld::wasm; + +using SymbolMapTy = DenseMap>; + +// Print out the first three columns of a line. +static void writeHeader(raw_ostream &os, int64_t vma, uint64_t lma, + uint64_t size) { + // Not all entries in the map has a virtual memory address (e.g. functions) + if (vma == -1) + os << format(" - %8llx %8llx ", lma, size); + else + os << format("%8llx %8llx %8llx ", vma, lma, size); +} + +// Returns a list of all symbols that we want to print out. +static std::vector getSymbols() { + std::vector v; + for (InputFile *file : symtab->objectFiles) + for (Symbol *b : file->getSymbols()) + if (auto *dr = dyn_cast(b)) + if ((!isa(dr)) && dr->isLive() && + (dr->getFile() == file)) + v.push_back(dr); + return v; +} + +// Returns a map from sections to their symbols. +static SymbolMapTy getSectionSyms(ArrayRef syms) { + SymbolMapTy ret; + for (Symbol *dr : syms) + ret[dr->getChunk()].push_back(dr); + return ret; +} + +// Construct a map from symbols to their stringified representations. +// Demangling symbols (which is what toString() does) is slow, so +// we do that in batch using parallel-for. +static DenseMap +getSymbolStrings(ArrayRef syms) { + std::vector str(syms.size()); + parallelForEachN(0, syms.size(), [&](size_t i) { + raw_string_ostream os(str[i]); + auto &chunk = *syms[i]->getChunk(); + uint64_t fileOffset = chunk.outputSec->getOffset() + chunk.outputOffset; + uint64_t vma = -1; + uint64_t size = 0; + if (auto *DD = dyn_cast(syms[i])) { + vma = DD->getVirtualAddress(); + size = DD->getSize(); + fileOffset += DD->offset; + } + if (auto *DF = dyn_cast(syms[i])) { + size = DF->function->getSize(); + } + writeHeader(os, vma, fileOffset, size); + os.indent(16) << toString(*syms[i]); + }); + + DenseMap ret; + for (size_t i = 0, e = syms.size(); i < e; ++i) + ret[syms[i]] = std::move(str[i]); + return ret; +} + +void lld::wasm::writeMapFile(ArrayRef outputSections) { + if (config->mapFile.empty()) + return; + + // Open a map file for writing. + std::error_code ec; + raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None); + if (ec) { + error("cannot open " + config->mapFile + ": " + ec.message()); + return; + } + + // Collect symbol info that we want to print out. + std::vector syms = getSymbols(); + SymbolMapTy sectionSyms = getSectionSyms(syms); + DenseMap symStr = getSymbolStrings(syms); + + // Print out the header line. + os << " Addr Off Size Out In Symbol\n"; + + for (OutputSection *osec : outputSections) { + writeHeader(os, -1, osec->getOffset(), osec->getSize()); + os << toString(*osec) << '\n'; + if (auto *code = dyn_cast(osec)) { + for (auto *chunk : code->functions) { + writeHeader(os, -1, chunk->outputSec->getOffset() + chunk->outputOffset, + chunk->getSize()); + os.indent(8) << toString(chunk) << '\n'; + for (Symbol *sym : sectionSyms[chunk]) + os << symStr[sym] << '\n'; + } + } else if (auto *data = dyn_cast(osec)) { + for (auto *oseg : data->segments) { + writeHeader(os, oseg->startVA, data->getOffset() + oseg->sectionOffset, + oseg->size); + os << oseg->name << '\n'; + for (auto *chunk : oseg->inputSegments) { + writeHeader(os, oseg->startVA + chunk->outputSegmentOffset, + chunk->outputSec->getOffset() + chunk->outputOffset, + chunk->getSize()); + os.indent(8) << toString(chunk) << '\n'; + for (Symbol *sym : sectionSyms[chunk]) + os << symStr[sym] << '\n'; + } + } + } + } +} diff --git a/lld/wasm/MapFile.h b/lld/wasm/MapFile.h new file mode 100644 index 0000000000000..ef2cc783a6c2c --- /dev/null +++ b/lld/wasm/MapFile.h @@ -0,0 +1,21 @@ +//===- MapFile.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_MAPFILE_H +#define LLD_WASM_MAPFILE_H + +#include "llvm/ADT/ArrayRef.h" + +namespace lld { +namespace wasm { +class OutputSection; +void writeMapFile(llvm::ArrayRef outputSections); +} // namespace wasm +} // namespace lld + +#endif diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 16c784f74828a..27d54c5cdc648 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -66,6 +66,8 @@ def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">; +defm Map: Eq<"Map", "Print a link map to the specified file">; + def no_color_diagnostics: F<"no-color-diagnostics">, HelpText<"Do not use colors in diagnostics">; @@ -84,6 +86,9 @@ defm print_gc_sections: B<"print-gc-sections", "List removed unused sections", "Do not list removed unused sections">; +def print_map: F<"print-map">, + HelpText<"Print a link map to the standard output">; + def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; defm reproduce: Eq<"reproduce", "Dump linker invocation and input files for debugging">; @@ -181,6 +186,7 @@ def: JoinedOrSeparate<["-"], "e">, Alias; def: J<"entry=">, Alias; def: Flag<["-"], "E">, Alias, HelpText<"Alias for --export-dynamic">; def: Flag<["-"], "i">, Alias; +def: Flag<["-"], "M">, Alias, HelpText<"Alias for --print-map">; def: Flag<["-"], "r">, Alias; def: Flag<["-"], "s">, Alias, HelpText<"Alias for --strip-all">; def: Flag<["-"], "S">, Alias, HelpText<"Alias for --strip-debug">; diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp index a936562992dd3..dbdabddb9320d 100644 --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -87,6 +87,7 @@ void CodeSection::finalizeContents() { bodySize = codeSectionHeader.size(); for (InputFunction *func : functions) { + func->outputSec = this; func->outputOffset = bodySize; func->calculateSize(); bodySize += func->getSize(); @@ -166,9 +167,11 @@ void DataSection::finalizeContents() { log("Data segment: size=" + Twine(segment->size) + ", startVA=" + Twine::utohexstr(segment->startVA) + ", name=" + segment->name); - for (InputSegment *inputSeg : segment->inputSegments) + for (InputSegment *inputSeg : segment->inputSegments) { + inputSeg->outputSec = this; inputSeg->outputOffset = segment->sectionOffset + segment->header.size() + inputSeg->outputSegmentOffset; + } } createHeader(bodySize); @@ -227,8 +230,8 @@ void CustomSection::finalizeContents() { os.flush(); for (InputSection *section : inputSections) { - section->outputOffset = payloadSize; section->outputSec = this; + section->outputOffset = payloadSize; payloadSize += section->getSize(); } diff --git a/lld/wasm/OutputSections.h b/lld/wasm/OutputSections.h index 1fcb5723df980..444116dac7d8c 100644 --- a/lld/wasm/OutputSections.h +++ b/lld/wasm/OutputSections.h @@ -40,6 +40,7 @@ class OutputSection { void createHeader(size_t bodySize); virtual bool isNeeded() const { return true; } virtual size_t getSize() const = 0; + virtual size_t getOffset() { return offset; } virtual void writeTo(uint8_t *buf) = 0; virtual void finalizeContents() = 0; virtual uint32_t getNumRelocations() const { return 0; } @@ -60,6 +61,10 @@ class CodeSection : public OutputSection { explicit CodeSection(ArrayRef functions) : OutputSection(llvm::wasm::WASM_SEC_CODE), functions(functions) {} + static bool classof(const OutputSection *sec) { + return sec->type == llvm::wasm::WASM_SEC_CODE; + } + size_t getSize() const override { return header.size() + bodySize; } void writeTo(uint8_t *buf) override; uint32_t getNumRelocations() const override; @@ -67,8 +72,9 @@ class CodeSection : public OutputSection { bool isNeeded() const override { return functions.size() > 0; } void finalizeContents() override; -protected: ArrayRef functions; + +protected: std::string codeSectionHeader; size_t bodySize = 0; }; @@ -78,6 +84,10 @@ class DataSection : public OutputSection { explicit DataSection(ArrayRef segments) : OutputSection(llvm::wasm::WASM_SEC_DATA), segments(segments) {} + static bool classof(const OutputSection *sec) { + return sec->type == llvm::wasm::WASM_SEC_DATA; + } + size_t getSize() const override { return header.size() + bodySize; } void writeTo(uint8_t *buf) override; uint32_t getNumRelocations() const override; @@ -85,8 +95,9 @@ class DataSection : public OutputSection { bool isNeeded() const override; void finalizeContents() override; -protected: ArrayRef segments; + +protected: std::string dataSectionHeader; size_t bodySize = 0; }; @@ -103,6 +114,11 @@ class CustomSection : public OutputSection { CustomSection(std::string name, ArrayRef inputSections) : OutputSection(llvm::wasm::WASM_SEC_CUSTOM, name), inputSections(inputSections) {} + + static bool classof(const OutputSection *sec) { + return sec->type == llvm::wasm::WASM_SEC_CUSTOM; + } + size_t getSize() const override { return header.size() + nameData.size() + payloadSize; } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 73f555217f260..eed481a0b44da 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -284,9 +284,9 @@ class DefinedData : public DataSymbol { uint64_t getSize() const { return size; } InputSegment *segment = nullptr; + uint32_t offset = 0; protected: - uint64_t offset = 0; uint64_t size = 0; }; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index fb4b79c5f6342..82b1aec8d1e92 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -11,6 +11,7 @@ #include "InputChunks.h" #include "InputEvent.h" #include "InputGlobal.h" +#include "MapFile.h" #include "OutputSections.h" #include "OutputSegment.h" #include "Relocations.h" @@ -1137,6 +1138,9 @@ void Writer::run() { log("-- finalizeSections"); finalizeSections(); + log("-- writeMapFile"); + writeMapFile(outputSections); + log("-- openFile"); openFile(); if (errorCount()) From 70daa353e2ae722beddbab02f9a34988c855f318 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 12 Sep 2020 23:13:20 +0000 Subject: [PATCH 0475/1079] [gn build] Port cc2da5554b5 --- llvm/utils/gn/secondary/lld/wasm/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/lld/wasm/BUILD.gn b/llvm/utils/gn/secondary/lld/wasm/BUILD.gn index c32205f9f9f63..98bc93e3cdc8f 100644 --- a/llvm/utils/gn/secondary/lld/wasm/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/wasm/BUILD.gn @@ -22,6 +22,7 @@ static_library("wasm") { "InputChunks.cpp", "InputFiles.cpp", "LTO.cpp", + "MapFile.cpp", "MarkLive.cpp", "OutputSections.cpp", "Relocations.cpp", From 9d300bc8d2f3cdbd7f2d7cea9fa3667c26840ad0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 12 Sep 2020 16:32:24 -0500 Subject: [PATCH 0476/1079] [Hexagon] Avoid widening vectors with non-HVX element types --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 28 ++++++++------- llvm/lib/Target/Hexagon/HexagonSubtarget.h | 13 ++++++- .../isel-widen-truncate-illegal-elem.ll | 34 +++++++++++++++++++ 3 files changed, 61 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-illegal-elem.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index e63cb50a0fb84..65bc2e3577cc4 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1925,6 +1925,17 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); unsigned HwWidth = 8*Subtarget.getVectorLength(); + SDValue Op0 = Op.getOperand(0); + MVT ResTy = ty(Op); + MVT OpTy = ty(Op0); + if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) + return SDValue(); + + // .-res, op-> Scalar Illegal HVX + // Scalar ok extract(widen) - + // Illegal - widen widen + // HVX - - ok + auto getFactor = [HwWidth](MVT Ty) { unsigned Width = Ty.getSizeInBits(); assert(HwWidth % Width == 0); @@ -1936,15 +1947,6 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); }; - SDValue Op0 = Op.getOperand(0); - MVT ResTy = ty(Op); - MVT OpTy = ty(Op0); - - // .-res, op-> Scalar Illegal HVX - // Scalar ok extract(widen) - - // Illegal - widen widen - // HVX - - ok - if (Subtarget.isHVXVectorType(OpTy)) return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); @@ -2053,8 +2055,8 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, switch (Opc) { case ISD::TRUNCATE: { assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?"); - SDValue T = WidenHvxTruncate(Op, DAG); - Results.push_back(T); + if (SDValue T = WidenHvxTruncate(Op, DAG)) + Results.push_back(T); break; } case ISD::STORE: { @@ -2089,8 +2091,8 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, switch (Opc) { case ISD::TRUNCATE: { assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); - SDValue T = WidenHvxTruncate(Op, DAG); - Results.push_back(T); + if (SDValue T = WidenHvxTruncate(Op, DAG)) + Results.push_back(T); break; } case ISD::BITCAST: diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index c47b95c5ad2aa..5b71784bac260 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -275,6 +275,17 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { return makeArrayRef(Types); } + bool isHVXElementType(MVT Ty, bool IncludeBool = false) const { + if (!useHVXOps()) + return false; + if (Ty.isVector()) + Ty = Ty.getVectorElementType(); + if (IncludeBool && Ty == MVT::i1) + return true; + ArrayRef ElemTypes = getHVXElementTypes(); + return llvm::find(ElemTypes, Ty) != ElemTypes.end(); + } + bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const { if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector()) return false; @@ -298,7 +309,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { unsigned VecWidth = VecTy.getSizeInBits(); if (VecWidth != 8*HwLen && VecWidth != 16*HwLen) return false; - return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; }); + return llvm::find(ElemTypes, ElemTy) != ElemTypes.end(); } unsigned getTypeAlignment(MVT Ty) const { diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-illegal-elem.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-illegal-elem.ll new file mode 100644 index 0000000000000..3f55d22308c3d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-illegal-elem.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this does not crash. +; CHECK: vmem + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dso_local void @f0() local_unnamed_addr #0 { +b0: + %v0 = load i32, i32* undef, align 4 + %v1 = select i1 undef, i32 0, i32 1073741823 + %v2 = shl i32 %v1, 0 + %v3 = sext i32 %v0 to i64 + %v4 = sext i32 %v2 to i64 + %v5 = mul nsw i64 %v4, %v3 + %v6 = lshr i64 %v5, 32 + %v7 = trunc i64 %v6 to i32 + %v8 = sext i32 %v7 to i64 + %v9 = insertelement <32 x i64> undef, i64 %v8, i32 0 + %v10 = shufflevector <32 x i64> %v9, <32 x i64> undef, <32 x i32> zeroinitializer + %v11 = getelementptr i32, i32* null, i32 32 + %v12 = bitcast i32* %v11 to <32 x i32>* + %v13 = load <32 x i32>, <32 x i32>* %v12, align 4 + %v14 = shl <32 x i32> %v13, zeroinitializer + %v15 = sext <32 x i32> %v14 to <32 x i64> + %v16 = mul nsw <32 x i64> %v10, %v15 + %v17 = lshr <32 x i64> %v16, + %v18 = trunc <32 x i64> %v17 to <32 x i32> + store <32 x i32> %v18, <32 x i32>* %v12, align 4 + ret void +} + +attributes #0 = { "target-features"="+hvx-length128b,+hvxv67,+v67,-long-calls" } From 758732a34ed005cb135afcf14c9750a5483a49d3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Sep 2020 18:09:27 -0700 Subject: [PATCH 0477/1079] [X86] Use ISD::PARITY directly instead of emitting CTPOP and AND from combineHorizontalPredicateResult. We have a PARITY ISD node now so might as well use it. It will get re-expanded later. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5f7721267db0e..34a1517ac70f0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39373,10 +39373,8 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32; if (BinOp == ISD::XOR) { - // parity -> (AND (CTPOP(MOVMSK X)), 1) - SDValue Mask = DAG.getConstant(1, DL, CmpVT); - SDValue Result = DAG.getNode(ISD::CTPOP, DL, CmpVT, Movmsk); - Result = DAG.getNode(ISD::AND, DL, CmpVT, Result, Mask); + // parity -> (PARITY(MOVMSK X)) + SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk); return DAG.getZExtOrTrunc(Result, DL, ExtractVT); } From 61d29e0dff0e93f3fa1382fb177634840844b273 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Sep 2020 20:54:48 -0700 Subject: [PATCH 0478/1079] [LegalizeTypes] Remove a few cases from SplitVectorOperand that should never happen. NFC CTTZ, CTLZ, CTPOP, and FCANONICALIZE all have the same input and output types so the operand should have already been legalized when the result type was legalized. --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 509ae2c6bdcb6..9d82d2ed8ec52 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2044,16 +2044,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::FTRUNC: - case ISD::FCANONICALIZE: Res = SplitVecOp_UnaryOp(N); break; From 0fb2203cd6c287e7438b7ac2571645066c63eeb6 Mon Sep 17 00:00:00 2001 From: Travis Finkenauer Date: Sun, 13 Sep 2020 05:26:08 +0000 Subject: [PATCH 0479/1079] [Docs] Fix --print-supported-cpus option rendering Adds link/code sample to avoid rendering two dashes as non-ASCII "en dash". Also make wording a complete sentence. Reviewed By: nickdesaulniers, tmfink Differential Revision: https://reviews.llvm.org/D85596 --- clang/docs/CommandGuide/clang.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst index 394bd1be24e87..11169e3528940 100644 --- a/clang/docs/CommandGuide/clang.rst +++ b/clang/docs/CommandGuide/clang.rst @@ -338,12 +338,12 @@ number of cross compilers, or may only support a native target. .. option:: --print-supported-cpus Print out a list of supported processors for the given target (specified - through --target= or -arch ). If no target is - specified, the system default target will be used. + through ``--target=`` or :option:`-arch` ````). If no + target is specified, the system default target will be used. .. option:: -mcpu=?, -mtune=? - Aliases of --print-supported-cpus + Acts as an alias for :option:`--print-supported-cpus`. .. option:: -march= From 8cf1ac97cec654923b4f80ad11506bf06ec34f65 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 12 Sep 2020 22:33:41 -0700 Subject: [PATCH 0480/1079] [llvm-cov gcov] Improve accuracy when some edges are not measured Also guard against infinite recursion if GCOV_ARC_ON_TREE edges contain a cycle. --- compiler-rt/test/profile/gcov-basic.c | 2 ++ llvm/include/llvm/ProfileData/GCOV.h | 2 ++ llvm/lib/ProfileData/GCOV.cpp | 11 ++++++++++- llvm/test/tools/llvm-cov/gcov-8.c | 6 +++--- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/compiler-rt/test/profile/gcov-basic.c b/compiler-rt/test/profile/gcov-basic.c index e00cebf4b781c..0d8be6d7de087 100644 --- a/compiler-rt/test/profile/gcov-basic.c +++ b/compiler-rt/test/profile/gcov-basic.c @@ -27,6 +27,8 @@ // CHECK: Runs:2 +#include + int main(int argc, char *argv[]) { // CHECK: 2: [[@LINE]]:int main if (argc > 1) // CHECK-NEXT: 2: [[@LINE]]: puts("hello"); // CHECK-NEXT: 1: [[@LINE]]: diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index f87eab6d3ead2..3c6312f916746 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -15,6 +15,7 @@ #define LLVM_PROFILEDATA_GCOV_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -261,6 +262,7 @@ class GCOVFunction { unsigned srcIdx; SmallVector, 0> Blocks; SmallVector, 0> arcs, treeArcs; + DenseSet visited; }; /// GCOVBlock - Collects block information. diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index f8c576d305f05..d4a4a8979e81c 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -231,7 +231,11 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { sink.addDstEdge(arc.get()); src.addSrcEdge(arc.get()); fn->treeArcs.push_back(std::move(arc)); - fn->propagateCounts(src, nullptr); + + for (GCOVBlock &block : make_pointee_range(fn->Blocks)) + fn->propagateCounts(block, nullptr); + for (size_t i = fn->treeArcs.size() - 1; i; --i) + fn->treeArcs[i - 1]->src.Counter += fn->treeArcs[i - 1]->Count; } } pos += 4 * length; @@ -289,6 +293,11 @@ GCOVBlock &GCOVFunction::getExitBlock() const { // spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be // uniquely identified. uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) { + // If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed; otherwise + // this prevents infinite recursion. + if (!visited.insert(&v).second) + return 0; + uint64_t excess = 0; for (GCOVArc *e : v.srcs()) if (e != pred) diff --git a/llvm/test/tools/llvm-cov/gcov-8.c b/llvm/test/tools/llvm-cov/gcov-8.c index 996e4cbe71b33..d557d84130183 100644 --- a/llvm/test/tools/llvm-cov/gcov-8.c +++ b/llvm/test/tools/llvm-cov/gcov-8.c @@ -20,7 +20,7 @@ int main() { // GCOV: 1: [[@LINE]]:in // RUN: llvm-cov gcov gcov-8.c | FileCheck %s --check-prefixes=OUT,OUTFILE // OUT: File 'gcov-8.c' // OUT-NEXT: Lines executed:100.00% of 9 -// OUT-B-NEXT: Branches executed:85.71% of 14 +// OUT-B-NEXT: Branches executed:100.00% of 14 // OUT-B-NEXT: Taken at least once:71.43% of 14 // OUT-B-NEXT: No calls // OUTFILE-NEXT: Creating 'gcov-8.c.gcov' @@ -66,6 +66,6 @@ int main() { // GCOV: 1: [[@LINE]]:in // I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,nottaken // I-NEXT:lcount:12,4 -// I-B-NEXT:branch:12,notexec -// I-B-NEXT:branch:12,notexec +// I-B-NEXT:branch:12,taken +// I-B-NEXT:branch:12,nottaken // I-NEXT:lcount:14,1 From f086e85eea94a51eb42115496ac5d24f07bc8791 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 12 Sep 2020 22:42:37 -0700 Subject: [PATCH 0481/1079] [gcov] Assign names to some types and loaded values used in @__llvm_internal* This makes the generated IR much more readable. --- clang/test/CodeGen/code-coverage.c | 6 +- .../Instrumentation/GCOVProfiling.cpp | 67 +++++++++++-------- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/clang/test/CodeGen/code-coverage.c b/clang/test/CodeGen/code-coverage.c index 014dd9cfb5a7b..39c4556b9ff4b 100644 --- a/clang/test/CodeGen/code-coverage.c +++ b/clang/test/CodeGen/code-coverage.c @@ -37,10 +37,10 @@ int test2(int b) { } -// CHECK: @__llvm_internal_gcov_emit_function_args.0 = internal unnamed_addr constant [2 x %0] -// CHECK-SAME: [%0 { i32 0, i32 {{[-0-9]+}}, i32 {{[-0-9]+}} }, %0 { i32 1, i32 {{[-0-9]+}}, i32 {{[-0-9]+}} }] +// CHECK: @__llvm_internal_gcov_emit_function_args.0 = internal unnamed_addr constant [2 x %emit_function_args_ty] +// CHECK-SAME: [%emit_function_args_ty { i32 0, i32 {{[-0-9]+}}, i32 {{[-0-9]+}} }, %emit_function_args_ty { i32 1, i32 {{[-0-9]+}}, i32 {{[-0-9]+}} }] -// CHECK: @__llvm_internal_gcov_emit_file_info = internal unnamed_addr constant [1 x %2] +// CHECK: @__llvm_internal_gcov_emit_file_info = internal unnamed_addr constant [1 x %file_info] /// 0x3330342a '3' '0' '4' '*' // 304-SAME: i32 858797098 /// 0x3430372a '4' '0' '7' '*' diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 68df0af4892af..734deda99707d 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -1029,15 +1029,19 @@ Function *GCOVProfiler::insertCounterWriteout( // Collect the relevant data into a large constant data structure that we can // walk to write out everything. StructType *StartFileCallArgsTy = StructType::create( - {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()}); + {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()}, + "start_file_args_ty"); StructType *EmitFunctionCallArgsTy = StructType::create( - {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()}); + {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()}, + "emit_function_args_ty"); StructType *EmitArcsCallArgsTy = StructType::create( - {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()}); + {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()}, + "emit_arcs_args_ty"); StructType *FileInfoTy = StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(), EmitFunctionCallArgsTy->getPointerTo(), - EmitArcsCallArgsTy->getPointerTo()}); + EmitArcsCallArgsTy->getPointerTo()}, + "file_info"); Constant *Zero32 = Builder.getInt32(0); // Build an explicit array of two zeros for use in ConstantExpr GEP building. @@ -1147,41 +1151,46 @@ Function *GCOVProfiler::insertCounterWriteout( // The index into the files structure is our loop induction variable. Builder.SetInsertPoint(FileLoopHeader); - PHINode *IV = - Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2); + PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2, + "file_idx"); IV->addIncoming(Builder.getInt32(0), BB); auto *FileInfoPtr = Builder.CreateInBoundsGEP( FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV}); auto *StartFileCallArgsPtr = - Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0); + Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args"); auto *StartFileCall = Builder.CreateCall( StartFile, {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0), Builder.CreateStructGEP(StartFileCallArgsTy, - StartFileCallArgsPtr, 0)), + StartFileCallArgsPtr, 0), + "filename"), Builder.CreateLoad(StartFileCallArgsTy->getElementType(1), Builder.CreateStructGEP(StartFileCallArgsTy, - StartFileCallArgsPtr, 1)), + StartFileCallArgsPtr, 1), + "version"), Builder.CreateLoad(StartFileCallArgsTy->getElementType(2), Builder.CreateStructGEP(StartFileCallArgsTy, - StartFileCallArgsPtr, 2))}); + StartFileCallArgsPtr, 2), + "stamp")}); if (auto AK = TLI->getExtAttrForI32Param(false)) StartFileCall->addParamAttr(2, AK); - auto *NumCounters = - Builder.CreateLoad(FileInfoTy->getElementType(1), - Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1)); + auto *NumCounters = Builder.CreateLoad( + FileInfoTy->getElementType(1), + Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs"); auto *EmitFunctionCallArgsArray = Builder.CreateLoad(FileInfoTy->getElementType(2), - Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2)); - auto *EmitArcsCallArgsArray = - Builder.CreateLoad(FileInfoTy->getElementType(3), - Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3)); + Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2), + "emit_function_args"); + auto *EmitArcsCallArgsArray = Builder.CreateLoad( + FileInfoTy->getElementType(3), + Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args"); auto *EnterCounterLoopCond = Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters); Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch); Builder.SetInsertPoint(CounterLoopHeader); - auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2); + auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2, + "ctr_idx"); JV->addIncoming(Builder.getInt32(0), FileLoopHeader); auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP( EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV); @@ -1189,14 +1198,16 @@ Function *GCOVProfiler::insertCounterWriteout( EmitFunction, {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0), Builder.CreateStructGEP(EmitFunctionCallArgsTy, - EmitFunctionCallArgsPtr, 0)), + EmitFunctionCallArgsPtr, 0), + "ident"), Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1), Builder.CreateStructGEP(EmitFunctionCallArgsTy, - EmitFunctionCallArgsPtr, 1)), + EmitFunctionCallArgsPtr, 1), + "func_checkssum"), Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2), Builder.CreateStructGEP(EmitFunctionCallArgsTy, - EmitFunctionCallArgsPtr, - 2))}); + EmitFunctionCallArgsPtr, 2), + "cfg_checksum")}); if (auto AK = TLI->getExtAttrForI32Param(false)) { EmitFunctionCall->addParamAttr(0, AK); EmitFunctionCall->addParamAttr(1, AK); @@ -1208,10 +1219,12 @@ Function *GCOVProfiler::insertCounterWriteout( EmitArcs, {Builder.CreateLoad( EmitArcsCallArgsTy->getElementType(0), - Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0)), - Builder.CreateLoad(EmitArcsCallArgsTy->getElementType(1), - Builder.CreateStructGEP(EmitArcsCallArgsTy, - EmitArcsCallArgsPtr, 1))}); + Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0), + "num_counters"), + Builder.CreateLoad( + EmitArcsCallArgsTy->getElementType(1), + Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1), + "counters")}); if (auto AK = TLI->getExtAttrForI32Param(false)) EmitArcsCall->addParamAttr(0, AK); auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1)); @@ -1222,7 +1235,7 @@ Function *GCOVProfiler::insertCounterWriteout( Builder.SetInsertPoint(FileLoopLatch); Builder.CreateCall(SummaryInfo, {}); Builder.CreateCall(EndFile, {}); - auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1)); + auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx"); auto *FileLoopCond = Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size())); Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB); From 63182c2ac0b643a60d397274e8a31166fc7243fa Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 13 Sep 2020 00:07:31 -0700 Subject: [PATCH 0482/1079] [gcov] Add spanning tree optimization gcov is an "Edge Profiling with Edge Counters" application according to Optimally Profiling and Tracing Programs (1994). The minimum number of counters necessary is |E|-(|V|-1). The unmeasured edges form a spanning tree. Both GCC --coverage and clang -fprofile-generate leverage this optimization. This patch implements the optimization for clang --coverage. The produced .gcda files are much smaller now. --- clang/test/CodeGen/code-coverage-tsan.c | 1 - compiler-rt/test/profile/Posix/gcov-fork.c | 2 +- .../test/profile/gcov-dump-and-remove.c | 8 +- .../Instrumentation/GCOVProfiling.cpp | 402 +++++++++++------- .../GCOVProfiling/atomic-counter.ll | 3 +- .../split-indirectbr-critical-edges.ll | 61 +++ 6 files changed, 326 insertions(+), 151 deletions(-) create mode 100644 llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll diff --git a/clang/test/CodeGen/code-coverage-tsan.c b/clang/test/CodeGen/code-coverage-tsan.c index 023a99598075f..17f6596aa83df 100644 --- a/clang/test/CodeGen/code-coverage-tsan.c +++ b/clang/test/CodeGen/code-coverage-tsan.c @@ -5,7 +5,6 @@ // CHECK-LABEL: void @foo() /// Two counters are incremented by __tsan_atomic64_fetch_add. // CHECK: call i64 @__tsan_atomic64_fetch_add -// CHECK-NEXT: call i64 @__tsan_atomic64_fetch_add // CHECK-NEXT: call i32 @__tsan_atomic32_fetch_sub _Atomic(int) cnt; diff --git a/compiler-rt/test/profile/Posix/gcov-fork.c b/compiler-rt/test/profile/Posix/gcov-fork.c index b89eb64922f0c..e66690a961e2e 100644 --- a/compiler-rt/test/profile/Posix/gcov-fork.c +++ b/compiler-rt/test/profile/Posix/gcov-fork.c @@ -17,7 +17,7 @@ int main(void) { // CHECK-NEXT: 1: [[#@LINE]]: int status; // CHECK-NEXT: -: [[#@LINE]]: func1(); // CHECK-NEXT: 1: [[#@LINE]]: pid_t pid = fork(); // CHECK-NEXT: 1: [[#@LINE]]: - if (pid == -1) return 1; // CHECK-NEXT: 2: [[#@LINE]]: + if (pid == -1) return 1; // CHECK-NEXT: 1: [[#@LINE]]: if (pid) // CHECK-NEXT: 2: [[#@LINE]]: wait(&status); // CHECK-NEXT: 1: [[#@LINE]]: func2(); // CHECK-NEXT: 2: [[#@LINE]]: diff --git a/compiler-rt/test/profile/gcov-dump-and-remove.c b/compiler-rt/test/profile/gcov-dump-and-remove.c index b7f80535aada3..c35640f93b3de 100644 --- a/compiler-rt/test/profile/gcov-dump-and-remove.c +++ b/compiler-rt/test/profile/gcov-dump-and-remove.c @@ -11,10 +11,10 @@ extern void __gcov_dump(void); extern void __gcov_reset(void); extern int remove(const char *); // CHECK: -: [[#@LINE]]:extern int remove -int main(void) { // CHECK-NEXT: #####: [[#@LINE]]: - __gcov_dump(); // CHECK-NEXT: #####: [[#@LINE]]: - __gcov_reset(); // CHECK-NEXT: #####: [[#@LINE]]: - if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT: #####: [[#@LINE]]: +int main(void) { // CHECK-NEXT: 1: [[#@LINE]]: + __gcov_dump(); // CHECK-NEXT: 1: [[#@LINE]]: + __gcov_reset(); // CHECK-NEXT: 1: [[#@LINE]]: + if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT: 1: [[#@LINE]]: return 1; // CHECK-NEXT: #####: [[#@LINE]]: return 1; // CHECK-NEXT: -: [[#@LINE]]: __gcov_dump(); // CHECK-NEXT: 1: [[#@LINE]]: diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 734deda99707d..437063eef6f95 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "CFGMST.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" @@ -20,6 +21,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" @@ -53,6 +56,8 @@ namespace endian = llvm::support::endian; #define DEBUG_TYPE "insert-gcov-profiling" enum : uint32_t { + GCOV_ARC_ON_TREE = 1 << 0, + GCOV_TAG_FUNCTION = 0x01000000, GCOV_TAG_BLOCKS = 0x01410000, GCOV_TAG_ARCS = 0x01430000, @@ -94,9 +99,10 @@ class GCOVProfiler { public: GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {} - bool - runOnModule(Module &M, - std::function GetTLI); + bool runOnModule(Module &M, + function_ref GetBFI, + function_ref GetBPI, + function_ref GetTLI); void write(uint32_t i) { char Bytes[4]; @@ -112,13 +118,12 @@ class GCOVProfiler { private: // Create the .gcno files for the Module based on DebugInfo. - void emitProfileNotes(NamedMDNode *CUNode); + bool + emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork, + function_ref GetBFI, + function_ref GetBPI, + function_ref GetTLI); - // Modify the program to track transitions along edges and call into the - // profiling runtime to emit .gcda files when run. - void instrumentFunction( - Function &F, - SmallVectorImpl> &CountersBySP); void emitGlobalConstructor( SmallVectorImpl> &CountersBySP); @@ -158,6 +163,7 @@ class GCOVProfiler { SmallVector, 16> Funcs; std::vector FilterRe; std::vector ExcludeRe; + DenseSet ExecBlocks; StringMap InstrumentedFiles; }; @@ -173,24 +179,69 @@ class GCOVProfilerLegacyPass : public ModulePass { StringRef getPassName() const override { return "GCOV Profiler"; } bool runOnModule(Module &M) override { - return Profiler.runOnModule(M, [this](Function &F) -> TargetLibraryInfo & { - return getAnalysis().getTLI(F); - }); + auto GetBFI = [this](Function &F) { + return &this->getAnalysis(F).getBFI(); + }; + auto GetBPI = [this](Function &F) { + return &this->getAnalysis(F).getBPI(); + }; + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); AU.addRequired(); } private: GCOVProfiler Profiler; }; + +struct BBInfo { + BBInfo *Group; + uint32_t Index; + uint32_t Rank = 0; + + BBInfo(unsigned Index) : Group(this), Index(Index) {} + const std::string infoString() const { + return (Twine("Index=") + Twine(Index)).str(); + } +}; + +struct Edge { + // This class implements the CFG edges. Note the CFG can be a multi-graph. + // So there might be multiple edges with same SrcBB and DestBB. + const BasicBlock *SrcBB; + const BasicBlock *DestBB; + uint64_t Weight; + BasicBlock *Place = nullptr; + uint32_t SrcNumber, DstNumber; + bool InMST = false; + bool Removed = false; + bool IsCritical = false; + + Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) + : SrcBB(Src), DestBB(Dest), Weight(W) {} + + // Return the information string of an edge. + const std::string infoString() const { + return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + + (IsCritical ? "c" : " ") + " W=" + Twine(Weight)) + .str(); + } +}; } char GCOVProfilerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN( GCOVProfilerLegacyPass, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( GCOVProfilerLegacyPass, "insert-gcov-profiling", @@ -275,8 +326,8 @@ namespace { return LinesByFile.try_emplace(Filename, P, Filename).first->second; } - void addEdge(GCOVBlock &Successor) { - OutEdges.push_back(&Successor); + void addEdge(GCOVBlock &Successor, uint32_t Flags) { + OutEdges.emplace_back(&Successor, Flags); } void writeOut() { @@ -310,9 +361,9 @@ namespace { } uint32_t Number; - SmallVector OutEdges; + SmallVector, 4> OutEdges; - private: + private: friend class GCOVFunction; GCOVBlock(GCOVProfiler *P, uint32_t Number) @@ -345,7 +396,7 @@ namespace { FuncChecksum = hash_value(FunctionNameAndLine); } - GCOVBlock &getBlock(BasicBlock *BB) { + GCOVBlock &getBlock(const BasicBlock *BB) { return Blocks.find(BB)->second; } @@ -402,33 +453,41 @@ namespace { LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n"); // Emit edges between blocks. - Function *F = Blocks.begin()->first->getParent(); - write(GCOV_TAG_ARCS); - write(3); - write(0); - write(getBlock(&F->getEntryBlock()).Number); - write(0); // no flags - for (BasicBlock &I : *F) { - GCOVBlock &Block = getBlock(&I); + const uint32_t Outgoing = EntryBlock.OutEdges.size(); + if (Outgoing) { + write(GCOV_TAG_ARCS); + write(Outgoing * 2 + 1); + write(EntryBlock.Number); + for (const auto &E : EntryBlock.OutEdges) { + write(E.first->Number); + write(E.second); + } + } + std::vector Sorted; + Sorted.reserve(Blocks.size()); + for (auto &It : Blocks) + Sorted.push_back(&It.second); + llvm::sort(Sorted, [](GCOVBlock *x, GCOVBlock *y) { + return x->Number < y->Number; + }); + for (GCOVBlock &Block : make_pointee_range(Sorted)) { if (Block.OutEdges.empty()) continue; write(GCOV_TAG_ARCS); write(Block.OutEdges.size() * 2 + 1); write(Block.Number); - for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { - LLVM_DEBUG(dbgs() << Block.Number << " -> " - << Block.OutEdges[i]->Number << "\n"); - write(Block.OutEdges[i]->Number); - write(0); // no flags + for (const auto &E : Block.OutEdges) { + write(E.first->Number); + write(E.second); } } // Emit lines for each block. - for (BasicBlock &I : *F) - getBlock(&I).writeOut(); + for (GCOVBlock &Block : make_pointee_range(Sorted)) + Block.writeOut(); } - private: + public: const DISubprogram *SP; unsigned EndLine; uint32_t Ident; @@ -549,7 +608,9 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, } bool GCOVProfiler::runOnModule( - Module &M, std::function GetTLI) { + Module &M, function_ref GetBFI, + function_ref GetBPI, + function_ref GetTLI) { this->M = &M; this->GetTLI = std::move(GetTLI); Ctx = &M.getContext(); @@ -558,12 +619,12 @@ bool GCOVProfiler::runOnModule( if (!CUNode || (!Options.EmitNotes && !Options.EmitData)) return false; - bool Modified = AddFlushBeforeForkAndExec(); + bool HasExecOrFork = AddFlushBeforeForkAndExec(); FilterRe = createRegexesFromString(Options.Filter); ExcludeRe = createRegexesFromString(Options.Exclude); - emitProfileNotes(CUNode); - return Modified || Options.EmitData; + emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, GetTLI); + return true; } PreservedAnalyses GCOVProfilerPass::run(Module &M, @@ -573,9 +634,17 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M, FunctionAnalysisManager &FAM = AM.getResult(M).getManager(); - if (!Profiler.runOnModule(M, [&](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - })) + auto GetBFI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + auto GetBPI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; + + if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -681,6 +750,7 @@ bool GCOVProfiler::AddFlushBeforeForkAndExec() { // dumped FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy); Builder.CreateCall(ResetF)->setDebugLoc(Loc); + ExecBlocks.insert(Parent); Parent->splitBasicBlock(NextInst); Parent->back().setDebugLoc(Loc); } @@ -688,7 +758,67 @@ bool GCOVProfiler::AddFlushBeforeForkAndExec() { return !Forks.empty() || !Execs.empty(); } -void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { +static BasicBlock *getInstrBB(CFGMST &MST, Edge &E, + const DenseSet &ExecBlocks) { + if (E.InMST || E.Removed) + return nullptr; + + BasicBlock *SrcBB = const_cast(E.SrcBB); + BasicBlock *DestBB = const_cast(E.DestBB); + // For a fake edge, instrument the real BB. + if (SrcBB == nullptr) + return DestBB; + if (DestBB == nullptr) + return SrcBB; + + auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * { + // There are basic blocks (such as catchswitch) cannot be instrumented. + // If the returned first insertion point is the end of BB, skip this BB. + if (BB->getFirstInsertionPt() == BB->end()) + return nullptr; + return BB; + }; + + // Instrument the SrcBB if it has a single successor, + // otherwise, the DestBB if this is not a critical edge. + Instruction *TI = SrcBB->getTerminator(); + if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB)) + return CanInstrument(SrcBB); + if (!E.IsCritical) + return CanInstrument(DestBB); + + // Some IndirectBr critical edges cannot be split by the previous + // SplitIndirectBrCriticalEdges call. Bail out. + const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + BasicBlock *InstrBB = + isa(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); + if (!InstrBB) + return nullptr; + + MST.addEdge(SrcBB, InstrBB, 0); + MST.addEdge(InstrBB, DestBB, 0).InMST = true; + E.Removed = true; + + return CanInstrument(InstrBB); +} + +#ifndef NDEBUG +static void dumpEdges(CFGMST &MST, GCOVFunction &GF) { + size_t ID = 0; + for (auto &E : make_pointee_range(MST.AllEdges)) { + GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock(); + GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock(); + dbgs() << " Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number + << E.infoString() << "\n"; + } +} +#endif + +bool GCOVProfiler::emitProfileNotes( + NamedMDNode *CUNode, bool HasExecOrFork, + function_ref GetBFI, + function_ref GetBPI, + function_ref GetTLI) { int Version; { uint8_t c3 = Options.Version[0]; @@ -725,36 +855,79 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { // TODO: Functions using scope-based EH are currently not supported. if (isUsingScopeBasedEH(F)) continue; - // gcov expects every function to start with an entry block that has a - // single successor, so split the entry block to make sure of that. - BasicBlock &EntryBlock = F.getEntryBlock(); + // Add the function line number to the lines of the entry block + // to have a counter for the function definition. + uint32_t Line = SP->getLine(); + auto Filename = getFilename(SP); + + BranchProbabilityInfo *BPI = GetBPI(F); + BlockFrequencyInfo *BFI = GetBFI(F); + // Split indirectbr critical edges here before computing the MST rather + // than later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, BPI, BFI); + + CFGMST MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI); + + // getInstrBB can split basic blocks and push elements to AllEdges. + for (size_t I : llvm::seq(0, MST.AllEdges.size())) { + auto &E = *MST.AllEdges[I]; + // For now, disable spanning tree optimization when fork or exec* is + // used. + if (HasExecOrFork) + E.InMST = false; + E.Place = getInstrBB(MST, E, ExecBlocks); + } + // Basic blocks in F are finalized at this point. + BasicBlock &EntryBlock = F.getEntryBlock(); Funcs.push_back(std::make_unique(this, &F, SP, EndLine, FunctionIdent++, Version)); GCOVFunction &Func = *Funcs.back(); - // Add the function line number to the lines of the entry block - // to have a counter for the function definition. - uint32_t Line = SP->getLine(); - auto Filename = getFilename(SP); + // Some non-tree edges are IndirectBr which cannot be split. Ignore them + // as well. + llvm::erase_if(MST.AllEdges, [](std::unique_ptr &E) { + return E->Removed || (!E->InMST && !E->Place); + }); + const size_t Measured = + llvm::partition(MST.AllEdges, + [](std::unique_ptr &E) { return E->Place; }) - + MST.AllEdges.begin(); + for (size_t I : llvm::seq(0, Measured)) { + Edge &E = *MST.AllEdges[I]; + GCOVBlock &Src = + E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock(); + GCOVBlock &Dst = + E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock(); + E.SrcNumber = Src.Number; + E.DstNumber = Dst.Number; + } + std::stable_sort( + MST.AllEdges.begin(), MST.AllEdges.begin() + Measured, + [](const std::unique_ptr &L, const std::unique_ptr &R) { + return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber + : L->DstNumber < R->DstNumber; + }); + + for (const Edge &E : make_pointee_range(MST.AllEdges)) { + GCOVBlock &Src = + E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock(); + GCOVBlock &Dst = + E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock(); + Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE)); + } // Artificial functions such as global initializers if (!SP->isArtificial()) Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line); - Func.getEntryBlock().addEdge(Func.getBlock(&EntryBlock)); - for (auto &BB : F) { - GCOVBlock &Block = Func.getBlock(&BB); - Instruction *TI = BB.getTerminator(); - if (int successors = TI->getNumSuccessors()) { - for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); - } - } else if (isa(TI)) { - Block.addEdge(Func.getReturnBlock()); - } - for (GCOVBlock *Succ : Block.OutEdges) { - uint32_t Idx = Succ->Number; + LLVM_DEBUG(dumpEdges(MST, Func)); + + for (auto &GB : Func.Blocks) { + const BasicBlock &BB = *GB.first; + auto &Block = GB.second; + for (auto Succ : Block.OutEdges) { + uint32_t Idx = Succ.first->Number; do EdgeDestinations.push_back(Idx & 255); while ((Idx >>= 8) > 0); } @@ -782,8 +955,30 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { } Line = 0; } - if (EmitGCDA) - instrumentFunction(F, CountersBySP); + if (EmitGCDA) { + DISubprogram *SP = F.getSubprogram(); + ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured); + GlobalVariable *Counters = new GlobalVariable( + *M, CounterTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); + CountersBySP.emplace_back(Counters, SP); + + for (size_t I : llvm::seq(0, Measured)) { + const Edge &E = *MST.AllEdges[I]; + IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt()); + Value *V = Builder.CreateConstInBoundsGEP2_64( + Counters->getValueType(), Counters, 0, I); + if (Options.Atomic) { + Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1), + AtomicOrdering::Monotonic); + } else { + Value *Count = + Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr"); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); + Builder.CreateStore(Count, V); + } + } + } } char Tmp[4]; @@ -830,86 +1025,7 @@ void GCOVProfiler::emitProfileNotes(NamedMDNode *CUNode) { EmitGCDA = false; } } -} - -void GCOVProfiler::instrumentFunction( - Function &F, - SmallVectorImpl> &CountersBySP) { - DISubprogram *SP = F.getSubprogram(); - DenseMap, unsigned> EdgeToCounter; - unsigned Edges = 0; - EdgeToCounter[{nullptr, &F.getEntryBlock()}] = Edges++; - for (auto &BB : F) { - Instruction *TI = BB.getTerminator(); - if (isa(TI)) { - EdgeToCounter[{&BB, nullptr}] = Edges++; - } else { - for (BasicBlock *Succ : successors(TI)) { - EdgeToCounter[{&BB, Succ}] = Edges++; - } - } - } - - ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Edges); - GlobalVariable *Counters = - new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); - CountersBySP.push_back(std::make_pair(Counters, SP)); - - // If a BB has several predecessors, use a PHINode to select - // the correct counter. - for (auto &BB : F) { - // The phi node must be at the begin of the BB. - IRBuilder<> BuilderForPhi(&*BB.begin()); - IRBuilder<> Builder(&*BB.getFirstInsertionPt()); - Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); - Value *V; - if (&BB == &F.getEntryBlock()) { - auto It = EdgeToCounter.find({nullptr, &BB}); - V = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), Counters, - 0, It->second); - } else { - const unsigned EdgeCount = std::distance(pred_begin(&BB), pred_end(&BB)); - if (EdgeCount == 0) - continue; - PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount); - for (BasicBlock *Pred : predecessors(&BB)) { - auto It = EdgeToCounter.find({Pred, &BB}); - assert(It != EdgeToCounter.end()); - const unsigned Edge = It->second; - Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64( - Counters->getValueType(), Counters, 0, Edge); - Phi->addIncoming(EdgeCounter, Pred); - V = Phi; - } - } - - if (Options.Atomic) { - Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1), - AtomicOrdering::Monotonic); - } else { - Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr"); - Count = Builder.CreateAdd(Count, Builder.getInt64(1)); - Builder.CreateStore(Count, V); - } - - Instruction *TI = BB.getTerminator(); - if (isa(TI)) { - auto It = EdgeToCounter.find({&BB, nullptr}); - assert(It != EdgeToCounter.end()); - const unsigned Edge = It->second; - Value *Counter = Builder.CreateConstInBoundsGEP2_64( - Counters->getValueType(), Counters, 0, Edge); - if (Options.Atomic) { - Builder.CreateAtomicRMW(AtomicRMWInst::Add, Counter, - Builder.getInt64(1), AtomicOrdering::Monotonic); - } else { - Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter); - Count = Builder.CreateAdd(Count, Builder.getInt64(1)); - Builder.CreateStore(Count, Counter); - } - } - } + return true; } void GCOVProfiler::emitGlobalConstructor( diff --git a/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll b/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll index 61ee30a4414bf..2c5ea41b6fd81 100644 --- a/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll +++ b/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll @@ -4,8 +4,7 @@ ; CHECK-LABEL: void @empty() ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), i64 1 monotonic, !dbg [[DBG:![0-9]+]] -; CHECK-NEXT: %1 = atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_gcov_ctr, i64 0, i64 1), i64 1 monotonic, !dbg [[DBG]] +; CHECK-NEXT: %0 = atomicrmw add i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), i64 1 monotonic, !dbg [[DBG:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG]] define dso_local void @empty() !dbg !5 { diff --git a/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll b/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll new file mode 100644 index 0000000000000..4d4ffe4021fa1 --- /dev/null +++ b/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll @@ -0,0 +1,61 @@ +; RUN: mkdir -p %t && cd %t +; RUN: opt < %s -passes=insert-gcov-profiling -S | FileCheck %s + +; CHECK: @__llvm_gcov_ctr = internal global [1 x i64] zeroinitializer + +;; If an indirectbr critical edge cannot be split, ignore it. +;; The edge will not be profiled. +; CHECK-LABEL: @cannot_split( +; CHECK: indirect.preheader: +; CHECK-NEXT: load {{.*}} @__llvm_gcov_ctr +; CHECK-NOT: load {{.*}} @__llvm_gcov_ctr + +define dso_local i32 @cannot_split(i8* nocapture readonly %p) #0 !dbg !7 { +entry: + %targets = alloca <2 x i8*>, align 16 + store <2 x i8*> , <2 x i8*>* %targets, align 16, !dbg !9 + br label %for.cond, !dbg !14 + +for.cond: ; preds = %for.cond, %entry + %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr, %for.cond ] + %0 = load i8, i8* %p.addr.0, align 1, !dbg !15 + %cmp = icmp eq i8 %0, 7, !dbg !17 + %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1, !dbg !18 + br i1 %cmp, label %indirect.preheader, label %for.cond, !dbg !15, !llvm.loop !19 + +indirect.preheader: ; preds = %for.cond + %1 = load i8, i8* %incdec.ptr, align 1, !dbg !21 + %idxprom = sext i8 %1 to i64, !dbg !21 + %arrayidx4 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %targets, i64 0, i64 %idxprom, !dbg !21 + %2 = load i8*, i8** %arrayidx4, align 8, !dbg !21 + br label %indirect + +indirect: ; preds = %indirect.preheader, %indirect + indirectbr i8* %2, [label %indirect, label %end] + +end: ; preds = %indirect + ret i32 0, !dbg !22 +} + +attributes #0 = { norecurse nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "/tmp/c") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!7 = distinct !DISubprogram(name: "cannot_split", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 3, column: 14, scope: !7) +!14 = !DILocation(line: 5, column: 3, scope: !7) +!15 = !DILocation(line: 6, column: 9, scope: !7) +!17 = !DILocation(line: 6, column: 12, scope: !7) +!18 = !DILocation(line: 5, column: 12, scope: !7) +!19 = distinct !{!19, !14, !20} +!20 = !DILocation(line: 9, column: 5, scope: !7) +!21 = !DILocation(line: 0, scope: !7) +!22 = !DILocation(line: 11, column: 3, scope: !7) From 5f4e9bf6416e45eba483a4e5e263749989fdb3b3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 13 Sep 2020 00:44:32 -0700 Subject: [PATCH 0483/1079] [gcov] Fix memory leak due to BranchProbabilityInfoWrapperPass This is weird. --- llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 437063eef6f95..68199f6379d40 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -193,7 +193,6 @@ class GCOVProfilerLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); AU.addRequired(); } From bec81dc67d9348dad0ea60a9b8804d1413aefe98 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Sun, 13 Sep 2020 19:39:49 +0800 Subject: [PATCH 0484/1079] Reland "[PowerPC] Implement instruction clustering for stores" Commit 3c0b3250 introduced store fusion for PowerPC target, but it brought failure under UB sanitizer and was reverted. This patch fixes them. --- llvm/lib/Target/PowerPC/PPC.td | 11 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 109 ++++++- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 13 + llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 + llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 4 + .../test/CodeGen/PowerPC/fusion-load-store.ll | 268 ++++++++++++++++++ .../PowerPC/pcrel-call-linkage-leaf.ll | 4 +- 8 files changed, 406 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fusion-load-store.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index d94ecc6e84381..81e5b3859a1f5 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -174,6 +174,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", "HasAddisLoadFusion", "true", "Power8 Addis-Load fusion", [FeatureFusion]>; +def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", + "Target supports store clustering", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -345,10 +348,12 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. + list FusionFeatures = [FeatureStoreFusion]; list P10AdditionalFeatures = - [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, - FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, - FeaturePairedVectorMemops]; + !listconcat(FusionFeatures, [ + DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, + FeaturePairedVectorMemops]); list P10SpecificFeatures = []; list P10InheritableFeatures = !listconcat(P9InheritableFeatures, P10AdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 2423bca42e805..7e5e42fdf47e8 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2222,6 +2222,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } +bool PPCInstrInfo::getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + OffsetIsScalable = false; + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; +} + +static bool isLdStSafeToCluster(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) + return false; + + if (LdSt.getOperand(2).isFI()) + return true; + + assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld r2,3(r2) + if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) + return false; + + return true; +} + +// Only cluster instruction pair that have the same opcode, and they are +// clusterable according to PowerPC specification. +static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, + const PPCSubtarget &Subtarget) { + switch (FirstOpc) { + default: + return false; + case PPC::STD: + case PPC::STFD: + case PPC::STXSD: + case PPC::DFSTOREf64: + return FirstOpc == SecondOpc; + // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with + // 32bit and 64bit instruction selection. They are clusterable pair though + // they are different opcode. + case PPC::STW: + case PPC::STW8: + return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; + } +} + +bool PPCInstrInfo::shouldClusterMemOps( + ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads, + unsigned NumBytes) const { + + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); + assert((BaseOp1.isReg() || BaseOp1.isFI()) && + "Only base registers and frame indices are supported."); + + // The NumLoads means the number of loads that has been clustered. + // Don't cluster memory op if there are already two ops clustered at least. + if (NumLoads > 2) + return false; + + // Cluster the load/store only when they have the same base + // register or FI. + if ((BaseOp1.isReg() != BaseOp2.isReg()) || + (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || + (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) + return false; + + // Check if the load/store are clusterable according to the PowerPC + // specification. + const MachineInstr &FirstLdSt = *BaseOp1.getParent(); + const MachineInstr &SecondLdSt = *BaseOp2.getParent(); + unsigned FirstOpc = FirstLdSt.getOpcode(); + unsigned SecondOpc = SecondLdSt.getOpcode(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + // Cluster the load/store only when they have the same opcode, and they are + // clusterable opcode according to PowerPC specification. + if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) + return false; + + // Can't cluster load/store that have ordered or volatile memory reference. + if (!isLdStSafeToCluster(FirstLdSt, TRI) || + !isLdStSafeToCluster(SecondLdSt, TRI)) + return false; + + int64_t Offset1 = 0, Offset2 = 0; + unsigned Width1 = 0, Width2 = 0; + const MachineOperand *Base1 = nullptr, *Base2 = nullptr; + if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || + !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || + Width1 != Width2) + return false; + + assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && + "getMemOperandWithOffsetWidth return incorrect base op"); + // The caller should already have ordered FirstMemOp/SecondMemOp by offset. + assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); + return Offset1 + Width1 == Offset2; +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -4664,7 +4770,8 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( return false; // Handle only loads/stores with base register followed by immediate offset. - if (LdSt.getNumExplicitOperands() != 3) + if (!LdSt.getOperand(1).isImm() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) return false; if (!LdSt.getOperand(1).isImm() || (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 75e8224892f4c..2f867b16aa24f 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -494,6 +494,19 @@ class PPCInstrInfo : public PPCGenInstrInfo { int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + /// Get the base operand and byte offset of an instruction that reads/writes + /// memory. + bool getMemOperandsWithOffsetWidth( + const MachineInstr &MI, SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const override; + + /// Returns true if the two given memory operations should be scheduled + /// adjacent. + bool shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, + unsigned NumLoads, unsigned NumBytes) const override; + /// Return true if two MIs access different memory addresses and false /// otherwise bool diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 5546ba9de5d75..1afed172e143b 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -120,6 +120,7 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFloat128 = false; HasFusion = false; + HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index ee430529ad564..4552defd657e5 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -140,6 +140,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasHTM; bool HasFloat128; bool HasFusion; + bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; @@ -317,6 +318,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } + bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 7fd7b82fb4352..6a15b0219252c 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -278,6 +278,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); @@ -292,6 +294,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( std::make_unique(C) : std::make_unique(C), true); // add DAG Mutations here. + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; diff --git a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll new file mode 100644 index 0000000000000..75b2eca2168c0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll @@ -0,0 +1,268 @@ +; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The +; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused. + +; REQUIRES: asserts +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ +; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \ +; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s + +define i64 @store_i64(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store i64 %v, i64* %arrayidx3 + ret i64 %v +} + +define i32 @store_i32(i32* nocapture %P, i32 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52 +; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48 +; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44 +; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48 +; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44 +; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52 +; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56 + %arrayidx = getelementptr inbounds i32, i32* %P, i32 13 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14 + store i32 %v, i32* %arrayidx3 + ret i32 %v +} + +define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 + store i64 %v, i64* %arrayidx3 + ret void +} + +define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12 +; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4 +; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8 +; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4 +; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8 +; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12 +; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16 + %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 + store i32 %v, i32* %arrayidx3 + ret void +} + +define void @store_double(double* nocapture %P, double %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_double:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24 +; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8 +; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16 +; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_double:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8 +; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16 +; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24 +; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32 + %arrayidx = getelementptr inbounds double, double* %P, i64 3 + store double %v, double* %arrayidx + %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 + store double %v, double* %arrayidx1 + %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 + store double %v, double* %arrayidx2 + %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 + store double %v, double* %arrayidx3 + ret void +} + +define void @store_float(float* nocapture %P, float %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_float:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12 +; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4 +; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8 +; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_float:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12 +; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4 +; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8 +; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16 + %arrayidx = getelementptr inbounds float, float* %P, i64 3 + store float %v, float* %arrayidx + %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 + store float %v, float* %arrayidx1 + %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 + store float %v, float* %arrayidx2 + %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 + store float %v, float* %arrayidx3 + ret void +} + +; Cannot fuse the store/load if there is volatile in between +define i64 @store_volatile(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_volatile:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_volatile:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store volatile i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store volatile i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store volatile i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store volatile i64 %v, i64* %arrayidx3 + ret i64 %v +} + +@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4 + +define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]]) +; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24 +; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]]) +; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24 +; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20 + store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 + store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 + %add = add nsw i32 %n, %m + store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4 + ret void +} + +define void @store_i32_stw8(i32 signext %m, i32 signext %n) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24 +; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24 +; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28 + store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 + store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 + ret void +} + +declare void @bar(i64*) + +define void @store_frame_index(i32 %a, i32 %b) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_frame_index:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf +; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf + %buf = alloca [8 x i64], align 8 + %0 = bitcast [8 x i64]* %buf to i8* + %conv = zext i32 %a to i64 + %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0 + store i64 %conv, i64* %arrayidx, align 8 + %conv1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1 + store i64 %conv1, i64* %arrayidx2, align 8 + call void @bar(i64* nonnull %arrayidx) + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 00cc472092d47..f2da036a37c50 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -104,15 +104,15 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-P9-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry ; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r11, r4, r3 ; CHECK-S-NEXT: sub r29, r8, r9 ; CHECK-S-NEXT: add r9, r10, r9 ; CHECK-S-NEXT: sub r10, r10, r3 -; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: sub r12, r4, r5 ; CHECK-S-NEXT: add r0, r6, r5 ; CHECK-S-NEXT: sub r2, r6, r7 -; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r11 ; CHECK-S-NEXT: mullw r3, r3, r5 From e2dee9af8db645fd3c0351da91d3cb09c1dcdd5d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 13 Sep 2020 13:38:05 +0100 Subject: [PATCH 0485/1079] [X86] Add test cases for PR11210 Demonstrates that redundant masked stores may be removed, as long as we're able to replace the AVX/AVX2 masked store with a generic masked store (constant mask or sign-extended bool vector mask). --- .../PhaseOrdering/X86/masked-memory-ops.ll | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops.ll b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops.ll new file mode 100644 index 0000000000000..96535892953f2 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O2 -S | FileCheck %s --check-prefixes=CHECK,OLDPM +; RUN: opt < %s -passes='default' -aa-pipeline=default -S | FileCheck %s --check-prefixes=CHECK,NEWPM + +target triple = "x86_64--" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) #0 +declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) + +; PR11210: If we have been able to replace a AVX/AVX2 masked store with a +; generic masked store intrinsic, then we should be able to remove dead +; masked stores. + +define void @PR11210_v8f32_maskstore_maskstore(i8* %ptr, <8 x float> %x, <8 x float> %y, <8 x i32> %src) { +; CHECK-LABEL: @PR11210_v8f32_maskstore_maskstore( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[SRC:%.*]], zeroinitializer +; CHECK-NEXT: [[CASTVEC:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x float>* +; CHECK-NEXT: tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[Y:%.*]], <8 x float>* [[CASTVEC]], i32 1, <8 x i1> [[CMP]]) +; CHECK-NEXT: ret void +; + %cmp = icmp sgt <8 x i32> %src, zeroinitializer + %mask = sext <8 x i1> %cmp to <8 x i32> + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr, <8 x i32> %mask, <8 x float> %x) + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr, <8 x i32> %mask, <8 x float> %y) + ret void +} + +; The contents of %mask are unknown so we don't replace this with a generic masked.store. +define void @PR11210_v8f32_maskstore_maskstore_raw_mask(i8* %ptr, <8 x float> %x, <8 x float> %y, <8 x i32> %mask) { +; CHECK-LABEL: @PR11210_v8f32_maskstore_maskstore_raw_mask( +; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.ps.256(i8* [[PTR:%.*]], <8 x i32> [[MASK:%.*]], <8 x float> [[X:%.*]]) +; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.ps.256(i8* [[PTR]], <8 x i32> [[MASK]], <8 x float> [[Y:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr, <8 x i32> %mask, <8 x float> %x) + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr, <8 x i32> %mask, <8 x float> %y) + ret void +} + +; Mix AVX and generic masked stores. +define void @PR11210_v8f32_mstore_maskstore(i8* %ptr, <8 x float> %x, <8 x float> %y, <8 x i32> %src) { +; CHECK-LABEL: @PR11210_v8f32_mstore_maskstore( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[SRC:%.*]], zeroinitializer +; CHECK-NEXT: [[PTRF:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x float>* +; CHECK-NEXT: tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[Y:%.*]], <8 x float>* [[PTRF]], i32 1, <8 x i1> [[CMP]]) +; CHECK-NEXT: ret void +; + %cmp = icmp sgt <8 x i32> %src, zeroinitializer + %mask = sext <8 x i1> %cmp to <8 x i32> + %ptrf = bitcast i8* %ptr to <8 x float>* + tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %x, <8 x float>* %ptrf, i32 1, <8 x i1> %cmp) + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr, <8 x i32> %mask, <8 x float> %y) + ret void +} + From 2c85f5e642fb599f77aac0de22316c922cfd7cbb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 13 Sep 2020 17:04:53 +0200 Subject: [PATCH 0486/1079] [ARM] Add tests for fmin/max with largest/smallest float (NFC) --- llvm/test/CodeGen/ARM/fminmax-folds.ll | 416 ++++++++++++++++++++++++- 1 file changed, 400 insertions(+), 16 deletions(-) diff --git a/llvm/test/CodeGen/ARM/fminmax-folds.ll b/llvm/test/CodeGen/ARM/fminmax-folds.ll index 6bf251ef95cbd..01e5ab4a46027 100644 --- a/llvm/test/CodeGen/ARM/fminmax-folds.ll +++ b/llvm/test/CodeGen/ARM/fminmax-folds.ll @@ -106,8 +106,8 @@ define float @test_minimum_const_inf(float %x) { ret float %r } -define float @test_minnum_const_ninf(float %x) { -; CHECK-LABEL: test_minnum_const_ninf: +define float @test_minnum_const_neg_inf(float %x) { +; CHECK-LABEL: test_minnum_const_neg_inf: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI8_0 ; CHECK-NEXT: vmov s2, r0 @@ -122,8 +122,8 @@ define float @test_minnum_const_ninf(float %x) { ret float %r } -define float @test_maxnum_const_ninf(float %x) { -; CHECK-LABEL: test_maxnum_const_ninf: +define float @test_maxnum_const_neg_inf(float %x) { +; CHECK-LABEL: test_maxnum_const_neg_inf: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI9_0 ; CHECK-NEXT: vmov s2, r0 @@ -138,8 +138,8 @@ define float @test_maxnum_const_ninf(float %x) { ret float %r } -define float @test_maximum_const_ninf(float %x) { -; CHECK-LABEL: test_maximum_const_ninf: +define float @test_maximum_const_neg_inf(float %x) { +; CHECK-LABEL: test_maximum_const_neg_inf: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI10_0 ; CHECK-NEXT: vmov s2, r0 @@ -154,8 +154,8 @@ define float @test_maximum_const_ninf(float %x) { ret float %r } -define float @test_minimum_const_ninf(float %x) { -; CHECK-LABEL: test_minimum_const_ninf: +define float @test_minimum_const_neg_inf(float %x) { +; CHECK-LABEL: test_minimum_const_neg_inf: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI11_0 ; CHECK-NEXT: vmov s2, r0 @@ -234,8 +234,8 @@ define float @test_minimum_const_inf_nnan(float %x) { ret float %r } -define float @test_minnum_const_ninf_nnan(float %x) { -; CHECK-LABEL: test_minnum_const_ninf_nnan: +define float @test_minnum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: test_minnum_const_neg_inf_nnan: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI16_0 ; CHECK-NEXT: vmov s2, r0 @@ -250,8 +250,8 @@ define float @test_minnum_const_ninf_nnan(float %x) { ret float %r } -define float @test_maxnum_const_ninf_nnan(float %x) { -; CHECK-LABEL: test_maxnum_const_ninf_nnan: +define float @test_maxnum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: test_maxnum_const_neg_inf_nnan: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI17_0 ; CHECK-NEXT: vmov s2, r0 @@ -266,8 +266,8 @@ define float @test_maxnum_const_ninf_nnan(float %x) { ret float %r } -define float @test_maximum_const_ninf_nnan(float %x) { -; CHECK-LABEL: test_maximum_const_ninf_nnan: +define float @test_maximum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: test_maximum_const_neg_inf_nnan: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI18_0 ; CHECK-NEXT: vmov s2, r0 @@ -282,8 +282,8 @@ define float @test_maximum_const_ninf_nnan(float %x) { ret float %r } -define float @test_minimum_const_ninf_nnan(float %x) { -; CHECK-LABEL: test_minimum_const_ninf_nnan: +define float @test_minimum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: test_minimum_const_neg_inf_nnan: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI19_0 ; CHECK-NEXT: vmov s2, r0 @@ -297,3 +297,387 @@ define float @test_minimum_const_ninf_nnan(float %x) { %r = call nnan float @llvm.minimum.f32(float %x, float 0xfff0000000000000) ret float %r } + +define float @test_minnum_const_max(float %x) { +; CHECK-LABEL: test_minnum_const_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI20_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI20_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_max(float %x) { +; CHECK-LABEL: test_maxnum_const_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI21_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI21_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maximum_const_max(float %x) { +; CHECK-LABEL: test_maximum_const_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI22_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI22_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minimum_const_max(float %x) { +; CHECK-LABEL: test_minimum_const_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI23_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI23_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minnum_const_neg_max(float %x) { +; CHECK-LABEL: test_minnum_const_neg_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI24_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI24_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_neg_max(float %x) { +; CHECK-LABEL: test_maxnum_const_neg_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI25_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI25_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maximum_const_neg_max(float %x) { +; CHECK-LABEL: test_maximum_const_neg_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI26_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI26_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minimum_const_neg_max(float %x) { +; CHECK-LABEL: test_minimum_const_neg_max: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI27_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI27_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minnum_const_max_ninf(float %x) { +; CHECK-LABEL: test_minnum_const_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI28_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI28_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_max_ninf(float %x) { +; CHECK-LABEL: test_maxnum_const_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI29_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI29_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maximum_const_max_ninf(float %x) { +; CHECK-LABEL: test_maximum_const_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI30_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI30_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minimum_const_max_ninf(float %x) { +; CHECK-LABEL: test_minimum_const_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI31_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI31_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minnum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: test_minnum_const_neg_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI32_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI32_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: test_maxnum_const_neg_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI33_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI33_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maximum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: test_maximum_const_neg_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI34_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI34_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minimum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: test_minimum_const_neg_max_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI35_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI35_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minnum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_minnum_const_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI36_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI36_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_maxnum_const_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI37_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI37_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maximum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_maximum_const_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI38_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI38_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minimum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_minimum_const_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI39_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI39_0: +; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 + %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minnum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_minnum_const_neg_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI40_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI40_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_maxnum_const_neg_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI41_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI41_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maximum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_maximum_const_neg_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI42_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI42_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minimum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: test_minimum_const_neg_max_nnan_ninf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI43_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI43_0: +; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 + %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} From 9237fde48139400764377eb73e7e5d3bc5b7fffc Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 13 Sep 2020 16:11:01 +0100 Subject: [PATCH 0487/1079] [CGP] Prevent optimizePhiType from iterating forever The recently added optimizePhiType algorithm had no checks to make sure it didn't continually iterate backward and forth between float and int types. This means that given an input like store(phi(bitcast(load))), we could convert that back and forth to store(bitcast(phi(load))). This particular case would usually have been simplified to a different load type (folding the bitcast into the load) before CGP, but other cases can occur. The one that came up was phi(bitcast(phi)), where the two phi's of different types were bitcast between. That was not helped by a dead bitcast being kept around which could make conversion look profitable. This adds an extra check of the bitcast Uses or Defs, to make sure that at least one is grounded and will not end up being converted back. It also makes sure that dead bitcasts are removed, and there is a minor change to include newly created Phi nodes in the Visited set so that they do not need to be revisited. Differential Revision: https://reviews.llvm.org/D82676 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 28 ++- llvm/test/CodeGen/AArch64/convertphitype.ll | 201 +++++++++++++++++++- 2 files changed, 219 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3e5dceccf49b0..529975c33ec17 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5807,6 +5807,12 @@ bool CodeGenPrepare::optimizePhiType( Visited.insert(I); SmallPtrSet Defs; SmallPtrSet Uses; + // This works by adding extra bitcasts between load/stores and removing + // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi)) + // we can get in the situation where we remove a bitcast in one iteration + // just to add it again in the next. We need to ensure that at least one + // bitcast we remove are anchored to something that will not change back. + bool AnyAnchored = false; while (!Worklist.empty()) { Instruction *II = Worklist.pop_back_val(); @@ -5840,9 +5846,12 @@ bool CodeGenPrepare::optimizePhiType( if (!Defs.count(OpBC)) { Defs.insert(OpBC); Worklist.push_back(OpBC); + AnyAnchored |= !isa(OpBC->getOperand(0)) && + !isa(OpBC->getOperand(0)); } - } else if (!isa(V)) + } else if (!isa(V)) { return false; + } } } @@ -5866,12 +5875,15 @@ bool CodeGenPrepare::optimizePhiType( if (OpBC->getType() != ConvertTy) return false; Uses.insert(OpBC); - } else + AnyAnchored |= + any_of(OpBC->users(), [](User *U) { return !isa(U); }); + } else { return false; + } } } - if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) + if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) return false; LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " @@ -5882,11 +5894,13 @@ bool CodeGenPrepare::optimizePhiType( ValueToValueMap ValMap; ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy); for (Instruction *D : Defs) { - if (isa(D)) + if (isa(D)) { ValMap[D] = D->getOperand(0); - else + DeletedInstrs.insert(D); + } else { ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); + } } for (PHINode *Phi : PhiNodes) ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), @@ -5897,15 +5911,17 @@ bool CodeGenPrepare::optimizePhiType( for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], Phi->getIncomingBlock(i)); + Visited.insert(NewPhi); } // And finally pipe up the stores and bitcasts for (Instruction *U : Uses) { if (isa(U)) { DeletedInstrs.insert(U); U->replaceAllUsesWith(ValMap[U->getOperand(0)]); - } else + } else { U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); + } } // Save the removed phis to be deleted later. diff --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll index bb82ea2905c1c..2e3530de378b3 100644 --- a/llvm/test/CodeGen/AArch64/convertphitype.ll +++ b/llvm/test/CodeGen/AArch64/convertphitype.ll @@ -70,14 +70,13 @@ define float @convphi3(i32 *%s, i32 *%d, i32 %n, float %f) { ; CHECK-LABEL: @convphi3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[FB:%.*]] = bitcast float [[F:%.*]] to i32 ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 ; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[PHI_TC]] ; entry: @@ -99,14 +98,13 @@ define void @convphi4(i32 *%s, i32 *%d, i32 %n, float %f) { ; CHECK-LABEL: @convphi4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[FB:%.*]] = bitcast float [[F:%.*]] to i32 ; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 ; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC:%.*]] = bitcast float [[PHI_TC]] to i32 ; CHECK-NEXT: store i32 [[BC]], i32* [[D:%.*]], align 4 ; CHECK-NEXT: ret void @@ -481,6 +479,201 @@ end: ret float %b } +define void @convphi_stop(i32 *%s, i32 *%d, float *%e, i32 %n) { +; CHECK-LABEL: @convphi_stop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] +; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float +; CHECK-NEXT: store float [[B]], float* [[E:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load i32, i32* %s, align 4 + br label %end + +else: + %ld = load i32, i32* %d, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %else ] + %b = bitcast i32 %phi to float + store float %b, float* %e, align 4 + ret void +} + +define void @convphi_stop2(i32 *%s, i32 *%d, float *%e, i32 %n) { +; CHECK-LABEL: @convphi_stop2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LSB:%.*]] = bitcast i32 [[LS]] to float +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LDB:%.*]] = bitcast i32 [[LD]] to float +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[LSB]], [[THEN]] ], [ [[LDB]], [[ELSE]] ] +; CHECK-NEXT: store float [[PHI]], float* [[E:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load i32, i32* %s, align 4 + %lsb = bitcast i32 %ls to float + br label %end + +else: + %ld = load i32, i32* %d, align 4 + %ldb = bitcast i32 %ld to float + br label %end + +end: + %phi = phi float [ %lsb, %then ], [ %ldb, %else ] + store float %phi, float* %e, align 4 + ret void +} + +define float @convphi_stop3(i32 *%s, i32 *%d, float *%e, i32 %n) { +; CHECK-LABEL: @convphi_stop3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast i32 [[LD]] to float +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ] +; CHECK-NEXT: store float [[PHI_TC]], float* [[E:%.*]], align 4 +; CHECK-NEXT: ret float [[PHI_TC]] +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load i32, i32* %s, align 4 + br label %end +else: + %ld = load i32, i32* %d, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %else ] + %b = bitcast i32 %phi to float + store float %b, float* %e, align 4 + ret float %b +} +define void @convphi_stop4(i32 *%s, i32 *%d, float *%e, i32 %n) { +; CHECK-LABEL: @convphi_stop4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: [[LD_BC:%.*]] = bitcast i32 [[LD]] to float +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[LS_BC:%.*]] = bitcast i32 [[LS]] to float +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: [[BC:%.*]] = bitcast float [[PHI_TC]] to i32 +; CHECK-NEXT: store i32 [[BC]], i32* [[S]], align 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN2:%.*]], label [[END2:%.*]] +; CHECK: then2: +; CHECK-NEXT: [[LF:%.*]] = load float, float* [[E:%.*]], align 4 +; CHECK-NEXT: br label [[END2]] +; CHECK: end2: +; CHECK-NEXT: [[PHI2:%.*]] = phi float [ [[PHI_TC]], [[END]] ], [ [[LF]], [[THEN2]] ] +; CHECK-NEXT: store float [[PHI2]], float* [[E]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + %ld = load i32, i32* %d, align 4 + br i1 %cmp15, label %then, label %end +then: + %ls = load i32, i32* %s, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %entry ] + %phib = bitcast i32 %phi to float + store i32 %phi, i32* %s, align 4 + br i1 %cmp15, label %then2, label %end2 + +then2: + %lf = load float, float* %e, align 4 + br label %end2 + +end2: + %phi2 = phi float [ %phib, %end ], [ %lf, %then2 ] + store float %phi2, float* %e, align 4 + ret void +} + +define float @multiuse(i32 *%s, i32 *%d, i32 %n) { +; CHECK-LABEL: @multiuse( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: [[A:%.*]] = add i32 [[LS]], 2 +; CHECK-NEXT: store i32 [[A]], i32* [[D:%.*]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] +; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float +; CHECK-NEXT: ret float [[B]] +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load i32, i32* %s, align 4 + %a = add i32 %ls, 2 + store i32 %a, i32* %d, align 4 + br label %end + +else: + %ld = load i32, i32* %d, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %else ] + %b = bitcast i32 %phi to float + ret float %b +} From a4c535198643d1541b19f37a468c885a7baa7605 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Mon, 14 Sep 2020 00:19:06 +0800 Subject: [PATCH 0488/1079] [DAGCombiner] Propagate FMF flags in FMA folding DAG combiner folds (fma a 1.0 b) into (fadd a b) but the flag isn't propagated into new fadd. This patch fixes that. Some code in visitFMA is redundant and such support for vector constants is missing. Need follow-up patch to clean. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D87037 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 ++-- llvm/test/CodeGen/PowerPC/fma-combine.ll | 59 ++++++++++++++++++- 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3aaf5e01d26a4..ae976af6557e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13185,11 +13185,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N1CFP->isZero()) return N2; } - // TODO: The FMA node should have flags that propagate to these nodes. + if (N0CFP && N0CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2, Flags); if (N1CFP && N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2, Flags); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (isConstantFPBuildVectorOrConstantFP(N0) && @@ -13218,19 +13218,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } - // (fma x, 1, y) -> (fadd x, y) // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) - // TODO: The FMA node should have flags that propagate to this node. - return DAG.getNode(ISD::FADD, DL, VT, N0, N2); + return DAG.getNode(ISD::FADD, DL, VT, N0, N2, Flags); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); - // TODO: The FMA node should have flags that propagate to this node. - return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); + return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg, Flags); } // fma (fneg x), K, y -> fma x -K, y diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll index bf2abe0b6b837..217d520f89187 100644 --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -243,17 +243,18 @@ define double @getNegatedExpression_crash(double %x, double %y) { define double @fma_flag_propagation(double %a) { ; CHECK-FAST-LABEL: fma_flag_propagation: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xssubdp 1, 1, 1 +; CHECK-FAST-NEXT: xxlxor 1, 1, 1 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_flag_propagation: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fsub 1, 1, 1 +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 1, .LCPI6_0@toc@l(3) ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_flag_propagation: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xssubdp 1, 1, 1 +; CHECK-NEXT: xxlxor 1, 1, 1 ; CHECK-NEXT: blr entry: %0 = fneg double %a @@ -261,4 +262,56 @@ entry: ret double %1 } +define double @neg_fma_flag_propagation(double %a) { +; CHECK-FAST-LABEL: neg_fma_flag_propagation: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xxlxor 1, 1, 1 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: neg_fma_flag_propagation: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 1, .LCPI7_0@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: neg_fma_flag_propagation: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor 1, 1, 1 +; CHECK-NEXT: blr +entry: + %0 = call reassoc nnan double @llvm.fma.f64(double %a, double -1.0, double %a) + ret double %0 +} + +define <2 x double> @vec_neg_fma_flag_propagation(<2 x double> %a) { +; CHECK-FAST-LABEL: vec_neg_fma_flag_propagation: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; CHECK-FAST-NEXT: addi 3, 3, .LCPI8_0@toc@l +; CHECK-FAST-NEXT: lxvd2x 0, 0, 3 +; CHECK-FAST-NEXT: xxswapd 0, 0 +; CHECK-FAST-NEXT: xvmaddadp 34, 34, 0 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: vec_neg_fma_flag_propagation: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 1, .LCPI8_0@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: fmr 2, 1 +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: vec_neg_fma_flag_propagation: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 0, 0 +; CHECK-NEXT: xvmaddadp 34, 34, 0 +; CHECK-NEXT: blr +entry: + %0 = call reassoc nnan <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> , <2 x double> %a) + ret <2 x double> %0 +} + declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone From c0bcd11068fc13e45b253c6c315882097f94c121 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Sat, 12 Sep 2020 21:49:48 +0200 Subject: [PATCH 0489/1079] [ASTImporter] Add basic support for comparing Stmts and compare function bodies Right now the ASTImporter assumes for most Expr nodes that they are always equal which leads to non-compatible declarations ending up being merged. This patch adds the basic framework for comparing Stmts (and with that also Exprs) and implements the custom checks for a few Stmt subclasses. I'll implement the remaining subclasses in follow up patches (mostly because there are a lot of subclasses and some of them require further changes like having GNU language in the testing framework) The motivation for this is that in LLDB we try to import libc++ source code and some of the types we are importing there contain expressions (e.g. because they use `enable_if`), so those declarations are currently merged even if they are completely different (e.g. `enable_if ...` and `enable_if ...` are currently considered equal which is clearly not true). Reviewed By: martong, balazske Differential Revision: https://reviews.llvm.org/D87444 --- .../clang/AST/ASTStructuralEquivalence.h | 7 + clang/lib/AST/ASTStructuralEquivalence.cpp | 244 ++++++++++++- .../AST/StructuralEquivalenceTest.cpp | 322 +++++++++++++++++- 3 files changed, 541 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/AST/ASTStructuralEquivalence.h b/clang/include/clang/AST/ASTStructuralEquivalence.h index 36a42070fd281..c958a16aba213 100644 --- a/clang/include/clang/AST/ASTStructuralEquivalence.h +++ b/clang/include/clang/AST/ASTStructuralEquivalence.h @@ -97,6 +97,13 @@ struct StructuralEquivalenceContext { /// \c VisitedDecls members) and can cause faulty equivalent results. bool IsEquivalent(QualType T1, QualType T2); + /// Determine whether the two statements are structurally equivalent. + /// Implementation functions (all static functions in + /// ASTStructuralEquivalence.cpp) must never call this function because that + /// will wreak havoc the internal state (\c DeclsToCheck and + /// \c VisitedDecls members) and can cause faulty equivalent results. + bool IsEquivalent(Stmt *S1, Stmt *S2); + /// Find the index of the given anonymous struct/union within its /// context. /// diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 8b5b2444f1e25..fafcfce269d75 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -68,7 +68,12 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprConcepts.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/ExprOpenMP.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtOpenMP.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" @@ -149,32 +154,230 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return true; } -/// Determine structural equivalence of two expressions. -static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, - const Expr *E1, const Expr *E2) { - if (!E1 || !E2) - return E1 == E2; +namespace { +/// Encapsulates Stmt comparison logic. +class StmtComparer { + StructuralEquivalenceContext &Context; + + // IsStmtEquivalent overloads. Each overload compares a specific statement + // and only has to compare the data that is specific to the specific statement + // class. Should only be called from TraverseStmt. + + bool IsStmtEquivalent(const AddrLabelExpr *E1, const AddrLabelExpr *E2) { + return IsStructurallyEquivalent(Context, E1->getLabel(), E2->getLabel()); + } + + bool IsStmtEquivalent(const AtomicExpr *E1, const AtomicExpr *E2) { + return E1->getOp() == E2->getOp(); + } + + bool IsStmtEquivalent(const BinaryOperator *E1, const BinaryOperator *E2) { + return E1->getOpcode() == E2->getOpcode(); + } - if (auto *DE1 = dyn_cast(E1)) { - auto *DE2 = dyn_cast(E2); - if (!DE2) + bool IsStmtEquivalent(const CallExpr *E1, const CallExpr *E2) { + // FIXME: IsStructurallyEquivalent requires non-const Decls. + Decl *Callee1 = const_cast(E1->getCalleeDecl()); + Decl *Callee2 = const_cast(E2->getCalleeDecl()); + + // Compare whether both calls know their callee. + if (static_cast(Callee1) != static_cast(Callee2)) return false; + + // Both calls have no callee, so nothing to do. + if (!static_cast(Callee1)) + return true; + + assert(Callee2); + return IsStructurallyEquivalent(Context, Callee1, Callee2); + } + + bool IsStmtEquivalent(const CharacterLiteral *E1, + const CharacterLiteral *E2) { + return E1->getValue() == E2->getValue() && E1->getKind() == E2->getKind(); + } + + bool IsStmtEquivalent(const ChooseExpr *E1, const ChooseExpr *E2) { + return true; // Semantics only depend on children. + } + + bool IsStmtEquivalent(const CompoundStmt *E1, const CompoundStmt *E2) { + // Number of children is actually checked by the generic children comparison + // code, but a CompoundStmt is one of the few statements where the number of + // children frequently differs and the number of statements is also always + // precomputed. Directly comparing the number of children here is thus + // just an optimization. + return E1->size() == E2->size(); + } + + bool IsStmtEquivalent(const DependentScopeDeclRefExpr *DE1, + const DependentScopeDeclRefExpr *DE2) { if (!IsStructurallyEquivalent(Context, DE1->getDeclName(), DE2->getDeclName())) return false; return IsStructurallyEquivalent(Context, DE1->getQualifier(), DE2->getQualifier()); - } else if (auto CastE1 = dyn_cast(E1)) { - auto *CastE2 = dyn_cast(E2); - if (!CastE2) + } + + bool IsStmtEquivalent(const Expr *E1, const Expr *E2) { + return IsStructurallyEquivalent(Context, E1->getType(), E2->getType()); + } + + bool IsStmtEquivalent(const ExpressionTraitExpr *E1, + const ExpressionTraitExpr *E2) { + return E1->getTrait() == E2->getTrait() && E1->getValue() == E2->getValue(); + } + + bool IsStmtEquivalent(const FloatingLiteral *E1, const FloatingLiteral *E2) { + return E1->isExact() == E2->isExact() && E1->getValue() == E2->getValue(); + } + + bool IsStmtEquivalent(const ImplicitCastExpr *CastE1, + const ImplicitCastExpr *CastE2) { + return IsStructurallyEquivalent(Context, CastE1->getType(), + CastE2->getType()); + } + + bool IsStmtEquivalent(const IntegerLiteral *E1, const IntegerLiteral *E2) { + return E1->getValue() == E2->getValue(); + } + + bool IsStmtEquivalent(const ObjCStringLiteral *E1, + const ObjCStringLiteral *E2) { + // Just wraps a StringLiteral child. + return true; + } + + bool IsStmtEquivalent(const Stmt *S1, const Stmt *S2) { return true; } + + bool IsStmtEquivalent(const SourceLocExpr *E1, const SourceLocExpr *E2) { + return E1->getIdentKind() == E2->getIdentKind(); + } + + bool IsStmtEquivalent(const StmtExpr *E1, const StmtExpr *E2) { + return E1->getTemplateDepth() == E2->getTemplateDepth(); + } + + bool IsStmtEquivalent(const StringLiteral *E1, const StringLiteral *E2) { + return E1->getBytes() == E2->getBytes(); + } + + bool IsStmtEquivalent(const SubstNonTypeTemplateParmExpr *E1, + const SubstNonTypeTemplateParmExpr *E2) { + return IsStructurallyEquivalent(Context, E1->getParameter(), + E2->getParameter()); + } + + bool IsStmtEquivalent(const SubstNonTypeTemplateParmPackExpr *E1, + const SubstNonTypeTemplateParmPackExpr *E2) { + return IsStructurallyEquivalent(Context, E1->getArgumentPack(), + E2->getArgumentPack()); + } + + bool IsStmtEquivalent(const TypeTraitExpr *E1, const TypeTraitExpr *E2) { + if (E1->getTrait() != E2->getTrait()) + return false; + + for (auto Pair : zip_longest(E1->getArgs(), E2->getArgs())) { + Optional Child1 = std::get<0>(Pair); + Optional Child2 = std::get<1>(Pair); + // Different number of args. + if (!Child1 || !Child2) + return false; + + if (!IsStructurallyEquivalent(Context, (*Child1)->getType(), + (*Child2)->getType())) + return false; + } + return true; + } + + bool IsStmtEquivalent(const UnaryExprOrTypeTraitExpr *E1, + const UnaryExprOrTypeTraitExpr *E2) { + if (E1->getKind() != E2->getKind()) + return false; + return IsStructurallyEquivalent(Context, E1->getTypeOfArgument(), + E2->getTypeOfArgument()); + } + + bool IsStmtEquivalent(const UnaryOperator *E1, const UnaryOperator *E2) { + return E1->getOpcode() == E2->getOpcode(); + } + + bool IsStmtEquivalent(const VAArgExpr *E1, const VAArgExpr *E2) { + // Semantics only depend on children. + return true; + } + + /// End point of the traversal chain. + bool TraverseStmt(const Stmt *S1, const Stmt *S2) { return true; } + + // Create traversal methods that traverse the class hierarchy and return + // the accumulated result of the comparison. Each TraverseStmt overload + // calls the TraverseStmt overload of the parent class. For example, + // the TraverseStmt overload for 'BinaryOperator' calls the TraverseStmt + // overload of 'Expr' which then calls the overload for 'Stmt'. +#define STMT(CLASS, PARENT) \ + bool TraverseStmt(const CLASS *S1, const CLASS *S2) { \ + if (!TraverseStmt(static_cast(S1), \ + static_cast(S2))) \ + return false; \ + return IsStmtEquivalent(S1, S2); \ + } +#include "clang/AST/StmtNodes.inc" + +public: + StmtComparer(StructuralEquivalenceContext &C) : Context(C) {} + + /// Determine whether two statements are equivalent. The statements have to + /// be of the same kind. The children of the statements and their properties + /// are not compared by this function. + bool IsEquivalent(const Stmt *S1, const Stmt *S2) { + if (S1->getStmtClass() != S2->getStmtClass()) + return false; + + // Each TraverseStmt walks the class hierarchy from the leaf class to + // the root class 'Stmt' (e.g. 'BinaryOperator' -> 'Expr' -> 'Stmt'). Cast + // the Stmt we have here to its specific subclass so that we call the + // overload that walks the whole class hierarchy from leaf to root (e.g., + // cast to 'BinaryOperator' so that 'Expr' and 'Stmt' is traversed). + switch (S1->getStmtClass()) { + case Stmt::NoStmtClass: + llvm_unreachable("Can't traverse NoStmtClass"); +#define STMT(CLASS, PARENT) \ + case Stmt::StmtClass::CLASS##Class: \ + return TraverseStmt(static_cast(S1), \ + static_cast(S2)); +#define ABSTRACT_STMT(S) +#include "clang/AST/StmtNodes.inc" + } + llvm_unreachable("Invalid statement kind"); + } +}; +} // namespace + +/// Determine structural equivalence of two statements. +static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, + const Stmt *S1, const Stmt *S2) { + if (!S1 || !S2) + return S1 == S2; + + // Compare the statements itself. + StmtComparer Comparer(Context); + if (!Comparer.IsEquivalent(S1, S2)) + return false; + + // Iterate over the children of both statements and also compare them. + for (auto Pair : zip_longest(S1->children(), S2->children())) { + Optional Child1 = std::get<0>(Pair); + Optional Child2 = std::get<1>(Pair); + // One of the statements has a different amount of children than the other, + // so the statements can't be equivalent. + if (!Child1 || !Child2) return false; - if (!IsStructurallyEquivalent(Context, CastE1->getType(), - CastE2->getType())) + if (!IsStructurallyEquivalent(Context, *Child1, *Child2)) return false; - return IsStructurallyEquivalent(Context, CastE1->getSubExpr(), - CastE2->getSubExpr()); } - // FIXME: Handle other kind of expressions! return true; } @@ -1790,6 +1993,15 @@ bool StructuralEquivalenceContext::IsEquivalent(QualType T1, QualType T2) { return !Finish(); } +bool StructuralEquivalenceContext::IsEquivalent(Stmt *S1, Stmt *S2) { + assert(DeclsToCheck.empty()); + assert(VisitedDecls.empty()); + if (!::IsStructurallyEquivalent(*this, S1, S2)) + return false; + + return !Finish(); +} + bool StructuralEquivalenceContext::CheckCommonEquivalence(Decl *D1, Decl *D2) { // Check for equivalent described template. TemplateDecl *Template1 = D1->getDescribedTemplate(); diff --git a/clang/unittests/AST/StructuralEquivalenceTest.cpp b/clang/unittests/AST/StructuralEquivalenceTest.cpp index 2b5ce0fed51d6..d71c65fa3b61a 100644 --- a/clang/unittests/AST/StructuralEquivalenceTest.cpp +++ b/clang/unittests/AST/StructuralEquivalenceTest.cpp @@ -19,14 +19,10 @@ struct StructuralEquivalenceTest : ::testing::Test { std::unique_ptr AST0, AST1; std::string Code0, Code1; // Buffers for SourceManager - // Get a pair of node pointers into the synthesized AST from the given code - // snippets. To determine the returned node, a separate matcher is specified - // for both snippets. The first matching node is returned. - template - std::tuple - makeDecls(const std::string &SrcCode0, const std::string &SrcCode1, - TestLanguage Lang, const MatcherType &Matcher0, - const MatcherType &Matcher1) { + // Parses the source code in the specified language and sets the ASTs of + // the current test instance to the parse result. + void makeASTUnits(const std::string &SrcCode0, const std::string &SrcCode1, + TestLanguage Lang) { this->Code0 = SrcCode0; this->Code1 = SrcCode1; std::vector Args = getCommandLineArgsForTesting(Lang); @@ -35,6 +31,17 @@ struct StructuralEquivalenceTest : ::testing::Test { AST0 = tooling::buildASTFromCodeWithArgs(Code0, Args, InputFileName); AST1 = tooling::buildASTFromCodeWithArgs(Code1, Args, InputFileName); + } + + // Get a pair of node pointers into the synthesized AST from the given code + // snippets. To determine the returned node, a separate matcher is specified + // for both snippets. The first matching node is returned. + template + std::tuple + makeDecls(const std::string &SrcCode0, const std::string &SrcCode1, + TestLanguage Lang, const MatcherType &Matcher0, + const MatcherType &Matcher1) { + makeASTUnits(SrcCode0, SrcCode1, Lang); NodeType *D0 = FirstDeclMatcher().match( AST0->getASTContext().getTranslationUnitDecl(), Matcher0); @@ -47,14 +54,7 @@ struct StructuralEquivalenceTest : ::testing::Test { std::tuple makeTuDecls(const std::string &SrcCode0, const std::string &SrcCode1, TestLanguage Lang) { - this->Code0 = SrcCode0; - this->Code1 = SrcCode1; - std::vector Args = getCommandLineArgsForTesting(Lang); - - const char *const InputFileName = "input.cc"; - - AST0 = tooling::buildASTFromCodeWithArgs(Code0, Args, InputFileName); - AST1 = tooling::buildASTFromCodeWithArgs(Code1, Args, InputFileName); + makeASTUnits(SrcCode0, SrcCode1, Lang); return std::make_tuple(AST0->getASTContext().getTranslationUnitDecl(), AST1->getASTContext().getTranslationUnitDecl()); @@ -80,6 +80,56 @@ struct StructuralEquivalenceTest : ::testing::Test { return makeDecls(SrcCode0, SrcCode1, Lang, Matcher); } + // Wraps a Stmt and the ASTContext that contains it. + struct StmtWithASTContext { + Stmt *S; + ASTContext *Context; + explicit StmtWithASTContext(Stmt &S, ASTContext &Context) + : S(&S), Context(&Context) {} + explicit StmtWithASTContext(FunctionDecl *FD) + : S(FD->getBody()), Context(&FD->getASTContext()) {} + }; + + // Get a pair of node pointers into the synthesized AST from the given code + // snippets. To determine the returned node, a separate matcher is specified + // for both snippets. The first matching node is returned. + template + std::tuple + makeStmts(const std::string &SrcCode0, const std::string &SrcCode1, + TestLanguage Lang, const MatcherType &Matcher0, + const MatcherType &Matcher1) { + makeASTUnits(SrcCode0, SrcCode1, Lang); + + Stmt *S0 = FirstDeclMatcher().match( + AST0->getASTContext().getTranslationUnitDecl(), Matcher0); + Stmt *S1 = FirstDeclMatcher().match( + AST1->getASTContext().getTranslationUnitDecl(), Matcher1); + + return std::make_tuple(StmtWithASTContext(*S0, AST0->getASTContext()), + StmtWithASTContext(*S1, AST1->getASTContext())); + } + + // Get a pair of node pointers into the synthesized AST from the given code + // snippets. The same matcher is used for both snippets. + template + std::tuple + makeStmts(const std::string &SrcCode0, const std::string &SrcCode1, + TestLanguage Lang, const MatcherType &AMatcher) { + return makeStmts(SrcCode0, SrcCode1, Lang, AMatcher, AMatcher); + } + + // Convenience function for makeStmts that wraps the code inside a function + // body. + template + std::tuple + makeWrappedStmts(const std::string &SrcCode0, const std::string &SrcCode1, + TestLanguage Lang, const MatcherType &AMatcher) { + auto Wrap = [](const std::string &Src) { + return "void wrapped() {" + Src + ";}"; + }; + return makeStmts(Wrap(SrcCode0), Wrap(SrcCode1), Lang, AMatcher); + } + bool testStructuralMatch(Decl *D0, Decl *D1) { llvm::DenseSet> NonEquivalentDecls01; llvm::DenseSet> NonEquivalentDecls10; @@ -95,6 +145,26 @@ struct StructuralEquivalenceTest : ::testing::Test { return Eq01; } + bool testStructuralMatch(StmtWithASTContext S0, StmtWithASTContext S1) { + llvm::DenseSet> NonEquivalentDecls01; + llvm::DenseSet> NonEquivalentDecls10; + StructuralEquivalenceContext Ctx01( + *S0.Context, *S1.Context, NonEquivalentDecls01, + StructuralEquivalenceKind::Default, false, false); + StructuralEquivalenceContext Ctx10( + *S1.Context, *S0.Context, NonEquivalentDecls10, + StructuralEquivalenceKind::Default, false, false); + bool Eq01 = Ctx01.IsEquivalent(S0.S, S1.S); + bool Eq10 = Ctx10.IsEquivalent(S1.S, S0.S); + EXPECT_EQ(Eq01, Eq10); + return Eq01; + } + + bool + testStructuralMatch(std::tuple t) { + return testStructuralMatch(get<0>(t), get<1>(t)); + } + bool testStructuralMatch(std::tuple t) { return testStructuralMatch(get<0>(t), get<1>(t)); } @@ -1375,5 +1445,225 @@ TEST_F(StructuralEquivalenceCacheTest, Cycle) { findDeclPair(TU, functionDecl(hasName("x"))))); } +struct StructuralEquivalenceStmtTest : StructuralEquivalenceTest {}; + +/// Fallback matcher to be used only when there is no specific matcher for a +/// Expr subclass. Remove this once all Expr subclasses have their own matcher. +static auto &fallbackExprMatcher = expr; + +TEST_F(StructuralEquivalenceStmtTest, AddrLabelExpr) { + auto t = makeWrappedStmts("lbl: &&lbl;", "lbl: &&lbl;", Lang_CXX03, + addrLabelExpr()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, AddrLabelExprDifferentLabel) { + auto t = makeWrappedStmts("lbl1: lbl2: &&lbl1;", "lbl1: lbl2: &&lbl2;", + Lang_CXX03, addrLabelExpr()); + // FIXME: Should be false. LabelDecl are incorrectly matched. + EXPECT_TRUE(testStructuralMatch(t)); +} + +static const std::string MemoryOrderSrc = R"( +enum memory_order { + memory_order_relaxed, + memory_order_consume, + memory_order_acquire, + memory_order_release, + memory_order_acq_rel, + memory_order_seq_cst +}; +)"; + +TEST_F(StructuralEquivalenceStmtTest, AtomicExpr) { + std::string Prefix = "char a, b; " + MemoryOrderSrc; + auto t = makeStmts( + Prefix + + "void wrapped() { __atomic_load(&a, &b, memory_order_seq_cst); }", + Prefix + + "void wrapped() { __atomic_load(&a, &b, memory_order_seq_cst); }", + Lang_CXX03, atomicExpr()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, AtomicExprDifferentOp) { + std::string Prefix = "char a, b; " + MemoryOrderSrc; + auto t = makeStmts( + Prefix + + "void wrapped() { __atomic_load(&a, &b, memory_order_seq_cst); }", + Prefix + + "void wrapped() { __atomic_store(&a, &b, memory_order_seq_cst); }", + Lang_CXX03, atomicExpr()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, BinaryOperator) { + auto t = makeWrappedStmts("1 + 1", "1 + 1", Lang_CXX03, binaryOperator()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, BinaryOperatorDifferentOps) { + auto t = makeWrappedStmts("1 + 1", "1 - 1", Lang_CXX03, binaryOperator()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, CallExpr) { + std::string Src = "int call(); int wrapped() { call(); }"; + auto t = makeStmts(Src, Src, Lang_CXX03, callExpr()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, CallExprDifferentCallee) { + std::string FunctionSrc = "int func1(); int func2();\n"; + auto t = makeStmts(FunctionSrc + "void wrapper() { func1(); }", + FunctionSrc + "void wrapper() { func2(); }", Lang_CXX03, + callExpr()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, CharacterLiteral) { + auto t = makeWrappedStmts("'a'", "'a'", Lang_CXX03, characterLiteral()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, CharacterLiteralDifferentValues) { + auto t = makeWrappedStmts("'a'", "'b'", Lang_CXX03, characterLiteral()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, ExpressionTraitExpr) { + auto t = makeWrappedStmts("__is_lvalue_expr(1)", "__is_lvalue_expr(1)", + Lang_CXX03, fallbackExprMatcher()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, ExpressionTraitExprDifferentKind) { + auto t = makeWrappedStmts("__is_lvalue_expr(1)", "__is_rvalue_expr(1)", + Lang_CXX03, fallbackExprMatcher()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, FloatingLiteral) { + auto t = makeWrappedStmts("1.0", "1.0", Lang_CXX03, fallbackExprMatcher()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, FloatingLiteralDifferentSpelling) { + auto t = makeWrappedStmts("0x10.1p0", "16.0625", Lang_CXX17, + fallbackExprMatcher()); + // Same value but with different spelling is equivalent. + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, FloatingLiteralDifferentType) { + auto t = makeWrappedStmts("1.0", "1.0f", Lang_CXX03, fallbackExprMatcher()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, FloatingLiteralDifferentValue) { + auto t = makeWrappedStmts("1.01", "1.0", Lang_CXX03, fallbackExprMatcher()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, IntegerLiteral) { + auto t = makeWrappedStmts("1", "1", Lang_CXX03, integerLiteral()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, IntegerLiteralDifferentSpelling) { + auto t = makeWrappedStmts("1", "0x1", Lang_CXX03, integerLiteral()); + // Same value but with different spelling is equivalent. + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, IntegerLiteralDifferentValue) { + auto t = makeWrappedStmts("1", "2", Lang_CXX03, integerLiteral()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, IntegerLiteralDifferentTypes) { + auto t = makeWrappedStmts("1", "1L", Lang_CXX03, integerLiteral()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, ObjCStringLiteral) { + auto t = + makeWrappedStmts("@\"a\"", "@\"a\"", Lang_OBJCXX, fallbackExprMatcher()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, ObjCStringLiteralDifferentContent) { + auto t = + makeWrappedStmts("@\"a\"", "@\"b\"", Lang_OBJCXX, fallbackExprMatcher()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, StringLiteral) { + auto t = makeWrappedStmts("\"a\"", "\"a\"", Lang_CXX03, stringLiteral()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, StringLiteralDifferentContent) { + auto t = makeWrappedStmts("\"a\"", "\"b\"", Lang_CXX03, stringLiteral()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, StringLiteralDifferentLength) { + auto t = makeWrappedStmts("\"a\"", "\"aa\"", Lang_CXX03, stringLiteral()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, TypeTraitExpr) { + auto t = makeWrappedStmts("__is_pod(int)", "__is_pod(int)", Lang_CXX03, + fallbackExprMatcher()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, TypeTraitExprDifferentType) { + auto t = makeWrappedStmts("__is_pod(int)", "__is_pod(long)", Lang_CXX03, + fallbackExprMatcher()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, TypeTraitExprDifferentTrait) { + auto t = makeWrappedStmts( + "__is_pod(int)", "__is_trivially_constructible(int)", Lang_CXX03, expr()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, TypeTraitExprDifferentTraits) { + auto t = makeWrappedStmts("__is_constructible(int)", + "__is_constructible(int, int)", Lang_CXX03, expr()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, UnaryExprOrTypeTraitExpr) { + auto t = makeWrappedStmts("sizeof(int)", "sizeof(int)", Lang_CXX03, + unaryExprOrTypeTraitExpr()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, UnaryExprOrTypeTraitExprDifferentKind) { + auto t = makeWrappedStmts("sizeof(int)", "alignof(long)", Lang_CXX11, + unaryExprOrTypeTraitExpr()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, UnaryExprOrTypeTraitExprDifferentType) { + auto t = makeWrappedStmts("sizeof(int)", "sizeof(long)", Lang_CXX03, + unaryExprOrTypeTraitExpr()); + EXPECT_FALSE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, UnaryOperator) { + auto t = makeWrappedStmts("+1", "+1", Lang_CXX03, unaryOperator()); + EXPECT_TRUE(testStructuralMatch(t)); +} + +TEST_F(StructuralEquivalenceStmtTest, UnaryOperatorDifferentOps) { + auto t = makeWrappedStmts("+1", "-1", Lang_CXX03, unaryOperator()); + EXPECT_FALSE(testStructuralMatch(t)); +} + } // end namespace ast_matchers } // end namespace clang From 8889faaed0b7c8545b67b040c380b983264ebc67 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 Sep 2020 11:49:14 -0700 Subject: [PATCH 0490/1079] [SelectionDAG] Remove default for 'unsigned' Alignment for getLoad/getStore/getExtLoad/getTruncStore. Add default for MaybeAlign version. NFCI We want to remove the unsigned signatures eventually. This change migrates any that don't explicitly pass an alignment. --- llvm/include/llvm/CodeGen/SelectionDAG.h | 33 ++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 8db5249743064..b5b18f49e104f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1178,14 +1178,15 @@ class SelectionDAG { /// This function will set the MOLoad flag on MMOFlags, but you can set it if /// you want. The MOStore flag must not be set. SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, - MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachinePointerInfo PtrInfo, + MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr); /// FIXME: Remove once transition to Align is over. inline SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, - MachinePointerInfo PtrInfo, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { @@ -1197,14 +1198,14 @@ class SelectionDAG { SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - MaybeAlign Alignment, + MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()); /// FIXME: Remove once transition to Align is over. inline SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment = 0, + unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getExtLoad(ExtType, dl, VT, Chain, Ptr, PtrInfo, MemVT, @@ -1221,13 +1222,12 @@ class SelectionDAG { MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr); - inline SDValue - getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, - const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, - MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, - MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, - const AAMDNodes &AAInfo = AAMDNodes(), - const MDNode *Ranges = nullptr) { + inline SDValue getLoad( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, + SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, + EVT MemVT, MaybeAlign Alignment = MaybeAlign(), + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { // Ensures that codegen never sees a None Alignment. return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo, @@ -1237,7 +1237,7 @@ class SelectionDAG { inline SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, - MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { @@ -1260,7 +1260,7 @@ class SelectionDAG { const AAMDNodes &AAInfo = AAMDNodes()); inline SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachinePointerInfo PtrInfo, MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getStore(Chain, dl, Val, Ptr, PtrInfo, @@ -1270,7 +1270,7 @@ class SelectionDAG { /// FIXME: Remove once transition to Align is over. inline SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getStore(Chain, dl, Val, Ptr, PtrInfo, MaybeAlign(Alignment), @@ -1285,7 +1285,8 @@ class SelectionDAG { const AAMDNodes &AAInfo = AAMDNodes()); inline SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, EVT SVT, MaybeAlign Alignment, + MachinePointerInfo PtrInfo, EVT SVT, + MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT, @@ -1295,7 +1296,7 @@ class SelectionDAG { /// FIXME: Remove once transition to Align is over. inline SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT, From 6e06f1cd0816b03d9336083667a0c71760d6b99f Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 13 Sep 2020 12:54:36 -0700 Subject: [PATCH 0491/1079] GCOVProfiling: Avoid use-after-move Turns out this was use-after-move of function_ref, which is trivially copyable and movable, so the move did nothing and use after move was safe. But since this function_ref is being copied into a std::function, change the function_ref to be std::function to avoid extra layers of type erasure indirection - and then it's a real use after move, and fix that by referring to the moved-to member variable rather than the moved-from parameter. --- .../lib/Transforms/Instrumentation/GCOVProfiling.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 68199f6379d40..c72c44809acc7 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -99,10 +99,10 @@ class GCOVProfiler { public: GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {} - bool runOnModule(Module &M, - function_ref GetBFI, - function_ref GetBPI, - function_ref GetTLI); + bool + runOnModule(Module &M, function_ref GetBFI, + function_ref GetBPI, + std::function GetTLI); void write(uint32_t i) { char Bytes[4]; @@ -609,7 +609,7 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, bool GCOVProfiler::runOnModule( Module &M, function_ref GetBFI, function_ref GetBPI, - function_ref GetTLI) { + std::function GetTLI) { this->M = &M; this->GetTLI = std::move(GetTLI); Ctx = &M.getContext(); @@ -622,7 +622,7 @@ bool GCOVProfiler::runOnModule( FilterRe = createRegexesFromString(Options.Filter); ExcludeRe = createRegexesFromString(Options.Exclude); - emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, GetTLI); + emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI); return true; } From 7940af02baa27e23ebbd9cd09b24ef1b24ea8cec Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 13 Sep 2020 13:07:58 -0700 Subject: [PATCH 0492/1079] Correct end-of-namespace comment to be clang-tidy/LLVM style appropriate --- llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h index b3971e49754ea..2766cc5e6263b 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h @@ -26,5 +26,5 @@ class GCOVProfilerPass : public PassInfoMixin { GCOVOptions GCOVOpts; }; -} // End llvm namespace +} // namespace llvm #endif From ce89eeee16dd1e7ca6eead3b9d7f256ca583f6e1 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 13 Sep 2020 13:08:17 -0700 Subject: [PATCH 0493/1079] PPCInstrInfo: Fix readability-inconsistent-declaration-parameter-name clang-tidy warning Reduces the chance of confusion when calling the function with autocomplete (will show the more accurate/informative variable name), etc. --- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 2f867b16aa24f..77ee236020a8a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -497,8 +497,9 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// Get the base operand and byte offset of an instruction that reads/writes /// memory. bool getMemOperandsWithOffsetWidth( - const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const MachineInstr &LdSt, + SmallVectorImpl &BaseOps, int64_t &Offset, + bool &OffsetIsScalable, unsigned &Width, const TargetRegisterInfo *TRI) const override; /// Returns true if the two given memory operations should be scheduled From cb3e1dd6c31ef0e0c83dcd1b4ef0b65a8b75a673 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 13 Sep 2020 22:16:24 +0200 Subject: [PATCH 0494/1079] [ARM] Add some fmin/fmax tests with commuted operands (NFC) As well as vector commuted operands. --- llvm/test/CodeGen/ARM/fminmax-folds.ll | 248 +++++++++++++++++++------ 1 file changed, 192 insertions(+), 56 deletions(-) diff --git a/llvm/test/CodeGen/ARM/fminmax-folds.ll b/llvm/test/CodeGen/ARM/fminmax-folds.ll index 01e5ab4a46027..30dfd4915d892 100644 --- a/llvm/test/CodeGen/ARM/fminmax-folds.ll +++ b/llvm/test/CodeGen/ARM/fminmax-folds.ll @@ -5,6 +5,10 @@ declare float @llvm.minnum.f32(float, float) declare float @llvm.maxnum.f32(float, float) declare float @llvm.minimum.f32(float, float) declare float @llvm.maximum.f32(float, float) +declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) define float @test_minnum_const_nan(float %x) { ; CHECK-LABEL: test_minnum_const_nan: @@ -234,8 +238,8 @@ define float @test_minimum_const_inf_nnan(float %x) { ret float %r } -define float @test_minnum_const_neg_inf_nnan(float %x) { -; CHECK-LABEL: test_minnum_const_neg_inf_nnan: +define float @test_minnum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: test_minnum_const_inf_nnan_comm: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s0, .LCPI16_0 ; CHECK-NEXT: vmov s2, r0 @@ -245,6 +249,138 @@ define float @test_minnum_const_neg_inf_nnan(float %x) { ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI16_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.minnum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define float @test_maxnum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: test_maxnum_const_inf_nnan_comm: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI17_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI17_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.maxnum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define float @test_maximum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: test_maximum_const_inf_nnan_comm: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI18_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmax.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI18_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.maximum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define float @test_minimum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: test_minimum_const_inf_nnan_comm: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI19_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmin.f32 d0, d1, d0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI19_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan float @llvm.minimum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: test_minnum_const_inf_nnan_comm_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, .LCPI20_0 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vminnm.f32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI20_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: test_maxnum_const_inf_nnan_comm_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, .LCPI21_0 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmaxnm.f32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI21_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: test_maximum_const_inf_nnan_comm_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, .LCPI22_0 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmax.f32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI22_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define <2 x float> @test_minimum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: test_minimum_const_inf_nnan_comm_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, .LCPI23_0 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmin.f32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI23_0: +; CHECK-NEXT: .long 0x7f800000 @ float +Inf +; CHECK-NEXT: .long 0x7f800000 @ float +Inf + %r = call nnan <2 x float> @llvm.minimum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define float @test_minnum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: test_minnum_const_neg_inf_nnan: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, .LCPI24_0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vminnm.f32 s0, s2, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI24_0: ; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.minnum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -253,14 +389,14 @@ define float @test_minnum_const_neg_inf_nnan(float %x) { define float @test_maxnum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_maxnum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI17_0 +; CHECK-NEXT: vldr s0, .LCPI25_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI17_0: +; CHECK-NEXT: .LCPI25_0: ; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -269,14 +405,14 @@ define float @test_maxnum_const_neg_inf_nnan(float %x) { define float @test_maximum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_maximum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI18_0 +; CHECK-NEXT: vldr s0, .LCPI26_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI18_0: +; CHECK-NEXT: .LCPI26_0: ; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.maximum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -285,14 +421,14 @@ define float @test_maximum_const_neg_inf_nnan(float %x) { define float @test_minimum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_minimum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI19_0 +; CHECK-NEXT: vldr s0, .LCPI27_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI19_0: +; CHECK-NEXT: .LCPI27_0: ; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.minimum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -301,14 +437,14 @@ define float @test_minimum_const_neg_inf_nnan(float %x) { define float @test_minnum_const_max(float %x) { ; CHECK-LABEL: test_minnum_const_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI20_0 +; CHECK-NEXT: vldr s0, .LCPI28_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI20_0: +; CHECK-NEXT: .LCPI28_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -317,14 +453,14 @@ define float @test_minnum_const_max(float %x) { define float @test_maxnum_const_max(float %x) { ; CHECK-LABEL: test_maxnum_const_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI21_0 +; CHECK-NEXT: vldr s0, .LCPI29_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI21_0: +; CHECK-NEXT: .LCPI29_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -333,14 +469,14 @@ define float @test_maxnum_const_max(float %x) { define float @test_maximum_const_max(float %x) { ; CHECK-LABEL: test_maximum_const_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI22_0 +; CHECK-NEXT: vldr s0, .LCPI30_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI22_0: +; CHECK-NEXT: .LCPI30_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -349,14 +485,14 @@ define float @test_maximum_const_max(float %x) { define float @test_minimum_const_max(float %x) { ; CHECK-LABEL: test_minimum_const_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI23_0 +; CHECK-NEXT: vldr s0, .LCPI31_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI23_0: +; CHECK-NEXT: .LCPI31_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -365,14 +501,14 @@ define float @test_minimum_const_max(float %x) { define float @test_minnum_const_neg_max(float %x) { ; CHECK-LABEL: test_minnum_const_neg_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI24_0 +; CHECK-NEXT: vldr s0, .LCPI32_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI24_0: +; CHECK-NEXT: .LCPI32_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -381,14 +517,14 @@ define float @test_minnum_const_neg_max(float %x) { define float @test_maxnum_const_neg_max(float %x) { ; CHECK-LABEL: test_maxnum_const_neg_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI25_0 +; CHECK-NEXT: vldr s0, .LCPI33_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: +; CHECK-NEXT: .LCPI33_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -397,14 +533,14 @@ define float @test_maxnum_const_neg_max(float %x) { define float @test_maximum_const_neg_max(float %x) { ; CHECK-LABEL: test_maximum_const_neg_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI26_0 +; CHECK-NEXT: vldr s0, .LCPI34_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI26_0: +; CHECK-NEXT: .LCPI34_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -413,14 +549,14 @@ define float @test_maximum_const_neg_max(float %x) { define float @test_minimum_const_neg_max(float %x) { ; CHECK-LABEL: test_minimum_const_neg_max: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI27_0 +; CHECK-NEXT: vldr s0, .LCPI35_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI27_0: +; CHECK-NEXT: .LCPI35_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -429,14 +565,14 @@ define float @test_minimum_const_neg_max(float %x) { define float @test_minnum_const_max_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI28_0 +; CHECK-NEXT: vldr s0, .LCPI36_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI28_0: +; CHECK-NEXT: .LCPI36_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -445,14 +581,14 @@ define float @test_minnum_const_max_ninf(float %x) { define float @test_maxnum_const_max_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI29_0 +; CHECK-NEXT: vldr s0, .LCPI37_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI29_0: +; CHECK-NEXT: .LCPI37_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -461,14 +597,14 @@ define float @test_maxnum_const_max_ninf(float %x) { define float @test_maximum_const_max_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI30_0 +; CHECK-NEXT: vldr s0, .LCPI38_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI30_0: +; CHECK-NEXT: .LCPI38_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -477,14 +613,14 @@ define float @test_maximum_const_max_ninf(float %x) { define float @test_minimum_const_max_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI31_0 +; CHECK-NEXT: vldr s0, .LCPI39_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI31_0: +; CHECK-NEXT: .LCPI39_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -493,14 +629,14 @@ define float @test_minimum_const_max_ninf(float %x) { define float @test_minnum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_neg_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI32_0 +; CHECK-NEXT: vldr s0, .LCPI40_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI32_0: +; CHECK-NEXT: .LCPI40_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -509,14 +645,14 @@ define float @test_minnum_const_neg_max_ninf(float %x) { define float @test_maxnum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_neg_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI33_0 +; CHECK-NEXT: vldr s0, .LCPI41_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI33_0: +; CHECK-NEXT: .LCPI41_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -525,14 +661,14 @@ define float @test_maxnum_const_neg_max_ninf(float %x) { define float @test_maximum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_neg_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI34_0 +; CHECK-NEXT: vldr s0, .LCPI42_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI34_0: +; CHECK-NEXT: .LCPI42_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -541,14 +677,14 @@ define float @test_maximum_const_neg_max_ninf(float %x) { define float @test_minimum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_neg_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI35_0 +; CHECK-NEXT: vldr s0, .LCPI43_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI35_0: +; CHECK-NEXT: .LCPI43_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -557,14 +693,14 @@ define float @test_minimum_const_neg_max_ninf(float %x) { define float @test_minnum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI36_0 +; CHECK-NEXT: vldr s0, .LCPI44_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI36_0: +; CHECK-NEXT: .LCPI44_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -573,14 +709,14 @@ define float @test_minnum_const_max_nnan_ninf(float %x) { define float @test_maxnum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI37_0 +; CHECK-NEXT: vldr s0, .LCPI45_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI37_0: +; CHECK-NEXT: .LCPI45_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -589,14 +725,14 @@ define float @test_maxnum_const_max_nnan_ninf(float %x) { define float @test_maximum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI38_0 +; CHECK-NEXT: vldr s0, .LCPI46_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI38_0: +; CHECK-NEXT: .LCPI46_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -605,14 +741,14 @@ define float @test_maximum_const_max_nnan_ninf(float %x) { define float @test_minimum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI39_0 +; CHECK-NEXT: vldr s0, .LCPI47_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI39_0: +; CHECK-NEXT: .LCPI47_0: ; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -621,14 +757,14 @@ define float @test_minimum_const_max_nnan_ninf(float %x) { define float @test_minnum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI40_0 +; CHECK-NEXT: vldr s0, .LCPI48_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vminnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: +; CHECK-NEXT: .LCPI48_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -637,14 +773,14 @@ define float @test_minnum_const_neg_max_nnan_ninf(float %x) { define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI41_0 +; CHECK-NEXT: vldr s0, .LCPI49_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI41_0: +; CHECK-NEXT: .LCPI49_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -653,14 +789,14 @@ define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { define float @test_maximum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI42_0 +; CHECK-NEXT: vldr s0, .LCPI50_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmax.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI42_0: +; CHECK-NEXT: .LCPI50_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -669,14 +805,14 @@ define float @test_maximum_const_neg_max_nnan_ninf(float %x) { define float @test_minimum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI43_0 +; CHECK-NEXT: vldr s0, .LCPI51_0 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmin.f32 d0, d1, d0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI43_0: +; CHECK-NEXT: .LCPI51_0: ; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) ret float %r From b2c32c90bab09a6e2c1f370429db26017a182143 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 13 Sep 2020 14:54:20 -0700 Subject: [PATCH 0495/1079] [llvm-cov gcov] Add -r (--relative-only) && -s (--source-prefix) gcov 4.7 introduced the two options. https://sourceware.org/pipermail/gcc-patches/2011-November/328782.html -r only dumps files with relative paths or absolute paths with the prefix specified by -s. The two options are useful filtering out system header files. --- llvm/include/llvm/ProfileData/GCOV.h | 9 ++++- llvm/lib/ProfileData/GCOV.cpp | 29 ++++++++++++-- .../tools/llvm-cov/gcov/Inputs/abs-path.gcda | Bin 0 -> 104 bytes .../tools/llvm-cov/gcov/Inputs/abs-path.gcno | Bin 0 -> 368 bytes .../tools/llvm-cov/gcov/relative-only.test | 37 ++++++++++++++++++ llvm/tools/llvm-cov/gcov.cpp | 11 +++++- 6 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llvm-cov/gcov/Inputs/abs-path.gcda create mode 100644 llvm/test/tools/llvm-cov/gcov/Inputs/abs-path.gcno create mode 100644 llvm/test/tools/llvm-cov/gcov/relative-only.test diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 3c6312f916746..56b512b6d6065 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -48,10 +48,11 @@ enum GCOVVersion { V304, V407, V408, V800, V900 }; /// A struct for passing gcov options between functions. struct Options { Options(bool A, bool B, bool C, bool F, bool P, bool U, bool I, bool L, - bool N, bool T, bool X) + bool N, bool R, bool T, bool X, std::string SourcePrefix) : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), PreservePaths(P), UncondBranch(U), Intermediate(I), LongFileNames(L), - NoOutput(N), UseStdout(T), HashFilenames(X) {} + NoOutput(N), RelativeOnly(R), UseStdout(T), HashFilenames(X), + SourcePrefix(std::move(SourcePrefix)) {} bool AllBlocks; bool BranchInfo; @@ -62,8 +63,10 @@ struct Options { bool Intermediate; bool LongFileNames; bool NoOutput; + bool RelativeOnly; bool UseStdout; bool HashFilenames; + std::string SourcePrefix; }; } // end namespace GCOV @@ -341,9 +344,11 @@ struct GCOVCoverage { struct SourceInfo { StringRef filename; + SmallString<0> displayName; std::string name; std::vector functions; GCOVCoverage coverage; + bool ignored = false; SourceInfo(StringRef filename) : filename(filename) {} }; diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index d4a4a8979e81c..20118a0378b79 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -261,8 +261,24 @@ LLVM_DUMP_METHOD void GCOVFile::dump() const { print(dbgs()); } /// reading .gcno and .gcda files. void GCOVFile::collectLineCounts(FileInfo &fi) { assert(fi.sources.empty()); - for (StringRef filename : filenames) + for (StringRef filename : filenames) { fi.sources.emplace_back(filename); + SourceInfo &si = fi.sources.back(); + si.displayName = si.filename; + if (!fi.Options.SourcePrefix.empty() && + sys::path::replace_path_prefix(si.displayName, fi.Options.SourcePrefix, + "") && + !si.displayName.empty()) { + // TODO replace_path_prefix may strip the prefix even if the remaining + // part does not start with a separator. + if (sys::path::is_separator(si.displayName[0])) + si.displayName.erase(si.displayName.begin()); + else + si.displayName = si.filename; + } + if (fi.Options.RelativeOnly && sys::path::is_absolute(si.displayName)) + si.ignored = true; + } for (GCOVFunction &f : *this) { f.collectLineCounts(fi); fi.sources[f.srcIdx].functions.push_back(&f); @@ -664,6 +680,10 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, llvm::sort(Filenames); for (StringRef Filename : Filenames) { + SourceInfo &source = sources[file.filenameToIdx.find(Filename)->second]; + if (source.ignored) + continue; + auto AllLines = Options.Intermediate ? LineConsumer() : LineConsumer(Filename); std::string CoveragePath = getCoveragePath(Filename, MainFilename); @@ -675,7 +695,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, raw_ostream &CovOS = !Options.NoOutput && Options.UseStdout ? llvm::outs() : *CovStream; - CovOS << " -: 0:Source:" << Filename << "\n"; + CovOS << " -: 0:Source:" << source.displayName << "\n"; CovOS << " -: 0:Graph:" << GCNOFile << "\n"; CovOS << " -: 0:Data:" << GCDAFile << "\n"; CovOS << " -: 0:Runs:" << RunCount << "\n"; @@ -683,7 +703,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, CovOS << " -: 0:Programs:" << ProgramCount << "\n"; const LineData &Line = LineInfo[Filename]; - GCOVCoverage FileCoverage(Filename); + GCOVCoverage FileCoverage(source.displayName); for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) { if (Options.BranchInfo) { @@ -767,7 +787,6 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, } } } - SourceInfo &source = sources[file.filenameToIdx.find(Filename)->second]; source.name = CoveragePath; source.coverage = FileCoverage; } @@ -928,6 +947,8 @@ void FileInfo::printFuncCoverage(raw_ostream &OS) const { // printFileCoverage - Print per-file coverage info. void FileInfo::printFileCoverage(raw_ostream &OS) const { for (const SourceInfo &source : sources) { + if (source.ignored) + continue; const GCOVCoverage &Coverage = source.coverage; OS << "File '" << Coverage.Name << "'\n"; printCoverage(OS, Coverage); diff --git a/llvm/test/tools/llvm-cov/gcov/Inputs/abs-path.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/abs-path.gcda new file mode 100644 index 0000000000000000000000000000000000000000..806dc6a2aa0f52edff3e5d97c1c519adbf4b8047 GIT binary patch literal 104 zcmYdHNlw=?G;q3Z9M{Rfz_5@BNH9V%h++m3uZs%U481p%?r1hx0#w2XQv;L&0kE3w R?(KLB*JGI9giATbcYXBR{Tm}KMwQrq3z6QATKF9f+6WL8>!K3EcDFW9{q`iXiO5PLx8 Yg4BqERgmXSPM}_xJF(dTHX29(0PA%sPyhe` literal 0 HcmV?d00001 diff --git a/llvm/test/tools/llvm-cov/gcov/relative-only.test b/llvm/test/tools/llvm-cov/gcov/relative-only.test new file mode 100644 index 0000000000000..157441e7673f5 --- /dev/null +++ b/llvm/test/tools/llvm-cov/gcov/relative-only.test @@ -0,0 +1,37 @@ +# Test -r (--relative-only) and -s (--source-prefix). +RUN: rm -rf %t && mkdir %t && cd %t +RUN: cp %S/Inputs/abs-path.gcno %S/Inputs/abs-path.gcda . + +RUN: llvm-cov gcov abs-path.gcda | FileCheck %s +RUN: rm abs-path.c.gcov a.h.gcov +CHECK: File '/tmp/c/abs-path.c' +CHECK: File '/tmp/h/a.h' + +# If there is no source file with a relative path, nothing is dumped. +RUN: llvm-cov gcov -r abs-path.gcda 2>&1 | count 0 +RUN: llvm-cov gcov -r -s /t abs-path.gcda 2>&1 | count 0 +RUN: not ls abs-path.c.gcov 2> /dev/null + +# -s strips a prefix from filenames and can change filtering of -r. +RUN: llvm-cov gcov -r -s /tmp abs-path.gcda | FileCheck %s --check-prefix=STRIP1 --match-full-lines --strict-whitespace +RUN: FileCheck %s --check-prefix=STRIP1_C < abs-path.c.gcov +RUN: FileCheck %s --check-prefix=STRIP1_H < a.h.gcov + +# Test full option names. +RUN: llvm-cov gcov --relative-only --source-prefix=/tmp abs-path.gcda | FileCheck %s --check-prefix=STRIP1 --match-full-lines --strict-whitespace + + STRIP1:File 'c/abs-path.c' + STRIP1-NEXT:Lines executed:100.00% of 1 + STRIP1-NEXT:Creating 'abs-path.c.gcov' +STRIP1-EMPTY: + STRIP1-NEXT:File 'h/a.h' + STRIP1-NEXT:Lines executed:0.00% of 1 + STRIP1-NEXT:Creating 'a.h.gcov' + +STRIP1_C: 0:Source:c/abs-path.c +STRIP1_H: 0:Source:h/a.h + +RUN: llvm-cov gcov -r -s /tmp/h abs-path.gcda | FileCheck %s --check-prefix=STRIP2 + +STRIP2-NOT: File +STRIP2: File 'a.h' diff --git a/llvm/tools/llvm-cov/gcov.cpp b/llvm/tools/llvm-cov/gcov.cpp index d99e792c68a95..858f4cee79045 100644 --- a/llvm/tools/llvm-cov/gcov.cpp +++ b/llvm/tools/llvm-cov/gcov.cpp @@ -131,6 +131,14 @@ int gcovMain(int argc, const char *argv[]) { cl::desc("Preserve path components")); cl::alias PreservePathsA("preserve-paths", cl::aliasopt(PreservePaths)); + cl::opt RelativeOnly( + "r", cl::Grouping, + cl::desc("Only dump files with relative paths or absolute paths with the " + "prefix specified by -s")); + cl::alias RelativeOnlyA("relative-only", cl::aliasopt(RelativeOnly)); + cl::opt SourcePrefix("s", cl::desc("Source prefix to elide")); + cl::alias SourcePrefixA("source-prefix", cl::aliasopt(SourcePrefix)); + cl::opt UseStdout("t", cl::Grouping, cl::init(false), cl::desc("Print to stdout")); cl::alias UseStdoutA("stdout", cl::aliasopt(UseStdout)); @@ -157,7 +165,8 @@ int gcovMain(int argc, const char *argv[]) { GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary, PreservePaths, UncondBranch, Intermediate, LongNames, - NoOutput, UseStdout, HashFilenames); + NoOutput, RelativeOnly, UseStdout, HashFilenames, + SourcePrefix); for (const auto &SourceFile : SourceFiles) reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV, From 44664a54483def1692ea75925bfce0053e76bef0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 13 Sep 2020 15:17:14 -0700 Subject: [PATCH 0496/1079] [llvm-cov gcov][test] Unsupport Windows --- llvm/test/tools/llvm-cov/gcov/relative-only.test | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/tools/llvm-cov/gcov/relative-only.test b/llvm/test/tools/llvm-cov/gcov/relative-only.test index 157441e7673f5..20be39683fbeb 100644 --- a/llvm/test/tools/llvm-cov/gcov/relative-only.test +++ b/llvm/test/tools/llvm-cov/gcov/relative-only.test @@ -1,4 +1,5 @@ # Test -r (--relative-only) and -s (--source-prefix). +# UNSUPPORTED: system-windows RUN: rm -rf %t && mkdir %t && cd %t RUN: cp %S/Inputs/abs-path.gcno %S/Inputs/abs-path.gcda . From 783ba64a8950768d412555abd52bbc65156d4fb5 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sun, 13 Sep 2020 14:22:20 -0700 Subject: [PATCH 0497/1079] [JITLink] Improve formatting for Edge, Block and Symbol debugging output. --- llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 34 +++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 5105ec4951484..71ec88639a5b7 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -93,6 +93,7 @@ const char *getScopeName(Scope S) { raw_ostream &operator<<(raw_ostream &OS, const Block &B) { return OS << formatv("{0:x16}", B.getAddress()) << " -- " << formatv("{0:x16}", B.getAddress() + B.getSize()) << ": " + << "size = " << formatv("{0:x}", B.getSize()) << ", " << (B.isZeroFill() ? "zero-fill" : "content") << ", align = " << B.getAlignment() << ", align-ofs = " << B.getAlignmentOffset() @@ -126,10 +127,10 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { break; } OS << (Sym.isLive() ? '+' : '-') - << ", size = " << formatv("{0:x8}", Sym.getSize()) + << ", size = " << formatv("{0:x}", Sym.getSize()) << ", addr = " << formatv("{0:x16}", Sym.getAddress()) << " (" << formatv("{0:x16}", Sym.getAddressable().getAddress()) << " + " - << formatv("{0:x8}", Sym.getOffset()); + << formatv("{0:x}", Sym.getOffset()); if (Sym.isDefined()) OS << " " << Sym.getBlock().getSection().getName(); OS << ")>"; @@ -139,8 +140,33 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName) { OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": " - << formatv("{0:x16}", B.getAddress()) << " + " << E.getOffset() << " -- " - << EdgeKindName << " -> " << E.getTarget() << " + " << E.getAddend(); + << formatv("{0:x16}", B.getAddress()) << " + " + << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName << " -> "; + + auto &TargetSym = E.getTarget(); + if (TargetSym.hasName()) + OS << TargetSym.getName(); + else { + auto &TargetBlock = TargetSym.getBlock(); + auto &TargetSec = TargetBlock.getSection(); + JITTargetAddress SecAddress = ~JITTargetAddress(0); + for (auto *B : TargetSec.blocks()) + if (B->getAddress() < SecAddress) + SecAddress = B->getAddress(); + + JITTargetAddress SecDelta = TargetSym.getAddress() - SecAddress; + OS << formatv("{0:x16}", TargetSym.getAddress()) << " (section " + << TargetSec.getName(); + if (SecDelta) + OS << " + " << formatv("{0:x}", SecDelta); + OS << " / block " << formatv("{0:x16}", TargetBlock.getAddress()); + if (TargetSym.getOffset()) + OS << " + " << formatv("{0:x}", TargetSym.getOffset()); + OS << ")"; + } + + if (E.getAddend() != 0) + OS << " + " << E.getAddend(); } Section::~Section() { From 56b33391d3a42ef8e6fd1bcdcbcbb72bfb562092 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 Sep 2020 19:51:20 -0700 Subject: [PATCH 0498/1079] [SelectionDAG] Move ISD:PARITY formation from DAGCombine to SimplifyDemandedBits. Previously, we formed ISD::PARITY by looking for (and (ctpop X), 1) but the AND might be separated from the ctpop. For example if the parity result is multiplied by 2, we'll pull the AND through the shift. So to handle more cases, move to SimplifyDemandedBits where we can handle more cases that result in only the LSB of the CTPOP being used. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 ---- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 11 +++ llvm/test/CodeGen/X86/parity.ll | 94 +++++++++++++++++++ 4 files changed, 110 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ae976af6557e1..e4a5176019689 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5574,25 +5574,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) return V; - // fold (and (ctpop X), 1) -> parity X - // Only do this before op legalization as it might be turned back into ctpop. - // TODO: Support vectors? - if (!LegalOperations && isOneConstant(N1) && N0.hasOneUse()) { - SDValue Tmp = N0; - - // It's possible the ctpop has been truncated, but since we only care about - // the LSB we can look through it. - if (Tmp.getOpcode() == ISD::TRUNCATE && Tmp.getOperand(0).hasOneUse()) - Tmp = Tmp.getOperand(0); - - if (Tmp.getOpcode() == ISD::CTPOP) { - SDLoc dl(N); - SDValue Parity = - DAG.getNode(ISD::PARITY, dl, Tmp.getValueType(), Tmp.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Parity); - } - } - return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1cc2ec77ebceb..93b40803089e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3053,6 +3053,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); break; } + case ISD::PARITY: { + // Parity returns 0 everywhere but the LSB. + Known.Zero.setBitsFrom(1); + break; + } case ISD::LOAD: { LoadSDNode *LD = cast(Op); const Constant *Cst = TLI->getTargetConstantFromLoad(LD); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ea2344e4f5515..b7f5ab3d6b85d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1748,6 +1748,17 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero = Known2.Zero.byteSwap(); break; } + case ISD::CTPOP: { + // If only 1 bit is demanded, replace with PARITY as long as we're before + // op legalization. + // FIXME: Limit to scalars for now. + if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, + Op.getOperand(0))); + + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast(Op.getOperand(1))->getVT(); diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index d7344a4a2ed78..4bc225cba5476 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -422,6 +422,100 @@ define i32 @parity_8_mask(i32 %x) { ret i32 %c } +define i32 @parity_32_shift(i32 %0) { +; X86-NOPOPCNT-LABEL: parity_32_shift: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl $16, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: addl %eax, %eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_32_shift: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movl %edi, %ecx +; X64-NOPOPCNT-NEXT: shrl $16, %ecx +; X64-NOPOPCNT-NEXT: xorl %edi, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %ch, %cl +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: addl %eax, %eax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_32_shift: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: addl %eax, %eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_32_shift: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntl %edi, %eax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: addl %eax, %eax +; X64-POPCNT-NEXT: retq + %2 = tail call i32 @llvm.ctpop.i32(i32 %0) + %3 = shl nuw nsw i32 %2, 1 + %4 = and i32 %3, 2 + ret i32 %4 +} + +define i64 @parity_64_shift(i64 %0) { +; X86-NOPOPCNT-LABEL: parity_64_shift: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl $16, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: addl %eax, %eax +; X86-NOPOPCNT-NEXT: xorl %edx, %edx +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_64_shift: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movq %rdi, %rax +; X64-NOPOPCNT-NEXT: shrq $32, %rax +; X64-NOPOPCNT-NEXT: xorl %edi, %eax +; X64-NOPOPCNT-NEXT: movl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $16, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %ch, %cl +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: addq %rax, %rax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_64_shift: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: addl %eax, %eax +; X86-POPCNT-NEXT: xorl %edx, %edx +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_64_shift: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntq %rdi, %rax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: addq %rax, %rax +; X64-POPCNT-NEXT: retq + %2 = tail call i64 @llvm.ctpop.i64(i64 %0) + %3 = shl nuw nsw i64 %2, 1 + %4 = and i64 %3, 2 + ret i64 %4 +} + declare i4 @llvm.ctpop.i4(i4 %x) declare i8 @llvm.ctpop.i8(i8 %x) declare i16 @llvm.ctpop.i16(i16 %x) From 6e42cadf106ccdc7759dd8af113ecf797220de47 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Sun, 13 Sep 2020 16:54:47 -0700 Subject: [PATCH 0499/1079] [docs] Document LLVM_EXTERNALIZE_DEBUGINFO CMake option Add `LLVM_EXTERNALIZE_DEBUGINFO` to CMake.rst. This should help make dSYM generation more discoverable. Differential Revision: https://reviews.llvm.org/D87591 --- llvm/docs/CMake.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 96994dbd8fda9..5a73b7d45211c 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -461,6 +461,10 @@ LLVM-specific variables **LLVM_PARALLEL_LINK_JOBS**:STRING Define the maximum number of concurrent link jobs. +**LLVM_EXTERNALIZE_DEBUGINFO**:BOOL + Generate dSYM files and strip executables and libraries (Darwin Only). + Defaults to OFF. + **LLVM_USE_CRT_{target}**:STRING On Windows, tells which version of the C runtime library (CRT) should be used. For example, -DLLVM_USE_CRT_RELEASE=MT would statically link the CRT into the From 88690a965892e82cac05a162a9d10e2ce4e2355f Mon Sep 17 00:00:00 2001 From: Yevgeny Rouban Date: Mon, 14 Sep 2020 11:42:23 +0700 Subject: [PATCH 0500/1079] [CodeGenPrepare] Fix zapping dead operands of assume This patch fixes a problem of the commit 52cc97a0. A test case is created to demonstrate the crash caused by the instruction iterator invalidated by the recursive removal of dead operands of assume. The solution restarts from the blocks's first instruction in case CurInstIterator is invalidated by RecursivelyDeleteTriviallyDeadInstructions(). Reviewed By: bkramer Differential Revision: https://reviews.llvm.org/D87434 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 8 +++--- .../recursively-delete-dead-instructions.ll | 27 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 529975c33ec17..bb0bad74fb698 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2047,9 +2047,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { Value *Operand = II->getOperand(0); II->eraseFromParent(); // Prune the operand, it's most likely dead. - RecursivelyDeleteTriviallyDeadInstructions( - Operand, TLInfo, nullptr, - [&](Value *V) { removeAllAssertingVHReferences(V); }); + resetIteratorIfInvalidatedWhileCalling(BB, [&]() { + RecursivelyDeleteTriviallyDeadInstructions( + Operand, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); + }); return true; } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll new file mode 100644 index 0000000000000..0366b7d7e6d2e --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll @@ -0,0 +1,27 @@ +; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s + +declare void @llvm.assume(i1 noundef) nounwind willreturn + +; Recursively deleting dead operands of assume() may result in its next +; instruction deleted and the iterator pointing to the next instruction +; invalidated. This prevents the following simple loop in +; CodeGenPrepare::optimizeBlock() unless CurInstIterator is fixed: +; +; CurInstIterator = BB.begin(); +; while (CurInstIterator != BB.end()) +; optimizeInst(&*CurInstIterator++, ModifiedDT); +; +define i32 @test_assume_in_loop(i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test_assume_in_loop( +; CHECK-NEXT: entry: +entry: + br label %loop + +; CHECK: loop: +; CHECK-NEXT: br label %loop +loop: + %cond3 = phi i1 [%cond1, %entry], [%cond4, %loop] + call void @llvm.assume(i1 %cond3) + %cond4 = icmp ult i1 %cond1, %cond2 + br label %loop +} From f1cd6593da3ad763eb3f7aaf7761d06fb303493a Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Sat, 12 Sep 2020 21:54:14 +0700 Subject: [PATCH 0501/1079] [AST][FPEnv] Keep FP options in trailing storage of CastExpr This is recommit of 6c8041aa0f, reverted in de044f7562 because of some fails. Original commit message is below. This change allow a CastExpr to have optional FPOptionsOverride object, stored in trailing storage. Of all cast nodes only ImplicitCastExpr, CStyleCastExpr, CXXFunctionalCastExpr and CXXStaticCastExpr are allowed to have FPOptions. Differential Revision: https://reviews.llvm.org/D85960 --- clang/include/clang/AST/Expr.h | 117 +++++++++++---- clang/include/clang/AST/ExprCXX.h | 139 +++++++++++------- clang/include/clang/AST/ExprObjC.h | 4 +- clang/include/clang/AST/Stmt.h | 3 + clang/include/clang/AST/TextNodeDumper.h | 1 + clang/include/clang/Basic/LangOptions.h | 2 + clang/lib/AST/ASTImporter.cpp | 15 +- clang/lib/AST/Expr.cpp | 55 +++++-- clang/lib/AST/ExprCXX.cpp | 61 ++++---- clang/lib/AST/TextNodeDumper.cpp | 10 ++ clang/lib/Analysis/BodyFarm.cpp | 16 +- clang/lib/CodeGen/CGBlocks.cpp | 2 +- clang/lib/CodeGen/CGObjC.cpp | 13 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- .../Frontend/Rewrite/RewriteModernObjC.cpp | 7 +- clang/lib/Frontend/Rewrite/RewriteObjC.cpp | 7 +- clang/lib/Sema/Sema.cpp | 3 +- clang/lib/Sema/SemaCast.cpp | 29 ++-- clang/lib/Sema/SemaDecl.cpp | 8 +- clang/lib/Sema/SemaDeclCXX.cpp | 9 +- clang/lib/Sema/SemaExpr.cpp | 11 +- clang/lib/Sema/SemaExprCXX.cpp | 13 +- clang/lib/Sema/SemaExprObjC.cpp | 12 +- clang/lib/Sema/SemaInit.cpp | 34 +++-- clang/lib/Sema/SemaLambda.cpp | 5 +- clang/lib/Sema/SemaObjCProperty.cpp | 14 +- clang/lib/Sema/SemaOpenMP.cpp | 12 +- clang/lib/Sema/SemaOverload.cpp | 23 +-- clang/lib/Sema/SemaStmt.cpp | 7 +- clang/lib/Sema/SemaTemplate.cpp | 2 +- clang/lib/Serialization/ASTReaderStmt.cpp | 29 +++- clang/lib/Serialization/ASTWriterDecl.cpp | 1 + clang/lib/Serialization/ASTWriterStmt.cpp | 6 +- clang/test/AST/ast-dump-fpfeatures.cpp | 43 +++++- 34 files changed, 460 insertions(+), 255 deletions(-) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 26e52ad367f81..1672fd707c6d2 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -3440,9 +3440,11 @@ class CastExpr : public Expr { } CXXBaseSpecifier **path_buffer(); + friend class ASTStmtReader; + protected: CastExpr(StmtClass SC, QualType ty, ExprValueKind VK, const CastKind kind, - Expr *op, unsigned BasePathSize) + Expr *op, unsigned BasePathSize, bool HasFPFeatures) : Expr(SC, ty, VK, OK_Ordinary), Op(op) { CastExprBits.Kind = kind; CastExprBits.PartOfExplicitCast = false; @@ -3451,17 +3453,27 @@ class CastExpr : public Expr { "BasePathSize overflow!"); setDependence(computeDependence(this)); assert(CastConsistency()); + CastExprBits.HasFPFeatures = HasFPFeatures; } /// Construct an empty cast. - CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize) - : Expr(SC, Empty) { + CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize, + bool HasFPFeatures) + : Expr(SC, Empty) { CastExprBits.PartOfExplicitCast = false; CastExprBits.BasePathSize = BasePathSize; + CastExprBits.HasFPFeatures = HasFPFeatures; assert((CastExprBits.BasePathSize == BasePathSize) && "BasePathSize overflow!"); } + /// Return a pointer to the trailing FPOptions. + /// \pre hasStoredFPFeatures() == true + FPOptionsOverride *getTrailingFPFeatures(); + const FPOptionsOverride *getTrailingFPFeatures() const { + return const_cast(this)->getTrailingFPFeatures(); + } + public: CastKind getCastKind() const { return (CastKind) CastExprBits.Kind; } void setCastKind(CastKind K) { CastExprBits.Kind = K; } @@ -3506,6 +3518,28 @@ class CastExpr : public Expr { return getTargetFieldForToUnionCast(getType(), getSubExpr()->getType()); } + bool hasStoredFPFeatures() const { return CastExprBits.HasFPFeatures; } + + /// Get FPOptionsOverride from trailing storage. + FPOptionsOverride getStoredFPFeatures() const { + assert(hasStoredFPFeatures()); + return *getTrailingFPFeatures(); + } + + // Get the FP features status of this operation. Only meaningful for + // operations on floating point types. + FPOptions getFPFeaturesInEffect(const LangOptions &LO) const { + if (hasStoredFPFeatures()) + return getStoredFPFeatures().applyOverrides(LO); + return FPOptions::defaultWithoutTrailingStorage(LO); + } + + FPOptionsOverride getFPFeatures() const { + if (hasStoredFPFeatures()) + return getStoredFPFeatures(); + return FPOptionsOverride(); + } + static const FieldDecl *getTargetFieldForToUnionCast(QualType unionType, QualType opType); static const FieldDecl *getTargetFieldForToUnionCast(const RecordDecl *RD, @@ -3543,21 +3577,35 @@ class CastExpr : public Expr { /// @endcode class ImplicitCastExpr final : public CastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { ImplicitCastExpr(QualType ty, CastKind kind, Expr *op, - unsigned BasePathLength, ExprValueKind VK) - : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength) { } + unsigned BasePathLength, FPOptionsOverride FPO, + ExprValueKind VK) + : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength, + FPO.requiresTrailingStorage()) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } /// Construct an empty implicit cast. - explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize) - : CastExpr(ImplicitCastExprClass, Shell, PathSize) { } + explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : CastExpr(ImplicitCastExprClass, Shell, PathSize, HasFPFeatures) {} + + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: enum OnStack_t { OnStack }; ImplicitCastExpr(OnStack_t _, QualType ty, CastKind kind, Expr *op, - ExprValueKind VK) - : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0) { + ExprValueKind VK, FPOptionsOverride FPO) + : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0, + FPO.requiresTrailingStorage()) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; } bool isPartOfExplicitCast() const { return CastExprBits.PartOfExplicitCast; } @@ -3568,10 +3616,10 @@ class ImplicitCastExpr final static ImplicitCastExpr *Create(const ASTContext &Context, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, - ExprValueKind Cat); + ExprValueKind Cat, FPOptionsOverride FPO); static ImplicitCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + unsigned PathSize, bool HasFPFeatures); SourceLocation getBeginLoc() const LLVM_READONLY { return getSubExpr()->getBeginLoc(); @@ -3612,12 +3660,14 @@ class ExplicitCastExpr : public CastExpr { protected: ExplicitCastExpr(StmtClass SC, QualType exprTy, ExprValueKind VK, CastKind kind, Expr *op, unsigned PathSize, - TypeSourceInfo *writtenTy) - : CastExpr(SC, exprTy, VK, kind, op, PathSize), TInfo(writtenTy) {} + bool HasFPFeatures, TypeSourceInfo *writtenTy) + : CastExpr(SC, exprTy, VK, kind, op, PathSize, HasFPFeatures), + TInfo(writtenTy) {} /// Construct an empty explicit cast. - ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) - : CastExpr(SC, Shell, PathSize) { } + ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : CastExpr(SC, Shell, PathSize, HasFPFeatures) {} public: /// getTypeInfoAsWritten - Returns the type source info for the type @@ -3640,29 +3690,38 @@ class ExplicitCastExpr : public CastExpr { /// (Type)expr. For example: @c (int)f. class CStyleCastExpr final : public ExplicitCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { SourceLocation LPLoc; // the location of the left paren SourceLocation RPLoc; // the location of the right paren CStyleCastExpr(QualType exprTy, ExprValueKind vk, CastKind kind, Expr *op, - unsigned PathSize, TypeSourceInfo *writtenTy, - SourceLocation l, SourceLocation r) - : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, - writtenTy), LPLoc(l), RPLoc(r) {} + unsigned PathSize, FPOptionsOverride FPO, + TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation r) + : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, + FPO.requiresTrailingStorage(), writtenTy), + LPLoc(l), RPLoc(r) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } /// Construct an empty C-style explicit cast. - explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize) - : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize) { } + explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize, HasFPFeatures) {} + + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: - static CStyleCastExpr *Create(const ASTContext &Context, QualType T, - ExprValueKind VK, CastKind K, - Expr *Op, const CXXCastPath *BasePath, - TypeSourceInfo *WrittenTy, SourceLocation L, - SourceLocation R); + static CStyleCastExpr * + Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, + Expr *Op, const CXXCastPath *BasePath, FPOptionsOverride FPO, + TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R); static CStyleCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + unsigned PathSize, bool HasFPFeatures); SourceLocation getLParenLoc() const { return LPLoc; } void setLParenLoc(SourceLocation L) { LPLoc = L; } diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 6b4b57eca9bea..0ba5e417fd58e 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -374,16 +374,17 @@ class CXXNamedCastExpr : public ExplicitCastExpr { protected: friend class ASTStmtReader; - CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK, - CastKind kind, Expr *op, unsigned PathSize, + CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK, CastKind kind, + Expr *op, unsigned PathSize, bool HasFPFeatures, TypeSourceInfo *writtenTy, SourceLocation l, - SourceLocation RParenLoc, - SourceRange AngleBrackets) - : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, writtenTy), Loc(l), - RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {} + SourceLocation RParenLoc, SourceRange AngleBrackets) + : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, HasFPFeatures, + writtenTy), + Loc(l), RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {} - explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) - : ExplicitCastExpr(SC, Shell, PathSize) {} + explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : ExplicitCastExpr(SC, Shell, PathSize, HasFPFeatures) {} public: const char *getCastName() const; @@ -419,29 +420,39 @@ class CXXNamedCastExpr : public ExplicitCastExpr { /// \c static_cast(1.0). class CXXStaticCastExpr final : public CXXNamedCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { CXXStaticCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy, - SourceLocation l, SourceLocation RParenLoc, - SourceRange AngleBrackets) + FPOptionsOverride FPO, SourceLocation l, + SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXStaticCastExprClass, ty, vk, kind, op, pathSize, - writtenTy, l, RParenLoc, AngleBrackets) {} + FPO.requiresTrailingStorage(), writtenTy, l, RParenLoc, + AngleBrackets) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } - explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize) - : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize) {} + explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize, + bool HasFPFeatures) + : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize, + HasFPFeatures) {} + + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: friend class CastExpr; friend TrailingObjects; - static CXXStaticCastExpr *Create(const ASTContext &Context, QualType T, - ExprValueKind VK, CastKind K, Expr *Op, - const CXXCastPath *Path, - TypeSourceInfo *Written, SourceLocation L, - SourceLocation RParenLoc, - SourceRange AngleBrackets); + static CXXStaticCastExpr * + Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, + Expr *Op, const CXXCastPath *Path, TypeSourceInfo *Written, + FPOptionsOverride FPO, SourceLocation L, SourceLocation RParenLoc, + SourceRange AngleBrackets); static CXXStaticCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + unsigned PathSize, bool hasFPFeatures); static bool classof(const Stmt *T) { return T->getStmtClass() == CXXStaticCastExprClass; @@ -456,15 +467,17 @@ class CXXStaticCastExpr final class CXXDynamicCastExpr final : public CXXNamedCastExpr, private llvm::TrailingObjects { - CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind, - Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy, + CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind, Expr *op, + unsigned pathSize, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXDynamicCastExprClass, ty, VK, kind, op, pathSize, - writtenTy, l, RParenLoc, AngleBrackets) {} + /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, + AngleBrackets) {} explicit CXXDynamicCastExpr(EmptyShell Empty, unsigned pathSize) - : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize) {} + : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -499,16 +512,17 @@ class CXXReinterpretCastExpr final : public CXXNamedCastExpr, private llvm::TrailingObjects { - CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind, - Expr *op, unsigned pathSize, - TypeSourceInfo *writtenTy, SourceLocation l, - SourceLocation RParenLoc, + CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op, + unsigned pathSize, TypeSourceInfo *writtenTy, + SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXReinterpretCastExprClass, ty, vk, kind, op, - pathSize, writtenTy, l, RParenLoc, AngleBrackets) {} + pathSize, /*HasFPFeatures*/ false, writtenTy, l, + RParenLoc, AngleBrackets) {} CXXReinterpretCastExpr(EmptyShell Empty, unsigned pathSize) - : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize) {} + : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -541,11 +555,13 @@ class CXXConstCastExpr final CXXConstCastExpr(QualType ty, ExprValueKind VK, Expr *op, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) - : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op, - 0, writtenTy, l, RParenLoc, AngleBrackets) {} + : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op, 0, + /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, + AngleBrackets) {} explicit CXXConstCastExpr(EmptyShell Empty) - : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0) {} + : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -578,10 +594,12 @@ class CXXAddrspaceCastExpr final TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation RParenLoc, SourceRange AngleBrackets) : CXXNamedCastExpr(CXXAddrspaceCastExprClass, ty, VK, Kind, op, 0, - writtenTy, l, RParenLoc, AngleBrackets) {} + /*HasFPFeatures*/ false, writtenTy, l, RParenLoc, + AngleBrackets) {} explicit CXXAddrspaceCastExpr(EmptyShell Empty) - : CXXNamedCastExpr(CXXAddrspaceCastExprClass, Empty, 0) {} + : CXXNamedCastExpr(CXXAddrspaceCastExprClass, Empty, 0, + /*HasFPFeatures*/ false) {} public: friend class CastExpr; @@ -1693,34 +1711,43 @@ class CXXInheritedCtorInitExpr : public Expr { /// \endcode class CXXFunctionalCastExpr final : public ExplicitCastExpr, - private llvm::TrailingObjects { + private llvm::TrailingObjects { SourceLocation LParenLoc; SourceLocation RParenLoc; CXXFunctionalCastExpr(QualType ty, ExprValueKind VK, - TypeSourceInfo *writtenTy, - CastKind kind, Expr *castExpr, unsigned pathSize, - SourceLocation lParenLoc, SourceLocation rParenLoc) - : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind, - castExpr, pathSize, writtenTy), - LParenLoc(lParenLoc), RParenLoc(rParenLoc) {} + TypeSourceInfo *writtenTy, CastKind kind, + Expr *castExpr, unsigned pathSize, + FPOptionsOverride FPO, SourceLocation lParenLoc, + SourceLocation rParenLoc) + : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind, castExpr, + pathSize, FPO.requiresTrailingStorage(), writtenTy), + LParenLoc(lParenLoc), RParenLoc(rParenLoc) { + if (hasStoredFPFeatures()) + *getTrailingFPFeatures() = FPO; + } + + explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize, + bool HasFPFeatures) + : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize, + HasFPFeatures) {} - explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize) - : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize) {} + unsigned numTrailingObjects(OverloadToken) const { + return path_size(); + } public: friend class CastExpr; friend TrailingObjects; - static CXXFunctionalCastExpr *Create(const ASTContext &Context, QualType T, - ExprValueKind VK, - TypeSourceInfo *Written, - CastKind Kind, Expr *Op, - const CXXCastPath *Path, - SourceLocation LPLoc, - SourceLocation RPLoc); - static CXXFunctionalCastExpr *CreateEmpty(const ASTContext &Context, - unsigned PathSize); + static CXXFunctionalCastExpr * + Create(const ASTContext &Context, QualType T, ExprValueKind VK, + TypeSourceInfo *Written, CastKind Kind, Expr *Op, + const CXXCastPath *Path, FPOptionsOverride FPO, SourceLocation LPLoc, + SourceLocation RPLoc); + static CXXFunctionalCastExpr * + CreateEmpty(const ASTContext &Context, unsigned PathSize, bool HasFPFeatures); SourceLocation getLParenLoc() const { return LParenLoc; } void setLParenLoc(SourceLocation L) { LParenLoc = L; } @@ -4828,11 +4855,11 @@ class BuiltinBitCastExpr final BuiltinBitCastExpr(QualType T, ExprValueKind VK, CastKind CK, Expr *SrcExpr, TypeSourceInfo *DstType, SourceLocation KWLoc, SourceLocation RParenLoc) - : ExplicitCastExpr(BuiltinBitCastExprClass, T, VK, CK, SrcExpr, 0, + : ExplicitCastExpr(BuiltinBitCastExprClass, T, VK, CK, SrcExpr, 0, false, DstType), KWLoc(KWLoc), RParenLoc(RParenLoc) {} BuiltinBitCastExpr(EmptyShell Empty) - : ExplicitCastExpr(BuiltinBitCastExprClass, Empty, 0) {} + : ExplicitCastExpr(BuiltinBitCastExprClass, Empty, 0, false) {} SourceLocation getBeginLoc() const LLVM_READONLY { return KWLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h index 4b39d9ab96a6a..17eec51726978 100644 --- a/clang/include/clang/AST/ExprObjC.h +++ b/clang/include/clang/AST/ExprObjC.h @@ -1639,12 +1639,12 @@ class ObjCBridgedCastExpr final CastKind CK, SourceLocation BridgeKeywordLoc, TypeSourceInfo *TSInfo, Expr *Operand) : ExplicitCastExpr(ObjCBridgedCastExprClass, TSInfo->getType(), VK_RValue, - CK, Operand, 0, TSInfo), + CK, Operand, 0, false, TSInfo), LParenLoc(LParenLoc), BridgeKeywordLoc(BridgeKeywordLoc), Kind(Kind) {} /// Construct an empty Objective-C bridged cast. explicit ObjCBridgedCastExpr(EmptyShell Shell) - : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0) {} + : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0, false) {} SourceLocation getLParenLoc() const { return LParenLoc; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 1e04e64727a08..4a6e8182e5a06 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -521,6 +521,9 @@ class alignas(void *) Stmt { unsigned Kind : 6; unsigned PartOfExplicitCast : 1; // Only set for ImplicitCastExpr. + /// True if the call expression has some floating-point features. + unsigned HasFPFeatures : 1; + /// The number of CXXBaseSpecifiers in the cast. 14 bits would be enough /// here. ([implimits] Direct and indirect base classes [16384]). unsigned BasePathSize; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index f68a5dbfc2a0d..15ca348f47667 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -270,6 +270,7 @@ class TextNodeDumper void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *Node); void VisitCXXThisExpr(const CXXThisExpr *Node); void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *Node); + void VisitCXXStaticCastExpr(const CXXStaticCastExpr *Node); void VisitCXXUnresolvedConstructExpr(const CXXUnresolvedConstructExpr *Node); void VisitCXXConstructExpr(const CXXConstructExpr *Node); void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Node); diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 2c8bb55cb5d93..3614496ded967 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -497,6 +497,8 @@ class FPOptionsOverride { FPOptionsOverride() {} FPOptionsOverride(const LangOptions &LO) : Options(LO), OverrideMask(OverrideMaskBits) {} + FPOptionsOverride(FPOptions FPO) + : Options(FPO), OverrideMask(OverrideMaskBits) {} bool requiresTrailingStorage() const { return OverrideMask != 0; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 7334d5b659e20..dd3c8518c2a3e 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6930,7 +6930,7 @@ ExpectedStmt ASTNodeImporter::VisitImplicitCastExpr(ImplicitCastExpr *E) { return ImplicitCastExpr::Create( Importer.getToContext(), *ToTypeOrErr, E->getCastKind(), *ToSubExprOrErr, - &(*ToBasePathOrErr), E->getValueKind()); + &(*ToBasePathOrErr), E->getValueKind(), E->getFPFeatures()); } ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { @@ -6957,8 +6957,8 @@ ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { return ToRParenLocOrErr.takeError(); return CStyleCastExpr::Create( Importer.getToContext(), ToType, E->getValueKind(), E->getCastKind(), - ToSubExpr, ToBasePath, ToTypeInfoAsWritten, *ToLParenLocOrErr, - *ToRParenLocOrErr); + ToSubExpr, ToBasePath, CCE->getFPFeatures(), ToTypeInfoAsWritten, + *ToLParenLocOrErr, *ToRParenLocOrErr); } case Stmt::CXXFunctionalCastExprClass: { @@ -6971,8 +6971,8 @@ ExpectedStmt ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) { return ToRParenLocOrErr.takeError(); return CXXFunctionalCastExpr::Create( Importer.getToContext(), ToType, E->getValueKind(), ToTypeInfoAsWritten, - E->getCastKind(), ToSubExpr, ToBasePath, *ToLParenLocOrErr, - *ToRParenLocOrErr); + E->getCastKind(), ToSubExpr, ToBasePath, FCE->getFPFeatures(), + *ToLParenLocOrErr, *ToRParenLocOrErr); } case Stmt::ObjCBridgedCastExprClass: { @@ -7815,10 +7815,11 @@ ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { if (!ToBasePathOrErr) return ToBasePathOrErr.takeError(); - if (isa(E)) { + if (auto CCE = dyn_cast(E)) { return CXXStaticCastExpr::Create( Importer.getToContext(), ToType, VK, CK, ToSubExpr, &(*ToBasePathOrErr), - ToTypeInfoAsWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets); + ToTypeInfoAsWritten, CCE->getFPFeatures(), ToOperatorLoc, ToRParenLoc, + ToAngleBrackets); } else if (isa(E)) { return CXXDynamicCastExpr::Create( Importer.getToContext(), ToType, VK, CK, ToSubExpr, &(*ToBasePathOrErr), diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 15f3df0fd2168..b664224aa7323 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1892,19 +1892,42 @@ const FieldDecl *CastExpr::getTargetFieldForToUnionCast(const RecordDecl *RD, return nullptr; } +FPOptionsOverride *CastExpr::getTrailingFPFeatures() { + assert(hasStoredFPFeatures()); + switch (getStmtClass()) { + case ImplicitCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + case CStyleCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + case CXXFunctionalCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + case CXXStaticCastExprClass: + return static_cast(this) + ->getTrailingObjects(); + default: + llvm_unreachable("Cast does not have FPFeatures"); + } +} + ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, - ExprValueKind VK) { + ExprValueKind VK, + FPOptionsOverride FPO) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); // Per C++ [conv.lval]p3, lvalue-to-rvalue conversions on class and // std::nullptr_t have special semantics not captured by CK_LValueToRValue. assert((Kind != CK_LValueToRValue || !(T->isNullPtrType() || T->getAsCXXRecordDecl())) && "invalid type for lvalue-to-rvalue conversion"); ImplicitCastExpr *E = - new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, VK); + new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, FPO, VK); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -1912,21 +1935,26 @@ ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T, } ImplicitCastExpr *ImplicitCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize); + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize, HasFPFeatures); } - CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, CastKind K, Expr *Op, const CXXCastPath *BasePath, + FPOptionsOverride FPO, TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); CStyleCastExpr *E = - new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, R); + new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, FPO, WrittenTy, L, R); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -1934,9 +1962,12 @@ CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T, } CStyleCastExpr *CStyleCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize); + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize, HasFPFeatures); } /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 3d61496f30e2a..3f3f2303587dd 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -690,19 +690,18 @@ const char *CXXNamedCastExpr::getCastName() const { } } -CXXStaticCastExpr *CXXStaticCastExpr::Create(const ASTContext &C, QualType T, - ExprValueKind VK, - CastKind K, Expr *Op, - const CXXCastPath *BasePath, - TypeSourceInfo *WrittenTy, - SourceLocation L, - SourceLocation RParenLoc, - SourceRange AngleBrackets) { +CXXStaticCastExpr * +CXXStaticCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, + CastKind K, Expr *Op, const CXXCastPath *BasePath, + TypeSourceInfo *WrittenTy, FPOptionsOverride FPO, + SourceLocation L, SourceLocation RParenLoc, + SourceRange AngleBrackets) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - auto *E = - new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, - RParenLoc, AngleBrackets); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); + auto *E = new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, + FPO, L, RParenLoc, AngleBrackets); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); @@ -710,9 +709,12 @@ CXXStaticCastExpr *CXXStaticCastExpr::Create(const ASTContext &C, QualType T, } CXXStaticCastExpr *CXXStaticCastExpr::CreateEmpty(const ASTContext &C, - unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize); + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize, HasFPFeatures); } CXXDynamicCastExpr *CXXDynamicCastExpr::Create(const ASTContext &C, QualType T, @@ -823,25 +825,30 @@ CXXAddrspaceCastExpr *CXXAddrspaceCastExpr::CreateEmpty(const ASTContext &C) { return new (C) CXXAddrspaceCastExpr(EmptyShell()); } -CXXFunctionalCastExpr * -CXXFunctionalCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK, - TypeSourceInfo *Written, CastKind K, Expr *Op, - const CXXCastPath *BasePath, - SourceLocation L, SourceLocation R) { +CXXFunctionalCastExpr *CXXFunctionalCastExpr::Create( + const ASTContext &C, QualType T, ExprValueKind VK, TypeSourceInfo *Written, + CastKind K, Expr *Op, const CXXCastPath *BasePath, FPOptionsOverride FPO, + SourceLocation L, SourceLocation R) { unsigned PathSize = (BasePath ? BasePath->size() : 0); - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - auto *E = - new (Buffer) CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, L, R); + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, FPO.requiresTrailingStorage())); + auto *E = new (Buffer) + CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, FPO, L, R); if (PathSize) std::uninitialized_copy_n(BasePath->data(), BasePath->size(), E->getTrailingObjects()); return E; } -CXXFunctionalCastExpr * -CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, unsigned PathSize) { - void *Buffer = C.Allocate(totalSizeToAlloc(PathSize)); - return new (Buffer) CXXFunctionalCastExpr(EmptyShell(), PathSize); +CXXFunctionalCastExpr *CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, + unsigned PathSize, + bool HasFPFeatures) { + void *Buffer = + C.Allocate(totalSizeToAlloc( + PathSize, HasFPFeatures)); + return new (Buffer) + CXXFunctionalCastExpr(EmptyShell(), PathSize, HasFPFeatures); } SourceLocation CXXFunctionalCastExpr::getBeginLoc() const { diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 16c4c3736a4a3..acbc0434931dc 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -964,6 +964,8 @@ void TextNodeDumper::VisitCastExpr(const CastExpr *Node) { } dumpBasePath(OS, Node); OS << ">"; + if (Node->hasStoredFPFeatures()) + printFPOptions(Node->getFPFeatures()); } void TextNodeDumper::VisitImplicitCastExpr(const ImplicitCastExpr *Node) { @@ -1132,6 +1134,14 @@ void TextNodeDumper::VisitCXXFunctionalCastExpr( const CXXFunctionalCastExpr *Node) { OS << " functional cast to " << Node->getTypeAsWritten().getAsString() << " <" << Node->getCastKindName() << ">"; + if (Node->hasStoredFPFeatures()) + printFPOptions(Node->getFPFeatures()); +} + +void TextNodeDumper::VisitCXXStaticCastExpr(const CXXStaticCastExpr *Node) { + VisitCXXNamedCastExpr(Node); + if (Node->hasStoredFPFeatures()) + printFPOptions(Node->getFPFeatures()); } void TextNodeDumper::VisitCXXUnresolvedConstructExpr( diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index f68b06487f98e..603da67156254 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -166,23 +166,21 @@ ASTMaker::makeLvalueToRvalue(const VarDecl *Arg, ImplicitCastExpr *ASTMaker::makeImplicitCast(const Expr *Arg, QualType Ty, CastKind CK) { return ImplicitCastExpr::Create(C, Ty, - /* CastKind=*/ CK, - /* Expr=*/ const_cast(Arg), - /* CXXCastPath=*/ nullptr, - /* ExprValueKind=*/ VK_RValue); + /* CastKind=*/CK, + /* Expr=*/const_cast(Arg), + /* CXXCastPath=*/nullptr, + /* ExprValueKind=*/VK_RValue, + /* FPFeatures */ FPOptionsOverride()); } Expr *ASTMaker::makeIntegralCast(const Expr *Arg, QualType Ty) { if (Arg->getType() == Ty) return const_cast(Arg); - - return ImplicitCastExpr::Create(C, Ty, CK_IntegralCast, - const_cast(Arg), nullptr, VK_RValue); + return makeImplicitCast(Arg, Ty, CK_IntegralCast); } ImplicitCastExpr *ASTMaker::makeIntegralCastToBoolean(const Expr *Arg) { - return ImplicitCastExpr::Create(C, C.BoolTy, CK_IntegralToBoolean, - const_cast(Arg), nullptr, VK_RValue); + return makeImplicitCast(Arg, C.BoolTy, CK_IntegralToBoolean); } ObjCBoolLiteralExpr *ASTMaker::makeObjCBool(bool Val) { diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 615b782350414..ac5559a93d9cc 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1024,7 +1024,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { type, VK_LValue, SourceLocation()); ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue, - &declRef, VK_RValue); + &declRef, VK_RValue, FPOptionsOverride()); // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 26dfb6259a290..99b896ae34886 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -1449,9 +1449,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, ValueDecl *selfDecl = setterMethod->getSelfDecl(); DeclRefExpr self(getContext(), selfDecl, false, selfDecl->getType(), VK_LValue, SourceLocation()); - ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, - selfDecl->getType(), CK_LValueToRValue, &self, - VK_RValue); + ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, selfDecl->getType(), + CK_LValueToRValue, &self, VK_RValue, + FPOptionsOverride()); ObjCIvarRefExpr ivarRef(ivar, ivar->getType().getNonReferenceType(), SourceLocation(), SourceLocation(), &selfLoad, true, true); @@ -1462,7 +1462,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, SourceLocation()); ImplicitCastExpr argLoad(ImplicitCastExpr::OnStack, argType.getUnqualifiedType(), CK_LValueToRValue, - &arg, VK_RValue); + &arg, VK_RValue, FPOptionsOverride()); // The property type can differ from the ivar type in some situations with // Objective-C pointer types, we can always bit cast the RHS in these cases. @@ -1483,9 +1483,8 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, } else if (ivarRef.getType()->isPointerType()) { argCK = CK_BitCast; } - ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, - ivarRef.getType(), argCK, &argLoad, - VK_RValue); + ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, ivarRef.getType(), argCK, + &argLoad, VK_RValue, FPOptionsOverride()); Expr *finalArg = &argLoad; if (!getContext().hasSameUnqualifiedType(ivarRef.getType(), argLoad.getType())) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index b9260892bd215..19dc9a87f239c 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4137,7 +4137,7 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, PrivateVD->setInitStyle(VarDecl::CInit); PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, InitRef, /*BasePath=*/nullptr, - VK_RValue)); + VK_RValue, FPOptionsOverride())); Data.FirstprivateVars.emplace_back(OrigRef); Data.FirstprivateCopies.emplace_back(PrivateRef); Data.FirstprivateInits.emplace_back(InitRef); diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 8c41e71ef0187..c0c81221b2344 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -586,7 +586,8 @@ namespace { CastKind Kind, Expr *E) { TypeSourceInfo *TInfo = Ctx->getTrivialTypeSourceInfo(Ty, SourceLocation()); return CStyleCastExpr::Create(*Ctx, Ty, VK_RValue, Kind, E, nullptr, - TInfo, SourceLocation(), SourceLocation()); + FPOptionsOverride(), TInfo, + SourceLocation(), SourceLocation()); } bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const { @@ -2105,8 +2106,8 @@ RewriteModernObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, // Now, we cast the reference to a pointer to the objc_msgSend type. QualType pToFunc = Context->getPointerType(msgSendType); ImplicitCastExpr *ICE = - ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, - DRE, nullptr, VK_RValue); + ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, + DRE, nullptr, VK_RValue, FPOptionsOverride()); const auto *FT = msgSendType->castAs(); CallExpr *Exp = diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index 4ecd6e95de10e..990509a84b06c 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -492,7 +492,8 @@ namespace { CastKind Kind, Expr *E) { TypeSourceInfo *TInfo = Ctx->getTrivialTypeSourceInfo(Ty, SourceLocation()); return CStyleCastExpr::Create(*Ctx, Ty, VK_RValue, Kind, E, nullptr, - TInfo, SourceLocation(), SourceLocation()); + FPOptionsOverride(), TInfo, + SourceLocation(), SourceLocation()); } StringLiteral *getStringLiteral(StringRef Str) { @@ -2022,8 +2023,8 @@ RewriteObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, // Now, we cast the reference to a pointer to the objc_msgSend type. QualType pToFunc = Context->getPointerType(msgSendType); ImplicitCastExpr *ICE = - ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, - DRE, nullptr, VK_RValue); + ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, + DRE, nullptr, VK_RValue, FPOptionsOverride()); const auto *FT = msgSendType->castAs(); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 47484c5be9c9b..375fe3b28dec3 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -586,7 +586,8 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, } } - return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK); + return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK, + CurFPFeatureOverrides()); } /// ScalarTypeToBooleanCastKind - Returns the cast kind corresponding diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 726900c59f20e..f718154ce6db8 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -105,10 +105,10 @@ namespace { // If this is an unbridged cast, wrap the result in an implicit // cast that yields the unbridged-cast placeholder type. if (IsARCUnbridgedCast) { - castExpr = ImplicitCastExpr::Create(Self.Context, - Self.Context.ARCUnbridgedCastTy, - CK_Dependent, castExpr, nullptr, - castExpr->getValueKind()); + castExpr = ImplicitCastExpr::Create( + Self.Context, Self.Context.ARCUnbridgedCastTy, CK_Dependent, + castExpr, nullptr, castExpr->getValueKind(), + Self.CurFPFeatureOverrides()); } updatePartOfExplicitCastFlags(castExpr); return castExpr; @@ -361,11 +361,10 @@ Sema::BuildCXXNamedCast(SourceLocation OpLoc, tok::TokenKind Kind, DiscardMisalignedMemberAddress(DestType.getTypePtr(), E); } - return Op.complete(CXXStaticCastExpr::Create(Context, Op.ResultType, - Op.ValueKind, Op.Kind, Op.SrcExpr.get(), - &Op.BasePath, DestTInfo, - OpLoc, Parens.getEnd(), - AngleBrackets)); + return Op.complete(CXXStaticCastExpr::Create( + Context, Op.ResultType, Op.ValueKind, Op.Kind, Op.SrcExpr.get(), + &Op.BasePath, DestTInfo, CurFPFeatureOverrides(), OpLoc, + Parens.getEnd(), AngleBrackets)); } } } @@ -3033,9 +3032,9 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc, // -Wcast-qual DiagnoseCastQual(Op.Self, Op.SrcExpr, Op.DestType); - return Op.complete(CStyleCastExpr::Create(Context, Op.ResultType, - Op.ValueKind, Op.Kind, Op.SrcExpr.get(), - &Op.BasePath, CastTypeInfo, LPLoc, RPLoc)); + return Op.complete(CStyleCastExpr::Create( + Context, Op.ResultType, Op.ValueKind, Op.Kind, Op.SrcExpr.get(), + &Op.BasePath, CurFPFeatureOverrides(), CastTypeInfo, LPLoc, RPLoc)); } ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, @@ -3058,7 +3057,7 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, if (auto *ConstructExpr = dyn_cast(SubExpr)) ConstructExpr->setParenOrBraceRange(SourceRange(LPLoc, RPLoc)); - return Op.complete(CXXFunctionalCastExpr::Create(Context, Op.ResultType, - Op.ValueKind, CastTypeInfo, Op.Kind, - Op.SrcExpr.get(), &Op.BasePath, LPLoc, RPLoc)); + return Op.complete(CXXFunctionalCastExpr::Create( + Context, Op.ResultType, Op.ValueKind, CastTypeInfo, Op.Kind, + Op.SrcExpr.get(), &Op.BasePath, CurFPFeatureOverrides(), LPLoc, RPLoc)); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a9e6113dc7bb5..4ede2f9192f4f 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18172,11 +18172,9 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, // Adjust the Expr initializer and type. if (ECD->getInitExpr() && !Context.hasSameType(NewTy, ECD->getInitExpr()->getType())) - ECD->setInitExpr(ImplicitCastExpr::Create(Context, NewTy, - CK_IntegralCast, - ECD->getInitExpr(), - /*base paths*/ nullptr, - VK_RValue)); + ECD->setInitExpr(ImplicitCastExpr::Create( + Context, NewTy, CK_IntegralCast, ECD->getInitExpr(), + /*base paths*/ nullptr, VK_RValue, FPOptionsOverride())); if (getLangOpts().CPlusPlus) // C++ [dcl.enum]p4: Following the closing brace of an // enum-specifier, each enumerator has the type of its diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 0a4f75ad341b1..6558a4f6d8b20 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1185,7 +1185,8 @@ static bool checkTupleLikeDecomposition(Sema &S, // an xvalue otherwise if (!Src->getType()->isLValueReferenceType()) E = ImplicitCastExpr::Create(S.Context, E.get()->getType(), CK_NoOp, - E.get(), nullptr, VK_XValue); + E.get(), nullptr, VK_XValue, + FPOptionsOverride()); TemplateArgumentListInfo Args(Loc, Loc); Args.addArgument( @@ -14869,9 +14870,9 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( // (since it's unusable otherwise); in the case where we inline the // block literal, it has block literal lifetime semantics. if (!BuildBlock.isInvalid() && !getLangOpts().ObjCAutoRefCount) - BuildBlock = ImplicitCastExpr::Create(Context, BuildBlock.get()->getType(), - CK_CopyAndAutoreleaseBlockObject, - BuildBlock.get(), nullptr, VK_RValue); + BuildBlock = ImplicitCastExpr::Create( + Context, BuildBlock.get()->getType(), CK_CopyAndAutoreleaseBlockObject, + BuildBlock.get(), nullptr, VK_RValue, FPOptionsOverride()); if (BuildBlock.isInvalid()) { Diag(CurrentLocation, diag::note_lambda_to_block_conv); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index d6f0a12106fe0..9a4b3e31e850c 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -695,7 +695,8 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // C++ [conv.lval]p3: // If T is cv std::nullptr_t, the result is a null pointer constant. CastKind CK = T->isNullPtrType() ? CK_NullToPointer : CK_LValueToRValue; - Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_RValue); + Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_RValue, + FPOptionsOverride()); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version @@ -703,7 +704,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), - nullptr, VK_RValue); + nullptr, VK_RValue, FPOptionsOverride()); } return Res; @@ -6960,9 +6961,9 @@ void Sema::maybeExtendBlockObject(ExprResult &E) { // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; - E = ImplicitCastExpr::Create(Context, E.get()->getType(), - CK_ARCExtendBlockObject, E.get(), - /*base path*/ nullptr, VK_RValue); + E = ImplicitCastExpr::Create( + Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), + /*base path*/ nullptr, VK_RValue, FPOptionsOverride()); Cleanup.setExprNeedsCleanups(true); } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d1fcdf3545278..b5d4276f22b46 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1503,7 +1503,8 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc); Result = CXXFunctionalCastExpr::Create( Context, ResultType, Expr::getValueKindForType(Ty), TInfo, CK_NoOp, - Result.get(), /*Path=*/nullptr, Locs.getBegin(), Locs.getEnd()); + Result.get(), /*Path=*/nullptr, CurFPFeatureOverrides(), + Locs.getBegin(), Locs.getEnd()); } return Result; @@ -2204,7 +2205,7 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal, SizeTy, SourceLocation()); ImplicitCastExpr DesiredAlignment(ImplicitCastExpr::OnStack, AlignValT, CK_IntegralCast, &AlignmentLiteral, - VK_RValue); + VK_RValue, FPOptionsOverride()); // Adjust placement args by prepending conjured size and alignment exprs. llvm::SmallVector CallArgs; @@ -3915,7 +3916,8 @@ static ExprResult BuildCXXCastArgument(Sema &S, // Record usage of conversion in an implicit cast. Result = ImplicitCastExpr::Create(S.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind()); + nullptr, Result.get()->getValueKind(), + S.CurFPFeatureOverrides()); return S.MaybeBindToTemporary(Result.get()); } @@ -4096,7 +4098,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, if (const AtomicType *FromAtomic = FromType->getAs()) { FromType = FromAtomic->getValueType().getUnqualifiedType(); From = ImplicitCastExpr::Create(Context, FromType, CK_AtomicToNonAtomic, - From, /*BasePath=*/nullptr, VK_RValue); + From, /*BasePath=*/nullptr, VK_RValue, + FPOptionsOverride()); } break; @@ -6840,7 +6843,7 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) { CastKind ck = (ReturnsRetained ? CK_ARCConsumeObject : CK_ARCReclaimReturnedObject); return ImplicitCastExpr::Create(Context, E->getType(), ck, E, nullptr, - VK_RValue); + VK_RValue, FPOptionsOverride()); } if (E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 228a1ec3ba1f9..2c088c8b15a3f 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -4462,8 +4462,8 @@ Sema::CheckObjCConversion(SourceRange castRange, QualType castType, // If the result is +1, consume it here. case ACC_plusOne: castExpr = ImplicitCastExpr::Create(Context, castExpr->getType(), - CK_ARCConsumeObject, castExpr, - nullptr, VK_RValue); + CK_ARCConsumeObject, castExpr, nullptr, + VK_RValue, FPOptionsOverride()); Cleanup.setExprNeedsCleanups(true); return ACR_okay; } @@ -4689,9 +4689,9 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, case OBC_BridgeRetained: // Produce the object before casting it. - SubExpr = ImplicitCastExpr::Create(Context, FromType, - CK_ARCProduceObject, - SubExpr, nullptr, VK_RValue); + SubExpr = ImplicitCastExpr::Create(Context, FromType, CK_ARCProduceObject, + SubExpr, nullptr, VK_RValue, + FPOptionsOverride()); break; case OBC_BridgeTransfer: { @@ -4730,7 +4730,7 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, if (MustConsume) { Cleanup.setExprNeedsCleanups(true); Result = ImplicitCastExpr::Create(Context, T, CK_ARCConsumeObject, Result, - nullptr, VK_RValue); + nullptr, VK_RValue, FPOptionsOverride()); } return Result; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index f63d600032ce4..ab82f85a086e5 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -2890,8 +2890,9 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, Expr *Init = new (Context) IntegerLiteral( Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc()); if (CharTy != PromotedCharTy) - Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, - Init, nullptr, VK_RValue); + Init = + ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, Init, + nullptr, VK_RValue, FPOptionsOverride()); StructuredList->updateInit(Context, i, Init); } } else { @@ -2912,8 +2913,9 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, Expr *Init = new (Context) IntegerLiteral( Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc()); if (CharTy != PromotedCharTy) - Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, - Init, nullptr, VK_RValue); + Init = + ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast, Init, + nullptr, VK_RValue, FPOptionsOverride()); StructuredList->updateInit(Context, i, Init); } } @@ -8019,9 +8021,9 @@ ExprResult InitializationSequence::Perform(Sema &S, (Step->Kind == SK_CastDerivedToBaseXValue ? VK_XValue : VK_RValue); - CurInit = - ImplicitCastExpr::Create(S.Context, Step->Type, CK_DerivedToBase, - CurInit.get(), &BasePath, VK); + CurInit = ImplicitCastExpr::Create(S.Context, Step->Type, + CK_DerivedToBase, CurInit.get(), + &BasePath, VK, FPOptionsOverride()); break; } @@ -8150,9 +8152,9 @@ ExprResult InitializationSequence::Perform(Sema &S, if (CreatedObject && checkAbstractType(CurInit.get()->getType())) return ExprError(); - CurInit = ImplicitCastExpr::Create(S.Context, CurInit.get()->getType(), - CastKind, CurInit.get(), nullptr, - CurInit.get()->getValueKind()); + CurInit = ImplicitCastExpr::Create( + S.Context, CurInit.get()->getType(), CastKind, CurInit.get(), nullptr, + CurInit.get()->getValueKind(), S.CurFPFeatureOverrides()); if (shouldBindAsTemporary(Entity)) // The overall entity is temporary, so this expression should be @@ -8493,9 +8495,9 @@ ExprResult InitializationSequence::Perform(Sema &S, break; case SK_ProduceObjCObject: - CurInit = - ImplicitCastExpr::Create(S.Context, Step->Type, CK_ARCProduceObject, - CurInit.get(), nullptr, VK_RValue); + CurInit = ImplicitCastExpr::Create( + S.Context, Step->Type, CK_ARCProduceObject, CurInit.get(), nullptr, + VK_RValue, FPOptionsOverride()); break; case SK_StdInitializerList: { @@ -8549,9 +8551,9 @@ ExprResult InitializationSequence::Perform(Sema &S, // Case 1b and 1c // No cast from integer to sampler is needed. if (!Var->hasGlobalStorage()) { - CurInit = ImplicitCastExpr::Create(S.Context, Step->Type, - CK_LValueToRValue, Init, - /*BasePath=*/nullptr, VK_RValue); + CurInit = ImplicitCastExpr::Create( + S.Context, Step->Type, CK_LValueToRValue, Init, + /*BasePath=*/nullptr, VK_RValue, FPOptionsOverride()); break; } // Case 1a diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index c9f2854f7accf..0b081f39299e9 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -680,8 +680,9 @@ static void adjustBlockReturnsToEnum(Sema &S, ArrayRef returns, ExprWithCleanups *cleanups = dyn_cast(retValue); Expr *E = (cleanups ? cleanups->getSubExpr() : retValue); - E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, - E, /*base path*/ nullptr, VK_RValue); + E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, E, + /*base path*/ nullptr, VK_RValue, + FPOptionsOverride()); if (cleanups) { cleanups->setSubExpr(E); } else { diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp index e301c62dd2c0b..fdc30fe6f6576 100644 --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -1464,10 +1464,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, DeclRefExpr(Context, SelfDecl, false, SelfDecl->getType(), VK_LValue, PropertyDiagLoc); MarkDeclRefReferenced(SelfExpr); - Expr *LoadSelfExpr = - ImplicitCastExpr::Create(Context, SelfDecl->getType(), - CK_LValueToRValue, SelfExpr, nullptr, - VK_RValue); + Expr *LoadSelfExpr = ImplicitCastExpr::Create( + Context, SelfDecl->getType(), CK_LValueToRValue, SelfExpr, nullptr, + VK_RValue, FPOptionsOverride()); Expr *IvarRefExpr = new (Context) ObjCIvarRefExpr(Ivar, Ivar->getUsageType(SelfDecl->getType()), @@ -1528,10 +1527,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, DeclRefExpr(Context, SelfDecl, false, SelfDecl->getType(), VK_LValue, PropertyDiagLoc); MarkDeclRefReferenced(SelfExpr); - Expr *LoadSelfExpr = - ImplicitCastExpr::Create(Context, SelfDecl->getType(), - CK_LValueToRValue, SelfExpr, nullptr, - VK_RValue); + Expr *LoadSelfExpr = ImplicitCastExpr::Create( + Context, SelfDecl->getType(), CK_LValueToRValue, SelfExpr, nullptr, + VK_RValue, FPOptionsOverride()); Expr *lhs = new (Context) ObjCIvarRefExpr(Ivar, Ivar->getUsageType(SelfDecl->getType()), diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 352f52d2f6260..1aeb52a213f6e 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15388,12 +15388,12 @@ static bool actOnOMPReductionKindClause( if (!BasePath.empty()) { LHS = S.DefaultLvalueConversion(LHS.get()); RHS = S.DefaultLvalueConversion(RHS.get()); - LHS = ImplicitCastExpr::Create(Context, PtrRedTy, - CK_UncheckedDerivedToBase, LHS.get(), - &BasePath, LHS.get()->getValueKind()); - RHS = ImplicitCastExpr::Create(Context, PtrRedTy, - CK_UncheckedDerivedToBase, RHS.get(), - &BasePath, RHS.get()->getValueKind()); + LHS = ImplicitCastExpr::Create( + Context, PtrRedTy, CK_UncheckedDerivedToBase, LHS.get(), &BasePath, + LHS.get()->getValueKind(), FPOptionsOverride()); + RHS = ImplicitCastExpr::Create( + Context, PtrRedTy, CK_UncheckedDerivedToBase, RHS.get(), &BasePath, + RHS.get()->getValueKind(), FPOptionsOverride()); } FunctionProtoType::ExtProtoInfo EPI; QualType Params[] = {PtrRedTy, PtrRedTy}; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 71341e5688fe0..95d110e754f45 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5862,7 +5862,8 @@ diagnoseNoViableConversion(Sema &SemaRef, SourceLocation Loc, Expr *&From, // Record usage of conversion in an implicit cast. From = ImplicitCastExpr::Create(SemaRef.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind()); + nullptr, Result.get()->getValueKind(), + SemaRef.CurFPFeatureOverrides()); } return false; } @@ -5891,7 +5892,8 @@ static bool recordConversion(Sema &SemaRef, SourceLocation Loc, Expr *&From, // Record usage of conversion in an implicit cast. From = ImplicitCastExpr::Create(SemaRef.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), - nullptr, Result.get()->getValueKind()); + nullptr, Result.get()->getValueKind(), + SemaRef.CurFPFeatureOverrides()); return false; } @@ -7296,8 +7298,8 @@ void Sema::AddConversionCandidate( VK_LValue, From->getBeginLoc()); ImplicitCastExpr ConversionFn(ImplicitCastExpr::OnStack, Context.getPointerType(Conversion->getType()), - CK_FunctionToPointerDecay, - &ConversionRef, VK_RValue); + CK_FunctionToPointerDecay, &ConversionRef, + VK_RValue, FPOptionsOverride()); QualType ConversionType = Conversion->getConversionType(); if (!isCompleteType(From->getBeginLoc(), ConversionType)) { @@ -14422,9 +14424,9 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, if (Call.isInvalid()) return ExprError(); // Record usage of conversion in an implicit cast. - Call = ImplicitCastExpr::Create(Context, Call.get()->getType(), - CK_UserDefinedConversion, Call.get(), - nullptr, VK_RValue); + Call = ImplicitCastExpr::Create( + Context, Call.get()->getType(), CK_UserDefinedConversion, Call.get(), + nullptr, VK_RValue, CurFPFeatureOverrides()); return BuildCallExpr(S, Call.get(), LParenLoc, Args, RParenLoc); } @@ -14829,10 +14831,9 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found, if (SubExpr == ICE->getSubExpr()) return ICE; - return ImplicitCastExpr::Create(Context, ICE->getType(), - ICE->getCastKind(), - SubExpr, nullptr, - ICE->getValueKind()); + return ImplicitCastExpr::Create(Context, ICE->getType(), ICE->getCastKind(), + SubExpr, nullptr, ICE->getValueKind(), + CurFPFeatureOverrides()); } if (auto *GSE = dyn_cast(E)) { diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index c44636ad1b395..5b4aaa678974b 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3095,7 +3095,7 @@ static void TryMoveInitialization(Sema& S, bool ConvertingConstructorsOnly, ExprResult &Res) { ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), - CK_NoOp, Value, VK_XValue); + CK_NoOp, Value, VK_XValue, FPOptionsOverride()); Expr *InitExpr = &AsRvalue; @@ -3150,8 +3150,9 @@ static void TryMoveInitialization(Sema& S, // Promote "AsRvalue" to the heap, since we now need this // expression node to persist. - Value = ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, - Value, nullptr, VK_XValue); + Value = + ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, Value, + nullptr, VK_XValue, FPOptionsOverride()); // Complete type-checking the initialization of the return type // using the constructor we found. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 6721b07253292..e1a563850970a 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7478,7 +7478,7 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, // FIXME: This is a hack. We need a better way to handle substituted // non-type template parameters. E = CStyleCastExpr::Create(Context, OrigT, VK_RValue, CK_IntegralCast, E, - nullptr, + nullptr, CurFPFeatureOverrides(), Context.getTrivialTypeSourceInfo(OrigT, Loc), Loc, Loc); } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index e261044f7cb14..c154c146727e9 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1082,6 +1082,8 @@ void ASTStmtReader::VisitCastExpr(CastExpr *E) { VisitExpr(E); unsigned NumBaseSpecs = Record.readInt(); assert(NumBaseSpecs == E->path_size()); + unsigned HasFPFeatures = Record.readInt(); + assert(E->hasStoredFPFeatures() == HasFPFeatures); E->setSubExpr(Record.readSubExpr()); E->setCastKind((CastKind)Record.readInt()); CastExpr::path_iterator BaseI = E->path_begin(); @@ -1090,6 +1092,9 @@ void ASTStmtReader::VisitCastExpr(CastExpr *E) { *BaseSpec = Record.readCXXBaseSpecifier(); *BaseI++ = BaseSpec; } + if (HasFPFeatures) + *E->getTrailingFPFeatures() = + FPOptionsOverride::getFromOpaqueInt(Record.readInt()); } void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) { @@ -2893,13 +2898,17 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_IMPLICIT_CAST: - S = ImplicitCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = ImplicitCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_CSTYLE_CAST: - S = CStyleCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = CStyleCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_COMPOUND_LITERAL: @@ -3501,8 +3510,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_STATIC_CAST: - S = CXXStaticCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = CXXStaticCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_CXX_DYNAMIC_CAST: @@ -3524,8 +3535,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_FUNCTIONAL_CAST: - S = CXXFunctionalCastExpr::CreateEmpty(Context, - /*PathSize*/ Record[ASTStmtReader::NumExprFields]); + S = CXXFunctionalCastExpr::CreateEmpty( + Context, + /*PathSize*/ Record[ASTStmtReader::NumExprFields], + /*HasFPFeatures*/ Record[ASTStmtReader::NumExprFields + 1]); break; case EXPR_BUILTIN_BIT_CAST: diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 2d250674057c3..911fcb4095474 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2346,6 +2346,7 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); //GetObjectKind // CastExpr Abv->Add(BitCodeAbbrevOp(0)); // PathSize + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 6)); // CastKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // PartOfExplicitCast // ImplicitCastExpr diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 4e3e1fdc346fc..0121f25832073 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -946,12 +946,16 @@ void ASTStmtWriter::VisitObjCBridgedCastExpr(ObjCBridgedCastExpr *E) { void ASTStmtWriter::VisitCastExpr(CastExpr *E) { VisitExpr(E); Record.push_back(E->path_size()); + Record.push_back(E->hasStoredFPFeatures()); Record.AddStmt(E->getSubExpr()); Record.push_back(E->getCastKind()); // FIXME: stable encoding for (CastExpr::path_iterator PI = E->path_begin(), PE = E->path_end(); PI != PE; ++PI) Record.AddCXXBaseSpecifier(**PI); + + if (E->hasStoredFPFeatures()) + Record.push_back(E->getFPFeatures().getAsOpaqueInt()); } void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) { @@ -1003,7 +1007,7 @@ void ASTStmtWriter::VisitImplicitCastExpr(ImplicitCastExpr *E) { VisitCastExpr(E); Record.push_back(E->isPartOfExplicitCast()); - if (E->path_size() == 0) + if (E->path_size() == 0 && !E->hasStoredFPFeatures()) AbbrevToUse = Writer.getExprImplicitCastAbbrev(); Code = serialization::EXPR_IMPLICIT_CAST; diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index f3925aebbe752..01af3a8fd7e9c 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -36,8 +36,49 @@ float func_03(float x) { // CHECK-NEXT: ReturnStmt // CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 +int func_04(float x) { +#pragma STDC FP_CONTRACT ON + return x; +} + +// CHECK: FunctionDecl {{.*}} func_04 'int (float)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' FPContractMode=1 +float func_05(double x) { +#pragma STDC FP_CONTRACT ON + return (float)x; +} +// CHECK: FunctionDecl {{.*}} func_05 'float (double)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} FPContractMode=1 + +float func_06(double x) { +#pragma STDC FP_CONTRACT ON + return float(x); +} + +// CHECK: FunctionDecl {{.*}} func_06 'float (double)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CXXFunctionalCastExpr {{.*}} FPContractMode=1 + +float func_07(double x) { +#pragma STDC FP_CONTRACT ON + return static_cast(x); +} + +// CHECK: FunctionDecl {{.*}} func_07 'float (double)' +// CHECK-NEXT: ParmVarDecl {{.*}} x 'double' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CXXStaticCastExpr {{.*}} FPContractMode=1 #pragma STDC FENV_ROUND FE_DOWNWARD @@ -87,7 +128,7 @@ T func_14(T x, T y) { } float func_15(float x, float y) { -#pragma STDC FPENV_ROUND FE_DOWNWARD +#pragma STDC FENV_ROUND FE_DOWNWARD return func_14(x, y); } From 4d7b19454397103620394dcceaf29592ef195231 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 13 Sep 2020 23:00:59 -0700 Subject: [PATCH 0502/1079] [llvm-cov gcov] Refactor counting and reporting The current organization of FileInfo and its referenced utility functions of (GCOVFile, GCOVFunction, GCOVBlock) is messy. Some members of FileInfo are just copied from GCOVFile. FileInfo::print (.gcov output and --intermediate output) is interleaved with branch statistics and computation of line execution counts. --intermediate has to do redundant .gcov output to gather branch statistics. This patch deletes lots of code and introduces a clearer work flow: ``` fn collectFunction for each block b for each line lineNum let line be LineInfo of the file on lineNum line.exists = 1 increment function's lines & linesExec if necessary increment line.count line.blocks.push_back(&b) fn collectSourceLine compute cycle counts count = incoming_counts + cycle_counts if line.exists ++summary->lines if line.count ++summary->linesExec fn collectSource for each line call collectSourceLine fn main for each function call collectFunction print function summary for each source file call collectSource print file summary annotate the source file with line execution counts if -i print intermediate file ``` The output order of functions and files now follows the original order in .gcno files. --- llvm/include/llvm/ProfileData/GCOV.h | 149 +---- llvm/lib/ProfileData/GCOV.cpp | 734 +++++++++++------------ llvm/test/tools/llvm-cov/gcov-fake-4.2.c | 1 + llvm/test/tools/llvm-cov/llvm-cov.test | 20 +- llvm/tools/llvm-cov/gcov.cpp | 4 +- 5 files changed, 389 insertions(+), 519 deletions(-) diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 56b512b6d6065..452cf458f4e98 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -39,7 +39,6 @@ namespace llvm { class GCOVFunction; class GCOVBlock; -class FileInfo; namespace GCOV { @@ -191,28 +190,26 @@ class GCOVFile { bool readGCNO(GCOVBuffer &Buffer); bool readGCDA(GCOVBuffer &Buffer); GCOV::GCOVVersion getVersion() const { return Version; } - uint32_t getChecksum() const { return Checksum; } void print(raw_ostream &OS) const; void dump() const; - void collectLineCounts(FileInfo &FI); std::vector filenames; StringMap filenameToIdx; -private: +public: bool GCNOInitialized = false; GCOV::GCOVVersion Version; uint32_t Checksum = 0; StringRef cwd; - SmallVector, 16> Functions; + SmallVector, 16> functions; std::map IdentToFunction; uint32_t RunCount = 0; uint32_t ProgramCount = 0; using iterator = pointee_iterator< SmallVectorImpl>::const_iterator>; - iterator begin() const { return iterator(Functions.begin()); } - iterator end() const { return iterator(Functions.end()); } + iterator begin() const { return iterator(functions.begin()); } + iterator end() const { return iterator(functions.end()); } }; struct GCOVArc { @@ -223,8 +220,8 @@ struct GCOVArc { GCOVBlock &src; GCOVBlock &dst; uint32_t flags; - uint64_t Count = 0; - uint64_t CyclesCount = 0; + uint64_t count = 0; + uint64_t cycleCount = 0; }; /// GCOVFunction - Collects function information. @@ -237,20 +234,16 @@ class GCOVFunction { StringRef getName() const { return Name; } StringRef getFilename() const; - size_t getNumBlocks() const { return Blocks.size(); } uint64_t getEntryCount() const; GCOVBlock &getExitBlock() const; - BlockIterator block_begin() const { return Blocks.begin(); } - BlockIterator block_end() const { return Blocks.end(); } - iterator_range blocks() const { - return make_range(block_begin(), block_end()); + iterator_range blocksRange() const { + return make_range(blocks.begin(), blocks.end()); } - uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *arc); + uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *pred); void print(raw_ostream &OS) const; void dump() const; - void collectLineCounts(FileInfo &FI); GCOVFile &file; uint32_t ident = 0; @@ -263,40 +256,29 @@ class GCOVFunction { uint8_t artificial = 0; StringRef Name; unsigned srcIdx; - SmallVector, 0> Blocks; + SmallVector, 0> blocks; SmallVector, 0> arcs, treeArcs; DenseSet visited; }; /// GCOVBlock - Collects block information. class GCOVBlock { - struct EdgeWeight { - EdgeWeight(GCOVBlock *D) : Dst(D) {} - - GCOVBlock *Dst; - uint64_t Count = 0; - }; - public: using EdgeIterator = SmallVectorImpl::const_iterator; - using BlockVector = SmallVector; + using BlockVector = SmallVector; using BlockVectorLists = SmallVector; using Edges = SmallVector; - GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {} + GCOVBlock(uint32_t N) : number(N) {} - const GCOVFunction &getParent() const { return Parent; } - void addLine(uint32_t N) { Lines.push_back(N); } - uint32_t getLastLine() const { return Lines.back(); } - uint64_t getCount() const { return Counter; } + void addLine(uint32_t N) { lines.push_back(N); } + uint32_t getLastLine() const { return lines.back(); } + uint64_t getCount() const { return count; } void addSrcEdge(GCOVArc *Edge) { pred.push_back(Edge); } void addDstEdge(GCOVArc *Edge) { succ.push_back(Edge); } - size_t getNumSrcEdges() const { return pred.size(); } - size_t getNumDstEdges() const { return succ.size(); } - iterator_range srcs() const { return make_range(pred.begin(), pred.end()); } @@ -307,7 +289,6 @@ class GCOVBlock { void print(raw_ostream &OS) const; void dump() const; - void collectLineCounts(FileInfo &FI); static uint64_t getCycleCount(const Edges &Path); static void unblock(const GCOVBlock *U, BlockVector &Blocked, @@ -320,105 +301,15 @@ class GCOVBlock { static uint64_t getLineCount(const BlockVector &Blocks); public: - GCOVFunction &Parent; - uint32_t Number; - uint64_t Counter = 0; + uint32_t number; + uint64_t count = 0; SmallVector pred; SmallVector succ; - SmallVector Lines; -}; - -struct GCOVCoverage { - GCOVCoverage() = default; - GCOVCoverage(StringRef Name) : Name(Name) {} - - StringRef Name; - - uint32_t LogicalLines = 0; - uint32_t LinesExec = 0; - - uint32_t Branches = 0; - uint32_t BranchesExec = 0; - uint32_t BranchesTaken = 0; -}; - -struct SourceInfo { - StringRef filename; - SmallString<0> displayName; - std::string name; - std::vector functions; - GCOVCoverage coverage; - bool ignored = false; - SourceInfo(StringRef filename) : filename(filename) {} + SmallVector lines; }; -class FileInfo { -protected: - // It is unlikely--but possible--for multiple functions to be on the same - // line. - // Therefore this typedef allows LineData.Functions to store multiple - // functions - // per instance. This is rare, however, so optimize for the common case. - using FunctionVector = SmallVector; - using FunctionLines = DenseMap; - using BlockVector = SmallVector; - using BlockLines = DenseMap; - - struct LineData { - LineData() = default; - - BlockLines Blocks; - FunctionLines Functions; - uint32_t LastLine = 0; - }; - -public: - friend class GCOVFile; - FileInfo(const GCOV::Options &Options) : Options(Options) {} - - void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) { - if (Line > LineInfo[Filename].LastLine) - LineInfo[Filename].LastLine = Line; - LineInfo[Filename].Blocks[Line - 1].push_back(Block); - } - - void addFunctionLine(StringRef Filename, uint32_t Line, - const GCOVFunction *Function) { - if (Line > LineInfo[Filename].LastLine) - LineInfo[Filename].LastLine = Line; - LineInfo[Filename].Functions[Line - 1].push_back(Function); - } - - void setRunCount(uint32_t Runs) { RunCount = Runs; } - void setProgramCount(uint32_t Programs) { ProgramCount = Programs; } - void print(raw_ostream &OS, StringRef MainFilename, StringRef GCNOFile, - StringRef GCDAFile, GCOVFile &file); - -protected: - std::string getCoveragePath(StringRef Filename, StringRef MainFilename); - std::unique_ptr openCoveragePath(StringRef CoveragePath); - void printFunctionSummary(raw_ostream &OS, const FunctionVector &Funcs) const; - void printBlockInfo(raw_ostream &OS, const GCOVBlock &Block, - uint32_t LineIndex, uint32_t &BlockNo) const; - void printBranchInfo(raw_ostream &OS, const GCOVBlock &Block, - GCOVCoverage &Coverage, uint32_t &EdgeNo); - void printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo, - uint64_t Count) const; - - void printCoverage(raw_ostream &OS, const GCOVCoverage &Coverage) const; - void printFuncCoverage(raw_ostream &OS) const; - void printFileCoverage(raw_ostream &OS) const; - - const GCOV::Options &Options; - StringMap LineInfo; - uint32_t RunCount = 0; - uint32_t ProgramCount = 0; - - using FuncCoverageMap = MapVector; - - FuncCoverageMap FuncCoverages; - std::vector sources; -}; +void gcovOneInput(const GCOV::Options &options, StringRef filename, + StringRef gcno, StringRef gcda, GCOVFile &file); } // end namespace llvm diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 20118a0378b79..0597797c6561b 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -17,11 +17,12 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" -#include "llvm/Support/Path.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace llvm; @@ -39,6 +40,59 @@ enum : uint32_t { GCOV_TAG_PROGRAM_SUMMARY = 0xa3000000, }; +namespace { +struct Summary { + Summary(StringRef Name) : Name(Name) {} + + StringRef Name; + uint64_t lines = 0; + uint64_t linesExec = 0; + uint64_t branches = 0; + uint64_t branchesExec = 0; + uint64_t branchesTaken = 0; +}; + +struct LineInfo { + SmallVector blocks; + uint64_t count = 0; + bool exists = false; +}; + +struct SourceInfo { + StringRef filename; + SmallString<0> displayName; + std::vector> startLineToFunctions; + std::vector lines; + bool ignored = false; + SourceInfo(StringRef filename) : filename(filename) {} +}; + +class Context { +public: + Context(const GCOV::Options &Options) : options(Options) {} + void print(StringRef filename, StringRef gcno, StringRef gcda, + GCOVFile &file); + +private: + std::string getCoveragePath(StringRef filename, StringRef mainFilename) const; + void printFunctionDetails(const GCOVFunction &f, raw_ostream &os) const; + void printBranchInfo(const GCOVBlock &Block, uint32_t &edgeIdx, + raw_ostream &OS) const; + void printSummary(const Summary &summary, raw_ostream &os) const; + + void collectFunction(GCOVFunction &f, Summary &summary); + void collectSourceLine(SourceInfo &si, Summary *summary, LineInfo &line, + size_t lineNum) const; + void collectSource(SourceInfo &si, Summary &summary) const; + void annotateSource(SourceInfo &si, const GCOVFile &file, StringRef gcno, + StringRef gcda, raw_ostream &os) const; + void printSourceToIntermediate(const SourceInfo &si, raw_ostream &os) const; + + const GCOV::Options &options; + std::vector sources; +}; +} // namespace + //===----------------------------------------------------------------------===// // GCOVFile implementation. @@ -61,8 +115,8 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { if (!buf.readInt(length)) return false; if (tag == GCOV_TAG_FUNCTION) { - Functions.push_back(std::make_unique(*this)); - fn = Functions.back().get(); + functions.push_back(std::make_unique(*this)); + fn = functions.back().get(); fn->ident = buf.getWord(); fn->linenoChecksum = buf.getWord(); if (Version >= GCOV::V407) @@ -90,24 +144,24 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { if (Version < GCOV::V800) { for (uint32_t i = 0; i != length; ++i) { buf.getWord(); // Ignored block flags - fn->Blocks.push_back(std::make_unique(*fn, i)); + fn->blocks.push_back(std::make_unique(i)); } } else { uint32_t num = buf.getWord(); for (uint32_t i = 0; i != num; ++i) - fn->Blocks.push_back(std::make_unique(*fn, i)); + fn->blocks.push_back(std::make_unique(i)); } } else if (tag == GCOV_TAG_ARCS && fn) { uint32_t srcNo = buf.getWord(); - if (srcNo >= fn->Blocks.size()) { + if (srcNo >= fn->blocks.size()) { errs() << "unexpected block number: " << srcNo << " (in " - << fn->Blocks.size() << ")\n"; + << fn->blocks.size() << ")\n"; return false; } - GCOVBlock *src = fn->Blocks[srcNo].get(); + GCOVBlock *src = fn->blocks[srcNo].get(); for (uint32_t i = 0, e = (length - 1) / 2; i != e; ++i) { uint32_t dstNo = buf.getWord(), flags = buf.getWord(); - GCOVBlock *dst = fn->Blocks[dstNo].get(); + GCOVBlock *dst = fn->blocks[dstNo].get(); auto arc = std::make_unique(*src, *dst, flags); src->addDstEdge(arc.get()); dst->addSrcEdge(arc.get()); @@ -118,12 +172,12 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { } } else if (tag == GCOV_TAG_LINES && fn) { uint32_t srcNo = buf.getWord(); - if (srcNo >= fn->Blocks.size()) { + if (srcNo >= fn->blocks.size()) { errs() << "unexpected block number: " << srcNo << " (in " - << fn->Blocks.size() << ")\n"; + << fn->blocks.size() << ")\n"; return false; } - GCOVBlock &Block = *fn->Blocks[srcNo]; + GCOVBlock &Block = *fn->blocks[srcNo]; for (;;) { uint32_t line = buf.getWord(); if (line) @@ -218,24 +272,24 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { return false; } for (std::unique_ptr &arc : fn->arcs) { - if (!buf.readInt64(arc->Count)) + if (!buf.readInt64(arc->count)) return false; - arc->src.Counter += arc->Count; + arc->src.count += arc->count; } - if (fn->Blocks.size() >= 2) { - GCOVBlock &src = *fn->Blocks[0]; + if (fn->blocks.size() >= 2) { + GCOVBlock &src = *fn->blocks[0]; GCOVBlock &sink = - Version < GCOV::V408 ? *fn->Blocks.back() : *fn->Blocks[1]; + Version < GCOV::V408 ? *fn->blocks.back() : *fn->blocks[1]; auto arc = std::make_unique(sink, src, GCOV_ARC_ON_TREE); sink.addDstEdge(arc.get()); src.addSrcEdge(arc.get()); fn->treeArcs.push_back(std::move(arc)); - for (GCOVBlock &block : make_pointee_range(fn->Blocks)) + for (GCOVBlock &block : fn->blocksRange()) fn->propagateCounts(block, nullptr); for (size_t i = fn->treeArcs.size() - 1; i; --i) - fn->treeArcs[i - 1]->src.Counter += fn->treeArcs[i - 1]->Count; + fn->treeArcs[i - 1]->src.count += fn->treeArcs[i - 1]->count; } } pos += 4 * length; @@ -257,36 +311,6 @@ void GCOVFile::print(raw_ostream &OS) const { LLVM_DUMP_METHOD void GCOVFile::dump() const { print(dbgs()); } #endif -/// collectLineCounts - Collect line counts. This must be used after -/// reading .gcno and .gcda files. -void GCOVFile::collectLineCounts(FileInfo &fi) { - assert(fi.sources.empty()); - for (StringRef filename : filenames) { - fi.sources.emplace_back(filename); - SourceInfo &si = fi.sources.back(); - si.displayName = si.filename; - if (!fi.Options.SourcePrefix.empty() && - sys::path::replace_path_prefix(si.displayName, fi.Options.SourcePrefix, - "") && - !si.displayName.empty()) { - // TODO replace_path_prefix may strip the prefix even if the remaining - // part does not start with a separator. - if (sys::path::is_separator(si.displayName[0])) - si.displayName.erase(si.displayName.begin()); - else - si.displayName = si.filename; - } - if (fi.Options.RelativeOnly && sys::path::is_absolute(si.displayName)) - si.ignored = true; - } - for (GCOVFunction &f : *this) { - f.collectLineCounts(fi); - fi.sources[f.srcIdx].functions.push_back(&f); - } - fi.setRunCount(RunCount); - fi.setProgramCount(ProgramCount); -} - bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; } //===----------------------------------------------------------------------===// @@ -297,11 +321,11 @@ StringRef GCOVFunction::getFilename() const { return file.filenames[srcIdx]; } /// getEntryCount - Get the number of times the function was called by /// retrieving the entry block's count. uint64_t GCOVFunction::getEntryCount() const { - return Blocks.front()->getCount(); + return blocks.front()->getCount(); } GCOVBlock &GCOVFunction::getExitBlock() const { - return file.getVersion() < GCOV::V408 ? *Blocks.back() : *Blocks[1]; + return file.getVersion() < GCOV::V408 ? *blocks.back() : *blocks[1]; } // For each basic block, the sum of incoming edge counts equals the sum of @@ -317,21 +341,21 @@ uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) { uint64_t excess = 0; for (GCOVArc *e : v.srcs()) if (e != pred) - excess += e->onTree() ? propagateCounts(e->src, e) : e->Count; + excess += e->onTree() ? propagateCounts(e->src, e) : e->count; for (GCOVArc *e : v.dsts()) if (e != pred) - excess -= e->onTree() ? propagateCounts(e->dst, e) : e->Count; + excess -= e->onTree() ? propagateCounts(e->dst, e) : e->count; if (int64_t(excess) < 0) excess = -excess; if (pred) - pred->Count = excess; + pred->count = excess; return excess; } void GCOVFunction::print(raw_ostream &OS) const { OS << "===== " << Name << " (" << ident << ") @ " << getFilename() << ":" << startLine << "\n"; - for (const auto &Block : Blocks) + for (const auto &Block : blocks) Block->print(OS); } @@ -342,33 +366,16 @@ LLVM_DUMP_METHOD void GCOVFunction::dump() const { print(dbgs()); } /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. -void GCOVFunction::collectLineCounts(FileInfo &FI) { - // If the line number is zero, this is a function that doesn't actually appear - // in the source file, so there isn't anything we can do with it. - if (startLine == 0) - return; - - for (const auto &Block : Blocks) - Block->collectLineCounts(FI); - FI.addFunctionLine(getFilename(), startLine, this); -} //===----------------------------------------------------------------------===// // GCOVBlock implementation. -/// collectLineCounts - Collect line counts. This must be used after -/// reading .gcno and .gcda files. -void GCOVBlock::collectLineCounts(FileInfo &FI) { - for (uint32_t N : Lines) - FI.addBlockLine(Parent.getFilename(), N, this); -} - void GCOVBlock::print(raw_ostream &OS) const { - OS << "Block : " << Number << " Counter : " << Counter << "\n"; + OS << "Block : " << number << " Counter : " << count << "\n"; if (!pred.empty()) { OS << "\tSource Edges : "; for (const GCOVArc *Edge : pred) - OS << Edge->src.Number << " (" << Edge->Count << "), "; + OS << Edge->src.number << " (" << Edge->count << "), "; OS << "\n"; } if (!succ.empty()) { @@ -376,13 +383,13 @@ void GCOVBlock::print(raw_ostream &OS) const { for (const GCOVArc *Edge : succ) { if (Edge->flags & GCOV_ARC_ON_TREE) OS << '*'; - OS << Edge->dst.Number << " (" << Edge->Count << "), "; + OS << Edge->dst.number << " (" << Edge->count << "), "; } OS << "\n"; } - if (!Lines.empty()) { + if (!lines.empty()) { OS << "\tLines : "; - for (uint32_t N : Lines) + for (uint32_t N : lines) OS << (N) << ","; OS << "\n"; } @@ -404,10 +411,10 @@ LLVM_DUMP_METHOD void GCOVBlock::dump() const { print(dbgs()); } uint64_t GCOVBlock::getCycleCount(const Edges &Path) { uint64_t CycleCount = std::numeric_limits::max(); for (auto E : Path) { - CycleCount = std::min(E->CyclesCount, CycleCount); + CycleCount = std::min(E->cycleCount, CycleCount); } for (auto E : Path) { - E->CyclesCount -= CycleCount; + E->cycleCount -= CycleCount; } return CycleCount; } @@ -490,31 +497,6 @@ void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) { } } -/// Get the count for the list of blocks which lie on the same line. -uint64_t GCOVBlock::getLineCount(const BlockVector &blocks) { - uint64_t count = 0; - for (const GCOVBlock *block : blocks) { - if (block->Number == 0) { - // For nonstandard control flows, arcs into the exit block may be - // duplicately counted (fork) or not be counted (abnormal exit), and thus - // the (exit,entry) counter may be inaccurate. Count the entry block with - // the outgoing arcs. - for (const GCOVArc *arc : block->succ) - count += arc->Count; - } else { - // Add counts from predecessors that are not on the same line. - for (const GCOVArc *arc : block->pred) - if (!llvm::is_contained(blocks, &arc->src)) - count += arc->Count; - } - for (GCOVArc *arc : block->succ) - arc->CyclesCount = arc->Count; - } - - GCOVBlock::getCyclesCount(blocks, count); - return count; -} - //===----------------------------------------------------------------------===// // FileInfo implementation. @@ -635,23 +617,23 @@ static std::string mangleCoveragePath(StringRef Filename, bool PreservePaths) { return std::string(Result.str()); } -std::string FileInfo::getCoveragePath(StringRef Filename, - StringRef MainFilename) { - if (Options.NoOutput) +std::string Context::getCoveragePath(StringRef filename, + StringRef mainFilename) const { + if (options.NoOutput) // This is probably a bug in gcov, but when -n is specified, paths aren't // mangled at all, and the -l and -p options are ignored. Here, we do the // same. - return std::string(Filename); + return std::string(filename); std::string CoveragePath; - if (Options.LongFileNames && !Filename.equals(MainFilename)) + if (options.LongFileNames && !filename.equals(mainFilename)) CoveragePath = - mangleCoveragePath(MainFilename, Options.PreservePaths) + "##"; - CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths); - if (Options.HashFilenames) { + mangleCoveragePath(mainFilename, options.PreservePaths) + "##"; + CoveragePath += mangleCoveragePath(filename, options.PreservePaths); + if (options.HashFilenames) { MD5 Hasher; MD5::MD5Result Result; - Hasher.update(Filename.str()); + Hasher.update(filename.str()); Hasher.final(Result); CoveragePath += "##" + std::string(Result.digest()); } @@ -659,301 +641,301 @@ std::string FileInfo::getCoveragePath(StringRef Filename, return CoveragePath; } -std::unique_ptr -FileInfo::openCoveragePath(StringRef CoveragePath) { - std::error_code EC; - auto OS = - std::make_unique(CoveragePath, EC, sys::fs::OF_Text); - if (EC) { - errs() << EC.message() << "\n"; - return std::make_unique(); +void Context::collectFunction(GCOVFunction &f, Summary &summary) { + SourceInfo &si = sources[f.srcIdx]; + if (f.startLine >= si.startLineToFunctions.size()) + si.startLineToFunctions.resize(f.startLine + 1); + si.startLineToFunctions[f.startLine].push_back(&f); + for (const GCOVBlock &b : f.blocksRange()) { + if (b.lines.empty()) + continue; + uint32_t maxLineNum = *std::max_element(b.lines.begin(), b.lines.end()); + if (maxLineNum >= si.lines.size()) + si.lines.resize(maxLineNum + 1); + for (uint32_t lineNum : b.lines) { + LineInfo &line = si.lines[lineNum]; + if (!line.exists) + ++summary.lines; + if (line.count == 0 && b.count) + ++summary.linesExec; + line.exists = true; + line.count += b.count; + line.blocks.push_back(&b); + } } - return std::move(OS); } -/// print - Print source files with collected line count information. -void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, - StringRef GCNOFile, StringRef GCDAFile, GCOVFile &file) { - SmallVector Filenames; - for (const auto &LI : LineInfo) - Filenames.push_back(LI.first()); - llvm::sort(Filenames); - - for (StringRef Filename : Filenames) { - SourceInfo &source = sources[file.filenameToIdx.find(Filename)->second]; - if (source.ignored) - continue; - - auto AllLines = - Options.Intermediate ? LineConsumer() : LineConsumer(Filename); - std::string CoveragePath = getCoveragePath(Filename, MainFilename); - std::unique_ptr CovStream; - if (Options.NoOutput || Options.Intermediate) - CovStream = std::make_unique(); - else if (!Options.UseStdout) - CovStream = openCoveragePath(CoveragePath); - raw_ostream &CovOS = - !Options.NoOutput && Options.UseStdout ? llvm::outs() : *CovStream; - - CovOS << " -: 0:Source:" << source.displayName << "\n"; - CovOS << " -: 0:Graph:" << GCNOFile << "\n"; - CovOS << " -: 0:Data:" << GCDAFile << "\n"; - CovOS << " -: 0:Runs:" << RunCount << "\n"; - if (file.getVersion() < GCOV::V900) - CovOS << " -: 0:Programs:" << ProgramCount << "\n"; - - const LineData &Line = LineInfo[Filename]; - GCOVCoverage FileCoverage(source.displayName); - for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty(); - ++LineIndex) { - if (Options.BranchInfo) { - FunctionLines::const_iterator FuncsIt = Line.Functions.find(LineIndex); - if (FuncsIt != Line.Functions.end()) - printFunctionSummary(CovOS, FuncsIt->second); - } +void Context::collectSourceLine(SourceInfo &si, Summary *summary, + LineInfo &line, size_t lineNum) const { + uint64_t count = 0; + for (const GCOVBlock *b : line.blocks) { + if (b->number == 0) { + // For nonstandard control flows, arcs into the exit block may be + // duplicately counted (fork) or not be counted (abnormal exit), and thus + // the (exit,entry) counter may be inaccurate. Count the entry block with + // the outgoing arcs. + for (const GCOVArc *arc : b->succ) + count += arc->count; + } else { + // Add counts from predecessors that are not on the same line. + for (const GCOVArc *arc : b->pred) + if (!llvm::is_contained(line.blocks, &arc->src)) + count += arc->count; + } + for (GCOVArc *arc : b->succ) + arc->cycleCount = arc->count; + } - BlockLines::const_iterator BlocksIt = Line.Blocks.find(LineIndex); - if (BlocksIt == Line.Blocks.end()) { - // No basic blocks are on this line. Not an executable line of code. - CovOS << " -:"; - AllLines.printNext(CovOS, LineIndex + 1); - } else { - const BlockVector &Blocks = BlocksIt->second; - - // Add up the block counts to form line counts. - DenseMap LineExecs; - for (const GCOVBlock *Block : Blocks) { - if (Options.FuncCoverage) { - // This is a slightly convoluted way to most accurately gather line - // statistics for functions. Basically what is happening is that we - // don't want to count a single line with multiple blocks more than - // once. However, we also don't simply want to give the total line - // count to every function that starts on the line. Thus, what is - // happening here are two things: - // 1) Ensure that the number of logical lines is only incremented - // once per function. - // 2) If there are multiple blocks on the same line, ensure that the - // number of lines executed is incremented as long as at least - // one of the blocks are executed. - const GCOVFunction *Function = &Block->getParent(); - if (FuncCoverages.find(Function) == FuncCoverages.end()) { - std::pair KeyValue( - Function, GCOVCoverage(Function->getName())); - FuncCoverages.insert(KeyValue); - } - GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second; - - if (LineExecs.find(Function) == LineExecs.end()) { - if (Block->getCount()) { - ++FuncCoverage.LinesExec; - LineExecs[Function] = true; - } else { - LineExecs[Function] = false; - } - ++FuncCoverage.LogicalLines; - } else if (!LineExecs[Function] && Block->getCount()) { - ++FuncCoverage.LinesExec; - LineExecs[Function] = true; - } - } - } + GCOVBlock::getCyclesCount(line.blocks, count); + line.count = count; + if (line.exists) { + ++summary->lines; + if (line.count != 0) + ++summary->linesExec; + } - const uint64_t LineCount = GCOVBlock::getLineCount(Blocks); - if (LineCount == 0) - CovOS << " #####:"; - else { - CovOS << format("%9" PRIu64 ":", LineCount); - ++FileCoverage.LinesExec; - } - ++FileCoverage.LogicalLines; - - AllLines.printNext(CovOS, LineIndex + 1); - - uint32_t BlockNo = 0; - uint32_t EdgeNo = 0; - for (const GCOVBlock *Block : Blocks) { - // Only print block and branch information at the end of the block. - if (Block->getLastLine() != LineIndex + 1) - continue; - if (Options.AllBlocks) - printBlockInfo(CovOS, *Block, LineIndex, BlockNo); - if (Options.BranchInfo) { - size_t NumEdges = Block->getNumDstEdges(); - if (NumEdges > 1) - printBranchInfo(CovOS, *Block, FileCoverage, EdgeNo); - else if (Options.UncondBranch && NumEdges == 1) - printUncondBranchInfo(CovOS, EdgeNo, Block->succ[0]->Count); - } - } + if (options.BranchInfo) + for (const GCOVBlock *b : line.blocks) { + if (b->getLastLine() != lineNum) + continue; + int branches = 0, execBranches = 0, takenBranches = 0; + for (const GCOVArc *arc : b->succ) { + ++branches; + if (count != 0) + ++execBranches; + if (arc->count != 0) + ++takenBranches; + } + if (branches > 1) { + summary->branches += branches; + summary->branchesExec += execBranches; + summary->branchesTaken += takenBranches; } } - source.name = CoveragePath; - source.coverage = FileCoverage; +} + +void Context::collectSource(SourceInfo &si, Summary &summary) const { + size_t lineNum = 0; + for (LineInfo &line : si.lines) { + collectSourceLine(si, &summary, line, lineNum); + ++lineNum; } +} - if (Options.Intermediate && !Options.NoOutput) { - // gcov 7.* unexpectedly create multiple .gcov files, which was fixed in 8.0 - // (PR GCC/82702). We create just one file. - std::string outputPath(sys::path::filename(MainFilename)); - std::error_code ec; - raw_fd_ostream os(outputPath + ".gcov", ec, sys::fs::OF_Text); - if (ec) { - errs() << ec.message() << "\n"; - return; +void Context::annotateSource(SourceInfo &si, const GCOVFile &file, + StringRef gcno, StringRef gcda, + raw_ostream &os) const { + auto source = + options.Intermediate ? LineConsumer() : LineConsumer(si.filename); + + os << " -: 0:Source:" << si.displayName << '\n'; + os << " -: 0:Graph:" << gcno << '\n'; + os << " -: 0:Data:" << gcda << '\n'; + os << " -: 0:Runs:" << file.RunCount << '\n'; + if (file.Version < GCOV::V900) + os << " -: 0:Programs:" << file.ProgramCount << '\n'; + + for (size_t lineNum = 1; !source.empty(); ++lineNum) { + if (lineNum >= si.lines.size()) { + os << " -:"; + source.printNext(os, lineNum); + continue; } - for (const SourceInfo &source : sources) { - os << "file:" << source.filename << '\n'; - for (const GCOVFunction *f : source.functions) - os << "function:" << f->startLine << ',' << f->getEntryCount() << ',' - << f->Name << '\n'; - const LineData &line = LineInfo[source.filename]; - for (uint32_t lineNum = 0; lineNum != line.LastLine; ++lineNum) { - BlockLines::const_iterator BlocksIt = line.Blocks.find(lineNum); - if (BlocksIt == line.Blocks.end()) - continue; - const BlockVector &blocks = BlocksIt->second; - // GCC 8 (r254259) added third third field for Ada: - // lcount:,, - // We don't need the third field. - os << "lcount:" << (lineNum + 1) << ',' - << GCOVBlock::getLineCount(blocks) << '\n'; - - if (!Options.BranchInfo) - continue; - for (const GCOVBlock *block : blocks) { - if (block->getLastLine() != lineNum + 1 || - block->getNumDstEdges() < 2) - continue; - for (const GCOVArc *arc : block->dsts()) { - const char *type = block->getCount() - ? arc->Count ? "taken" : "nottaken" - : "notexec"; - os << "branch:" << (lineNum + 1) << ',' << type << '\n'; - } + const LineInfo &line = si.lines[lineNum]; + if (options.BranchInfo && lineNum < si.startLineToFunctions.size()) + for (const auto *f : si.startLineToFunctions[lineNum]) + printFunctionDetails(*f, os); + if (!line.exists) + os << " -:"; + else if (line.count == 0) + os << " #####:"; + else + os << format("%9" PRIu64 ":", line.count); + source.printNext(os, lineNum); + + uint32_t blockIdx = 0, edgeIdx = 0; + for (const GCOVBlock *b : line.blocks) { + if (b->getLastLine() != lineNum) + continue; + if (options.AllBlocks) { + if (b->getCount() == 0) + os << " $$$$$:"; + else + os << format("%9" PRIu64 ":", b->count); + os << format("%5u-block %2u\n", lineNum, blockIdx++); + } + if (options.BranchInfo) { + size_t NumEdges = b->succ.size(); + if (NumEdges > 1) + printBranchInfo(*b, edgeIdx, os); + else if (options.UncondBranch && NumEdges == 1) { + uint64_t count = b->succ[0]->count; + os << format("unconditional %2u ", edgeIdx++) + << formatBranchInfo(options, count, count) << '\n'; } } } } +} + +void Context::printSourceToIntermediate(const SourceInfo &si, + raw_ostream &os) const { + os << "file:" << si.filename << '\n'; + for (const auto &fs : si.startLineToFunctions) + for (const GCOVFunction *f : fs) + os << "function:" << f->startLine << ',' << f->getEntryCount() << ',' + << f->Name << '\n'; + for (size_t lineNum = 1, size = si.lines.size(); lineNum < size; ++lineNum) { + const LineInfo &line = si.lines[lineNum]; + if (line.blocks.empty()) + continue; + // GCC 8 (r254259) added third third field for Ada: + // lcount:,, + // We don't need the third field. + os << "lcount:" << lineNum << ',' << line.count << '\n'; - if (!Options.UseStdout) { - // FIXME: There is no way to detect calls given current instrumentation. - if (Options.FuncCoverage) - printFuncCoverage(InfoOS); - printFileCoverage(InfoOS); + if (!options.BranchInfo) + continue; + for (const GCOVBlock *b : line.blocks) { + if (b->succ.size() < 2 || b->getLastLine() != lineNum) + continue; + for (const GCOVArc *arc : b->succ) { + const char *type = + b->getCount() ? arc->count ? "taken" : "nottaken" : "notexec"; + os << "branch:" << lineNum << ',' << type << '\n'; + } + } } } -/// printFunctionSummary - Print function and block summary. -void FileInfo::printFunctionSummary(raw_ostream &OS, - const FunctionVector &Funcs) const { - for (const GCOVFunction *Func : Funcs) { - uint64_t EntryCount = Func->getEntryCount(); - uint32_t BlocksExec = 0; - const GCOVBlock &ExitBlock = Func->getExitBlock(); - uint64_t exitCount = 0; - for (const GCOVArc *arc : ExitBlock.pred) - exitCount += arc->Count; - for (const GCOVBlock &Block : Func->blocks()) - if (Block.Number != 0 && &Block != &ExitBlock && Block.getCount()) - ++BlocksExec; - - OS << "function " << Func->getName() << " called " << EntryCount - << " returned " << formatPercentage(exitCount, EntryCount) - << "% blocks executed " - << formatPercentage(BlocksExec, Func->getNumBlocks() - 2) << "%\n"; +void Context::print(StringRef filename, StringRef gcno, StringRef gcda, + GCOVFile &file) { + for (StringRef filename : file.filenames) { + sources.emplace_back(filename); + SourceInfo &si = sources.back(); + si.displayName = si.filename; + if (!options.SourcePrefix.empty() && + sys::path::replace_path_prefix(si.displayName, options.SourcePrefix, + "") && + !si.displayName.empty()) { + // TODO replace_path_prefix may strip the prefix even if the remaining + // part does not start with a separator. + if (sys::path::is_separator(si.displayName[0])) + si.displayName.erase(si.displayName.begin()); + else + si.displayName = si.filename; + } + if (options.RelativeOnly && sys::path::is_absolute(si.displayName)) + si.ignored = true; } -} -/// printBlockInfo - Output counts for each block. -void FileInfo::printBlockInfo(raw_ostream &OS, const GCOVBlock &Block, - uint32_t LineIndex, uint32_t &BlockNo) const { - if (Block.getCount() == 0) - OS << " $$$$$:"; - else - OS << format("%9" PRIu64 ":", Block.getCount()); - OS << format("%5u-block %2u\n", LineIndex + 1, BlockNo++); -} + raw_ostream &os = llvm::outs(); + for (GCOVFunction &f : make_pointee_range(file.functions)) { + Summary summary(f.Name); + collectFunction(f, summary); + if (options.FuncCoverage && !options.UseStdout) { + os << "Function '" << summary.Name << "'\n"; + printSummary(summary, os); + os << '\n'; + } + } -/// printBranchInfo - Print conditional branch probabilities. -void FileInfo::printBranchInfo(raw_ostream &OS, const GCOVBlock &Block, - GCOVCoverage &Coverage, uint32_t &EdgeNo) { - SmallVector BranchCounts; - uint64_t TotalCounts = 0; - for (const GCOVArc *Edge : Block.dsts()) { - BranchCounts.push_back(Edge->Count); - TotalCounts += Edge->Count; - if (Block.getCount()) - ++Coverage.BranchesExec; - if (Edge->Count) - ++Coverage.BranchesTaken; - ++Coverage.Branches; - - if (Options.FuncCoverage) { - const GCOVFunction *Function = &Block.getParent(); - GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second; - if (Block.getCount()) - ++FuncCoverage.BranchesExec; - if (Edge->Count) - ++FuncCoverage.BranchesTaken; - ++FuncCoverage.Branches; + for (SourceInfo &si : sources) { + if (si.ignored) + continue; + Summary summary(si.displayName); + collectSource(si, summary); + + // Print file summary unless -t is specified. + std::string gcovName = getCoveragePath(si.filename, filename); + if (!options.UseStdout) { + os << "File '" << summary.Name << "'\n"; + printSummary(summary, os); + if (!options.NoOutput && !options.Intermediate) + os << "Creating '" << gcovName << "'\n"; + os << '\n'; } + + if (options.NoOutput || options.Intermediate) + continue; + Optional os; + if (!options.UseStdout) { + std::error_code ec; + os.emplace(gcovName, ec, sys::fs::OF_Text); + if (ec) { + errs() << ec.message() << '\n'; + continue; + } + } + annotateSource(si, file, gcno, gcda, + options.UseStdout ? llvm::outs() : *os); } - for (uint64_t N : BranchCounts) - OS << format("branch %2u ", EdgeNo++) - << formatBranchInfo(Options, N, TotalCounts) << "\n"; + if (options.Intermediate && !options.NoOutput) { + // gcov 7.* unexpectedly create multiple .gcov files, which was fixed in 8.0 + // (PR GCC/82702). We create just one file. + std::string outputPath(sys::path::filename(filename)); + std::error_code ec; + raw_fd_ostream os(outputPath + ".gcov", ec, sys::fs::OF_Text); + if (ec) { + errs() << ec.message() << '\n'; + return; + } + + for (const SourceInfo &si : sources) + printSourceToIntermediate(si, os); + } } -/// printUncondBranchInfo - Print unconditional branch probabilities. -void FileInfo::printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo, - uint64_t Count) const { - OS << format("unconditional %2u ", EdgeNo++) - << formatBranchInfo(Options, Count, Count) << "\n"; +void Context::printFunctionDetails(const GCOVFunction &f, + raw_ostream &os) const { + const uint64_t entryCount = f.getEntryCount(); + uint32_t blocksExec = 0; + const GCOVBlock &exitBlock = f.getExitBlock(); + uint64_t exitCount = 0; + for (const GCOVArc *arc : exitBlock.pred) + exitCount += arc->count; + for (const GCOVBlock &b : f.blocksRange()) + if (b.number != 0 && &b != &exitBlock && b.getCount()) + ++blocksExec; + + os << "function " << f.getName() << " called " << entryCount << " returned " + << formatPercentage(exitCount, entryCount) << "% blocks executed " + << formatPercentage(blocksExec, f.blocks.size() - 2) << "%\n"; } -// printCoverage - Print generic coverage info used by both printFuncCoverage -// and printFileCoverage. -void FileInfo::printCoverage(raw_ostream &OS, - const GCOVCoverage &Coverage) const { - OS << format("Lines executed:%.2f%% of %u\n", - double(Coverage.LinesExec) * 100 / Coverage.LogicalLines, - Coverage.LogicalLines); - if (Options.BranchInfo) { - if (Coverage.Branches) { - OS << format("Branches executed:%.2f%% of %u\n", - double(Coverage.BranchesExec) * 100 / Coverage.Branches, - Coverage.Branches); - OS << format("Taken at least once:%.2f%% of %u\n", - double(Coverage.BranchesTaken) * 100 / Coverage.Branches, - Coverage.Branches); - } else { - OS << "No branches\n"; - } - OS << "No calls\n"; // to be consistent with gcov - } +/// printBranchInfo - Print conditional branch probabilities. +void Context::printBranchInfo(const GCOVBlock &Block, uint32_t &edgeIdx, + raw_ostream &os) const { + uint64_t total = 0; + for (const GCOVArc *arc : Block.dsts()) + total += arc->count; + for (const GCOVArc *arc : Block.dsts()) + os << format("branch %2u ", edgeIdx++) + << formatBranchInfo(options, arc->count, total) << '\n'; } -// printFuncCoverage - Print per-function coverage info. -void FileInfo::printFuncCoverage(raw_ostream &OS) const { - for (const auto &FC : FuncCoverages) { - const GCOVCoverage &Coverage = FC.second; - OS << "Function '" << Coverage.Name << "'\n"; - printCoverage(OS, Coverage); - OS << "\n"; +void Context::printSummary(const Summary &summary, raw_ostream &os) const { + os << format("Lines executed:%.2f%% of %u\n", + double(summary.linesExec) * 100 / summary.lines, summary.lines); + if (options.BranchInfo) { + if (summary.branches == 0) { + os << "No branches\n"; + } else { + os << format("Branches executed:%.2f%% of %u\n", + double(summary.branchesExec) * 100 / summary.branches, + summary.branches); + os << format("Taken at least once:%.2f%% of %u\n", + double(summary.branchesTaken) * 100 / summary.branches, + summary.branches); + } + os << "No calls\n"; } } -// printFileCoverage - Print per-file coverage info. -void FileInfo::printFileCoverage(raw_ostream &OS) const { - for (const SourceInfo &source : sources) { - if (source.ignored) - continue; - const GCOVCoverage &Coverage = source.coverage; - OS << "File '" << Coverage.Name << "'\n"; - printCoverage(OS, Coverage); - if (!Options.NoOutput && !Options.Intermediate) - OS << "Creating '" << source.name << "'\n"; - OS << "\n"; - } +void llvm::gcovOneInput(const GCOV::Options &options, StringRef filename, + StringRef gcno, StringRef gcda, GCOVFile &file) { + Context fi(options); + fi.print(filename, gcno, gcda, file); } diff --git a/llvm/test/tools/llvm-cov/gcov-fake-4.2.c b/llvm/test/tools/llvm-cov/gcov-fake-4.2.c index 7e8eb2f2a5ff2..470a14ff7e41c 100644 --- a/llvm/test/tools/llvm-cov/gcov-fake-4.2.c +++ b/llvm/test/tools/llvm-cov/gcov-fake-4.2.c @@ -1,6 +1,7 @@ /// Test that llvm-cov supports a fake gcov 4.2 format used before clang 11. // RUN: rm -rf %t && mkdir %t && cd %t +// RUN: echo -e '\n\n\n\n\n\n\n\n\n' > test.cpp && echo > test.h // RUN: llvm-cov gcov test. --gcno=%S/Inputs/gcov-fake-4.2.gcno --gcda=%S/Inputs/gcov-fake-4.2.gcda | FileCheck %s // RUN: FileCheck %s --check-prefix=C < test.cpp.gcov // RUN: FileCheck %s --check-prefix=H < test.h.gcov diff --git a/llvm/test/tools/llvm-cov/llvm-cov.test b/llvm/test/tools/llvm-cov/llvm-cov.test index 2256501cd5ea2..4a3b81ce2b7e3 100644 --- a/llvm/test/tools/llvm-cov/llvm-cov.test +++ b/llvm/test/tools/llvm-cov/llvm-cov.test @@ -38,7 +38,7 @@ RUN: llvm-cov gcov -n test.c | FileCheck %s --check-prefix=OUT # Print to stdout. RUN: llvm-cov gcov -t test.c > stdout RUN: llvm-cov gcov --stdout test.c | cmp stdout - -RUN: cat test_no_options.h.gcov test_no_options.cpp.gcov | diff -u - stdout +RUN: cat test_no_options.cpp.gcov test_no_options.h.gcov | diff -u - stdout RUN: llvm-cov gcov -n -t test.c | count 0 RUN: llvm-cov gcov test_paths.cpp 2>/dev/null | FileCheck %s --check-prefix=MISSING @@ -84,12 +84,7 @@ RUN: llvm-cov gcov test.c -a -b -f | FileCheck %s --check-prefixes=OUT,OUTFILE,O RUN: FileCheck %s --check-prefixes=C,C-A,C-B --match-full-lines --strict-whitespace < test.cpp.gcov RUN: FileCheck %s --check-prefixes=H,H-A,H-B --match-full-lines --strict-whitespace < test.h.gcov - OUT-F:Function '_ZN1AC2Ev' - OUT-F-NEXT:Lines executed:100.00% of 1 - OUT-FB-NEXT:No branches - OUT-FB-NEXT:No calls - OUT-F-EMPTY: - OUT-F-NEXT:Function '_ZN1A1BEv' + OUT-F:Function '_ZN1A1BEv' OUT-F-NEXT:Lines executed:100.00% of 1 OUT-FB-NEXT:No branches OUT-FB-NEXT:No calls @@ -121,14 +116,17 @@ RUN: FileCheck %s --check-prefixes=H,H-A,H-B --match-full-lines --strict-whitesp OUT-F-EMPTY: OUT-F-NEXT:Function '_Z15initialize_gridv' OUT-F-NEXT:Lines executed:100.00% of 5 - OUT-FB-NEXT:Branches executed:100.00% of 4 - OUT-FB-NEXT:Taken at least once:100.00% of 4 + OUT-FB-NEXT:No branches OUT-FB-NEXT:No calls OUT-F-EMPTY: OUT-F-NEXT:Function 'main' OUT-F-NEXT:Lines executed:92.00% of 25 - OUT-FB-NEXT:Branches executed:100.00% of 11 - OUT-FB-NEXT:Taken at least once:81.82% of 11 + OUT-FB-NEXT:No branches + OUT-FB-NEXT:No calls + OUT-F-EMPTY: + OUT-F-NEXT:Function '_ZN1AC2Ev' + OUT-F-NEXT:Lines executed:100.00% of 1 + OUT-FB-NEXT:No branches OUT-FB-NEXT:No calls OUT-F-EMPTY: OUT:File 'test.cpp' diff --git a/llvm/tools/llvm-cov/gcov.cpp b/llvm/tools/llvm-cov/gcov.cpp index 858f4cee79045..8d2876b6f42ee 100644 --- a/llvm/tools/llvm-cov/gcov.cpp +++ b/llvm/tools/llvm-cov/gcov.cpp @@ -77,9 +77,7 @@ static void reportCoverage(StringRef SourceFile, StringRef ObjectDir, if (DumpGCOV) GF.print(errs()); - FileInfo FI(Options); - GF.collectLineCounts(FI); - FI.print(llvm::outs(), SourceFile, GCNO, GCDA, GF); + gcovOneInput(Options, SourceFile, GCNO, GCDA, GF); } int gcovMain(int argc, const char *argv[]) { From 163863604f9c1ad3add238f9e8fb32cfd136f894 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Mon, 14 Sep 2020 08:43:56 +0200 Subject: [PATCH 0503/1079] [analyzer] Evaluate PredefinedExpressions We did not evaluate such expressions, just returned `Unknown` for such cases. After this patch, we will be able to access a unique value identifying a template instantiation via the value of the `PRETTY_FUNCTION` predefined expression. Reviewed By: vsavchenko Differential Revision: https://reviews.llvm.org/D87004 --- clang/lib/StaticAnalyzer/Core/Environment.cpp | 1 + clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 8 ++ clang/test/Analysis/eval-predefined-exprs.cpp | 109 ++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 clang/test/Analysis/eval-predefined-exprs.cpp diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp index 556ff6af15de2..cba20b967b6fa 100644 --- a/clang/lib/StaticAnalyzer/Core/Environment.cpp +++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -116,6 +116,7 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry, case Stmt::StringLiteralClass: case Stmt::TypeTraitExprClass: case Stmt::SizeOfPackExprClass: + case Stmt::PredefinedExprClass: // Known constants; defer to SValBuilder. return svalBuilder.getConstantVal(cast(S)).getValue(); diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 32d2a3e30708e..72b8ada1dfab9 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -306,6 +306,14 @@ Optional SValBuilder::getConstantVal(const Expr *E) { return makeLoc(getRegionManager().getStringRegion(SL)); } + case Stmt::PredefinedExprClass: { + const auto *PE = cast(E); + assert(PE->getFunctionName() && + "Since we analyze only instantiated functions, PredefinedExpr " + "should have a function name."); + return makeLoc(getRegionManager().getStringRegion(PE->getFunctionName())); + } + // Fast-path some expressions to avoid the overhead of going through the AST's // constant evaluator case Stmt::CharacterLiteralClass: { diff --git a/clang/test/Analysis/eval-predefined-exprs.cpp b/clang/test/Analysis/eval-predefined-exprs.cpp new file mode 100644 index 0000000000000..cc48a264f2d32 --- /dev/null +++ b/clang/test/Analysis/eval-predefined-exprs.cpp @@ -0,0 +1,109 @@ +// RUN: %clang_analyze_cc1 -std=c++17 -analyzer-checker=core,debug.ExprInspection -verify %s +// +// RUN: %clang_analyze_cc1 -std=c++17 -analyzer-checker=core,debug.ExprInspection -verify \ +// RUN: -triple i386-pc-win32 -fms-compatibility -fms-extensions -DANALYZER_MS %s + +template +void clang_analyzer_dump(const T *); +void clang_analyzer_warnIfReached(); + +void builtin_unique_stable_name_of_lambda() { + auto y = [] {}; + clang_analyzer_dump(__builtin_unique_stable_name(y)); + // expected-warning@-1 {{&Element{"_ZTSZ36builtin_unique_stable_name_of_lambdavEUlvE11_12",0 S64b,char}}} +} + +template +void func(U param) { + clang_analyzer_dump(__func__); + clang_analyzer_dump(__FUNCTION__); + clang_analyzer_dump(__PRETTY_FUNCTION__); + // expected-warning@-3 {{&Element{"func",0 S64b,char}}} + // expected-warning@-3 {{&Element{"func",0 S64b,char}}} + // expected-warning@-3 {{&Element{"void func(U) [T = Class, Value = 42, U = char]",0 S64b,char}}} + +#ifdef ANALYZER_MS + clang_analyzer_dump(__FUNCDNAME__); + clang_analyzer_dump(L__FUNCTION__); + clang_analyzer_dump(__FUNCSIG__); + clang_analyzer_dump(L__FUNCSIG__); + // expected-warning@-4 {{&Element{"??$func@UClass@?1??foo@@YAXXZ@$0CK@D@@YAXD@Z",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"func",0 S64b,wchar_t}}} + // expected-warning@-4 {{&Element{"void __cdecl func(U) [T = Class, Value = 42, U = char]",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"void __cdecl func(U) [T = Class, Value = 42, U = char]",0 S64b,wchar_t}}} +#endif +} + +void foo() { + clang_analyzer_dump(__func__); + clang_analyzer_dump(__FUNCTION__); + clang_analyzer_dump(__PRETTY_FUNCTION__); + // expected-warning@-3 {{&Element{"foo",0 S64b,char}}} + // expected-warning@-3 {{&Element{"foo",0 S64b,char}}} + // expected-warning@-3 {{&Element{"void foo()",0 S64b,char}}} + +#ifdef ANALYZER_MS + clang_analyzer_dump(__FUNCDNAME__); + clang_analyzer_dump(L__FUNCTION__); + clang_analyzer_dump(__FUNCSIG__); + clang_analyzer_dump(L__FUNCSIG__); + // expected-warning@-4 {{&Element{"?foo@@YAXXZ",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"foo",0 S64b,wchar_t}}} + // expected-warning@-4 {{&Element{"void __cdecl foo(void)",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"void __cdecl foo(void)",0 S64b,wchar_t}}} +#endif + + func('b'); // instantiate template +} + +void test_builtin_unique_stable_name(int a) { + clang_analyzer_dump(__builtin_unique_stable_name(a)); + // expected-warning@-1 {{&Element{"_ZTSi",0 S64b,char}}} +} + +struct A { + A() { + clang_analyzer_dump(__func__); + clang_analyzer_dump(__FUNCTION__); + clang_analyzer_dump(__PRETTY_FUNCTION__); + // expected-warning@-3 {{&Element{"A",0 S64b,char}}} + // expected-warning@-3 {{&Element{"A",0 S64b,char}}} + // expected-warning@-3 {{&Element{"A::A()",0 S64b,char}}} + +#ifdef ANALYZER_MS + clang_analyzer_dump(__FUNCDNAME__); + clang_analyzer_dump(L__FUNCTION__); + clang_analyzer_dump(__FUNCSIG__); + clang_analyzer_dump(L__FUNCSIG__); + // expected-warning@-4 {{&Element{"??0A@@QAE@XZ",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"A",0 S64b,wchar_t}}} + // expected-warning@-4 {{&Element{"__thiscall A::A(void)",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"__thiscall A::A(void)",0 S64b,wchar_t}}} +#endif + } + ~A() { + clang_analyzer_dump(__func__); + clang_analyzer_dump(__FUNCTION__); + clang_analyzer_dump(__PRETTY_FUNCTION__); + // expected-warning@-3 {{&Element{"~A",0 S64b,char}}} + // expected-warning@-3 {{&Element{"~A",0 S64b,char}}} + // expected-warning@-3 {{&Element{"A::~A()",0 S64b,char}}} + +#ifdef ANALYZER_MS + clang_analyzer_dump(__FUNCDNAME__); + clang_analyzer_dump(L__FUNCTION__); + clang_analyzer_dump(__FUNCSIG__); + clang_analyzer_dump(L__FUNCSIG__); + // expected-warning@-4 {{&Element{"??1A@@QAE@XZ",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"~A",0 S64b,wchar_t}}} + // expected-warning@-4 {{&Element{"__thiscall A::~A(void)",0 S64b,char}}} + // expected-warning@-4 {{&Element{L"__thiscall A::~A(void)",0 S64b,wchar_t}}} +#endif + } + + template int dependent() { + // We should not analyze dependent functions. + // Such functions have no function name of predefined expressions such as: '__func__' etc. + clang_analyzer_warnIfReached(); // no-warning + } +}; From d7ae9696e31f6484de4ff4c10bca144d7e61320c Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Mon, 14 Sep 2020 08:43:56 +0200 Subject: [PATCH 0504/1079] [analyzer][docs][NFC] Document the ento namespace in the llvm/Lexicon Document the `ento` namespace in the Lexicon according to @nicolas17 on the mailing list (http://lists.llvm.org/pipermail/cfe-dev/2020-August/066577.html). The analyzer lived at different namespaces at different times. Originally lived at the `GR` aka. (Graph Reachability) namespace [7], later it moved under the `ento` namespace [9]. The Static Analyzer's code lived at many other places as well: `Analysis` -[2]-> `Checker` -[5]-> `GR` -[10]> `entoSA` -[11]-> `StaticAnalyzer` The relevant code motion, refactor commits, cfe-dev mailing in chronological order: 1) 2008-03-15 Make a major restructuring of the clang tree: introduce a ... 7a51313d8a0a358bb92eb5dbf8fd846b7c48e7fe 2) 2010-01-25 Split libAnalysis into two libraries: libAnalysis and libChecker d6b8708643219776b1f0f41df32c5eccf065ed5b 3) 2010-12-21 Reorganization of Checker files http://lists.llvm.org/pipermail/cfe-dev/2010-December/012694.html 4) 2010-12-22 Refactoring: include/clang/Checker -> include/clang/GR 8d602a8aa8e6697509465d8a5473fc41cb1a382e 5) 2010-12-22 Refactoring: lib/Checker -> lib/GR 2ff5ab1516e48c2fff0138f953d887b5e695214b 6) 2010-12-22 Refactoring: Move checkers into lib/GR/Checkers and their own a700e976b658860418bc145ec0bdacd4f1db3264 7) 2010-12-22 Refactoring: Move stuff into namespace 'GR' ca08fba4141f1d3ae6193b3c81fb6ba8fb10d7dc 8) 2010-12-22 Refactoring: Drop the 'GR' prefix. 1696f508e2fe95793ca8bb70d78b88023b6b8625 9) 2010-12-23 Rename static analyzer namespace 'GR' to 'ento' 98857c986078c6e6a10910628dbabf75ae735b76 10) 2010-12-23 Rename headers: 'clang/GR' 'clang/EntoSA' and update Makefile ef33f0996c6a625767690395f3cfb41afb84db5a 11) 2010-12-23 Chris Lattner has strong opinions about directory d99bd55a5e092774214ba31fc5a871bfc31e711c 12) 2010-12-24 Remove the EntoSA directories. 9d6af5328e3a61641a125b17125952fa1a6bf11d Reviewed By: Szelethus,martong,ASDenysPetrov,xazax.hun Differential Revision: https://reviews.llvm.org/D86446 --- llvm/docs/Lexicon.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/docs/Lexicon.rst b/llvm/docs/Lexicon.rst index cf194eb0d1d3d..03090827ffe48 100644 --- a/llvm/docs/Lexicon.rst +++ b/llvm/docs/Lexicon.rst @@ -92,6 +92,19 @@ D **DSE** Dead Store Elimination +E +- + +**ento** + This namespace houses the + `Clang Static Analyzer `_. + It is an abbreviaton of `entomology `_. + + *"Entomology is the scientific study of insects."* + + In the past, this namespace had not only the name `GR` (aka. Graph Reachability) + but also `entoSA`. + F - From cdacffe4acc083dfb1cccb6458420eed09f9d093 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Mon, 14 Sep 2020 08:43:56 +0200 Subject: [PATCH 0505/1079] [analyzer][z3] Use more elaborate Z3 variable names Previously, it was a tedious task to comprehend Z3 dumps. We will use the same name prefix just as we use in the corresponding dump method For all `SymbolData` values: `$###` -> `conj_$###` `$###` -> `derived_$###` `$###` -> `extent_$###` `$###` -> `meta_$###` `$###` -> `reg_$###` Reviewed By: xazax.hun,mikhail.ramalho Differential Revision: https://reviews.llvm.org/D86223 --- .../Core/PathSensitive/SMTConstraintManager.h | 3 +-- .../Core/PathSensitive/SMTConv.h | 18 +++++++++------ .../Core/PathSensitive/SymExpr.h | 3 +++ .../Core/PathSensitive/SymbolManager.h | 10 +++++++++ .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 22 ++++++++++++------- clang/test/Analysis/z3/pretty-dump.c | 17 ++++++++++++++ 6 files changed, 56 insertions(+), 17 deletions(-) create mode 100644 clang/test/Analysis/z3/pretty-dump.c diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h index 6a0f5f10874e3..07fc73a670f35 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h @@ -122,8 +122,7 @@ class SMTConstraintManager : public clang::ento::SimpleConstraintManager { // this method tries to get the interpretation (the actual value) from // the solver, which is currently not cached. - llvm::SMTExprRef Exp = - SMTConv::fromData(Solver, SD->getSymbolID(), Ty, Ctx.getTypeSize(Ty)); + llvm::SMTExprRef Exp = SMTConv::fromData(Solver, Ctx, SD); Solver->reset(); addStateConstraints(State); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h index bdebe238829e8..2d0f169260a45 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h @@ -319,11 +319,16 @@ class SMTConv { } /// Construct an SMTSolverRef from a SymbolData. - static inline llvm::SMTExprRef fromData(llvm::SMTSolverRef &Solver, - const SymbolID ID, const QualType &Ty, - uint64_t BitWidth) { - llvm::Twine Name = "$" + llvm::Twine(ID); - return Solver->mkSymbol(Name.str().c_str(), mkSort(Solver, Ty, BitWidth)); + static inline llvm::SMTExprRef + fromData(llvm::SMTSolverRef &Solver, ASTContext &Ctx, const SymbolData *Sym) { + const SymbolID ID = Sym->getSymbolID(); + const QualType Ty = Sym->getType(); + const uint64_t BitWidth = Ctx.getTypeSize(Ty); + + llvm::SmallString<16> Str; + llvm::raw_svector_ostream OS(Str); + OS << Sym->getKindStr() << ID; + return Solver->mkSymbol(Str.c_str(), mkSort(Solver, Ty, BitWidth)); } // Wrapper to generate SMTSolverRef from SymbolCast data. @@ -422,8 +427,7 @@ class SMTConv { if (RetTy) *RetTy = Sym->getType(); - return fromData(Solver, SD->getSymbolID(), Sym->getType(), - Ctx.getTypeSize(Sym->getType())); + return fromData(Solver, Ctx, SD); } if (const SymbolCast *SC = dyn_cast(Sym)) { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h index abfcd1d80faa4..2f4ac6ba5f975 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -126,6 +126,9 @@ class SymbolData : public SymExpr { public: ~SymbolData() override = default; + /// Get a string representation of the kind of the region. + virtual StringRef getKindStr() const = 0; + SymbolID getSymbolID() const { return Sym; } unsigned computeComplexity() const override { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 390ced8c29f8f..75dfbde5c1519 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -59,6 +59,8 @@ class SymbolRegionValue : public SymbolData { Profile(profile, R); } + StringRef getKindStr() const override; + void dumpToStream(raw_ostream &os) const override; const MemRegion *getOriginRegion() const override { return getRegion(); } @@ -99,6 +101,8 @@ class SymbolConjured : public SymbolData { QualType getType() const override; + StringRef getKindStr() const override; + void dumpToStream(raw_ostream &os) const override; static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, @@ -141,6 +145,8 @@ class SymbolDerived : public SymbolData { QualType getType() const override; + StringRef getKindStr() const override; + void dumpToStream(raw_ostream &os) const override; const MemRegion *getOriginRegion() const override { return getRegion(); } @@ -177,6 +183,8 @@ class SymbolExtent : public SymbolData { QualType getType() const override; + StringRef getKindStr() const override; + void dumpToStream(raw_ostream &os) const override; static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { @@ -226,6 +234,8 @@ class SymbolMetadata : public SymbolData { QualType getType() const override; + StringRef getKindStr() const override; + void dumpToStream(raw_ostream &os) const override; static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index ae40ad910d843..700f91aed610f 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -35,6 +35,12 @@ using namespace ento; void SymExpr::anchor() {} +StringRef SymbolConjured::getKindStr() const { return "conj_$"; } +StringRef SymbolDerived::getKindStr() const { return "derived_$"; } +StringRef SymbolExtent::getKindStr() const { return "extent_$"; } +StringRef SymbolMetadata::getKindStr() const { return "meta_$"; } +StringRef SymbolRegionValue::getKindStr() const { return "reg_$"; } + LLVM_DUMP_METHOD void SymExpr::dump() const { dumpToStream(llvm::errs()); } void BinarySymExpr::dumpToStreamImpl(raw_ostream &OS, const SymExpr *Sym) { @@ -65,7 +71,7 @@ void SymbolCast::dumpToStream(raw_ostream &os) const { } void SymbolConjured::dumpToStream(raw_ostream &os) const { - os << "conj_$" << getSymbolID() << '{' << T.getAsString() << ", LC" + os << getKindStr() << getSymbolID() << '{' << T.getAsString() << ", LC" << LCtx->getID(); if (S) os << ", S" << S->getID(LCtx->getDecl()->getASTContext()); @@ -75,24 +81,24 @@ void SymbolConjured::dumpToStream(raw_ostream &os) const { } void SymbolDerived::dumpToStream(raw_ostream &os) const { - os << "derived_$" << getSymbolID() << '{' - << getParentSymbol() << ',' << getRegion() << '}'; + os << getKindStr() << getSymbolID() << '{' << getParentSymbol() << ',' + << getRegion() << '}'; } void SymbolExtent::dumpToStream(raw_ostream &os) const { - os << "extent_$" << getSymbolID() << '{' << getRegion() << '}'; + os << getKindStr() << getSymbolID() << '{' << getRegion() << '}'; } void SymbolMetadata::dumpToStream(raw_ostream &os) const { - os << "meta_$" << getSymbolID() << '{' - << getRegion() << ',' << T.getAsString() << '}'; + os << getKindStr() << getSymbolID() << '{' << getRegion() << ',' + << T.getAsString() << '}'; } void SymbolData::anchor() {} void SymbolRegionValue::dumpToStream(raw_ostream &os) const { - os << "reg_$" << getSymbolID() - << '<' << getType().getAsString() << ' ' << R << '>'; + os << getKindStr() << getSymbolID() << '<' << getType().getAsString() << ' ' + << R << '>'; } bool SymExpr::symbol_iterator::operator==(const symbol_iterator &X) const { diff --git a/clang/test/Analysis/z3/pretty-dump.c b/clang/test/Analysis/z3/pretty-dump.c new file mode 100644 index 0000000000000..811da172e7490 --- /dev/null +++ b/clang/test/Analysis/z3/pretty-dump.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -analyze -analyzer-constraints=z3 -setup-static-analyzer \ +// RUN: -analyzer-checker=core,debug.ExprInspection %s 2>&1 | FileCheck %s +// +// REQUIRES: z3 +// +// Works only with the z3 constraint manager. + +void clang_analyzer_printState(); + +void foo(int x) { + if (x == 3) { + clang_analyzer_printState(); + (void)x; + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$[[#]]) == 3", "range": "(= reg_$[[#]] #x00000003)" } + } +} From 15bff4dec4360985a6a058a7e42a4ffd590dc665 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 10 Sep 2020 11:54:58 +0100 Subject: [PATCH 0506/1079] [CodeGen] Fix bug in IncrementPointer In an earlier patch I meant to add the correct flags to the ADD node when incrementing the pointer, but forgot to pass them to SelectionDAG::getNode. Differential Revision: https://reviews.llvm.org/D87496 --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9d82d2ed8ec52..b09303e5219eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1006,7 +1006,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, Flags.setNoUnsignedWrap(true); if (ScaledOffset) *ScaledOffset += IncrementSize; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement, + Flags); } else { MPI = N->getPointerInfo().getWithOffset(IncrementSize); // Increment the pointer to the other half. From 4946802c5f406b050cbb1524d0fd03cf3fd7b0dc Mon Sep 17 00:00:00 2001 From: Simon Wallis Date: Mon, 14 Sep 2020 08:52:59 +0100 Subject: [PATCH 0507/1079] [ARM] Fix so immediates and pc relative checks Treating an SoImm offset as a multiple of 4 between -1020 and 1020 mis-handles the second of a pair of 16-bit constants where the offset is a multiple of 2 but not a multiple of 4, leading to an LLVM ERROR: out of range pc-relative fixup value For 32-bit and larger (64-bit) constants, continue to treat an SoImm offset as a multiple of 4 between -1020 and 1020. For smaller (16-bit) constants, treat an SoImm offset as a multiple of 1 between -255 and 255. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D86949 --- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 28 ++++++--- .../ARM/constant-island-SOImm-limit16.mir | 62 +++++++++++++++++++ 2 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 204e57fefb9a5..86da5a24d3407 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -775,15 +775,25 @@ initializeFunctionInfo(const std::vector &CPEMIs) { // Taking the address of a CP entry. case ARM::LEApcrel: - case ARM::LEApcrelJT: - // This takes a SoImm, which is 8 bit immediate rotated. We'll - // pretend the maximum offset is 255 * 4. Since each instruction - // 4 byte wide, this is always correct. We'll check for other - // displacements that fits in a SoImm as well. - Bits = 8; - Scale = 4; - NegOk = true; - IsSoImm = true; + case ARM::LEApcrelJT: { + // This takes a SoImm, which is 8 bit immediate rotated. We'll + // pretend the maximum offset is 255 * 4. Since each instruction + // 4 byte wide, this is always correct. We'll check for other + // displacements that fits in a SoImm as well. + Bits = 8; + NegOk = true; + IsSoImm = true; + unsigned CPI = I.getOperand(op).getIndex(); + MachineInstr *CPEMI = CPEMIs[CPI]; + const Align CPEAlign = getCPEAlign(CPEMI); + const unsigned LogCPEAlign = Log2(CPEAlign); + if (LogCPEAlign >= 2) + Scale = 4; + else + // For constants with less than 4-byte alignment, + // we'll pretend the maximum offset is 255 * 1. + Scale = 1; + } break; case ARM::t2LEApcrel: case ARM::t2LEApcrelJT: diff --git a/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir b/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir new file mode 100644 index 0000000000000..223a3b0b33b13 --- /dev/null +++ b/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir @@ -0,0 +1,62 @@ +# RUN: sed -e "s/SPACEBYTES/100/g" %s | sed -e "s/OFFSET/116/g" > %t.mir +# RUN: llc %t.mir --filetype=obj -start-before=arm-cp-islands -o - | \ +# RUN: llvm-objdump --arch=armv8a --disassemble - | FileCheck %t.mir + +# RUN: sed -e "s/SPACEBYTES/400/g" %s | sed -e "s/OFFSET/12/g" > %t.mir +# RUN: llc %t.mir --filetype=obj -start-before=arm-cp-islands -o - | \ +# RUN: llvm-objdump --arch=armv8a --disassemble - | FileCheck %t.mir + +# RUN: sed -e "s/SPACEBYTES/800/g" %s | sed -e "s/OFFSET/12/g" > %t.mir +# RUN: llc %t.mir --filetype=obj -start-before=arm-cp-islands -o - | \ +# RUN: llvm-objdump --arch=armv8a --disassemble - | FileCheck %t.mir + +--- | + target triple = "armv8.2a-arm-none-eabi" + + define dso_local i32 @main() #0 { ret i32 0 } + + attributes #0 = { "frame-pointer"="all" } !4 = !{i32 210} + +... +--- + +name: main +alignment: 4 +tracksRegLiveness: true +constants: + +- + id: 0 + value: half 0xH5440 + alignment: 2 +- + id: 1 + value: half 0xH5441 + alignment: 2 + +machineFunctionInfo: {} +body: | + + bb.0 (%ir-block.0): + liveins: $lr + + $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr + $r11 = frame-setup MOVr killed $sp, 14, $noreg, $noreg + $sp = frame-setup SUBri killed $sp, 80, 14, $noreg, $noreg + + ; Test handling of 16-bit constant pool entries. + ; 2 consecutive entries: 1 is 4-byte aligned, 1 is not 4-byte aligned. + + renamable $r1 = LEApcrel %const.0, 14, $noreg + renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load 2 from constant-pool) + renamable $r1 = LEApcrel %const.1, 14, $noreg + renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load 2 from constant-pool) + + renamable $r0 = SPACE SPACEBYTES, undef renamable $r0 + + $sp = frame-destroy MOVr $r11, 14, $noreg, $noreg + $sp = frame-destroy LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + + # CHECK: add r1, pc, #OFFSET +--- +... From 9a4476072e152881e00179bef2c6da9fea9b274e Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 11 Sep 2020 22:00:36 +0100 Subject: [PATCH 0508/1079] [UnifyLoopExits] Fix non-deterministic iteration order This was causing random minor codegen differences in shaders compiled with the AMDGPU backend. Differential Revision: https://reviews.llvm.org/D87548 --- llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index b10deee3907c7..6eacb9a20e4c0 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -16,6 +16,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/InitializePasses.h" @@ -80,7 +81,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L, const SetVector &Incoming, BasicBlock *LoopExitBlock) { using InstVector = SmallVector; - using IIMap = DenseMap; + using IIMap = MapVector; IIMap ExternalUsers; for (auto BB : L->blocks()) { for (auto &I : *BB) { From 0008fb343704bafc3469703be930b8a65d7c47fa Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Mon, 14 Sep 2020 10:10:49 +0200 Subject: [PATCH 0509/1079] [compiler-rt] [netbsd] Use internal_ptrace() instead of ptrace() --- .../sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp index 1ed21343254d5..63ef00d2750a3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp @@ -131,7 +131,7 @@ bool ThreadSuspender::SuspendAllThreads() { pl.pl_lwpid = 0; int val; - while ((val = ptrace(op, pid_, (void *)&pl, sizeof(pl))) != -1 && + while ((val = internal_ptrace(op, pid_, (void *)&pl, sizeof(pl))) != -1 && pl.pl_lwpid != 0) { suspended_threads_list_.Append(pl.pl_lwpid); VReport(2, "Appended thread %d in process %d.\n", pl.pl_lwpid, pid_); From bfcb824ba5287f96c5b9f1009d10af37b7eb9519 Mon Sep 17 00:00:00 2001 From: David Stenberg Date: Mon, 14 Sep 2020 09:38:54 +0200 Subject: [PATCH 0510/1079] [JumpThreading] Fix an incorrect Modified status This fixes PR47297. When ProcessBlock() was able to constant fold the terminator's condition, but not do any more transformations, the function would return false, which would lead to the JumpThreading pass returning an incorrect modified status. This patch makes so that ProcessBlock() returns true in such cases. This will trigger an unnecessary invocation of ProcessBlock() in such cases, but this should be rare to occur. This was caught using the check introduced by D80916. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87392 --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 6 +++- .../JumpThreading/constant-fold-status.ll | 28 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/JumpThreading/constant-fold-status.ll diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 354afc710f31c..8b1ad336c8a59 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1047,6 +1047,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { return false; // Must be an invoke or callbr. } + // Keep track if we constant folded the condition in this invocation. + bool ConstantFolded = false; + // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast(Condition)) { @@ -1057,6 +1060,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { if (isInstructionTriviallyDead(I, TLI)) I->eraseFromParent(); Condition = SimpleVal; + ConstantFolded = true; } } @@ -1107,7 +1111,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // FIXME: Unify this with code below. if (ProcessThreadableEdges(Condition, BB, Preference, Terminator)) return true; - return false; + return ConstantFolded; } if (CmpInst *CondCmp = dyn_cast(CondInst)) { diff --git a/llvm/test/Transforms/JumpThreading/constant-fold-status.ll b/llvm/test/Transforms/JumpThreading/constant-fold-status.ll new file mode 100644 index 0000000000000..95cf8bab7a5ed --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/constant-fold-status.ll @@ -0,0 +1,28 @@ +; RUN: opt -jump-threading < %s -S -o - | FileCheck %s + +; Reproducer for PR47297. + +; The pass did previously not report a correct Modified status in the case +; where a terminator's condition was successfully constant folded, but there +; were no other transformations done. This was caught by the pass return +; status check that is hidden under EXPENSIVE_CHECKS. + +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 icmp eq (i32 ptrtoint (i16* @a to i32), i32 0), label %overflow, label %cont + +@a = internal global i16 0 + +define void @foo(i16 %d) { +entry: + %.not = icmp eq i16 zext (i1 icmp ne (i32 ptrtoint (i16* @a to i32), i32 0) to i16), 0 + br i1 %.not, label %overflow, label %cont + +overflow: ; preds = %entry + call void @bar() + br label %cont + +cont: ; preds = %overflow, %entry + ret void +} + +declare void @bar() From 09b8871f8d81ce2777afe836604f392a2af9e620 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 14 Sep 2020 10:39:25 +0200 Subject: [PATCH 0511/1079] AMDGPU/GlobalISel/Emitter Support for predicate code that uses operands Predicates with 'let PredicateCodeUsesOperands = 1' want to examine matched operands. When we encounter predicate code that uses operands, analyze its named operand arguments and create a map between argument index and name. Later, when leaf node with name is encountered, emit GIM_RecordNamedOperand that will store that operand at its argument index in operand list. This operand list will be an argument to c++ code of the predicate. Differential Revision: https://reviews.llvm.org/D87285 --- .../CodeGen/GlobalISel/InstructionSelector.h | 18 ++- .../GlobalISel/InstructionSelectorImpl.h | 17 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 1 + .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 20 ++- .../test/CodeGen/AMDGPU/GlobalISel/add_shl.ll | 149 ++++++++++++++++++ llvm/test/TableGen/GlobalISelEmitter.td | 2 +- .../GlobalISelEmitterCustomPredicate.td | 47 +++--- llvm/utils/TableGen/GlobalISelEmitter.cpp | 79 +++++++++- 9 files changed, 302 insertions(+), 32 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 17c1ec36c24fe..bf9991eb08de1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -254,6 +254,15 @@ enum { /// - OtherOpIdx - Other operand index GIM_CheckIsSameOperand, + /// Predicates with 'let PredicateCodeUsesOperands = 1' need to examine some + /// named operands that will be recorded in RecordedOperands. Names of these + /// operands are referenced in predicate argument list. Emitter determines + /// StoreIdx(corresponds to the order in which names appear in argument list). + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - StoreIdx - Store location in RecordedOperands. + GIM_RecordNamedOperand, + /// Fail the current try-block, or completely fail to match if there is no /// current try-block. GIM_Reject, @@ -446,6 +455,11 @@ class InstructionSelector { std::vector Renderers; RecordedMIVector MIs; DenseMap TempRegisters; + /// Named operands that predicate with 'let PredicateCodeUsesOperands = 1' + /// referenced in its argument list. Operands are inserted at index set by + /// emitter, it corresponds to the order in which names appear in argument + /// list. Currently such predicates don't have more then 3 arguments. + std::array RecordedOperands; MatcherState(unsigned MaxRenderers); }; @@ -506,7 +520,9 @@ class InstructionSelector { llvm_unreachable( "Subclasses must override this with a tablegen-erated function"); } - virtual bool testMIPredicate_MI(unsigned, const MachineInstr &) const { + virtual bool testMIPredicate_MI( + unsigned, const MachineInstr &, + const std::array &Operands) const { llvm_unreachable( "Subclasses must override this with a tablegen-erated function"); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index 1f1fb5aca8757..bcb84c337f5e9 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -367,7 +367,8 @@ bool InstructionSelector::executeMatchTable( assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate"); - if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID])) + if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID], + State.RecordedOperands)) if (handleReject() == RejectAndGiveUp) return false; break; @@ -617,6 +618,20 @@ bool InstructionSelector::executeMatchTable( break; } + case GIM_RecordNamedOperand: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + uint64_t StoreIdx = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_RecordNamedOperand(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), StoreIdx=" << StoreIdx << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(StoreIdx < State.RecordedOperands.size() && "Index out of range"); + State.RecordedOperands[StoreIdx] = &State.MIs[InsnID]->getOperand(OpIdx); + break; + } case GIM_CheckRegBankForClass: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3f39f6f21c1cc..3f8782b2a66ee 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -72,6 +72,7 @@ const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB, CodeGenCoverage &CoverageInfo) { MRI = &MF.getRegInfo(); + Subtarget = &MF.getSubtarget(); InstructionSelector::setupMF(MF, KB, CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 2176e2b549511..bd25c67964bfa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -50,6 +50,7 @@ class SIRegisterInfo; class AMDGPUInstructionSelector final : public InstructionSelector { private: MachineRegisterInfo *MRI; + const GCNSubtarget *Subtarget; public: AMDGPUInstructionSelector(const GCNSubtarget &STI, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 3048bcc610c76..c4546f989c70d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -605,16 +605,24 @@ class ThreeOpFrag : PatFrag< let PredicateCodeUsesOperands = 1; // The divergence predicate is irrelevant in GlobalISel, as we have - // proper register bank checks. We also force all VOP instruction - // operands to VGPR, so we should not need to check the constant bus - // restriction. + // proper register bank checks. We just need to verify the constant + // bus restriction when all the sources are considered. // // FIXME: With unlucky SGPR operands, we could penalize code by // blocking folding SGPR->VGPR copies later. // FIXME: There's no register bank verifier - // FIXME: Should add a way for the emitter to recognize this is a - // trivially true predicate to eliminate the check. - let GISelPredicateCode = [{return true;}]; + let GISelPredicateCode = [{ + const int ConstantBusLimit = Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32); + int ConstantBusUses = 0; + for (unsigned i = 0; i < 3; ++i) { + const RegisterBank *RegBank = RBI.getRegBank(Operands[i]->getReg(), MRI, TRI); + if (RegBank->getID() == AMDGPU::SGPRRegBankID) { + if (++ConstantBusUses > ConstantBusLimit) + return false; + } + } + return true; + }]; } let SubtargetPredicate = isGFX9Plus in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll new file mode 100644 index 0000000000000..0e232bf5945d8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +; =================================================================================== +; V_ADD_LSHL_U32 +; =================================================================================== + +define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) { +; VI-LABEL: add_shl: +; VI: ; %bb.0: +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0 +; VI-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: add_shl: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, v2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: add_shl: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %x = add i32 %a, %b + %result = shl i32 %x, %c + %bc = bitcast i32 %result to float + ret float %bc +} + +define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) { +; VI-LABEL: add_shl_vgpr_c: +; VI: ; %bb.0: +; VI-NEXT: s_add_i32 s2, s2, s3 +; VI-NEXT: v_lshlrev_b32_e64 v0, v0, s2 +; VI-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: add_shl_vgpr_c: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_i32 s2, s2, s3 +; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: add_shl_vgpr_c: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_add_i32 s2, s2, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s2 +; GFX10-NEXT: ; return to shader part epilog + %x = add i32 %a, %b + %result = shl i32 %x, %c + %bc = bitcast i32 %result to float + ret float %bc +} + +define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) { +; VI-LABEL: add_shl_vgpr_ac: +; VI: ; %bb.0: +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; VI-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: add_shl_vgpr_ac: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_add_lshl_u32 v0, v0, s2, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: add_shl_vgpr_ac: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %x = add i32 %a, %b + %result = shl i32 %x, %c + %bc = bitcast i32 %result to float + ret float %bc +} + +define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) { +; VI-LABEL: add_shl_vgpr_const: +; VI: ; %bb.0: +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 +; VI-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: add_shl_vgpr_const: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, 9 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: add_shl_vgpr_const: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, 9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %x = add i32 %a, %b + %result = shl i32 %x, 9 + %bc = bitcast i32 %result to float + ret float %bc +} + +define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) { +; VI-LABEL: add_shl_vgpr_const_inline_const: +; VI: ; %bb.0: +; VI-NEXT: v_add_u32_e32 v0, vcc, 0x3f4, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 +; VI-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: add_shl_vgpr_const_inline_const: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v1, 0x3f4 +; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, 9 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: add_shl_vgpr_const_inline_const: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_lshl_u32 v0, v0, 0x3f4, 9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %x = add i32 %a, 1012 + %result = shl i32 %x, 9 + %bc = bitcast i32 %result to float + ret float %bc +} + +define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) { +; VI-LABEL: add_shl_vgpr_inline_const_x2: +; VI: ; %bb.0: +; VI-NEXT: v_add_u32_e32 v0, vcc, 3, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 +; VI-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: add_shl_vgpr_inline_const_x2: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_add_lshl_u32 v0, v0, 3, 9 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: add_shl_vgpr_inline_const_x2: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_add_lshl_u32 v0, v0, 3, 9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %x = add i32 %a, 3 + %result = shl i32 %x, 9 + %bc = bitcast i32 %result to float + ret float %bc +} diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td index ed7bed3f711f0..c77630ba80151 100644 --- a/llvm/test/TableGen/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter.td @@ -78,7 +78,7 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; } // CHECK-NEXT: bool testImmPredicate_APInt(unsigned PredicateID, const APInt &Imm) const override; // CHECK-NEXT: bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat &Imm) const override; // CHECK-NEXT: const int64_t *getMatchTable() const override; -// CHECK-NEXT: bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI) const override; +// CHECK-NEXT: bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI, const std::array &Operands) const override; // CHECK-NEXT: #endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL // CHECK-LABEL: #ifdef GET_GLOBALISEL_TEMPORARIES_INIT diff --git a/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td b/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td index d985ef5da9245..6f6320f6389d0 100644 --- a/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td +++ b/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td @@ -45,61 +45,67 @@ def and_or_pat : PatFrag< let GISelPredicateCode = [{ return doesComplexCheck(MI); }]; + let PredicateCodeUsesOperands = 1; } -// CHECK: GIM_Try, /*On fail goto*//*Label 0*/ {{[0-9]+}}, // Rule ID 1 // +// CHECK: GIM_Try, /*On fail goto*//*Label 0*/ 99, // Rule ID 2 // // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, -// CHECK-NEXT: // MIs[0] Operand 1 +// CHECK-NEXT: // MIs[0] src2 // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, -// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/1, // MIs[1] +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/0, /*Op*/1, /*StoreIdx*/2, // Name : pred:2:z +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] Operand 2 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/2, // MIs[1] // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_OR, // CHECK-NEXT: // MIs[1] Operand 0 -// CHECK-NEXT:GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[1] src0 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/1, /*Op*/1, /*StoreIdx*/0, // Name : pred:2:x // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/Test::DRegsRegClassID, // CHECK-NEXT: // MIs[1] src1 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/1, /*Op*/2, /*StoreIdx*/1, // Name : pred:2:y // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/2, /*RC*/Test::DRegsRegClassID, -// CHECK-NEXT: // MIs[0] src2 -// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, -// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/2, /*RC*/Test::DRegsRegClassID, // CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_and_or_pat, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, -// CHECK-NEXT: // (and:{ *:[i32] } (or:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1), DOP:{ *:[i32] }:$src2)<> => (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2) +// CHECK-NEXT: // (and:{ *:[i32] } DOP:{ *:[i32] }:$src2:$pred:2:z, (or:{ *:[i32] } DOP:{ *:[i32] }:$src0:$pred:2:x, DOP:{ *:[i32] }:$src1:$pred:2:y))<> => (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2) // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::AND_OR, - -// CHECK: GIM_Try, /*On fail goto*//*Label 1*/ {{[0-9]+}}, // Rule ID 2 // +// CHECK: GIM_Try, /*On fail goto*//*Label 1*/ 198, // Rule ID 1 // // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND, // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, -// CHECK-NEXT: // MIs[0] src2 +// CHECK-NEXT: // MIs[0] Operand 1 // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, -// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/Test::DRegsRegClassID, -// CHECK-NEXT: // MIs[0] Operand 2 -// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, -// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/2, // MIs[1] +// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/1, // MIs[1] // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_OR, // CHECK-NEXT: // MIs[1] Operand 0 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[1] src0 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/1, /*Op*/1, /*StoreIdx*/0, // Name : pred:2:x // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/Test::DRegsRegClassID, // CHECK-NEXT: // MIs[1] src1 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/1, /*Op*/2, /*StoreIdx*/1, // Name : pred:2:y // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/2, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] src2 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/0, /*Op*/2, /*StoreIdx*/2, // Name : pred:2:z +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/2, /*RC*/Test::DRegsRegClassID, // CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_and_or_pat, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, -// CHECK-NEXT: // (and:{ *:[i32] } DOP:{ *:[i32] }:$src2, (or:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1))<> => (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2) +// CHECK-NEXT: // (and:{ *:[i32] } (or:{ *:[i32] } DOP:{ *:[i32] }:$src0:$pred:2:x, DOP:{ *:[i32] }:$src1:$pred:2:y), DOP:{ *:[i32] }:$src2:$pred:2:z)<> => (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2) // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::AND_OR, // Test commutative, standalone pattern. @@ -115,9 +121,11 @@ def sub3_pat : PatFrag< let GISelPredicateCode = [{ return doesComplexCheck(MI); }]; + + let PredicateCodeUsesOperands = 1; } -// CHECK: GIM_Try, /*On fail goto*//*Label 2*/ {{[0-9]+}}, // Rule ID 0 // +// CHECK: GIM_Try, /*On fail goto*//*Label 2*/ 285, // Rule ID 0 // // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SUB, // CHECK-NEXT: // MIs[0] dst @@ -132,13 +140,16 @@ def sub3_pat : PatFrag< // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[1] src0 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/1, /*Op*/1, /*StoreIdx*/0, // Name : pred:1:x // CHECK-NEXT: // MIs[1] src1 // CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/1, /*Op*/2, /*StoreIdx*/1, // Name : pred:1:y // CHECK-NEXT: // MIs[0] src2 // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordNamedOperand, /*MI*/0, /*Op*/2, /*StoreIdx*/2, // Name : pred:1:z // CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_sub3_pat, // CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, -// CHECK-NEXT: // (sub:{ *:[i32] } (sub:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1), i32:{ *:[i32] }:$src2)<> => (SUB3:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1, i32:{ *:[i32] }:$src2) +// CHECK-NEXT: // (sub:{ *:[i32] } (sub:{ *:[i32] } i32:{ *:[i32] }:$src0:$pred:1:x, i32:{ *:[i32] }:$src1:$pred:1:y), i32:{ *:[i32] }:$src2:$pred:1:z)<> => (SUB3:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1, i32:{ *:[i32] }:$src2) // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUB3, // Test a non-commutative pattern. diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index d74cfae629f54..0fe1571cff136 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -389,6 +389,10 @@ getNameForFeatureBitset(const std::vector &FeatureBitset) { return Name; } +static std::string getScopedName(unsigned Scope, const std::string &Name) { + return ("pred:" + Twine(Scope) + ":" + Name).str(); +} + //===- MatchTable Helpers -------------------------------------------------===// class MatchTable; @@ -1102,6 +1106,7 @@ class PredicateMatcher { OPM_PointerToAny, OPM_RegBank, OPM_MBB, + OPM_RecordNamedOperand, }; protected: @@ -1290,6 +1295,40 @@ class PointerToAnyOperandMatcher : public OperandPredicateMatcher { } }; +/// Generates code to record named operand in RecordedOperands list at StoreIdx. +/// Predicates with 'let PredicateCodeUsesOperands = 1' get RecordedOperands as +/// an argument to predicate's c++ code once all operands have been matched. +class RecordNamedOperandMatcher : public OperandPredicateMatcher { +protected: + unsigned StoreIdx; + std::string Name; + +public: + RecordNamedOperandMatcher(unsigned InsnVarID, unsigned OpIdx, + unsigned StoreIdx, StringRef Name) + : OperandPredicateMatcher(OPM_RecordNamedOperand, InsnVarID, OpIdx), + StoreIdx(StoreIdx), Name(Name) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == OPM_RecordNamedOperand; + } + + bool isIdentical(const PredicateMatcher &B) const override { + return OperandPredicateMatcher::isIdentical(B) && + StoreIdx == cast(&B)->StoreIdx && + Name.compare(cast(&B)->Name) == 0; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_RecordNamedOperand") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx) + << MatchTable::Comment("StoreIdx") << MatchTable::IntValue(StoreIdx) + << MatchTable::Comment("Name : " + Name) << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is a particular target constant. class ComplexPatternOperandMatcher : public OperandPredicateMatcher { protected: @@ -3459,6 +3498,16 @@ class GlobalISelEmitter { // Rule coverage information. Optional RuleCoverage; + /// Variables used to help with collecting of named operands for predicates + /// with 'let PredicateCodeUsesOperands = 1'. WaitingForNamedOperands is set + /// to the number of named operands that predicate expects. Store locations in + /// StoreIdxForName correspond to the order in which operand names appear in + /// predicate's argument list. + /// When we visit named leaf operand and WaitingForNamedOperands is not zero, + /// add matcher that will record operand and decrease counter. + unsigned WaitingForNamedOperands = 0; + StringMap StoreIdxForName; + void gatherOpcodeValues(); void gatherTypeIDValues(); void gatherNodeEquivs(); @@ -3511,7 +3560,8 @@ class GlobalISelEmitter { void emitCxxPredicateFns(raw_ostream &OS, StringRef CodeFieldName, StringRef TypeIdentifier, StringRef ArgType, - StringRef ArgName, StringRef AdditionalDeclarations, + StringRef ArgName, StringRef AdditionalArgs, + StringRef AdditionalDeclarations, std::function Filter); void emitImmPredicateFns(raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType, @@ -3863,6 +3913,15 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( return std::move(Error); if (Predicate.hasGISelPredicateCode()) { + if (Predicate.usesOperands()) { + assert(WaitingForNamedOperands == 0 && + "previous predicate didn't find all operands or " + "nested predicate that uses operands"); + TreePattern *TP = Predicate.getOrigPatFragRecord(); + WaitingForNamedOperands = TP->getNumArgs(); + for (unsigned i = 0; i < WaitingForNamedOperands; ++i) + StoreIdxForName[getScopedName(Call.Scope, TP->getArgName(i))] = i; + } InsnMatcher.addPredicate(Predicate); continue; } @@ -4141,6 +4200,13 @@ Error GlobalISelEmitter::importChildMatcher( if (auto *ChildDefInit = dyn_cast(SrcChild->getLeafValue())) { auto *ChildRec = ChildDefInit->getDef(); + if (WaitingForNamedOperands) { + auto PA = SrcChild->getNamesAsPredicateArg().begin(); + std::string Name = getScopedName(PA->getScope(), PA->getIdentifier()); + OM.addPredicate(StoreIdxForName[Name], Name); + --WaitingForNamedOperands; + } + // Check for register classes. if (ChildRec->isSubClassOf("RegisterClass") || ChildRec->isSubClassOf("RegisterOperand")) { @@ -5236,7 +5302,8 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { // trouble than it's worth. void GlobalISelEmitter::emitCxxPredicateFns( raw_ostream &OS, StringRef CodeFieldName, StringRef TypeIdentifier, - StringRef ArgType, StringRef ArgName, StringRef AdditionalDeclarations, + StringRef ArgType, StringRef ArgName, StringRef AdditionalArgs, + StringRef AdditionalDeclarations, std::function Filter) { std::vector MatchedRecords; const auto &Defs = RK.getAllDerivedDefinitions("PatFrag"); @@ -5261,7 +5328,7 @@ void GlobalISelEmitter::emitCxxPredicateFns( OS << "bool " << Target.getName() << "InstructionSelector::test" << ArgName << "Predicate_" << TypeIdentifier << "(unsigned PredicateID, " << ArgType << " " - << ArgName << ") const {\n" + << ArgName << AdditionalArgs <<") const {\n" << AdditionalDeclarations; if (!AdditionalDeclarations.empty()) OS << "\n"; @@ -5287,12 +5354,13 @@ void GlobalISelEmitter::emitImmPredicateFns( raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType, std::function Filter) { return emitCxxPredicateFns(OS, "ImmediateCode", TypeIdentifier, ArgType, - "Imm", "", Filter); + "Imm", "", "", Filter); } void GlobalISelEmitter::emitMIPredicateFns(raw_ostream &OS) { return emitCxxPredicateFns( OS, "GISelPredicateCode", "MI", "const MachineInstr &", "MI", + ", const std::array &Operands", " const MachineFunction &MF = *MI.getParent()->getParent();\n" " const MachineRegisterInfo &MRI = MF.getRegInfo();\n" " (void)MRI;", @@ -5525,7 +5593,8 @@ void GlobalISelEmitter::run(raw_ostream &OS) { << " bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat " "&Imm) const override;\n" << " const int64_t *getMatchTable() const override;\n" - << " bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI) " + << " bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI" + ", const std::array &Operands) " "const override;\n" << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n"; From 30667c967d3f420d3f53fb1c9c2465550a1112df Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 8 Jul 2020 21:49:38 +0200 Subject: [PATCH 0512/1079] [clangd] Add error() function for creating formatv-style llvm::Errors. NFC Summary: This is considerably terser than the makeStringError and friends, and avoids verbosity cliffs that discourage adding log information. It follows the syntax used in log/elog/vlog/dlog that have been successful. The main caveats are: - it's strictly out-of-place in logger.h, though kind of fits thematically and in implementation - it claims the "error" identifier, which seems a bit too opinionated to put higher up in llvm I've updated some users of StringError mostly at random - there are lots more mechanical changes but I'd like to get this reviewed before making them all. Reviewers: kbobyrev, hokein Subscribers: mgorny, ilya-biryukov, javed.absar, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D83419 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 28 +++------ clang-tools-extra/clangd/ClangdServer.cpp | 3 +- clang-tools-extra/clangd/CodeComplete.cpp | 3 +- clang-tools-extra/clangd/DraftStore.cpp | 23 +++---- clang-tools-extra/clangd/JSONTransport.cpp | 8 +-- clang-tools-extra/clangd/PathMapping.cpp | 8 +-- clang-tools-extra/clangd/RIFF.cpp | 19 ++---- clang-tools-extra/clangd/TUScheduler.cpp | 3 +- .../clangd/index/Serialization.cpp | 33 +++++----- clang-tools-extra/clangd/support/Logger.cpp | 23 +++++++ clang-tools-extra/clangd/support/Logger.h | 26 ++++++++ .../clangd/unittests/CMakeLists.txt | 1 + .../clangd/unittests/LoggerTests.cpp | 62 +++++++++++++++++++ 13 files changed, 160 insertions(+), 80 deletions(-) create mode 100644 clang-tools-extra/clangd/unittests/LoggerTests.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 6ebb71c3b4d13..4cc1feabb15f7 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -147,13 +147,9 @@ llvm::Error validateEdits(const DraftStore &DraftMgr, const FileEdits &FE) { if (!InvalidFileCount) return llvm::Error::success(); if (InvalidFileCount == 1) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "File must be saved first: " + - LastInvalidFile); - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Files must be saved first: " + LastInvalidFile + " (and " + - llvm::to_string(InvalidFileCount - 1) + " others)"); + return error("File must be saved first: {0}", LastInvalidFile); + return error("Files must be saved first: {0} (and {1} others)", + LastInvalidFile, InvalidFileCount - 1); } } // namespace @@ -284,10 +280,9 @@ class ClangdLSPServer::MessageHandler : public Transport::MessageHandler { } } if (OldestCB) - OldestCB->second(llvm::createStringError( - llvm::inconvertibleErrorCode(), - llvm::formatv("failed to receive a client reply for request ({0})", - OldestCB->first))); + OldestCB->second( + error("failed to receive a client reply for request ({0})", + OldestCB->first)); return ID; } @@ -661,8 +656,7 @@ void ClangdLSPServer::onSync(const NoParams &Params, if (Server->blockUntilIdleForTest(/*TimeoutSeconds=*/60)) Reply(nullptr); else - Reply(llvm::createStringError(llvm::inconvertibleErrorCode(), - "Not idle after a minute")); + Reply(error("Not idle after a minute")); } void ClangdLSPServer::onDocumentDidOpen( @@ -729,9 +723,7 @@ void ClangdLSPServer::onCommand(const ExecuteCommandParams &Params, std::string Reason = Response->failureReason ? *Response->failureReason : "unknown reason"; - return Reply(llvm::createStringError( - llvm::inconvertibleErrorCode(), - ("edits were not applied: " + Reason).c_str())); + return Reply(error("edits were not applied: {0}", Reason)); } return Reply(SuccessMessage); }); @@ -752,9 +744,7 @@ void ClangdLSPServer::onCommand(const ExecuteCommandParams &Params, Params.tweakArgs) { auto Code = DraftMgr.getDraft(Params.tweakArgs->file.file()); if (!Code) - return Reply(llvm::createStringError( - llvm::inconvertibleErrorCode(), - "trying to apply a code action for a non-added file")); + return Reply(error("trying to apply a code action for a non-added file")); auto Action = [this, ApplyEdit, Reply = std::move(Reply), File = Params.tweakArgs->file, Code = std::move(*Code)]( diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index d204e87c143b4..a571ff56ce4c4 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -342,8 +342,7 @@ void ClangdServer::signatureHelp(PathRef File, Position Pos, const auto *PreambleData = IP->Preamble; if (!PreambleData) - return CB(llvm::createStringError(llvm::inconvertibleErrorCode(), - "Failed to parse includes")); + return CB(error("Failed to parse includes")); ParseInputs ParseInput{IP->Command, &TFS, IP->Contents.str()}; ParseInput.Index = Index; diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 92ebc4c39f64c..4d5b2975c9aee 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -333,8 +333,7 @@ struct CodeCompletionBuilder { return ResolvedInserted.takeError(); auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName); if (!Spelled) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Header not on include path"); + return error("Header not on include path"); return std::make_pair( std::move(*Spelled), Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted)); diff --git a/clang-tools-extra/clangd/DraftStore.cpp b/clang-tools-extra/clangd/DraftStore.cpp index bef48ddfa37d6..1299efbfba9fa 100644 --- a/clang-tools-extra/clangd/DraftStore.cpp +++ b/clang-tools-extra/clangd/DraftStore.cpp @@ -64,9 +64,9 @@ llvm::Expected DraftStore::updateDraft( auto EntryIt = Drafts.find(File); if (EntryIt == Drafts.end()) { - return llvm::make_error( - "Trying to do incremental update on non-added document: " + File, - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Trying to do incremental update on non-added document: {0}", + File); } Draft &D = EntryIt->second; std::string Contents = EntryIt->second.Contents; @@ -89,11 +89,9 @@ llvm::Expected DraftStore::updateDraft( return EndIndex.takeError(); if (*EndIndex < *StartIndex) - return llvm::make_error( - llvm::formatv( - "Range's end position ({0}) is before start position ({1})", End, - Start), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Range's end position ({0}) is before start position ({1})", + End, Start); // Since the range length between two LSP positions is dependent on the // contents of the buffer we compute the range length between the start and @@ -106,11 +104,10 @@ llvm::Expected DraftStore::updateDraft( lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) - return llvm::make_error( - llvm::formatv("Change's rangeLength ({0}) doesn't match the " - "computed range length ({1}).", - *Change.rangeLength, ComputedRangeLength), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Change's rangeLength ({0}) doesn't match the " + "computed range length ({1}).", + *Change.rangeLength, ComputedRangeLength); std::string NewContents; NewContents.reserve(*StartIndex + Change.text.length() + diff --git a/clang-tools-extra/clangd/JSONTransport.cpp b/clang-tools-extra/clangd/JSONTransport.cpp index fa86baf6c5816..c591da0db47d3 100644 --- a/clang-tools-extra/clangd/JSONTransport.cpp +++ b/clang-tools-extra/clangd/JSONTransport.cpp @@ -51,12 +51,10 @@ llvm::json::Object encodeError(llvm::Error E) { } llvm::Error decodeError(const llvm::json::Object &O) { - std::string Msg = - std::string(O.getString("message").getValueOr("Unspecified error")); + llvm::StringRef Msg = O.getString("message").getValueOr("Unspecified error"); if (auto Code = O.getInteger("code")) - return llvm::make_error(std::move(Msg), ErrorCode(*Code)); - return llvm::make_error(std::move(Msg), - llvm::inconvertibleErrorCode()); + return llvm::make_error(Msg.str(), ErrorCode(*Code)); + return error(Msg.str()); } class JSONTransport : public Transport { diff --git a/clang-tools-extra/clangd/PathMapping.cpp b/clang-tools-extra/clangd/PathMapping.cpp index eb568b917966d..0cd9d22b998ca 100644 --- a/clang-tools-extra/clangd/PathMapping.cpp +++ b/clang-tools-extra/clangd/PathMapping.cpp @@ -8,6 +8,7 @@ #include "PathMapping.h" #include "Transport.h" #include "URI.h" +#include "support/Logger.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Errno.h" @@ -156,8 +157,7 @@ llvm::Expected parsePath(llvm::StringRef Path) { Converted = "/" + Converted; return Converted; } - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Path not absolute: " + Path); + return error("Path not absolute: {0}", Path); } } // namespace @@ -174,9 +174,7 @@ parsePathMappings(llvm::StringRef RawPathMappings) { std::tie(PathPair, Rest) = Rest.split(","); std::tie(ClientPath, ServerPath) = PathPair.split("="); if (ClientPath.empty() || ServerPath.empty()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Not a valid path mapping pair: " + - PathPair); + return error("Not a valid path mapping pair: {0}", PathPair); llvm::Expected ParsedClientPath = parsePath(ClientPath); if (!ParsedClientPath) return ParsedClientPath.takeError(); diff --git a/clang-tools-extra/clangd/RIFF.cpp b/clang-tools-extra/clangd/RIFF.cpp index f59200bd58561..8423580f9b46d 100644 --- a/clang-tools-extra/clangd/RIFF.cpp +++ b/clang-tools-extra/clangd/RIFF.cpp @@ -7,35 +7,28 @@ //===----------------------------------------------------------------------===// #include "RIFF.h" +#include "support/Logger.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" namespace clang { namespace clangd { namespace riff { -static llvm::Error makeError(const llvm::Twine &Msg) { - return llvm::make_error(Msg, - llvm::inconvertibleErrorCode()); -} - llvm::Expected readChunk(llvm::StringRef &Stream) { if (Stream.size() < 8) - return makeError("incomplete chunk header: " + llvm::Twine(Stream.size()) + - " bytes available"); + return error("incomplete chunk header: {0} bytes available", Stream.size()); Chunk C; std::copy(Stream.begin(), Stream.begin() + 4, C.ID.begin()); Stream = Stream.drop_front(4); uint32_t Len = llvm::support::endian::read32le(Stream.take_front(4).begin()); Stream = Stream.drop_front(4); if (Stream.size() < Len) - return makeError("truncated chunk: want " + llvm::Twine(Len) + ", got " + - llvm::Twine(Stream.size())); + return error("truncated chunk: want {0}, got {1}", Len, Stream.size()); C.Data = Stream.take_front(Len); Stream = Stream.drop_front(Len); if ((Len % 2) && !Stream.empty()) { // Skip padding byte. if (Stream.front()) - return makeError("nonzero padding byte"); + return error("nonzero padding byte"); Stream = Stream.drop_front(); } return std::move(C); @@ -57,9 +50,9 @@ llvm::Expected readFile(llvm::StringRef Stream) { if (!RIFF) return RIFF.takeError(); if (RIFF->ID != fourCC("RIFF")) - return makeError("not a RIFF container: root is " + fourCCStr(RIFF->ID)); + return error("not a RIFF container: root is {0}", fourCCStr(RIFF->ID)); if (RIFF->Data.size() < 4) - return makeError("RIFF chunk too short"); + return error("RIFF chunk too short"); File F; std::copy(RIFF->Data.begin(), RIFF->Data.begin() + 4, F.Type.begin()); for (llvm::StringRef Body = RIFF->Data.drop_front(4); !Body.empty();) diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index ed367005177b2..c408c8c0731de 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -717,8 +717,7 @@ void ASTWorker::runWithAST( [&AST, this]() { IdleASTs.put(this, std::move(*AST)); }); // Run the user-provided action. if (!*AST) - return Action(llvm::make_error( - "invalid AST", llvm::errc::invalid_argument)); + return Action(error(llvm::errc::invalid_argument, "invalid AST")); vlog("ASTWorker running {0} on version {2} of {1}", Name, FileName, FileInputs.Version); Action(InputsAndAST{FileInputs, **AST}); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 11d70b550642b..c099a30c4d348 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -25,10 +25,6 @@ namespace clang { namespace clangd { namespace { -llvm::Error makeError(const llvm::Twine &Msg) { - return llvm::make_error(Msg, - llvm::inconvertibleErrorCode()); -} // IO PRIMITIVES // We use little-endian 32 bit ints, sometimes with variable-length encoding. @@ -199,7 +195,7 @@ llvm::Expected readStringTable(llvm::StringRef Data) { Reader R(Data); size_t UncompressedSize = R.consume32(); if (R.err()) - return makeError("Truncated string table"); + return error("Truncated string table"); llvm::StringRef Uncompressed; llvm::SmallString<1> UncompressedStorage; @@ -218,12 +214,12 @@ llvm::Expected readStringTable(llvm::StringRef Data) { for (Reader R(Uncompressed); !R.eof();) { auto Len = R.rest().find(0); if (Len == llvm::StringRef::npos) - return makeError("Bad string table: not null terminated"); + return error("Bad string table: not null terminated"); Table.Strings.push_back(Saver.save(R.consume(Len))); R.consume8(); } if (R.err()) - return makeError("Truncated string table"); + return error("Truncated string table"); return std::move(Table); } @@ -426,24 +422,23 @@ llvm::Expected readRIFF(llvm::StringRef Data) { if (!RIFF) return RIFF.takeError(); if (RIFF->Type != riff::fourCC("CdIx")) - return makeError("wrong RIFF filetype: " + riff::fourCCStr(RIFF->Type)); + return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type)); llvm::StringMap Chunks; for (const auto &Chunk : RIFF->Chunks) Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()), Chunk.Data); if (!Chunks.count("meta")) - return makeError("missing meta chunk"); + return error("missing meta chunk"); Reader Meta(Chunks.lookup("meta")); auto SeenVersion = Meta.consume32(); if (SeenVersion != Version) - return makeError("wrong version: want " + llvm::Twine(Version) + ", got " + - llvm::Twine(SeenVersion)); + return error("wrong version: want {0}, got {1}", Version, SeenVersion); // meta chunk is checked above, as we prefer the "version mismatch" error. for (llvm::StringRef RequiredChunk : {"stri"}) if (!Chunks.count(RequiredChunk)) - return makeError("missing required chunk " + RequiredChunk); + return error("missing required chunk {0}", RequiredChunk); auto Strings = readStringTable(Chunks.lookup("stri")); if (!Strings) @@ -464,7 +459,7 @@ llvm::Expected readRIFF(llvm::StringRef Data) { Include = Result.Sources->try_emplace(Include).first->getKey(); } if (SrcsReader.err()) - return makeError("malformed or truncated include uri"); + return error("malformed or truncated include uri"); } if (Chunks.count("symb")) { @@ -473,7 +468,7 @@ llvm::Expected readRIFF(llvm::StringRef Data) { while (!SymbolReader.eof()) Symbols.insert(readSymbol(SymbolReader, Strings->Strings)); if (SymbolReader.err()) - return makeError("malformed or truncated symbol"); + return error("malformed or truncated symbol"); Result.Symbols = std::move(Symbols).build(); } if (Chunks.count("refs")) { @@ -485,7 +480,7 @@ llvm::Expected readRIFF(llvm::StringRef Data) { Refs.insert(RefsBundle.first, Ref); } if (RefsReader.err()) - return makeError("malformed or truncated refs"); + return error("malformed or truncated refs"); Result.Refs = std::move(Refs).build(); } if (Chunks.count("rela")) { @@ -496,13 +491,13 @@ llvm::Expected readRIFF(llvm::StringRef Data) { Relations.insert(Relation); } if (RelationsReader.err()) - return makeError("malformed or truncated relations"); + return error("malformed or truncated relations"); Result.Relations = std::move(Relations).build(); } if (Chunks.count("cmdl")) { Reader CmdReader(Chunks.lookup("cmdl")); if (CmdReader.err()) - return makeError("malformed or truncated commandline section"); + return error("malformed or truncated commandline section"); InternedCompileCommand Cmd = readCompileCommand(CmdReader, Strings->Strings); Result.Cmd.emplace(); @@ -660,8 +655,8 @@ llvm::Expected readIndexFile(llvm::StringRef Data) { } else if (auto YAMLContents = readYAML(Data)) { return std::move(*YAMLContents); } else { - return makeError("Not a RIFF file and failed to parse as YAML: " + - llvm::toString(YAMLContents.takeError())); + return error("Not a RIFF file and failed to parse as YAML: {0}", + YAMLContents.takeError()); } } diff --git a/clang-tools-extra/clangd/support/Logger.cpp b/clang-tools-extra/clangd/support/Logger.cpp index 768d2e52210b2..4a5d7d63bed46 100644 --- a/clang-tools-extra/clangd/support/Logger.cpp +++ b/clang-tools-extra/clangd/support/Logger.cpp @@ -9,6 +9,7 @@ #include "support/Logger.h" #include "support/Trace.h" #include "llvm/Support/Chrono.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include @@ -58,5 +59,27 @@ void StreamLogger::log(Logger::Level Level, Logs.flush(); } +namespace { +// Like llvm::StringError but with fewer options and no gratuitous copies. +class SimpleStringError : public llvm::ErrorInfo { + std::error_code EC; + std::string Message; + +public: + SimpleStringError(std::error_code EC, std::string &&Message) + : EC(EC), Message(std::move(Message)) {} + void log(llvm::raw_ostream &OS) const override { OS << Message; } + std::string message() const override { return Message; } + std::error_code convertToErrorCode() const override { return EC; } + static char ID; +}; +char SimpleStringError::ID; + +} // namespace + +llvm::Error detail::error(std::error_code EC, std::string &&Msg) { + return llvm::make_error(EC, std::move(Msg)); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/support/Logger.h b/clang-tools-extra/clangd/support/Logger.h index 72d1408bdc77c..0674671aa8e12 100644 --- a/clang-tools-extra/clangd/support/Logger.h +++ b/clang-tools-extra/clangd/support/Logger.h @@ -45,6 +45,8 @@ template void log(Logger::Level L, const char *Fmt, Ts &&... Vals) { detail::log(L, llvm::formatv(Fmt, detail::wrap(std::forward(Vals))...)); } + +llvm::Error error(std::error_code, std::string &&); } // namespace detail // Clangd logging functions write to a global logger set by LoggingSession. @@ -67,6 +69,30 @@ template void log(const char *Fmt, Ts &&... Vals) { template void vlog(const char *Fmt, Ts &&... Vals) { detail::log(Logger::Verbose, Fmt, std::forward(Vals)...); } +// error() constructs an llvm::Error object, using formatv()-style arguments. +// It is not automatically logged! (This function is a little out of place). +// The error simply embeds the message string. +template +llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals) { + // We must render the formatv_object eagerly, while references are valid. + return detail::error( + EC, llvm::formatv(Fmt, detail::wrap(std::forward(Vals))...).str()); +} +// Overload with no error_code conversion, the error will be inconvertible. +template llvm::Error error(const char *Fmt, Ts &&... Vals) { + return detail::error( + llvm::inconvertibleErrorCode(), + llvm::formatv(Fmt, detail::wrap(std::forward(Vals))...).str()); +} +// Overload to avoid formatv complexity for simple strings. +inline llvm::Error error(std::error_code EC, std::string Msg) { + return detail::error(EC, std::move(Msg)); +} +// Overload for simple strings with no error_code conversion. +inline llvm::Error error(std::string Msg) { + return detail::error(llvm::inconvertibleErrorCode(), std::move(Msg)); +} + // dlog only logs if --debug was passed, or --debug_only=Basename. // This level would be enabled in a targeted way when debugging. #define dlog(...) \ diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 966fa9630852b..2167b5e210e22 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -62,6 +62,7 @@ add_unittest(ClangdUnitTests ClangdTests IndexActionTests.cpp IndexTests.cpp JSONTransportTests.cpp + LoggerTests.cpp LSPClient.cpp ModulesTests.cpp ParsedASTTests.cpp diff --git a/clang-tools-extra/clangd/unittests/LoggerTests.cpp b/clang-tools-extra/clangd/unittests/LoggerTests.cpp new file mode 100644 index 0000000000000..3d2194d79090d --- /dev/null +++ b/clang-tools-extra/clangd/unittests/LoggerTests.cpp @@ -0,0 +1,62 @@ +//===-- LoggerTests.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "support/Logger.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +TEST(ErrorTest, Overloads) { + EXPECT_EQ("foo", llvm::toString(error("foo"))); + // Inconvertible to error code when none is specified. + // Don't actually try to convert, it'll crash. + handleAllErrors(error("foo"), [&](const llvm::ErrorInfoBase &EI) { + EXPECT_EQ(llvm::inconvertibleErrorCode(), EI.convertToErrorCode()); + }); + + EXPECT_EQ("foo 42", llvm::toString(error("foo {0}", 42))); + handleAllErrors(error("foo {0}", 42), [&](const llvm::ErrorInfoBase &EI) { + EXPECT_EQ(llvm::inconvertibleErrorCode(), EI.convertToErrorCode()); + }); + + EXPECT_EQ("foo", llvm::toString(error(llvm::errc::invalid_argument, "foo"))); + EXPECT_EQ(llvm::errc::invalid_argument, + llvm::errorToErrorCode(error(llvm::errc::invalid_argument, "foo"))); + + EXPECT_EQ("foo 42", + llvm::toString(error(llvm::errc::invalid_argument, "foo {0}", 42))); + EXPECT_EQ(llvm::errc::invalid_argument, + llvm::errorToErrorCode( + error(llvm::errc::invalid_argument, "foo {0}", 42))); +} + +TEST(ErrorTest, Lifetimes) { + llvm::Optional Err; + { + // Check the error contains the value when error() was called. + std::string S = "hello, world"; + Err = error("S={0}", llvm::StringRef(S)); + S = "garbage"; + } + EXPECT_EQ("S=hello, world", llvm::toString(std::move(*Err))); +} + +TEST(ErrorTest, ConsumeError) { + llvm::Error Foo = error("foo"); + llvm::Error Bar = error("bar: {0}", std::move(Foo)); + EXPECT_EQ("bar: foo", llvm::toString(std::move(Bar))); + // No assert for unchecked Foo. +} + +} // namespace +} // namespace clangd +} // namespace clang From 119e57be76266bf524a4e3b45e01dd8c2c1e9d35 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 14 Sep 2020 08:53:33 +0000 Subject: [PATCH 0513/1079] [gn build] Port 30667c967d3 --- .../gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn index dfd320164feb8..f732e837a88ef 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn @@ -63,6 +63,7 @@ unittest("ClangdTests") { "IndexTests.cpp", "JSONTransportTests.cpp", "LSPClient.cpp", + "LoggerTests.cpp", "ModulesTests.cpp", "ParsedASTTests.cpp", "PathMappingTests.cpp", From d3af441dfeb69d4c2a91b427e3d7a57e04c59201 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 14 Sep 2020 09:55:38 +0100 Subject: [PATCH 0514/1079] [DebugInstrRef][1/9] Add fields for instr-ref variable locations Add a DBG_INSTR_REF instruction and a "debug instruction number" field to MachineInstr. The two allow variable values to be specified by identifying where the value is computed, rather than the register it lies in, like so: %0 = fooinst, debug-instr-number 1 [...] DBG_INSTR_REF 1, 0 See the original RFC for motivation: http://lists.llvm.org/pipermail/llvm-dev/2020-February/139440.html This patch is NFCI; it only adds fields and other boiler plate. Differential Revision: https://reviews.llvm.org/D85741 --- llvm/include/llvm/CodeGen/MachineFunction.h | 9 ++++++ llvm/include/llvm/CodeGen/MachineInstr.h | 18 +++++++++++- llvm/include/llvm/Support/TargetOpcodes.def | 4 +++ llvm/include/llvm/Target/Target.td | 6 ++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 5 ++++ llvm/lib/CodeGen/MachineInstr.cpp | 32 +++++++++++++++------ 6 files changed, 64 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 0ea2da9910f39..247716df78825 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -431,6 +431,11 @@ class MachineFunction { using VariableDbgInfoMapTy = SmallVector; VariableDbgInfoMapTy VariableDbgInfos; + /// A count of how many instructions in the function have had numbers + /// assigned to them. Used for debug value tracking, to determine the + /// next instruction number. + unsigned DebugInstrNumberingCount = 0; + MachineFunction(Function &F, const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &MMI); @@ -1076,6 +1081,10 @@ class MachineFunction { /// the same callee. void moveCallSiteInfo(const MachineInstr *Old, const MachineInstr *New); + + unsigned getNewDebugInstrNum() { + return ++DebugInstrNumberingCount; + } }; //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 2c912b177384b..957ec2124e0ae 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -249,6 +249,10 @@ class MachineInstr DebugLoc debugLoc; // Source line information. + /// Unique instruction number. Used by DBG_INSTR_REFs to refer to the values + /// defined by this instruction. + unsigned DebugInstrNum; + // Intrusive list support friend struct ilist_traits; friend struct ilist_callback_traits; @@ -444,6 +448,14 @@ class MachineInstr /// this DBG_LABEL instruction. const DILabel *getDebugLabel() const; + /// Fetch the instruction number of this MachineInstr. If it does not have + /// one already, a new and unique number will be assigned. + unsigned getDebugInstrNum(); + + /// Examine the instruction number of this MachineInstr. May be zero if + /// it hasn't been assigned a number yet. + unsigned peekDebugInstrNum() const { return DebugInstrNum; } + /// Emit an error referring to the source location of this instruction. /// This should only be used for inline assembly that is somehow /// impossible to compile. Other errors should have been handled much @@ -1145,7 +1157,10 @@ class MachineInstr bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; } bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; } - bool isDebugInstr() const { return isDebugValue() || isDebugLabel(); } + bool isDebugRef() const { return getOpcode() == TargetOpcode::DBG_INSTR_REF; } + bool isDebugInstr() const { + return isDebugValue() || isDebugLabel() || isDebugRef(); + } bool isDebugOffsetImm() const { return getDebugOffset().isImm(); } @@ -1238,6 +1253,7 @@ class MachineInstr case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: case TargetOpcode::DBG_VALUE: + case TargetOpcode::DBG_INSTR_REF: case TargetOpcode::DBG_LABEL: case TargetOpcode::LIFETIME_START: case TargetOpcode::LIFETIME_END: diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index db36fc42aa2a2..2e464b395d7d9 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -77,6 +77,10 @@ HANDLE_TARGET_OPCODE(SUBREG_TO_REG) /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic HANDLE_TARGET_OPCODE(DBG_VALUE) +/// DBG_INSTR_REF - A mapping of llvm.dbg.value referring to the instruction +/// that defines the value, rather than a virtual register. +HANDLE_TARGET_OPCODE(DBG_INSTR_REF) + /// DBG_LABEL - a mapping of the llvm.dbg.label intrinsic HANDLE_TARGET_OPCODE(DBG_LABEL) diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index e56927540f51c..8fba826f21874 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1100,6 +1100,12 @@ def DBG_VALUE : StandardPseudoInstruction { let AsmString = "DBG_VALUE"; let hasSideEffects = 0; } +def DBG_INSTR_REF : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins variable_ops); + let AsmString = "DBG_INSTR_REF"; + let hasSideEffects = 0; +} def DBG_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$label); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index cdacedc723217..7a141819950a9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1142,6 +1142,11 @@ void AsmPrinter::emitFunctionBody() { emitInstruction(&MI); } break; + case TargetOpcode::DBG_INSTR_REF: + // This instruction reference will have been resolved to a machine + // location, and a nearby DBG_VALUE created. We can safely ignore + // the instruction reference. + break; case TargetOpcode::DBG_LABEL: if (isVerbose()) { if (!emitDebugLabelComment(&MI, *this)) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 457db8d50ca9e..ebae5eb380de8 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -116,7 +116,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) - : MCID(&tid), debugLoc(std::move(dl)) { + : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -130,10 +130,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, addImplicitDefUseOperands(MF); } -/// MachineInstr ctor - Copies MachineInstr arg exactly -/// +/// MachineInstr ctor - Copies MachineInstr arg exactly. +/// Does not copy the number from debug instruction numbering, to preserve +/// uniqueness. MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()), + DebugInstrNum(0) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -839,27 +841,27 @@ const DILabel *MachineInstr::getDebugLabel() const { } const MachineOperand &MachineInstr::getDebugVariableOp() const { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return getOperand(2); } MachineOperand &MachineInstr::getDebugVariableOp() { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return getOperand(2); } const DILocalVariable *MachineInstr::getDebugVariable() const { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return cast(getOperand(2).getMetadata()); } MachineOperand &MachineInstr::getDebugExpressionOp() { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return getOperand(3); } const DIExpression *MachineInstr::getDebugExpression() const { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return cast(getOperand(3).getMetadata()); } @@ -1757,6 +1759,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, HeapAllocMarker->printAsOperand(OS, MST); } + if (DebugInstrNum) { + if (!FirstOp) + OS << ","; + OS << " debug-instr-number " << DebugInstrNum; + } + if (!SkipDebugLoc) { if (const DebugLoc &DL = getDebugLoc()) { if (!FirstOp) @@ -2231,3 +2239,9 @@ MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const { return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); return None; } + +unsigned MachineInstr::getDebugInstrNum() { + if (DebugInstrNum == 0) + DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum(); + return DebugInstrNum; +} From 574dd60547179a2c143ac14cdd6f5f5a40156d54 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Fri, 11 Sep 2020 11:40:54 +0200 Subject: [PATCH 0515/1079] [clangd] Track tweaks that fail the apply stage Differential Revision: https://reviews.llvm.org/D87501 --- clang-tools-extra/clangd/ClangdServer.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index a571ff56ce4c4..27d1a2dc7cdce 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -536,9 +536,12 @@ void ClangdServer::enumerateTweaks(PathRef File, Range Sel, void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID, Callback CB) { - // Tracks number of times a tweak has been applied. + // Tracks number of times a tweak has been attempted. static constexpr trace::Metric TweakAttempt( "tweak_attempt", trace::Metric::Counter, "tweak_id"); + // Tracks number of times a tweak has failed to produce edits. + static constexpr trace::Metric TweakFailed( + "tweak_failed", trace::Metric::Counter, "tweak_id"); TweakAttempt.record(1, TweakID); auto Action = [File = File.str(), Sel, TweakID = TweakID.str(), CB = std::move(CB), @@ -569,6 +572,8 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID, if (llvm::Error Err = reformatEdit(E, Style)) elog("Failed to format {0}: {1}", It.first(), std::move(Err)); } + } else { + TweakFailed.record(1, TweakID); } return CB(std::move(*Effect)); }; From 4232bccfb461fb9bc1ca83f0cbbda2b11f92bda8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 14 Sep 2020 10:27:35 +0100 Subject: [PATCH 0516/1079] [CodeGen][X86] Regenerate minmax reduction sequence tests to match arithmetic tests. avx512-reduceIntrin.c wasn't bothering with the exhaustive alloca/store/load/bitcast checks and avx512-reduceMinMaxIntrin.c shouldn't need to either. This makes it a lot easier to maintain as the update script still doesn't work properly on x86 targets --- .../CodeGen/X86/avx512-reduceMinMaxIntrin.c | 2769 ++--------------- 1 file changed, 327 insertions(+), 2442 deletions(-) diff --git a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c index c1eebb6f3bc93..b02bd7c66658d 100644 --- a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -2,2536 +2,421 @@ #include -// CHECK-LABEL: define i64 @test_mm512_reduce_max_epi64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_reduce_max_epi64(__m512i __W){ +// CHECK-LABEL: @test_mm512_reduce_max_epi64( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_reduce_max_epi64(__W); } -// CHECK-LABEL: define i64 @test_mm512_reduce_max_epu64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] unsigned long long test_mm512_reduce_max_epu64(__m512i __W){ - return _mm512_reduce_max_epu64(__W); +// CHECK-LABEL: @test_mm512_reduce_max_epu64( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 + return _mm512_reduce_max_epu64(__W); } -// CHECK-LABEL: define double @test_mm512_reduce_max_pd(<8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT4_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP22]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_reduce_max_pd(__m512d __W){ +// CHECK-LABEL: @test_mm512_reduce_max_pd( +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: extractelement <2 x double> %{{.*}}, i32 0 return _mm512_reduce_max_pd(__W); } -// CHECK-LABEL: define i64 @test_mm512_reduce_min_epi64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp slt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp slt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_reduce_min_epi64(__m512i __W){ +// CHECK-LABEL: @test_mm512_reduce_min_epi64( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_reduce_min_epi64(__W); } -// CHECK-LABEL: define i64 @test_mm512_reduce_min_epu64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp ult <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp ult <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp ult <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] unsigned long long test_mm512_reduce_min_epu64(__m512i __W){ +// CHECK-LABEL: @test_mm512_reduce_min_epu64( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_reduce_min_epu64(__W); } -// CHECK-LABEL: define double @test_mm512_reduce_min_pd(<8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT4_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP22]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_reduce_min_pd(__m512d __W){ +// CHECK-LABEL: @test_mm512_reduce_min_pd( +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: extractelement <2 x double> %{{.*}}, i32 0 return _mm512_reduce_min_pd(__W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i64 -9223372036854775808, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] -// CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i64> [[TMP22]], [[TMP23]] -// CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] -// CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP32:%.*]] = icmp sgt <8 x i64> [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] -// CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP40:%.*]] = icmp sgt <8 x i64> [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] -// CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_max_epi64( +// CHECK: bitcast i8 %{{.*}} to <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_reduce_max_epi64(__M, __W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i8 [[TMP2]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP29]], [[TMP30]] -// CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP29]], <8 x i64> [[TMP30]] -// CHECK-NEXT: store <8 x i64> [[TMP32]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP33]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_max_epu64( +// CHECK: bitcast i8 %{{.*}} to <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_reduce_max_epu64(__M, __W); } -// CHECK-LABEL: define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0xFFF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] -// CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT6_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP32]], <2 x double> [[TMP33]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP39]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_max_pd( +// CHECK: bitcast i8 %{{.*}} to <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: extractelement <2 x double> %{{.*}}, i32 0 return _mm512_mask_reduce_max_pd(__M, __W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i64 9223372036854775807, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] -// CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = icmp slt <8 x i64> [[TMP22]], [[TMP23]] -// CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] -// CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP32:%.*]] = icmp slt <8 x i64> [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] -// CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP40:%.*]] = icmp slt <8 x i64> [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] -// CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_min_epi64( +// CHECK: bitcast i8 %{{.*}} to <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_reduce_min_epi64(__M, __W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i64 -1, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] -// CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = icmp ult <8 x i64> [[TMP22]], [[TMP23]] -// CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] -// CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP32:%.*]] = icmp ult <8 x i64> [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] -// CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP40:%.*]] = icmp ult <8 x i64> [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] -// CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] -long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){ - return _mm512_mask_reduce_min_epu64(__M, __W); +unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_min_epu64( +// CHECK: bitcast i8 %{{.*}} to <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> +// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_reduce_min_epu64(__M, __W); } -// CHECK-LABEL: define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] -// CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT6_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP32]], <2 x double> [[TMP33]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP39]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_min_pd( +// CHECK: bitcast i8 %{{.*}} to <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> +// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) +// CHECK: extractelement <2 x double> %{{.*}}, i32 0 return _mm512_mask_reduce_min_pd(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_max_epi32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp sgt <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp sgt <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP51]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_reduce_max_epi32(__m512i __W){ +// CHECK-LABEL: @test_mm512_reduce_max_epi32( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp sgt <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_reduce_max_epi32(__W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_max_epu32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp ugt <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp ugt <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP51]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_reduce_max_epu32(__m512i __W){ - return _mm512_reduce_max_epu32(__W); +// CHECK-LABEL: @test_mm512_reduce_max_epu32( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp ugt <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 + return _mm512_reduce_max_epu32(__W); } -// CHECK-LABEL: define float @test_mm512_reduce_max_ps(<16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[TMP1]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP3]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <8 x double> -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[EXTRACT2_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT4_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> [[TMP27]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP33]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_reduce_max_ps(__m512 __W){ +// CHECK-LABEL: define float @test_mm512_reduce_max_ps( +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _mm512_reduce_max_ps(__W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_min_epi32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp slt <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp slt <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP51]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_reduce_min_epi32(__m512i __W){ +// CHECK-LABEL: @test_mm512_reduce_min_epi32( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp slt <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_reduce_min_epi32(__W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_min_epu32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp ult <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp ult <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP51]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_reduce_min_epu32(__m512i __W){ - return _mm512_reduce_min_epu32(__W); +// CHECK-LABEL: @test_mm512_reduce_min_epu32( +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp ult <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 + return _mm512_reduce_min_epu32(__W); } -// CHECK-LABEL: define float @test_mm512_reduce_min_ps(<16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[TMP1]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP3]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <8 x double> -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[EXTRACT2_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT4_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> [[TMP27]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP33]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_reduce_min_ps(__m512 __W){ +// CHECK-LABEL: define float @test_mm512_reduce_min_ps( +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _mm512_reduce_min_ps(__W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i32 -2147483648, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> -// CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> -// CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] -// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = icmp sgt <8 x i32> [[TMP35]], [[TMP37]] -// CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> -// CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> -// CHECK-NEXT: [[TMP49:%.*]] = icmp sgt <4 x i32> [[TMP46]], [[TMP48]] -// CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> -// CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: [[TMP63:%.*]] = icmp sgt <4 x i32> [[TMP60]], [[TMP62]] -// CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] -// CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> -// CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> -// CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> -// CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> -// CHECK-NEXT: [[TMP77:%.*]] = icmp sgt <4 x i32> [[TMP74]], [[TMP76]] -// CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] -// CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: [[TMP80:%.*]] = bitcast <2 x i64> [[TMP79]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP80]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP81:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP81]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_max_epi32( +// CHECK: bitcast i16 %{{.*}} to <16 x i1> +// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp sgt <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_max_epi32(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i16 [[TMP2]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP5]] to <16 x i32> -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP7]] to <16 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> -// CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP6]], <16 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT3_I:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT3_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP15:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP14]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i64> [[TMP16]] to <8 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i64> [[TMP18]] to <8 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <8 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <8 x i1> [[TMP20]], <8 x i32> [[TMP17]], <8 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i32> [[TMP21]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP22]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP24]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[__V1_ADDR_I13_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[__V2_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i64> [[TMP27]] to <4 x i32> -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = bitcast <2 x i64> [[TMP29]] to <4 x i32> -// CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <4 x i32> [[TMP28]], [[TMP30]] -// CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP31]], <4 x i32> [[TMP28]], <4 x i32> [[TMP30]] -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[TMP32]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP34:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <2 x i64> [[TMP34]] to <4 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <2 x i64> [[TMP36]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP37]], <4 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP38]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP39]], <2 x i64>* [[__V1_ADDR_I11_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP40]], <2 x i64>* [[__V2_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = bitcast <2 x i64> [[TMP41]] to <4 x i32> -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = bitcast <2 x i64> [[TMP43]] to <4 x i32> -// CHECK-NEXT: [[TMP45:%.*]] = icmp ugt <4 x i32> [[TMP42]], [[TMP44]] -// CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP45]], <4 x i32> [[TMP42]], <4 x i32> [[TMP44]] -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <4 x i32> [[TMP46]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP47]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = bitcast <2 x i64> [[TMP48]] to <4 x i32> -// CHECK-NEXT: [[TMP50:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x i32> [[TMP49]], <4 x i32> [[TMP51]], <4 x i32> -// CHECK-NEXT: [[TMP52:%.*]] = bitcast <4 x i32> [[SHUFFLE9_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP52]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP53]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP54]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <2 x i64> [[TMP55]] to <4 x i32> -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = bitcast <2 x i64> [[TMP57]] to <4 x i32> -// CHECK-NEXT: [[TMP59:%.*]] = icmp ugt <4 x i32> [[TMP56]], [[TMP58]] -// CHECK-NEXT: [[TMP60:%.*]] = select <4 x i1> [[TMP59]], <4 x i32> [[TMP56]], <4 x i32> [[TMP58]] -// CHECK-NEXT: [[TMP61:%.*]] = bitcast <4 x i32> [[TMP60]] to <2 x i64> -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP62]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP63:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP63]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_max_epu32( +// CHECK: bitcast i16 %{{.*}} to <16 x i1> +// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp ugt <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_max_epu32(__M, __W); } -// CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0xFFF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> -// CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] -// CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x float> [[TMP26]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP27]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP28]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x float> [[TMP29]] to <8 x double> -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP30]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x double> [[EXTRACT4_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <8 x float> [[TMP37]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT6_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP44]], <4 x float> [[TMP45]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x float> [[TMP51]], <4 x float> [[TMP52]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE10_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP58]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){ +// CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps( +// CHECK: bitcast i16 %{{.*}} to <16 x i1> +// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _mm512_mask_reduce_max_ps(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i32 2147483647, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> -// CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> -// CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] -// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = icmp slt <8 x i32> [[TMP35]], [[TMP37]] -// CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> -// CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> -// CHECK-NEXT: [[TMP49:%.*]] = icmp slt <4 x i32> [[TMP46]], [[TMP48]] -// CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> -// CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: [[TMP63:%.*]] = icmp slt <4 x i32> [[TMP60]], [[TMP62]] -// CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] -// CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> -// CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> -// CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> -// CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> -// CHECK-NEXT: [[TMP77:%.*]] = icmp slt <4 x i32> [[TMP74]], [[TMP76]] -// CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] -// CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: [[TMP80:%.*]] = bitcast <2 x i64> [[TMP79]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP80]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP81:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP81]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_min_epi32( +// CHECK: bitcast i16 %{{.*}} to <16 x i1> +// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp slt <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_min_epi32(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i32 -1, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> -// CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> -// CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] -// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = icmp ult <8 x i32> [[TMP35]], [[TMP37]] -// CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> -// CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> -// CHECK-NEXT: [[TMP49:%.*]] = icmp ult <4 x i32> [[TMP46]], [[TMP48]] -// CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> -// CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: [[TMP63:%.*]] = icmp ult <4 x i32> [[TMP60]], [[TMP62]] -// CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] -// CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> -// CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> -// CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> -// CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> -// CHECK-NEXT: [[TMP77:%.*]] = icmp ult <4 x i32> [[TMP74]], [[TMP76]] -// CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] -// CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: [[TMP80:%.*]] = bitcast <2 x i64> [[TMP79]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[TMP80]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP81:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP81]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){ +// CHECK-LABEL: @test_mm512_mask_reduce_min_epu32( +// CHECK: bitcast i16 %{{.*}} to <16 x i1> +// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> +// CHECK: icmp ult <8 x i32> %{{.*}}, %{{.*}} +// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> +// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> +// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} +// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_min_epu32(__M, __W); } -// CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> -// CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] -// CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x float> [[TMP26]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP27]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP28]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x float> [[TMP29]] to <8 x double> -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP30]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x double> [[EXTRACT4_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <8 x float> [[TMP37]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT6_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP44]], <4 x float> [[TMP45]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x float> [[TMP51]], <4 x float> [[TMP52]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE10_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP58]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){ +// CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps( +// CHECK: bitcast i16 %{{.*}} to <16 x i1> +// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> +// CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> +// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _mm512_mask_reduce_min_ps(__M, __W); } From 687e1d7121645d23aa5e919ed4d3c0e57af975cd Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 14 Sep 2020 11:33:12 +0200 Subject: [PATCH 0517/1079] [clangd] makeStringError,make_error -> error() --- clang-tools-extra/clangd/FindSymbols.cpp | 9 ++-- clang-tools-extra/clangd/IncludeFixer.cpp | 3 +- clang-tools-extra/clangd/JSONTransport.cpp | 6 +-- clang-tools-extra/clangd/Preamble.cpp | 9 ++-- clang-tools-extra/clangd/SourceCode.cpp | 22 ++++---- clang-tools-extra/clangd/URI.cpp | 21 +++----- clang-tools-extra/clangd/index/Background.cpp | 9 ++-- clang-tools-extra/clangd/index/SymbolID.cpp | 7 ++- .../clangd/index/YAMLSerialization.cpp | 9 ++-- .../index/remote/marshalling/Marshalling.cpp | 27 +++------- clang-tools-extra/clangd/refactor/Rename.cpp | 52 ++++++------------- clang-tools-extra/clangd/refactor/Tweak.cpp | 12 ++--- .../clangd/refactor/tweaks/AddUsing.cpp | 10 ++-- .../clangd/refactor/tweaks/DefineInline.cpp | 28 ++++------ .../clangd/refactor/tweaks/DefineOutline.cpp | 40 +++++--------- .../clangd/refactor/tweaks/ExpandAutoType.cpp | 25 ++------- .../refactor/tweaks/ExtractFunction.cpp | 8 ++- .../tweaks/ObjCLocalizeStringLiteral.cpp | 3 +- .../refactor/tweaks/RemoveUsingNamespace.cpp | 4 +- .../clangd/refactor/tweaks/SwapIfBranches.cpp | 8 +-- clang-tools-extra/clangd/tool/ClangdMain.cpp | 14 +++-- clang-tools-extra/clangd/unittests/TestFS.cpp | 12 ++--- clang-tools-extra/clangd/xpc/XPCTransport.cpp | 2 +- 23 files changed, 110 insertions(+), 230 deletions(-) diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 2471656988250..e37d73103e36d 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -43,12 +43,9 @@ struct ScoredSymbolGreater { llvm::Expected indexToLSPLocation(const SymbolLocation &Loc, llvm::StringRef TUPath) { auto Path = URI::resolve(Loc.FileURI, TUPath); - if (!Path) { - return llvm::make_error( - llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI, - llvm::toString(Path.takeError())), - llvm::inconvertibleErrorCode()); - } + if (!Path) + return error("Could not resolve path for file '{0}': {1}", Loc.FileURI, + Path.takeError()); Location L; L.uri = URIForFile::canonicalize(*Path, TUPath); Position Start, End; diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp index 945f4eced88c4..7704ccb82c0f0 100644 --- a/clang-tools-extra/clangd/IncludeFixer.cpp +++ b/clang-tools-extra/clangd/IncludeFixer.cpp @@ -153,8 +153,7 @@ std::vector IncludeFixer::fixesForSymbols(const SymbolSlab &Syms) const { return ResolvedInserted.takeError(); auto Spelled = Inserter->calculateIncludePath(*ResolvedInserted, File); if (!Spelled) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Header not on include path"); + return error("Header not on include path"); return std::make_pair( std::move(*Spelled), Inserter->shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted)); diff --git a/clang-tools-extra/clangd/JSONTransport.cpp b/clang-tools-extra/clangd/JSONTransport.cpp index c591da0db47d3..eb5a83882b2bd 100644 --- a/clang-tools-extra/clangd/JSONTransport.cpp +++ b/clang-tools-extra/clangd/JSONTransport.cpp @@ -12,6 +12,7 @@ #include "support/Shutdown.h" #include "llvm/Support/Errno.h" #include "llvm/Support/Error.h" +#include namespace clang { namespace clangd { @@ -100,9 +101,8 @@ class JSONTransport : public Transport { llvm::Error loop(MessageHandler &Handler) override { while (!feof(In)) { if (shutdownRequested()) - return llvm::createStringError( - std::make_error_code(std::errc::operation_canceled), - "Got signal, shutting down"); + return error(std::make_error_code(std::errc::operation_canceled), + "Got signal, shutting down"); if (ferror(In)) return llvm::errorCodeToError( std::error_code(errno, std::system_category())); diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index b71afa0b16191..8e1ad7242eb01 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -243,8 +243,7 @@ scanPreamble(llvm::StringRef Contents, const tooling::CompileCommand &Cmd) { IgnoringDiagConsumer IgnoreDiags; auto CI = buildCompilerInvocation(PI, IgnoreDiags); if (!CI) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "failed to create compiler invocation"); + return error("failed to create compiler invocation"); CI->getDiagnosticOpts().IgnoreWarnings = true; auto ContentsBuffer = llvm::MemoryBuffer::getMemBuffer(Contents); // This means we're scanning (though not preprocessing) the preamble section @@ -260,14 +259,12 @@ scanPreamble(llvm::StringRef Contents, const tooling::CompileCommand &Cmd) { // also implies missing resolved paths for includes. FS.view(llvm::None), IgnoreDiags); if (Clang->getFrontendOpts().Inputs.empty()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "compiler instance had no inputs"); + return error("compiler instance had no inputs"); // We are only interested in main file includes. Clang->getPreprocessorOpts().SingleFileParseMode = true; PreprocessOnlyAction Action; if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "failed BeginSourceFile"); + return error("failed BeginSourceFile"); const auto &SM = Clang->getSourceManager(); Preprocessor &PP = Clang->getPreprocessor(); IncludeStructure Includes; diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 2b50aea82fb28..0432097b43488 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -175,20 +175,17 @@ size_t lspLength(llvm::StringRef Code) { llvm::Expected positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength) { if (P.line < 0) - return llvm::make_error( - llvm::formatv("Line value can't be negative ({0})", P.line), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Line value can't be negative ({0})", P.line); if (P.character < 0) - return llvm::make_error( - llvm::formatv("Character value can't be negative ({0})", P.character), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Character value can't be negative ({0})", P.character); size_t StartOfLine = 0; for (int I = 0; I != P.line; ++I) { size_t NextNL = Code.find('\n', StartOfLine); if (NextNL == llvm::StringRef::npos) - return llvm::make_error( - llvm::formatv("Line value is out of range ({0})", P.line), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Line value is out of range ({0})", P.line); StartOfLine = NextNL + 1; } StringRef Line = @@ -198,10 +195,9 @@ llvm::Expected positionToOffset(llvm::StringRef Code, Position P, bool Valid; size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); if (!Valid && !AllowColumnsBeyondLineLength) - return llvm::make_error( - llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(), - P.character, P.line), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "{0} offset {1} is invalid for line {2}", lspEncoding(), + P.character, P.line); return StartOfLine + ByteInLine; } diff --git a/clang-tools-extra/clangd/URI.cpp b/clang-tools-extra/clangd/URI.cpp index fad93143a30dd..f9e8fdc46fa7f 100644 --- a/clang-tools-extra/clangd/URI.cpp +++ b/clang-tools-extra/clangd/URI.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "URI.h" +#include "support/Logger.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" @@ -21,11 +22,6 @@ namespace clang { namespace clangd { namespace { -inline llvm::Error make_string_error(const llvm::Twine &Message) { - return llvm::make_error(Message, - llvm::inconvertibleErrorCode()); -} - bool isWindowsPath(llvm::StringRef Path) { return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':'; } @@ -45,9 +41,9 @@ class FileSystemScheme : public URIScheme { getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body, llvm::StringRef /*HintPath*/) const override { if (!Body.startswith("/")) - return make_string_error("File scheme: expect body to be an absolute " - "path starting with '/': " + - Body); + return error("File scheme: expect body to be an absolute path starting " + "with '/': {0}", + Body); llvm::SmallString<128> Path; if (!Authority.empty()) { // Windows UNC paths e.g. file://server/share => \\server\share @@ -89,7 +85,7 @@ findSchemeByName(llvm::StringRef Scheme) { continue; return URIScheme.instantiate(); } - return make_string_error("Can't find scheme: " + Scheme); + return error("Can't find scheme: {0}", Scheme); } bool shouldEscape(unsigned char C) { @@ -187,12 +183,11 @@ llvm::Expected URI::parse(llvm::StringRef OrigUri) { auto Pos = Uri.find(':'); if (Pos == llvm::StringRef::npos) - return make_string_error("Scheme must be provided in URI: " + OrigUri); + return error("Scheme must be provided in URI: {0}", OrigUri); auto SchemeStr = Uri.substr(0, Pos); U.Scheme = percentDecode(SchemeStr); if (!isValidScheme(U.Scheme)) - return make_string_error(llvm::formatv("Invalid scheme: {0} (decoded: {1})", - SchemeStr, U.Scheme)); + return error("Invalid scheme: {0} (decoded: {1})", SchemeStr, U.Scheme); Uri = Uri.substr(Pos + 1); if (Uri.consume_front("//")) { Pos = Uri.find('/'); @@ -217,7 +212,7 @@ llvm::Expected URI::resolve(llvm::StringRef FileURI, llvm::Expected URI::create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme) { if (!llvm::sys::path::is_absolute(AbsolutePath)) - return make_string_error("Not a valid absolute path: " + AbsolutePath); + return error("Not a valid absolute path: {0}", AbsolutePath); auto S = findSchemeByName(Scheme); if (!S) return S.takeError(); diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 2bac6ec39d308..a1aafeaf31a96 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -272,15 +272,13 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { IgnoreDiagnostics IgnoreDiags; auto CI = buildCompilerInvocation(Inputs, IgnoreDiags); if (!CI) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Couldn't build compiler invocation"); + return error("Couldn't build compiler invocation"); auto Clang = prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, std::move(*Buf), std::move(FS), IgnoreDiags); if (!Clang) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Couldn't build compiler instance"); + return error("Couldn't build compiler instance"); SymbolCollector::Options IndexOpts; // Creates a filter to not collect index results from files with unchanged @@ -318,8 +316,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); if (!Action->BeginSourceFile(*Clang, Input)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "BeginSourceFile() failed"); + return error("BeginSourceFile() failed"); if (llvm::Error Err = Action->Execute()) return Err; diff --git a/clang-tools-extra/clangd/index/SymbolID.cpp b/clang-tools-extra/clangd/index/SymbolID.cpp index b97103d377ca2..2bb3d4f0b6a0d 100644 --- a/clang-tools-extra/clangd/index/SymbolID.cpp +++ b/clang-tools-extra/clangd/index/SymbolID.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "SymbolID.h" +#include "support/Logger.h" #include "llvm/Support/SHA1.h" namespace clang { @@ -34,12 +35,10 @@ std::string SymbolID::str() const { return llvm::toHex(raw()); } llvm::Expected SymbolID::fromStr(llvm::StringRef Str) { if (Str.size() != RawSize * 2) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Bad ID length"); + return error("Bad ID length"); for (char C : Str) if (!llvm::isHexDigit(C)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Bad hex ID"); + return error("Bad hex ID"); return fromRaw(llvm::fromHex(Str)); } diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp index 4f6bd927cc196..d269a3b36eb48 100644 --- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp +++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp @@ -18,6 +18,7 @@ #include "SymbolLocation.h" #include "SymbolOrigin.h" #include "dex/Dex.h" +#include "support/Logger.h" #include "support/Trace.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" @@ -533,9 +534,7 @@ symbolFromYAML(StringRef YAML, llvm::UniqueStringSaver *Strings) { clangd::Symbol Deserialized; llvm::yaml::Input YAMLInput(YAML, Strings); if (YAMLInput.error()) - return llvm::make_error( - llvm::formatv("Unable to deserialize Symbol from YAML: {0}", YAML), - llvm::inconvertibleErrorCode()); + return error("Unable to deserialize Symbol from YAML: {0}", YAML); YAMLInput >> Deserialized; return Deserialized; } @@ -545,9 +544,7 @@ llvm::Expected refFromYAML(StringRef YAML, clangd::Ref Deserialized; llvm::yaml::Input YAMLInput(YAML, Strings); if (YAMLInput.error()) - return llvm::make_error( - llvm::formatv("Unable to deserialize Symbol from YAML: {0}", YAML), - llvm::inconvertibleErrorCode()); + return error("Unable to deserialize Symbol from YAML: {0}", YAML); YAMLInput >> Deserialized; return Deserialized; } diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp index cfc72ce87be61..839250982a03b 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp @@ -45,11 +45,6 @@ llvm::Expected> getIDs(IDRange IDs) { return Result; } -llvm::Error makeStringError(llvm::StringRef Message) { - return llvm::make_error(Message, - llvm::inconvertibleErrorCode()); -} - } // namespace Marshaller::Marshaller(llvm::StringRef RemoteIndexRoot, @@ -132,7 +127,7 @@ Marshaller::fromProtobuf(const RelationsRequest *Message) { llvm::Expected Marshaller::fromProtobuf(const Symbol &Message) { if (!Message.has_info() || !Message.has_canonical_declaration()) - return makeStringError("Missing info or declaration."); + return error("Missing info or declaration."); clangd::Symbol Result; auto ID = SymbolID::fromStr(Message.id()); if (!ID) @@ -170,7 +165,7 @@ llvm::Expected Marshaller::fromProtobuf(const Symbol &Message) { llvm::Expected Marshaller::fromProtobuf(const Ref &Message) { if (!Message.has_location()) - return makeStringError("Missing location."); + return error("Missing location."); clangd::Ref Result; auto Location = fromProtobuf(Message.location()); if (!Location) @@ -186,7 +181,7 @@ Marshaller::fromProtobuf(const Relation &Message) { if (!SubjectID) return SubjectID.takeError(); if (!Message.has_object()) - return makeStringError("Missing Object."); + return error("Missing Object."); auto Object = fromProtobuf(Message.object()); if (!Object) return Object.takeError(); @@ -304,10 +299,9 @@ Marshaller::relativePathToURI(llvm::StringRef RelativePath) { assert(RelativePath == llvm::sys::path::convert_to_slash( RelativePath, llvm::sys::path::Style::posix)); if (RelativePath.empty()) - return makeStringError("Empty relative path."); + return error("Empty relative path."); if (llvm::sys::path::is_absolute(RelativePath)) - return makeStringError( - llvm::formatv("RelativePath '{0}' is absolute.", RelativePath).str()); + return error("RelativePath '{0}' is absolute.", RelativePath); llvm::SmallString<256> FullPath = llvm::StringRef(*LocalIndexRoot); llvm::sys::path::append(FullPath, RelativePath); auto Result = URI::createFile(FullPath); @@ -320,16 +314,11 @@ llvm::Expected Marshaller::uriToRelativePath(llvm::StringRef URI) { if (!ParsedURI) return ParsedURI.takeError(); if (ParsedURI->scheme() != "file") - return makeStringError( - llvm::formatv("Can not use URI schemes other than file, given: '{0}'.", - URI) - .str()); + return error("Can not use URI schemes other than file, given: '{0}'.", URI); llvm::SmallString<256> Result = ParsedURI->body(); if (!llvm::sys::path::replace_path_prefix(Result, *RemoteIndexRoot, "")) - return makeStringError( - llvm::formatv("File path '{0}' doesn't start with '{1}'.", Result.str(), - *RemoteIndexRoot) - .str()); + return error("File path '{0}' doesn't start with '{1}'.", Result.str(), + *RemoteIndexRoot); // Make sure the result has UNIX slashes. return llvm::sys::path::convert_to_slash(Result, llvm::sys::path::Style::posix); diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index ea75de6e86eac..2744caa586485 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -213,9 +213,7 @@ llvm::Error makeError(ReasonToReject Reason) { } llvm_unreachable("unhandled reason kind"); }; - return llvm::make_error( - llvm::formatv("Cannot rename symbol: {0}", Message(Reason)), - llvm::inconvertibleErrorCode()); + return error("Cannot rename symbol: {0}", Message(Reason)); } // Return all rename occurrences in the main file. @@ -319,16 +317,11 @@ findOccurrencesOutsideFile(const NamedDecl &RenameDecl, }); if (AffectedFiles.size() >= MaxLimitFiles) - return llvm::make_error( - llvm::formatv("The number of affected files exceeds the max limit {0}", - MaxLimitFiles), - llvm::inconvertibleErrorCode()); - if (HasMore) { - return llvm::make_error( - llvm::formatv("The symbol {0} has too many occurrences", - RenameDecl.getQualifiedNameAsString()), - llvm::inconvertibleErrorCode()); - } + return error("The number of affected files exceeds the max limit {0}", + MaxLimitFiles); + if (HasMore) + return error("The symbol {0} has too many occurrences", + RenameDecl.getQualifiedNameAsString()); // Sort and deduplicate the results, in case that index returns duplications. for (auto &FileAndOccurrences : AffectedFiles) { auto &Ranges = FileAndOccurrences.getValue(); @@ -379,20 +372,15 @@ llvm::Expected renameOutsideFile( // Our heuristics fails to adjust rename ranges to the current state of // the file, it is most likely the index is stale, so we give up the // entire rename. - return llvm::make_error( - llvm::formatv("Index results don't match the content of file {0} " - "(the index may be stale)", - FilePath), - llvm::inconvertibleErrorCode()); + return error("Index results don't match the content of file {0} " + "(the index may be stale)", + FilePath); } auto RenameEdit = buildRenameEdit(FilePath, *AffectedFileCode, *RenameRanges, NewName); - if (!RenameEdit) { - return llvm::make_error( - llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, - llvm::toString(RenameEdit.takeError())), - llvm::inconvertibleErrorCode()); - } + if (!RenameEdit) + return error("failed to rename in file {0}: {1}", FilePath, + RenameEdit.takeError()); if (!RenameEdit->Replacements.empty()) Results.insert({FilePath, std::move(*RenameEdit)}); } @@ -455,14 +443,10 @@ llvm::Expected rename(const RenameInputs &RInputs) { auto Content = SM.getFileManager().getVirtualFileSystem().getBufferForFile(AbsPath); if (!Content) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - llvm::formatv("Fail to open file {0}: {1}", AbsPath, - Content.getError().message())); + return error("Fail to open file {0}: {1}", AbsPath, + Content.getError().message()); if (!*Content) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - llvm::formatv("Got no buffer for file {0}", AbsPath)); + return error("Got no buffer for file {0}", AbsPath); return (*Content)->getBuffer().str(); }; @@ -559,10 +543,8 @@ llvm::Expected buildRenameEdit(llvm::StringRef AbsFilePath, auto ShiftedOffset = positionToOffset(InitialCode.substr(LastOffset), Shifted); if (!ShiftedOffset) - return llvm::make_error( - llvm::formatv("fail to convert the position {0} to offset ({1})", P, - llvm::toString(ShiftedOffset.takeError())), - llvm::inconvertibleErrorCode()); + return error("fail to convert the position {0} to offset ({1})", P, + ShiftedOffset.takeError()); LastPos = P; LastOffset += *ShiftedOffset; return LastOffset; diff --git a/clang-tools-extra/clangd/refactor/Tweak.cpp b/clang-tools-extra/clangd/refactor/Tweak.cpp index b1f4dcd69af6b..34b5b2b544dff 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.cpp +++ b/clang-tools-extra/clangd/refactor/Tweak.cpp @@ -80,12 +80,10 @@ llvm::Expected> prepareTweak(StringRef ID, TweakRegistry::entries(), [ID](const TweakRegistry::entry &E) { return E.getName() == ID; }); if (It == TweakRegistry::end()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "id of the tweak is invalid"); + return error("tweak ID {0} is invalid", ID); std::unique_ptr T = It->instantiate(); if (!T->prepare(S)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "failed to prepare() a check"); + return error("failed to prepare() tweak {0}", ID); return std::move(T); } @@ -95,10 +93,8 @@ Tweak::Effect::fileEdit(const SourceManager &SM, FileID FID, Edit Ed(SM.getBufferData(FID), std::move(Replacements)); if (auto FilePath = getCanonicalPath(SM.getFileEntryForID(FID), SM)) return std::make_pair(*FilePath, std::move(Ed)); - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Failed to get absolute path for edited file: " + - SM.getFileEntryForID(FID)->getName()); + return error("Failed to get absolute path for edited file: {0}", + SM.getFileEntryForID(FID)->getName()); } llvm::Expected diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp index e4900041671a4..d5e6e12b31aad 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp @@ -169,8 +169,7 @@ findInsertionPoint(const Tweak::Selection &Inputs, return Tok.kind() == tok::l_brace; }); if (Tok == Toks.end() || Tok->endLocation().isInvalid()) { - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Namespace with no {"); + return error("Namespace with no {"); } if (!Tok->endLocation().isMacroID()) { InsertionPointData Out; @@ -183,8 +182,7 @@ findInsertionPoint(const Tweak::Selection &Inputs, // top level decl. auto TLDs = Inputs.AST->getLocalTopLevelDecls(); if (TLDs.empty()) { - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Cannot find place to insert \"using\""); + return error("Cannot find place to insert \"using\""); } InsertionPointData Out; Out.Loc = SM.getExpansionLoc(TLDs[0]->getBeginLoc()); @@ -272,9 +270,7 @@ Expected AddUsing::apply(const Selection &Inputs) { auto SpelledTokens = TB.spelledForExpanded( TB.expandedTokens(QualifierToRemove.getSourceRange())); if (!SpelledTokens) { - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Could not determine length of the qualifier"); + return error("Could not determine length of the qualifier"); } unsigned Length = syntax::Token::range(SM, SpelledTokens->front(), SpelledTokens->back()) diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp index 698d2a406811a..cdd5f9c6595b0 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp @@ -205,18 +205,15 @@ llvm::Expected qualifyAllDecls(const FunctionDecl *FD, } }); - if (HadErrors) { - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "define inline: Failed to compute qualifiers see logs for details."); - } + if (HadErrors) + return error( + "define inline: Failed to compute qualifiers. See logs for details."); // Get new begin and end positions for the qualified body. auto OrigBodyRange = toHalfOpenFileRange( SM, FD->getASTContext().getLangOpts(), FD->getBody()->getSourceRange()); if (!OrigBodyRange) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Couldn't get range func body."); + return error("Couldn't get range func body."); unsigned BodyBegin = SM.getFileOffset(OrigBodyRange->getBegin()); unsigned BodyEnd = Replacements.getShiftedCodePosition( @@ -311,9 +308,7 @@ renameParameters(const FunctionDecl *Dest, const FunctionDecl *Source) { ReplaceRange = Lexer::makeFileCharRange(ReplaceRange, SM, LangOpts); // Bail out if we need to replace macro bodies. if (ReplaceRange.isInvalid()) { - auto Err = llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Cant rename parameter inside macro body."); + auto Err = error("Cant rename parameter inside macro body."); elog("define inline: {0}", Err); return std::move(Err); } @@ -450,11 +445,8 @@ class DefineInline : public Tweak { const auto &SM = AST.getSourceManager(); auto Semicolon = getSemicolonForDecl(Target); - if (!Semicolon) { - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Couldn't find semicolon for target declaration."); - } + if (!Semicolon) + return error("Couldn't find semicolon for target declaration."); auto AddInlineIfNecessary = addInlineIfInHeader(Target); auto ParamReplacements = renameParameters(Target, Source); @@ -479,10 +471,8 @@ class DefineInline : public Tweak { SM.getExpansionRange(CharSourceRange::getCharRange(getBeginLoc(Source), Source->getEndLoc())) .getAsRange()); - if (!DefRange) { - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Couldn't get range for the source."); - } + if (!DefRange) + return error("Couldn't get range for the source."); unsigned int SourceLen = SM.getFileOffset(DefRange->getEnd()) - SM.getFileOffset(DefRange->getBegin()); const tooling::Replacement DeleteFuncBody(SM, DefRange->getBegin(), diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp index 66d9c4c36b122..ed4d0cc462692 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -120,8 +120,7 @@ getFunctionSourceAfterReplacements(const FunctionDecl *FD, auto OrigFuncRange = toHalfOpenFileRange( SM, FD->getASTContext().getLangOpts(), FD->getSourceRange()); if (!OrigFuncRange) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Couldn't get range for function."); + return error("Couldn't get range for function."); assert(!FD->getDescribedFunctionTemplate() && "Define out-of-line doesn't apply to function templates."); @@ -151,9 +150,7 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, auto &SM = AST.getSourceManager(); auto TargetContext = findContextForNS(TargetNamespace, FD->getDeclContext()); if (!TargetContext) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "define outline: couldn't find a context for target"); + return error("define outline: couldn't find a context for target"); llvm::Error Errors = llvm::Error::success(); tooling::Replacements DeclarationCleanups; @@ -219,12 +216,9 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, assert(A->getLocation().isValid()); if (!AttrTokens || AttrTokens->empty()) { Errors = llvm::joinErrors( - std::move(Errors), - llvm::createStringError( - llvm::inconvertibleErrorCode(), - llvm::StringRef("define outline: Can't move out of line as " - "function has a macro `") + - A->getSpelling() + "` specifier.")); + std::move(Errors), error("define outline: Can't move out of line as " + "function has a macro `{0}` specifier.", + A->getSpelling())); return; } CharSourceRange DelRange = @@ -248,10 +242,8 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, if (!Spelling) { Errors = llvm::joinErrors( std::move(Errors), - llvm::createStringError( - llvm::inconvertibleErrorCode(), - llvm::formatv("define outline: couldn't remove `{0}` keyword.", - tok::getKeywordSpelling(Kind)))); + error("define outline: couldn't remove `{0}` keyword.", + tok::getKeywordSpelling(Kind))); break; } CharSourceRange DelRange = @@ -264,11 +256,8 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, if (!FoundAny) { Errors = llvm::joinErrors( std::move(Errors), - llvm::createStringError( - llvm::inconvertibleErrorCode(), - llvm::formatv( - "define outline: couldn't find `{0}` keyword to remove.", - tok::getKeywordSpelling(Kind)))); + error("define outline: couldn't find `{0}` keyword to remove.", + tok::getKeywordSpelling(Kind))); } }; @@ -411,15 +400,11 @@ class DefineOutline : public Tweak { auto MainFileName = getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM); if (!MainFileName) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Couldn't get absolute path for mainfile."); + return error("Couldn't get absolute path for main file."); auto CCFile = getSourceFile(*MainFileName, Sel); if (!CCFile) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Couldn't find a suitable implementation file."); + return error("Couldn't find a suitable implementation file."); auto &FS = Sel.AST->getSourceManager().getFileManager().getVirtualFileSystem(); @@ -427,8 +412,7 @@ class DefineOutline : public Tweak { // FIXME: Maybe we should consider creating the implementation file if it // doesn't exist? if (!Buffer) - return llvm::createStringError(Buffer.getError(), - Buffer.getError().message()); + return llvm::errorCodeToError(Buffer.getError()); auto Contents = Buffer->get()->getBuffer(); auto InsertionPoint = getInsertionPoint( Contents, Source->getQualifiedNameAsString(), Sel.AST->getLangOpts()); diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp index d2dfc4a537d4a..f9db50d934b09 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp @@ -45,11 +45,6 @@ class ExpandAutoType : public Tweak { private: /// Cache the AutoTypeLoc, so that we do not need to search twice. llvm::Optional CachedLocation; - - /// Create an error message with filename and line number in it - llvm::Error createErrorMessage(const std::string& Message, - const Selection &Inputs); - }; REGISTER_TWEAK(ExpandAutoType) @@ -78,21 +73,19 @@ Expected ExpandAutoType::apply(const Selection& Inputs) { // if we can't resolve the type, return an error message if (DeducedType == llvm::None) - return createErrorMessage("Could not deduce type for 'auto' type", Inputs); + return error("Could not deduce type for 'auto' type"); // if it's a lambda expression, return an error message if (isa(*DeducedType) && dyn_cast(*DeducedType)->getDecl()->isLambda()) { - return createErrorMessage("Could not expand type of lambda expression", - Inputs); + return error("Could not expand type of lambda expression"); } // if it's a function expression, return an error message // naively replacing 'auto' with the type will break declarations. // FIXME: there are other types that have similar problems if (DeducedType->getTypePtr()->isFunctionPointerType()) { - return createErrorMessage("Could not expand type of function pointer", - Inputs); + return error("Could not expand type of function pointer"); } std::string PrettyTypeName = printType(*DeducedType, @@ -105,18 +98,6 @@ Expected ExpandAutoType::apply(const Selection& Inputs) { return Effect::mainFileEdit(SrcMgr, tooling::Replacements(Expansion)); } -llvm::Error ExpandAutoType::createErrorMessage(const std::string& Message, - const Selection& Inputs) { - auto &SrcMgr = Inputs.AST->getSourceManager(); - std::string ErrorMessage = - Message + ": " + - SrcMgr.getFilename(Inputs.Cursor).str() + " Line " + - std::to_string(SrcMgr.getExpansionLineNumber(Inputs.Cursor)); - - return llvm::createStringError(llvm::inconvertibleErrorCode(), - ErrorMessage.c_str()); -} - } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp index d4c723e02eebe..6ee5aee37f51c 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp @@ -625,9 +625,8 @@ llvm::Expected getExtractedFunction(ExtractionZone &ExtZone, CapturedZoneInfo CapturedInfo = captureZoneInfo(ExtZone); // Bail out if any break of continue exists if (CapturedInfo.BrokenControlFlow) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - +"Cannot extract break/continue without " - "corresponding loop/switch statement."); + return error("Cannot extract break/continue without corresponding " + "loop/switch statement."); NewFunction ExtractedFunc(getSemicolonPolicy(ExtZone, SM, LangOpts)); ExtractedFunc.BodyRange = ExtZone.ZoneRange; ExtractedFunc.InsertionPoint = ExtZone.getInsertionPoint(); @@ -637,8 +636,7 @@ llvm::Expected getExtractedFunction(ExtractionZone &ExtZone, if (!createParameters(ExtractedFunc, CapturedInfo) || !generateReturnProperties(ExtractedFunc, *ExtZone.EnclosingFunction, CapturedInfo)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - +"Too complex to extract."); + return error("Too complex to extract."); return ExtractedFunc; } diff --git a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp index 2534cf562daa8..894f018aa7968 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp @@ -68,8 +68,7 @@ ObjCLocalizeStringLiteral::apply(const Selection &Inputs) { const auto &TB = AST->getTokens(); auto Toks = TB.spelledForExpanded(TB.expandedTokens(Str->getSourceRange())); if (!Toks || Toks->empty()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Failed to find tokens to replace."); + return error("Failed to find tokens to replace."); // Insert `NSLocalizedString(` before the literal. auto Reps = tooling::Replacements(tooling::Replacement( SM, Toks->front().location(), 0, "NSLocalizedString(")); diff --git a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp index e054e33c046a0..9d1a9f12567c4 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp @@ -10,6 +10,7 @@ #include "Selection.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" @@ -73,8 +74,7 @@ removeUsingDirective(ASTContext &Ctx, const UsingDirectiveDecl *D) { llvm::Optional NextTok = Lexer::findNextToken(D->getEndLoc(), SM, Ctx.getLangOpts()); if (!NextTok || NextTok->isNot(tok::semi)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "no semicolon after using-directive"); + return error("no semicolon after using-directive"); // FIXME: removing the semicolon may be invalid in some obscure cases, e.g. // if (x) using namespace std; else using namespace bar; return tooling::Replacement( diff --git a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp index d6966e699fdbc..d5299f014cc74 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp @@ -69,15 +69,11 @@ Expected SwapIfBranches::apply(const Selection &Inputs) { auto ThenRng = toHalfOpenFileRange(SrcMgr, Ctx.getLangOpts(), If->getThen()->getSourceRange()); if (!ThenRng) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Could not obtain range of the 'then' branch. Macros?"); + return error("Could not obtain range of the 'then' branch. Macros?"); auto ElseRng = toHalfOpenFileRange(SrcMgr, Ctx.getLangOpts(), If->getElse()->getSourceRange()); if (!ElseRng) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Could not obtain range of the 'else' branch. Macros?"); + return error("Could not obtain range of the 'else' branch. Macros?"); auto ThenCode = toSourceCode(SrcMgr, *ThenRng); auto ElseCode = toSourceCode(SrcMgr, *ElseRng); diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index dcbaa35238226..cf74ded936320 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -484,9 +484,9 @@ class TestScheme : public URIScheme { // Still require "/" in body to mimic file scheme, as we want lengths of an // equivalent URI in both schemes to be the same. if (!Body.startswith("/")) - return llvm::make_error( - "Expect URI body to be an absolute path starting with '/': " + Body, - llvm::inconvertibleErrorCode()); + return error( + "Expect URI body to be an absolute path starting with '/': {0}", + Body); Body = Body.ltrim('/'); llvm::SmallVector Path(Body.begin(), Body.end()); path::native(Path); @@ -497,11 +497,9 @@ class TestScheme : public URIScheme { llvm::Expected uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { llvm::StringRef Body = AbsolutePath; - if (!Body.consume_front(TestScheme::TestDir)) { - return llvm::make_error( - "Path " + AbsolutePath + " doesn't start with root " + TestDir, - llvm::inconvertibleErrorCode()); - } + if (!Body.consume_front(TestScheme::TestDir)) + return error("Path {0} doesn't start with root {1}", AbsolutePath, + TestDir); return URI("test", /*Authority=*/"", llvm::sys::path::convert_to_slash(Body)); diff --git a/clang-tools-extra/clangd/unittests/TestFS.cpp b/clang-tools-extra/clangd/unittests/TestFS.cpp index 3b2fbc142a28f..ba4010cb45817 100644 --- a/clang-tools-extra/clangd/unittests/TestFS.cpp +++ b/clang-tools-extra/clangd/unittests/TestFS.cpp @@ -100,13 +100,9 @@ class TestScheme : public URIScheme { getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, llvm::StringRef HintPath) const override { if (!HintPath.startswith(testRoot())) - return llvm::make_error( - "Hint path doesn't start with test root: " + HintPath, - llvm::inconvertibleErrorCode()); + return error("Hint path doesn't start with test root: {0}", HintPath); if (!Body.consume_front("/")) - return llvm::make_error( - "Body of an unittest: URI must start with '/'", - llvm::inconvertibleErrorCode()); + return error("Body of an unittest: URI must start with '/'"); llvm::SmallString<16> Path(Body.begin(), Body.end()); llvm::sys::path::native(Path); return testPath(Path); @@ -116,9 +112,7 @@ class TestScheme : public URIScheme { uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { llvm::StringRef Body = AbsolutePath; if (!Body.consume_front(testRoot())) - return llvm::make_error( - AbsolutePath + "does not start with " + testRoot(), - llvm::inconvertibleErrorCode()); + return error("{0} does not start with {1}", AbsolutePath, testRoot()); return URI(Scheme, /*Authority=*/"", llvm::sys::path::convert_to_slash(Body)); diff --git a/clang-tools-extra/clangd/xpc/XPCTransport.cpp b/clang-tools-extra/clangd/xpc/XPCTransport.cpp index 50eacf2115eea..9eb083953b965 100644 --- a/clang-tools-extra/clangd/xpc/XPCTransport.cpp +++ b/clang-tools-extra/clangd/xpc/XPCTransport.cpp @@ -41,7 +41,7 @@ Error decodeError(const json::Object &O) { std::string(O.getString("message").getValueOr("Unspecified error")); if (auto Code = O.getInteger("code")) return make_error(std::move(Msg), ErrorCode(*Code)); - return make_error(std::move(Msg), inconvertibleErrorCode()); + return error("{0}", Msg); } // C "closure" for XPCTransport::loop() method From 00e5676cf64740daf99b694d1ac968be141b655f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 14 Sep 2020 11:09:15 +0100 Subject: [PATCH 0518/1079] [LegalizeDAG] Fix MSVC "result of 32-bit shift implicitly converted to 64 bits" warning. NFCI. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 71ba228b53f6f..541edafc0ef56 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2800,7 +2800,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { Result = Op; for (unsigned i = Log2_32_Ceil(Sz); i != 0;) { SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result, - DAG.getConstant(1 << (--i), dl, ShVT)); + DAG.getConstant(1ULL << (--i), dl, ShVT)); Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift); } } From 0c8f4cd657346fcb25e99a3d2c93a7a12080d667 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 14 Sep 2020 11:18:21 +0200 Subject: [PATCH 0519/1079] AMDGPU/GlobalISel Add test for non-leaf complex patterns GlobalIsel emitter does not import patterns where complex sub-operand of a non-leaf complex pattern is referenced more then once. Multiple references of complex patterns with same name and same sub-operands represent the same operand. Document this with a test. --- .../GlobalISel/inst-select-fract.f64.mir | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir new file mode 100644 index 0000000000000..0110762baed31 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -0,0 +1,105 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: fract_f64_neg +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.1: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: fract_f64_neg + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: $vcc_hi = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1) + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK: %12:vreg_64 = nofpexcept V_ADD_F64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; CHECK: %13:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec + ; CHECK: %15:vreg_64 = nofpexcept V_ADD_F64 0, %12, 1, %13, 0, 0, implicit $mode, implicit $exec + ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[COPY1]] + ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY5]], %15, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) + ; CHECK: S_ENDPGM 0 + %2:sgpr(p4) = COPY $sgpr0_sgpr1 + %7:sgpr(s64) = G_CONSTANT i64 36 + %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) + %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) + %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 + %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %15:sgpr(p1) = G_INTTOPTR %13(s64) + %18:sgpr(s64) = G_LOAD %15(p1) :: (load 8, addrspace 1) + %19:sgpr(s64) = G_FCONSTANT double -0.000000e+00 + %24:sgpr(s64) = G_FNEG %18 + %25:vgpr(s64) = COPY %19(s64) + %26:vgpr(s64) = COPY %24(s64) + %20:vgpr(s64) = G_FADD %25, %26 + %21:vgpr(s64) = G_FFLOOR %20 + %23:vgpr(s64) = G_FNEG %21 + %22:vgpr(s64) = G_FADD %20, %23 + %12:sgpr(p1) = G_INTTOPTR %10(s64) + %27:vgpr(p1) = COPY %12(p1) + G_STORE %22(s64), %27(p1) :: (store 8, addrspace 1) + S_ENDPGM 0 +... + +--- +name: fract_f64_neg_abs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.1: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: fract_f64_neg_abs + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: $vcc_hi = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1) + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK: %13:vreg_64 = nofpexcept V_ADD_F64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; CHECK: %14:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec + ; CHECK: %16:vreg_64 = nofpexcept V_ADD_F64 0, %13, 1, %14, 0, 0, implicit $mode, implicit $exec + ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[COPY1]] + ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY5]], %16, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) + ; CHECK: S_ENDPGM 0 + %2:sgpr(p4) = COPY $sgpr0_sgpr1 + %7:sgpr(s64) = G_CONSTANT i64 36 + %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) + %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) + %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 + %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %15:sgpr(p1) = G_INTTOPTR %13(s64) + %18:sgpr(s64) = G_LOAD %15(p1) :: (load 8, addrspace 1) + %19:sgpr(s64) = G_FABS %18 + %20:sgpr(s64) = G_FCONSTANT double -0.000000e+00 + %25:sgpr(s64) = G_FNEG %19 + %26:vgpr(s64) = COPY %20(s64) + %27:vgpr(s64) = COPY %25(s64) + %21:vgpr(s64) = G_FADD %26, %27 + %22:vgpr(s64) = G_FFLOOR %21 + %24:vgpr(s64) = G_FNEG %22 + %23:vgpr(s64) = G_FADD %21, %24 + %12:sgpr(p1) = G_INTTOPTR %10(s64) + %28:vgpr(p1) = COPY %12(p1) + G_STORE %23(s64), %28(p1) :: (store 8, addrspace 1) + S_ENDPGM 0 +... From 416346d1ca503262983c954ddc861ff4f91347a3 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 14 Sep 2020 11:37:14 +0200 Subject: [PATCH 0520/1079] AMDGPU/GlobalISel/Emitter Recognize additional 'same operand checks' The "name" of a non-leaf complex pattern (MY_PAT $op1, $op2) is "MY_PAT:op1:op2" and the ones with same "name" represent same operand. Add 'same operand check' for this case. Differential Revision: https://reviews.llvm.org/D87351 --- .../GlobalISel/inst-select-fract.f64.mir | 6 +-- llvm/test/TableGen/GlobalISelEmitter.td | 4 +- .../GlobalISelEmitterSkippedPatterns.td | 2 +- llvm/utils/TableGen/GlobalISelEmitter.cpp | 49 ++++++++++++++----- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index 0110762baed31..b450aa8b81962 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -25,8 +25,7 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] ; CHECK: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK: %12:vreg_64 = nofpexcept V_ADD_F64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; CHECK: %13:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec - ; CHECK: %15:vreg_64 = nofpexcept V_ADD_F64 0, %12, 1, %13, 0, 0, implicit $mode, implicit $exec + ; CHECK: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[COPY1]] ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY5]], %15, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) ; CHECK: S_ENDPGM 0 @@ -76,8 +75,7 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] ; CHECK: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK: %13:vreg_64 = nofpexcept V_ADD_F64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; CHECK: %14:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec - ; CHECK: %16:vreg_64 = nofpexcept V_ADD_F64 0, %13, 1, %14, 0, 0, implicit $mode, implicit $exec + ; CHECK: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[COPY1]] ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY5]], %16, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) ; CHECK: S_ENDPGM 0 diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td index c77630ba80151..acf5cf55320ee 100644 --- a/llvm/test/TableGen/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter.td @@ -255,7 +255,7 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; } // R19N-NEXT: // MIs[0] src1 // R19N-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, // R19N-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/MyTarget::GPR32RegClassID, -// R19N-NEXT: // MIs[0] Operand 2 +// R19N-NEXT: // MIs[0] complex_rr:src2a:src2b // R19N-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, // // R19N-NEXT: GIM_CheckComplexPattern, /*MI*/0, /*Op*/2, /*Renderer*/0, GICP_gi_complex_rr, @@ -274,7 +274,7 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; } // R19N-NEXT: // MIs[1] src4 // R19N-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, // R19N-NEXT: GIM_CheckComplexPattern, /*MI*/1, /*Op*/2, /*Renderer*/1, GICP_gi_complex, -// R19N-NEXT: // MIs[1] Operand 3 +// R19N-NEXT: // MIs[1] complex:src5a:src5b // R19N-NEXT: GIM_CheckType, /*MI*/1, /*Op*/3, /*Type*/GILLT_s32, // R19N-NEXT: GIM_CheckComplexPattern, /*MI*/1, /*Op*/3, /*Renderer*/2, GICP_gi_complex, // R19O-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/MyTarget::GPR32RegClassID, diff --git a/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td b/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td index b9ba1a7d8c554..7c9df02ebd87c 100644 --- a/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td +++ b/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td @@ -23,7 +23,7 @@ def INSN : I<(outs GPR32:$dst), (ins GPR32:$src1, complex:$src2), []>; //===- Bail out when we define a variable twice wrt complex suboperands. -===// -// CHECK: warning: Skipped pattern: Complex suboperand referenced more than once (Operand: x) +// CHECK: warning: Skipped pattern: Error: Complex suboperand x referenced by different operands: complex_rr:x:y and complex_rr:x:z. def : Pat<(add (complex_rr GPR32:$x, GPR32:$y), (complex_rr GPR32:$x, GPR32:$z)), (INSN GPR32:$z, complex:$y)>; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 0fe1571cff136..67b68217cbd87 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -856,6 +856,11 @@ class RuleMatcher : public Matcher { DefinedComplexPatternSubOperandMap; /// A map of Symbolic Names to ComplexPattern sub-operands. DefinedComplexPatternSubOperandMap ComplexSubOperands; + /// A map used to for multiple referenced error check of ComplexSubOperand. + /// ComplexSubOperand can't be referenced multiple from different operands, + /// however multiple references from same operand are allowed since that is + /// how 'same operand checks' are generated. + StringMap ComplexSubOperandsParentName; uint64_t RuleID; static uint64_t NextRuleID; @@ -921,14 +926,24 @@ class RuleMatcher : public Matcher { void definePhysRegOperand(Record *Reg, OperandMatcher &OM); Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern, - unsigned RendererID, unsigned SubOperandID) { - if (ComplexSubOperands.count(SymbolicName)) - return failedImport( - "Complex suboperand referenced more than once (Operand: " + - SymbolicName + ")"); + unsigned RendererID, unsigned SubOperandID, + StringRef ParentSymbolicName) { + std::string ParentName(ParentSymbolicName); + if (ComplexSubOperands.count(SymbolicName)) { + auto RecordedParentName = ComplexSubOperandsParentName[SymbolicName]; + if (RecordedParentName.compare(ParentName) != 0) + return failedImport("Error: Complex suboperand " + SymbolicName + + " referenced by different operands: " + + RecordedParentName + " and " + ParentName + "."); + // Complex suboperand referenced more than once from same the operand is + // used to generate 'same operand check'. Emitting of + // GIR_ComplexSubOperandRenderer for them is already handled. + return Error::success(); + } ComplexSubOperands[SymbolicName] = std::make_tuple(ComplexPattern, RendererID, SubOperandID); + ComplexSubOperandsParentName[SymbolicName] = ParentName; return Error::success(); } @@ -4100,12 +4115,22 @@ Error GlobalISelEmitter::importChildMatcher( bool OperandIsImmArg, unsigned OpIdx, unsigned &TempOpIdx) { Record *PhysReg = nullptr; - StringRef SrcChildName = getSrcChildName(SrcChild, PhysReg); + std::string SrcChildName = std::string(getSrcChildName(SrcChild, PhysReg)); + if (!SrcChild->isLeaf() && + SrcChild->getOperator()->isSubClassOf("ComplexPattern")) { + // The "name" of a non-leaf complex pattern (MY_PAT $op1, $op2) is + // "MY_PAT:op1:op2" and the ones with same "name" represent same operand. + std::string PatternName = std::string(SrcChild->getOperator()->getName()); + for (unsigned i = 0; i < SrcChild->getNumChildren(); ++i) { + PatternName += ":"; + PatternName += SrcChild->getChild(i)->getName(); + } + SrcChildName = PatternName; + } OperandMatcher &OM = - PhysReg - ? InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx) - : InsnMatcher.addOperand(OpIdx, std::string(SrcChildName), TempOpIdx); + PhysReg ? InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx) + : InsnMatcher.addOperand(OpIdx, SrcChildName, TempOpIdx); if (OM.isSameAsAnotherOperand()) return Error::success(); @@ -4152,9 +4177,9 @@ Error GlobalISelEmitter::importChildMatcher( for (unsigned i = 0, e = SrcChild->getNumChildren(); i != e; ++i) { auto *SubOperand = SrcChild->getChild(i); if (!SubOperand->getName().empty()) { - if (auto Error = Rule.defineComplexSubOperand(SubOperand->getName(), - SrcChild->getOperator(), - RendererID, i)) + if (auto Error = Rule.defineComplexSubOperand( + SubOperand->getName(), SrcChild->getOperator(), RendererID, i, + SrcChildName)) return Error; } } From 6e2a86ed5abfdb75ba9c08ea94ed8dbd41e75c9e Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 14 Sep 2020 12:03:36 +0200 Subject: [PATCH 0521/1079] AMDGPU/GlobalISel Check for NoNaNsFPMath in isKnownNeverSNaN Check for NoNaNsFPMath function attribute in isKnownNeverSNaN. Function attributes are in held in 'TargetMachine.Options'. Among other things, this allows selection of some patterns imported in D87351 since G_FCANONICALIZE is not generated when isKnownNeverSNaN returns true in lowerFMinNumMaxNum. However we notice some incorrect results since function attributes are not correctly written in TargetMachine.Options when next function is processed. Take a look at @v_test_no_global_nnans_med3_f32_pat0_srcmod0, it has "no-nans-fp-math"="false" but TargetMachine.Options still has it set to true since first function in test file had this attribute set to true. This will be fixed in D87511. Differential Revision: https://reviews.llvm.org/D87456 --- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 4 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 2 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll | 589 ++++++++++++++++++ 3 files changed, 593 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 53e6eff2590e0..070a45951fed1 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "globalisel-utils" @@ -470,7 +471,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (!DefMI) return false; - if (DefMI->getFlag(MachineInstr::FmNoNans)) + const TargetMachine& TM = DefMI->getMF()->getTarget(); + if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) return true; if (SNaN) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3f8782b2a66ee..7ed6688439355 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3167,7 +3167,7 @@ AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const { Register Src; unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root); - if (!TM.Options.NoNaNsFPMath && !isKnownNeverNaN(Src, *MRI)) + if (!isKnownNeverNaN(Src, *MRI)) return None; return {{ diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll new file mode 100644 index 0000000000000..d64e97e80a6d1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll @@ -0,0 +1,589 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s + +define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { +; SI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s10, 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[4:5] +; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_med3_f32 v2, v2, v3, v4 +; SI-NEXT: s_mov_b64 s[2:3], s[10:11] +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v8, 2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v8 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v8 +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v8 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v0, v[0:1] +; VI-NEXT: flat_load_dword v1, v[2:3] +; VI-NEXT: flat_load_dword v2, v[4:5] +; VI-NEXT: v_mov_b32_e32 v7, s1 +; VI-NEXT: v_mov_b32_e32 v6, s0 +; VI-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; VI-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_med3_f32 v0, v0, v1, v2 +; VI-NEXT: flat_store_dword v[6:7], v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[4:5] +; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load volatile float, float addrspace(1)* %gep0 + %b = load volatile float, float addrspace(1)* %gep1 + %c = load volatile float, float addrspace(1)* %gep2 + %a.fneg = fsub float -0.0, %a + %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) + %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b) + %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + store float %med3, float addrspace(1)* %outgep + ret void +} + +define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 { +; SI-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s10, 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[4:5] +; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_med3_f32 v2, v2, v3, v4 +; SI-NEXT: s_mov_b64 s[2:3], s[10:11] +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v8, 2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v8 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v8 +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v8 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v0, v[0:1] +; VI-NEXT: flat_load_dword v1, v[2:3] +; VI-NEXT: flat_load_dword v2, v[4:5] +; VI-NEXT: v_mov_b32_e32 v7, s1 +; VI-NEXT: v_mov_b32_e32 v6, s0 +; VI-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; VI-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_med3_f32 v0, v0, v1, v2 +; VI-NEXT: flat_store_dword v[6:7], v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[4:5] +; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load volatile float, float addrspace(1)* %gep0 + %b = load volatile float, float addrspace(1)* %gep1 + %c = load volatile float, float addrspace(1)* %gep2 + %a.fneg = fsub float -0.0, %a + %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) + %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b) + %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + store float %med3, float addrspace(1)* %outgep + ret void +} + +define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { +; SI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s10, 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[4:5] +; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b32 s2, 0x80000000 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_sub_f32_e32 v2, s2, v2 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_sub_f32_e64 v4, s2, |v4| +; SI-NEXT: v_med3_f32 v2, v2, |v3|, v4 +; SI-NEXT: s_mov_b64 s[2:3], s[10:11] +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v6 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v7, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: flat_load_dword v3, v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: s_mov_b32 s2, 0x80000000 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; VI-NEXT: v_sub_f32_e32 v4, s2, v7 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; VI-NEXT: v_med3_f32 v2, v4, |v2|, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[4:5] +; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_mov_b32 s2, 0x80000000 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_sub_f32_e32 v1, s2, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_sub_f32_e64 v3, s2, |v3| +; GFX9-NEXT: v_med3_f32 v1, v1, |v2|, v3 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load volatile float, float addrspace(1)* %gep0 + %b = load volatile float, float addrspace(1)* %gep1 + %c = load volatile float, float addrspace(1)* %gep2 + + %a.fneg = fsub float -0.0, %a + %b.fabs = call float @llvm.fabs.f32(float %b) + %c.fabs = call float @llvm.fabs.f32(float %c) + %c.fabs.fneg = fsub float -0.0, %c.fabs + + %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs) + %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs) + %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) + %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + + store float %med3, float addrspace(1)* %outgep + ret void +} + +define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { +; SI-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s10, 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[4:5] +; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b32 s2, 0x80000000 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_sub_f32_e64 v2, s2, |v2| +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_sub_f32_e64 v4, s2, |v4| +; SI-NEXT: v_med3_f32 v2, v2, v3, v4 +; SI-NEXT: s_mov_b64 s[2:3], s[10:11] +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v6 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v7, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: flat_load_dword v3, v[4:5] +; VI-NEXT: s_mov_b32 s2, 0x80000000 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; VI-NEXT: v_sub_f32_e64 v4, s2, |v7| +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_sub_f32_e64 v2, s2, |v2| +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; VI-NEXT: v_med3_f32 v2, v4, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[4:5] +; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_mov_b32 s2, 0x80000000 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_sub_f32_e64 v1, s2, |v1| +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_sub_f32_e64 v2, s2, |v2| +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_sub_f32_e64 v3, s2, |v3| +; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load volatile float, float addrspace(1)* %gep0 + %b = load volatile float, float addrspace(1)* %gep1 + %c = load volatile float, float addrspace(1)* %gep2 + + %a.fabs = call float @llvm.fabs.f32(float %a) + %a.fabs.fneg = fsub float -0.0, %a.fabs + %b.fabs = call float @llvm.fabs.f32(float %b) + %b.fabs.fneg = fsub float -0.0, %b.fabs + %c.fabs = call float @llvm.fabs.f32(float %c) + %c.fabs.fneg = fsub float -0.0, %c.fabs + + %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) + %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) + %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) + %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + + store float %med3, float addrspace(1)* %outgep + ret void +} + +define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 { +; SI-LABEL: v_nnan_inputs_med3_f32_pat0: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s10, 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[4:5] +; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_add_f32_e32 v2, 1.0, v2 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_add_f32_e32 v3, 2.0, v3 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_add_f32_e32 v4, 4.0, v4 +; SI-NEXT: v_med3_f32 v2, v2, v3, v4 +; SI-NEXT: s_mov_b64 s[2:3], s[10:11] +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_nnan_inputs_med3_f32_pat0: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v6 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v7, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: flat_load_dword v3, v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; VI-NEXT: v_add_f32_e32 v4, 1.0, v7 +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_add_f32_e32 v2, 2.0, v2 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_add_f32_e32 v3, 4.0, v3 +; VI-NEXT: v_med3_f32 v2, v4, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_nnan_inputs_med3_f32_pat0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[4:5] +; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 +; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load volatile float, float addrspace(1)* %gep0 + %b = load volatile float, float addrspace(1)* %gep1 + %c = load volatile float, float addrspace(1)* %gep2 + + %a.nnan = fadd nnan float %a, 1.0 + %b.nnan = fadd nnan float %b, 2.0 + %c.nnan = fadd nnan float %c, 4.0 + + %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan) + %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan) + %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan) + %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + store float %med3, float addrspace(1)* %outgep + ret void +} + + +; --------------------------------------------------------------------- +; Negative patterns +; --------------------------------------------------------------------- + +define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 { +; SI-LABEL: v_test_safe_med3_f32_pat0_multi_use0: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s10, 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b64 s[8:9], s[4:5] +; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_min_f32_e32 v5, v2, v3 +; SI-NEXT: v_max_f32_e32 v2, v2, v3 +; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_min_f32_e32 v2, v2, v4 +; SI-NEXT: v_max_f32_e32 v2, v5, v2 +; SI-NEXT: s_mov_b64 s[2:3], s[10:11] +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_test_safe_med3_f32_pat0_multi_use0: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v6 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v7, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: flat_load_dword v3, v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_min_f32_e32 v4, v7, v2 +; VI-NEXT: v_max_f32_e32 v2, v7, v2 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_min_f32_e32 v2, v2, v3 +; VI-NEXT: v_max_f32_e32 v2, v4, v2 +; VI-NEXT: flat_store_dword v[0:1], v4 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[4:5] +; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX9-NEXT: global_store_dword v[0:1], v4, off +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX9-NEXT: v_max_f32_e32 v1, v4, v1 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load volatile float, float addrspace(1)* %gep0 + %b = load volatile float, float addrspace(1)* %gep1 + %c = load volatile float, float addrspace(1)* %gep2 + %tmp0 = call float @llvm.minnum.f32(float %a, float %b) + store volatile float %tmp0, float addrspace(1)* undef + %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + store float %med3, float addrspace(1)* %outgep + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare float @llvm.fabs.f32(float) #0 +declare float @llvm.minnum.f32(float, float) #0 +declare float @llvm.maxnum.f32(float, float) #0 +declare double @llvm.minnum.f64(double, double) #0 +declare double @llvm.maxnum.f64(double, double) #0 +declare half @llvm.fabs.f16(half) #0 +declare half @llvm.minnum.f16(half, half) #0 +declare half @llvm.maxnum.f16(half, half) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } +attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } From 816663adb5f1362597c9b2947586e0847c5cdf9b Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 10 Sep 2020 08:40:17 +0100 Subject: [PATCH 0522/1079] [SVE] In LoopIdiomRecognize::isLegalStore bail out for scalable vectors The function LoopIdiomRecognize::isLegalStore looks for stores in loops that could be transformed into memset or memcpy. However, the algorithm currently requires that we know how big the store is at runtime, i.e. that the store size will not overflow an unsigned integer. For scalable vectors we cannot guarantee this so I have changed the code to bail out for now. In addition, even if we add a way to query the maximum value of vscale in future we will still need to update the algorithm to cope with non-constant strides. The additional cost associated with calculating the memset and memcpy arguments will need to be taken into account as well. This patch also fixes up an implicit TypeSize -> uint64_t cast, thereby removing a warning. I've added tests here showing a fixed width vector loop being transformed into memcpy, and a scalable vector loop remaining unchanged: Transforms/LoopIdiom/memcpy-vectors.ll Differential Revision: https://reviews.llvm.org/D87439 --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 7 ++- .../Transforms/LoopIdiom/memcpy-vectors.ll | 53 +++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 011d6f487742d..147ccc939ac9f 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -468,8 +468,11 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { Value *StorePtr = SI->getPointerOperand(); // Reject stores that are so large that they overflow an unsigned. - uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); - if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) + // When storing out scalable vectors we bail out for now, since the code + // below currently only works for constant strides. + TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); + if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) || + (SizeInBits.getFixedSize() >> 32) != 0) return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll b/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll new file mode 100644 index 0000000000000..b4445c70cb57f --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll @@ -0,0 +1,53 @@ +; RUN: opt -loop-idiom -S <%s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define void @memcpy_fixed_vec(i64* noalias %a, i64* noalias %b) local_unnamed_addr #1 { +; CHECK-LABEL: @memcpy_fixed_vec( +; CHECK: entry: +; CHECK: memcpy +; CHECK: vector.body +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <2 x i64>* + %wide.load = load <2 x i64>, <2 x i64>* %1, align 8 + %2 = getelementptr inbounds i64, i64* %b, i64 %index + %3 = bitcast i64* %2 to <2 x i64>* + store <2 x i64> %wide.load, <2 x i64>* %3, align 8 + %index.next = add nuw nsw i64 %index, 2 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @memcpy_scalable_vec(i64* noalias %a, i64* noalias %b) local_unnamed_addr #1 { +; CHECK-LABEL: @memcpy_scalable_vec( +; CHECK: entry: +; CHECK-NOT: memcpy +; CHECK: vector.body +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = bitcast i64* %a to * + %1 = getelementptr inbounds , * %0, i64 %index + %wide.load = load , * %1, align 16 + %2 = bitcast i64* %b to * + %3 = getelementptr inbounds , * %2, i64 %index + store %wide.load, * %3, align 16 + %index.next = add nuw nsw i64 %index, 1 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} From 676febc044ecbb27f8a227d351ced282cfe908cf Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 9 Sep 2020 14:39:51 +0100 Subject: [PATCH 0523/1079] [ARM][MVE] Tail-predication: check get.active.lane.mask's TC value This adds additional checks for the original scalar loop tripcount value, i.e. get.active.lane.mask second argument, and perform several sanity checks to see if it is of the form that we expect similarly like we already do for the IV which is the first argument of get.active.lane. Differential Revision: https://reviews.llvm.org/D86074 --- llvm/lib/Target/ARM/MVETailPredication.cpp | 81 +++++++- .../LowOverheadLoops/basic-tail-pred.ll | 189 ++++++++++++++++++ .../LowOverheadLoops/tail-pred-const.ll | 62 +----- 3 files changed, 263 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index ef83e36381104..26e21f04c6b9a 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -119,10 +119,10 @@ class MVETailPredication : public LoopPass { /// load/stores. bool IsPredicatedVectorLoop(); - /// Perform checks on the arguments of @llvm.get.active.lane.mask - /// intrinsic: check if the first is a loop induction variable, and for the - /// the second check that no overflow can occur in the expression that use - /// this backedge-taken count. + /// Perform several checks on the arguments of @llvm.get.active.lane.mask + /// intrinsic. E.g., check that the loop induction variable and the element + /// count are of the form we expect, and also perform overflow checks for + /// the new expressions that are created. bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount, FixedVectorType *VecTy); @@ -373,10 +373,73 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, EnableTailPredication == TailPredication::ForceEnabledNoReductions || EnableTailPredication == TailPredication::ForceEnabled; - // 1) TODO: Check that the TripCount (TC) belongs to this loop (originally). + // 1) Check that the original scalar loop TripCount (TC) belongs to this loop. // The scalar tripcount corresponds the number of elements processed by the // loop, so we will refer to that from this point on. - auto *ElemCountVal = ActiveLaneMask->getOperand(1); + Value *ElemCount = ActiveLaneMask->getOperand(1); + auto *EC= SE->getSCEV(ElemCount); + auto *TC = SE->getSCEV(TripCount); + int VectorWidth = VecTy->getNumElements(); + ConstantInt *ConstElemCount = nullptr; + + if (!SE->isLoopInvariant(EC, L)) { + LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n"); + return false; + } + + if ((ConstElemCount = dyn_cast(ElemCount))) { + ConstantInt *TC = dyn_cast(TripCount); + if (!TC) { + LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in " + "set.loop.iterations\n"); + return false; + } + + // Calculate 2 tripcount values and check that they are consistent with + // each other: + // i) The number of loop iterations extracted from the set.loop.iterations + // intrinsic, multipled by the vector width: + uint64_t TC1 = TC->getZExtValue() * VectorWidth; + + // ii) TC1 has to be equal to TC + 1, with the + 1 to compensate for start + // counting from 0. + uint64_t TC2 = ConstElemCount->getZExtValue() + 1; + + if (TC1 != TC2) { + LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: " + << TC1 << " from set.loop.iterations, and " + << TC2 << " from get.active.lane.mask\n"); + return false; + } + } else { + // Smoke tests if the element count is a runtime value. I.e., this isn't + // fully generic because that would require a full SCEV visitor here. It + // would require extracting the variable from the elementcount SCEV + // expression, and match this up with the tripcount SCEV expression. If + // this matches up, we know both expressions are bound by the same + // variable, and thus we know this tripcount belongs to this loop. The + // checks below will catch most cases though. + if (isa(EC) || isa(EC)) { + // If the element count is a simple AddExpr or SCEVUnknown, which is e.g. + // the case when the element count is just a variable %N, we can just see + // if it is an operand in the tripcount scev expression. + if (isa(TC) && !SE->hasOperand(TC, EC)) { + LLVM_DEBUG(dbgs() << "ARM TP: 1Can't verify the element counter\n"); + return false; + } + } else if (const SCEVAddRecExpr *AddRecExpr = dyn_cast(EC)) { + // For more complicated AddRecExpr, check that the corresponding loop and + // its loop hierarhy contains the trip count loop. + if (!AddRecExpr->getLoop()->contains(L)) { + LLVM_DEBUG(dbgs() << "ARM TP: 2Can't verify the element counter\n"); + return false; + } + } else { + LLVM_DEBUG(dbgs() << "ARM TP: Unsupported SCEV type, can't verify the " + "element counter\n"); + return false; + } + } // 2) Prove that the sub expression is non-negative, i.e. it doesn't overflow: // @@ -393,9 +456,7 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, // // upperbound(TC) <= UINT_MAX - VectorWidth // - auto *TC = SE->getSCEV(TripCount); unsigned SizeInBits = TripCount->getType()->getScalarSizeInBits(); - int VectorWidth = VecTy->getNumElements(); auto Diff = APInt(SizeInBits, ~0) - APInt(SizeInBits, VectorWidth); uint64_t MaxMinusVW = Diff.getZExtValue(); // FIXME: since ranges can be negative we work with signed ranges here, but @@ -432,9 +493,9 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, // 1. Thus, if the ranges of Ceil and TC are not a single constant but a set, // we first add 0 to TC such that we can do the <= comparison on both sets. // - auto *ElementCount = SE->getSCEV(ElemCountVal); + // Tmp = ElementCount + (VW-1) - auto *ECPlusVWMinus1 = SE->getAddExpr(ElementCount, + auto *ECPlusVWMinus1 = SE->getAddExpr(EC, SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1))); // Ceil = ElementCount + (VW-1) / VW auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll index fb974048b1ef4..fffa430b7274d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll @@ -431,6 +431,195 @@ for.cond.cleanup: ret void } +; CHECK-LABEL: const_expected_in_set_loop +; CHECK: call <4 x i1> @llvm.get.active.lane.mask +; CHECK-NOT: vctp +; CHECK: ret void +; +define dso_local void @const_expected_in_set_loop(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +entry: + %cmp8 = icmp sgt i32 %N, 0 + %0 = add i32 %N, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp8, label %vector.ph, label %for.cond.cleanup + +vector.ph: + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ] + %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* + %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 42) + + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %7, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) + %9 = icmp ne i32 %8, 0 + br i1 %9, label %vector.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +; CHECK-LABEL: wrong_tripcount_arg +; CHECK: vector.body: +; CHECK: call <4 x i1> @llvm.arm.mve.vctp32 +; CHECK-NOT: call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32 +; CHECK: vector.body35: +; CHECK: call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32 +; CHECK-NOT: call <4 x i1> @llvm.arm.mve.vctp32 +; CHECK: ret void +; +define dso_local void @wrong_tripcount_arg(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture %D, i32 %N1, i32 %N2) local_unnamed_addr #0 { +entry: + %cmp29 = icmp sgt i32 %N1, 0 + %0 = add i32 %N1, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp29, label %vector.ph, label %for.cond4.preheader + +vector.ph: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %D, %vector.ph ] + %lsr.iv59 = phi i32* [ %scevgep60, %vector.body ], [ %C, %vector.ph ] + %lsr.iv56 = phi i32* [ %scevgep57, %vector.body ], [ %B, %vector.ph ] + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ] + %lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>* + %lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>* + %lsr.iv6264 = bitcast i32* %lsr.iv62 to <4 x i32>* + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N1) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5658, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %wide.masked.load32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %7 = add nsw <4 x i32> %wide.masked.load32, %wide.masked.load + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %7, <4 x i32>* %lsr.iv6264, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %scevgep57 = getelementptr i32, i32* %lsr.iv56, i32 4 + %scevgep60 = getelementptr i32, i32* %lsr.iv59, i32 4 + %scevgep63 = getelementptr i32, i32* %lsr.iv62, i32 4 + %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) + %9 = icmp ne i32 %8, 0 + br i1 %9, label %vector.body, label %for.cond4.preheader + +for.cond4.preheader: ; preds = %vector.body, %entry + %cmp527 = icmp sgt i32 %N2, 0 + %10 = add i32 %N2, 3 + %11 = lshr i32 %10, 2 + %12 = shl nuw i32 %11, 2 + %13 = add i32 %12, -4 + %14 = lshr i32 %13, 2 + %15 = add nuw nsw i32 %14, 1 + br i1 %cmp527, label %vector.ph36, label %for.cond.cleanup6 + +vector.ph36: ; preds = %for.cond4.preheader + call void @llvm.set.loop.iterations.i32(i32 %15) + br label %vector.body35 + +vector.body35: ; preds = %vector.body35, %vector.ph36 + %lsr.iv53 = phi i32* [ %scevgep54, %vector.body35 ], [ %A, %vector.ph36 ] + %lsr.iv50 = phi i32* [ %scevgep51, %vector.body35 ], [ %C, %vector.ph36 ] + %lsr.iv = phi i32* [ %scevgep, %vector.body35 ], [ %B, %vector.ph36 ] + %index40 = phi i32 [ 0, %vector.ph36 ], [ %index.next41, %vector.body35 ] + %16 = phi i32 [ %15, %vector.ph36 ], [ %18, %vector.body35 ] + %lsr.iv49 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv5052 = bitcast i32* %lsr.iv50 to <4 x i32>* + %lsr.iv5355 = bitcast i32* %lsr.iv53 to <4 x i32>* + +; This has N1 as the tripcount / element count, which is the tripcount of the +; first loop and not this one: + %active.lane.mask46 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index40, i32 %N1) + + %wide.masked.load47 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv49, i32 4, <4 x i1> %active.lane.mask46, <4 x i32> undef) + %wide.masked.load48 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5052, i32 4, <4 x i1> %active.lane.mask46, <4 x i32> undef) + %17 = add nsw <4 x i32> %wide.masked.load48, %wide.masked.load47 + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %17, <4 x i32>* %lsr.iv5355, i32 4, <4 x i1> %active.lane.mask46) + %index.next41 = add i32 %index40, 4 + %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep51 = getelementptr i32, i32* %lsr.iv50, i32 4 + %scevgep54 = getelementptr i32, i32* %lsr.iv53, i32 4 + %18 = call i32 @llvm.loop.decrement.reg.i32(i32 %16, i32 1) + %19 = icmp ne i32 %18, 0 + br i1 %19, label %vector.body35, label %for.cond.cleanup6 + +for.cond.cleanup6: ; preds = %vector.body35, %for.cond4.preheader + ret void +} + +; CHECK-LABEL: tripcount_arg_not_invariant +; CHECK: call <4 x i1> @llvm.get.active.lane.mask +; CHECK-NOT: vctp +; CHECK: ret void +; +define dso_local void @tripcount_arg_not_invariant(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +entry: + %cmp8 = icmp sgt i32 %N, 0 + %0 = add i32 %N, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp8, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %trip.count.minus.1 = add i32 %N, -1 + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ] + + %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* + %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %index) + + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %7, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) + %9 = icmp ne i32 %8, 0 + ;br i1 %9, label %vector.body, label %for.cond.cleanup + br i1 %9, label %vector.body, label %vector.ph + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll index 4cd0c54c666c8..8bf15aba9d975 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -265,13 +265,13 @@ for.cond.cleanup: ret void } -; CHECK-LABEL: @overflow_BTC_plus_1( +; CHECK-LABEL: @inconsistent_tripcounts( ; CHECK: vector.body: ; CHECK-NOT: @llvm.arm.mve.vctp32 ; CHECK: @llvm.get.active.lane.mask ; CHECK: ret void ; -define dso_local void @overflow_BTC_plus_1(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { +define dso_local void @inconsistent_tripcounts(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: call void @llvm.set.loop.iterations.i32(i32 8001) br label %vector.body @@ -316,63 +316,7 @@ for.cond.cleanup: ; define dso_local void @overflow_in_sub(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: - call void @llvm.set.loop.iterations.i32(i32 8001) - br label %vector.body - -vector.body: - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %A, %entry ] - %lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ] - %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] - %0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ] - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* - %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - -; Overflow in the substraction. This should hold: -; -; ceil(ElementCount / VectorWidth) >= TripCount -; -; But we have: -; -; ceil(3200 / 4) >= 8001 -; 8000 >= 8001 -; - %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 31999) - - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) - %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) - %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %2, <4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %1) - %index.next = add i32 %index, 4 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep12 = getelementptr i32, i32* %lsr.iv11, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %3 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) - %4 = icmp ne i32 %3, 0 - br i1 %4, label %vector.body, label %for.cond.cleanup - -for.cond.cleanup: - ret void -} - -; CHECK-LABEL: @overflow_in_rounding_tripcount( -; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void -; -define dso_local void @overflow_in_rounding_tripcount(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { -entry: - -; TC = 4294967292 -; 4294967292 <= 4294967291 (MAX - vectorwidth) -; False -; - call void @llvm.set.loop.iterations.i32(i32 4294967291) + call void @llvm.set.loop.iterations.i32(i32 1073741824) br label %vector.body vector.body: From 12232dc181cbe78fbd40a6ed1a89795a2c9a1154 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 14 Sep 2020 07:56:39 +0000 Subject: [PATCH 0524/1079] [SyntaxTree][List] Fix: `ParameterDeclarationList` is the `List` inside `ParametersAndQualifiers` Differential Revision: https://reviews.llvm.org/D87598 --- clang/lib/Tooling/Syntax/Tree.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index ca1e2880af9f2..2bff159696c1c 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -366,7 +366,7 @@ clang::tok::TokenKind syntax::List::getDelimiterTokenKind() { case NodeKind::NestedNameSpecifier: return clang::tok::coloncolon; case NodeKind::CallArguments: - case NodeKind::ParametersAndQualifiers: + case NodeKind::ParameterDeclarationList: return clang::tok::comma; default: llvm_unreachable("This is not a subclass of List, thus " @@ -379,7 +379,7 @@ syntax::List::TerminationKind syntax::List::getTerminationKind() { case NodeKind::NestedNameSpecifier: return TerminationKind::Terminated; case NodeKind::CallArguments: - case NodeKind::ParametersAndQualifiers: + case NodeKind::ParameterDeclarationList: return TerminationKind::Separated; default: llvm_unreachable("This is not a subclass of List, thus " @@ -393,7 +393,7 @@ bool syntax::List::canBeEmpty() { return false; case NodeKind::CallArguments: return true; - case NodeKind::ParametersAndQualifiers: + case NodeKind::ParameterDeclarationList: return true; default: llvm_unreachable("This is not a subclass of List, thus canBeEmpty() " From 0f4cc64fd747fbb33aeccfaccb8873762d2511f2 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 14 Sep 2020 07:58:30 +0000 Subject: [PATCH 0525/1079] [SyntaxTree] Provide `List::classof` Differential Revision: https://reviews.llvm.org/D87599 --- clang/include/clang/Tooling/Syntax/Tree.h | 1 + clang/lib/Tooling/Syntax/Tree.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index b49a09344c0fb..5a09d45649694 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -213,6 +213,7 @@ class List : public Tree { }; using Tree::Tree; + static bool classof(const Node *N); /// Returns the elements and corresponding delimiters. Missing elements /// and delimiters are represented as null pointers. /// diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 2bff159696c1c..1c705f6fd7cfd 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -273,6 +273,17 @@ syntax::Node *syntax::Tree::findChild(NodeRole R) { return nullptr; } +bool classof(const syntax::Node *N) { + switch (N->getKind()) { + case syntax::NodeKind::NestedNameSpecifier: + case syntax::NodeKind::CallArguments: + case syntax::NodeKind::ParameterDeclarationList: + return true; + default: + return false; + } +} + std::vector> syntax::List::getElementsAsNodesAndDelimiters() { if (!getFirstChild()) From ceb0128509c51100afbf804bda84d82b7ebe06b1 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 14 Sep 2020 08:20:19 +0000 Subject: [PATCH 0526/1079] [SyntaxTree][List] `assertInvariants` for `List`s Differential Revision: https://reviews.llvm.org/D87600 --- clang/include/clang/Tooling/Syntax/Tree.h | 6 +++--- clang/lib/Tooling/Syntax/Tree.cpp | 23 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index 5a09d45649694..a544fc1827b7d 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -237,16 +237,16 @@ class List : public Tree { /// /// Useful for discovering the correct delimiter to use when adding /// elements to empty or one-element lists. - clang::tok::TokenKind getDelimiterTokenKind(); + clang::tok::TokenKind getDelimiterTokenKind() const; - TerminationKind getTerminationKind(); + TerminationKind getTerminationKind() const; /// Whether this list can be empty in syntactically and semantically correct /// code. /// /// This list may be empty when the source code has errors even if /// canBeEmpty() returns false. - bool canBeEmpty(); + bool canBeEmpty() const; }; } // namespace syntax diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 1c705f6fd7cfd..1edd2583105aa 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -223,7 +223,7 @@ void syntax::Node::assertInvariants() const { else assert(getParent() != nullptr); - auto *T = dyn_cast(this); + const auto *T = dyn_cast(this); if (!T) return; for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) { @@ -232,6 +232,19 @@ void syntax::Node::assertInvariants() const { assert(!C->isDetached()); assert(C->getParent() == T); } + + const auto *L = dyn_cast(T); + if (!L) + return; + for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) { + assert(C->getRole() == NodeRole::ListElement || + C->getRole() == NodeRole::ListDelimiter); + if (C->getRole() == NodeRole::ListDelimiter) { + assert(isa(C)); + assert(cast(C)->getToken()->kind() == L->getDelimiterTokenKind()); + } + } + #endif } @@ -273,7 +286,7 @@ syntax::Node *syntax::Tree::findChild(NodeRole R) { return nullptr; } -bool classof(const syntax::Node *N) { +bool syntax::List::classof(const syntax::Node *N) { switch (N->getKind()) { case syntax::NodeKind::NestedNameSpecifier: case syntax::NodeKind::CallArguments: @@ -372,7 +385,7 @@ std::vector syntax::List::getElementsAsNodes() { return children; } -clang::tok::TokenKind syntax::List::getDelimiterTokenKind() { +clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const { switch (this->getKind()) { case NodeKind::NestedNameSpecifier: return clang::tok::coloncolon; @@ -385,7 +398,7 @@ clang::tok::TokenKind syntax::List::getDelimiterTokenKind() { } } -syntax::List::TerminationKind syntax::List::getTerminationKind() { +syntax::List::TerminationKind syntax::List::getTerminationKind() const { switch (this->getKind()) { case NodeKind::NestedNameSpecifier: return TerminationKind::Terminated; @@ -398,7 +411,7 @@ syntax::List::TerminationKind syntax::List::getTerminationKind() { } } -bool syntax::List::canBeEmpty() { +bool syntax::List::canBeEmpty() const { switch (this->getKind()) { case NodeKind::NestedNameSpecifier: return false; From dd519bf0b074cfee2879036ec9b55452e53c9d99 Mon Sep 17 00:00:00 2001 From: Meera Nakrani Date: Mon, 14 Sep 2020 10:57:41 +0000 Subject: [PATCH 0527/1079] [ARM] Selects SSAT/USAT from correct LLVM IR LLVM will canonicalize conditional selectors to a different pattern than the old code that was used. This is updating the function to match the new expected patterns and select SSAT or USAT when successful. Tests have also been updated to use the new patterns. Differential Review: https://reviews.llvm.org/D87379 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 115 +++++++------------ llvm/test/CodeGen/ARM/ssat.ll | 80 ++++++------- llvm/test/CodeGen/ARM/usat.ll | 80 ++++++------- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll | 12 +- 4 files changed, 125 insertions(+), 162 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 943dc467025dd..9c76a0da83eec 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4998,16 +4998,6 @@ static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); } -// Similar to isLowerSaturate(), but checks for upper-saturating conditions. -static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, - const SDValue TrueVal, const SDValue FalseVal, - const ISD::CondCode CC, const SDValue K) { - return (isGTorGE(CC) && - ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || - (isLTorLE(CC) && - ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); -} - // Check if two chained conditionals could be converted into SSAT or USAT. // // SSAT can replace a set of two conditional selectors that bound a number to an @@ -5019,6 +5009,10 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, // x < k ? (x < -k ? -k : x) : k // etc. // +// LLVM canonicalizes these to either a min(max()) or a max(min()) +// pattern. This function tries to match one of these and will return true +// if successful. +// // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is // a power of 2. // @@ -5026,9 +5020,9 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, // Additionally, the variable is returned in parameter V, the constant in K and // usat is set to true if the conditional represents an unsigned saturation static bool isSaturatingConditional(const SDValue &Op, SDValue &V, - uint64_t &K, bool &usat) { - SDValue LHS1 = Op.getOperand(0); - SDValue RHS1 = Op.getOperand(1); + uint64_t &K, bool &Usat) { + SDValue V1 = Op.getOperand(0); + SDValue K1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); SDValue FalseVal1 = Op.getOperand(3); ISD::CondCode CC1 = cast(Op.getOperand(4))->get(); @@ -5037,82 +5031,57 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, if (Op2.getOpcode() != ISD::SELECT_CC) return false; - SDValue LHS2 = Op2.getOperand(0); - SDValue RHS2 = Op2.getOperand(1); + SDValue V2 = Op2.getOperand(0); + SDValue K2 = Op2.getOperand(1); SDValue TrueVal2 = Op2.getOperand(2); SDValue FalseVal2 = Op2.getOperand(3); ISD::CondCode CC2 = cast(Op2.getOperand(4))->get(); - // Find out which are the constants and which are the variables - // in each conditional - SDValue *K1 = isa(LHS1) ? &LHS1 : isa(RHS1) - ? &RHS1 - : nullptr; - SDValue *K2 = isa(LHS2) ? &LHS2 : isa(RHS2) - ? &RHS2 - : nullptr; - SDValue K2Tmp = isa(TrueVal2) ? TrueVal2 : FalseVal2; - SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; - SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; - SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; - - // We must detect cases where the original operations worked with 16- or - // 8-bit values. In such case, V2Tmp != V2 because the comparison operations - // must work with sign-extended values but the select operations return - // the original non-extended value. - SDValue V2TmpReg = V2Tmp; - if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) - V2TmpReg = V2Tmp->getOperand(0); - - // Check that the registers and the constants have the correct values - // in both conditionals - if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || - V2TmpReg != V2) - return false; + SDValue V1Tmp = V1; + SDValue V2Tmp = V2; - // Figure out which conditional is saturating the lower/upper bound. - const SDValue *LowerCheckOp = - isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) - ? &Op - : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) - ? &Op2 - : nullptr; - const SDValue *UpperCheckOp = - isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) - ? &Op - : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) - ? &Op2 - : nullptr; - - if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) - return false; + if (V1.getOpcode() == ISD::SIGN_EXTEND_INREG && + V2.getOpcode() == ISD::SIGN_EXTEND_INREG) { + V1Tmp = V1.getOperand(0); + V2Tmp = V2.getOperand(0); + } + + // Check that the registers and the constants match a max(min()) or min(max()) + // pattern + if (V1Tmp == TrueVal1 && V2Tmp == TrueVal2 && K1 == FalseVal1 && + K2 == FalseVal2 && + ((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) { - // Check that the constant in the lower-bound check is - // the opposite of the constant in the upper-bound check - // in 1's complement. - int64_t Val1 = cast(*K1)->getSExtValue(); - int64_t Val2 = cast(*K2)->getSExtValue(); - int64_t PosVal = std::max(Val1, Val2); - int64_t NegVal = std::min(Val1, Val2); + // Check that the constant in the lower-bound check is + // the opposite of the constant in the upper-bound check + // in 1's complement. + if (!isa(K1) || !isa(K2)) + return false; + + int64_t Val1 = cast(K1)->getSExtValue(); + int64_t Val2 = cast(K2)->getSExtValue(); + int64_t PosVal = std::max(Val1, Val2); + int64_t NegVal = std::min(Val1, Val2); - if (((Val1 > Val2 && UpperCheckOp == &Op) || - (Val1 < Val2 && UpperCheckOp == &Op2)) && - isPowerOf2_64(PosVal + 1)) { + if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) && + !isPowerOf2_64(PosVal + 1)) + return false; - // Handle the difference between USAT (unsigned) and SSAT (signed) saturation + // Handle the difference between USAT (unsigned) and SSAT (signed) + // saturation if (Val1 == ~Val2) - usat = false; + Usat = false; else if (NegVal == 0) - usat = true; + Usat = true; else return false; - V = V2; - K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + V = V2Tmp; + // At this point, PosVal is guaranteed to be positive + K = (uint64_t) PosVal; return true; } - return false; } diff --git a/llvm/test/CodeGen/ARM/ssat.ll b/llvm/test/CodeGen/ARM/ssat.ll index f1e11dd33d1fb..a2027435ed291 100644 --- a/llvm/test/CodeGen/ARM/ssat.ll +++ b/llvm/test/CodeGen/ARM/ssat.ll @@ -20,10 +20,10 @@ define i32 @sat_base_32bit(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i32 %x, -8388608 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %saturateUp + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, -8388608 + %saturateLow = select i1 %1, i32 %saturateUp, i32 -8388608 ret i32 %saturateLow } @@ -34,10 +34,10 @@ define i16 @sat_base_16bit(i16 %x) #0 { ; V6T2: ssat r0, #12, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i16 %x, -2048 - %cmpUp = icmp sgt i16 %x, 2047 - %saturateUp = select i1 %cmpUp, i16 2047, i16 %x - %saturateLow = select i1 %cmpLow, i16 -2048, i16 %saturateUp + %0 = icmp slt i16 %x, 2047 + %saturateUp = select i1 %0, i16 %x, i16 2047 + %1 = icmp sgt i16 %saturateUp, -2048 + %saturateLow = select i1 %1, i16 %saturateUp, i16 -2048 ret i16 %saturateLow } @@ -48,10 +48,10 @@ define i8 @sat_base_8bit(i8 %x) #0 { ; V6T2: ssat r0, #6, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i8 %x, -32 - %cmpUp = icmp sgt i8 %x, 31 - %saturateUp = select i1 %cmpUp, i8 31, i8 %x - %saturateLow = select i1 %cmpLow, i8 -32, i8 %saturateUp + %0 = icmp slt i8 %x, 31 + %saturateUp = select i1 %0, i8 %x, i8 31 + %1 = icmp sgt i8 %saturateUp, -32 + %saturateLow = select i1 %1, i8 %saturateUp, i8 -32 ret i8 %saturateLow } @@ -67,10 +67,10 @@ define i32 @sat_lower_upper_1(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i32 %x, -8388608 %cmpUp = icmp slt i32 %x, 8388607 %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %saturateUp + %0 = icmp sgt i32 %saturateUp, -8388608 + %saturateLow = select i1 %0, i32 %saturateUp, i32 -8388608 ret i32 %saturateLow } @@ -80,10 +80,10 @@ define i32 @sat_lower_upper_2(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp sgt i32 %x, -8388608 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 -8388608 + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, -8388608 + %saturateLow = select i1 %1, i32 %saturateUp, i32 -8388608 ret i32 %saturateLow } @@ -93,10 +93,10 @@ define i32 @sat_upper_lower_1(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x - %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -106,10 +106,10 @@ define i32 @sat_upper_lower_2(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -119,10 +119,10 @@ define i32 @sat_upper_lower_3(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp slt i32 8388607, %x %cmpLow = icmp sgt i32 %x, -8388608 %saturateLow = select i1 %cmpLow, i32 %x, i32 -8388608 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %0, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -137,10 +137,10 @@ define i32 @sat_le_ge(i32 %x) #0 { ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp sle i32 8388607, %x - %cmpLow = icmp sge i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 %x, i32 -8388608 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -156,8 +156,8 @@ define i32 @no_sat_missing_lower(i32 %x) #0 { ; CHECK-NOT: ssat entry: %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp sgt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %0 = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -169,8 +169,8 @@ define i32 @no_sat_missing_upper(i32 %x) #0 { ; CHECK-NOT: ssat entry: %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -192,10 +192,10 @@ define i32 @no_sat_incorrect_interval(i32 %x) #0 { ; CHECK-LABEL: no_sat_incorrect_interval: ; CHECK-NOT: ssat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -19088744 - %saturateLow = select i1 %cmpLow, i32 -19088744, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -19088744 + %saturateLow = select i1 %0, i32 %x, i32 -19088744 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll index 8f19d11ef7bb7..99064386fa504 100644 --- a/llvm/test/CodeGen/ARM/usat.ll +++ b/llvm/test/CodeGen/ARM/usat.ll @@ -22,10 +22,10 @@ define i32 @unsigned_sat_base_32bit(i32 %x) #0 { ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i32 %x, 0 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, 0 + %saturateLow = select i1 %1, i32 %saturateUp, i32 0 ret i32 %saturateLow } @@ -37,10 +37,10 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 { ; V6T2: usat r0, #11, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i16 %x, 0 - %cmpUp = icmp sgt i16 %x, 2047 - %saturateUp = select i1 %cmpUp, i16 2047, i16 %x - %saturateLow = select i1 %cmpLow, i16 0, i16 %saturateUp + %0 = icmp slt i16 %x, 2047 + %saturateUp = select i1 %0, i16 %x, i16 2047 + %1 = icmp sgt i16 %saturateUp, 0 + %saturateLow = select i1 %1, i16 %saturateUp, i16 0 ret i16 %saturateLow } @@ -52,10 +52,10 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 { ; V6T2: usat r0, #5, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i8 %x, 0 - %cmpUp = icmp sgt i8 %x, 31 - %saturateUp = select i1 %cmpUp, i8 31, i8 %x - %saturateLow = select i1 %cmpLow, i8 0, i8 %saturateUp + %0 = icmp slt i8 %x, 31 + %saturateUp = select i1 %0, i8 %x, i8 31 + %1 = icmp sgt i8 %saturateUp, 0 + %saturateLow = select i1 %1, i8 %saturateUp, i8 0 ret i8 %saturateLow } @@ -71,10 +71,10 @@ define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 { ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i32 %x, 0 %cmpUp = icmp slt i32 %x, 8388607 %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 - %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + %0 = icmp sgt i32 %saturateUp, 0 + %saturateLow = select i1 %0, i32 %saturateUp, i32 0 ret i32 %saturateLow } @@ -85,10 +85,10 @@ define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 { ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp sgt i32 %x, 0 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 0 + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, 0 + %saturateLow = select i1 %1, i32 %saturateUp, i32 0 ret i32 %saturateLow } @@ -99,10 +99,10 @@ define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 { ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x - %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -113,10 +113,10 @@ define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 { ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -127,10 +127,10 @@ define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 { ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpUp = icmp slt i32 8388607, %x %cmpLow = icmp sgt i32 %x, 0 %saturateLow = select i1 %cmpLow, i32 %x, i32 0 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %0, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -145,8 +145,8 @@ define i32 @no_unsigned_sat_missing_lower(i32 %x) #0 { ; CHECK-NOT: usat entry: %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp sgt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %0 = icmp slt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -158,8 +158,8 @@ define i32 @no_unsigned_sat_missing_upper(i32 %x) #0 { ; CHECK-NOT: usat entry: %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -169,10 +169,10 @@ define i32 @no_unsigned_sat_incorrect_constant(i32 %x) #0 { ; CHECK-LABEL: no_unsigned_sat_incorrect_constant: ; CHECK-NOT: usat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 -1, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %cmpLow.inv = icmp sgt i32 %x, -1 + %saturateLow = select i1 %cmpLow.inv, i32 %x, i32 -1 + %0 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %0, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -181,10 +181,10 @@ define i32 @no_unsigned_sat_incorrect_interval(i32 %x) #0 { ; CHECK-LABEL: no_unsigned_sat_incorrect_interval: ; CHECK-NOT: usat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -4 - %saturateLow = select i1 %cmpLow, i32 -4, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -4 + %saturateLow = select i1 %0, i32 %x, i32 -4 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 2ea70f1b06de2..36e620d50758e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -2240,15 +2240,9 @@ define arm_aapcs_vfpcc void @ssatmul_4_q7(i8* nocapture readonly %pSrcA, i8* noc ; CHECK-NEXT: ldrsb r0, [r12], #1 ; CHECK-NEXT: ldrsb r1, [r6], #1 ; CHECK-NEXT: muls r0, r1, r0 -; CHECK-NEXT: asrs r1, r0, #7 -; CHECK-NEXT: cmn.w r1, #128 -; CHECK-NEXT: mvn r1, #127 -; CHECK-NEXT: it gt -; CHECK-NEXT: asrgt r1, r0, #7 -; CHECK-NEXT: cmp r1, #127 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r1, #127 -; CHECK-NEXT: strb r1, [r4], #1 +; CHECK-NEXT: asrs r0, r0, #7 +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: strb r0, [r4], #1 ; CHECK-NEXT: le lr, .LBB13_7 ; CHECK-NEXT: .LBB13_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} From eef30334d1daaddf8b4e465be7c0f4aa4f98e208 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 14 Sep 2020 11:56:13 +0100 Subject: [PATCH 0528/1079] [DSE] Precommit test case for invalid elimination of store in loop. --- .../MSSA/multiblock-loops.ll | 58 ++++++++++++++++--- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll index c898cf9bee8ac..75f17d964b136 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -9,7 +9,7 @@ define void @test13(i32* noalias %P) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret void @@ -29,7 +29,7 @@ define void @test14(i32* noalias %P) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret void @@ -48,12 +48,12 @@ define void @test18(i32* noalias %P) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P]], align 4 ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: -; CHECK-NEXT: store i8 1, i8* [[P2]] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] -; CHECK-NEXT: store i8 2, i8* [[P2]] +; CHECK-NEXT: store i8 1, i8* [[P2]], align 1 +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: store i8 2, i8* [[P2]], align 1 ; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret void @@ -183,7 +183,7 @@ define void @loop_multiple_def_uses(i32* noalias %P) { ; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: store i32 1, i32* [[P]], align 4 -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[FOR_HEADER]] ; CHECK: end: ; CHECK-NEXT: store i32 3, i32* [[P]], align 4 @@ -220,7 +220,7 @@ define void @loop_multiple_def_uses_partial_write(i32* noalias %p) { ; CHECK: for.body: ; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[P]] to i8* ; CHECK-NEXT: store i8 1, i8* [[C]], align 4 -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[FOR_HEADER]] ; CHECK: end: ; CHECK-NEXT: store i32 3, i32* [[P]], align 4 @@ -257,7 +257,7 @@ define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) { ; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: store i32 1, i32* [[Q:%.*]], align 4 -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[FOR_HEADER]] ; CHECK: end: ; CHECK-NEXT: store i32 3, i32* [[P]], align 4 @@ -314,3 +314,43 @@ bb1: ; preds = %bb1, %bb } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +@x = global [10 x i16] zeroinitializer, align 1 + +; Make sure we do not eliminate the store in %do.body, because it writes to +; multiple locations in the loop and the store in %if.end10 only stores to +; the last one. +define i16 @test_loop_carried_dep() { +; CHECK-LABEL: @test_loop_carried_dep( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[IF_END10:%.*]], label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br label [[DO_BODY]] +; CHECK: if.end10: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: ; preds = %if.end, %entry + %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + br i1 %exitcond, label %if.end10, label %if.end + +if.end: ; preds = %do.body + %inc = add nuw nsw i16 %i.0, 1 + br label %do.body + +if.end10: ; preds = %do.body + store i16 1, i16* %arrayidx2, align 1 + ret i16 0 +} From f715d81c9df3fb3e047a54899fc749f57c84aeb5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 14 Sep 2020 11:49:27 +0100 Subject: [PATCH 0529/1079] [DSE] Only eliminate candidates that always store the same loc. AliasAnalysis/MemoryLocation does not account for loops. Two MemoryLocation can be must-overwrite, even if the first one writes multiple locations in a loop. This patch prevents removing such stores, by only considering candidates that are known to be loop invariant, or executed in the same BB. Currently the invariant check is quite conservative and only considers Alloca and Alloca-like instructions and arguments as invariant base pointers. It also considers GEPs with all constant indices and invariant bases as invariant. This can be improved in the future, but the current implementation has only minor impact on the total number of stores eliminated (25903 vs 26047 for the baseline). There are some 2-10% swings for some individual benchmarks. In roughly half of the cases, the number of stores removed increases actually, because we skip candidates that are unlikely to be valid candidates early. --- .../Scalar/DeadStoreElimination.cpp | 37 +++++++++++++++++++ .../MSSA/multiblock-loops.ll | 2 + 2 files changed, 39 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 16f4ea2f900c1..6615f6b1c32e9 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1861,6 +1861,32 @@ struct DSEState { return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); } + /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible + /// loop. In particular, this guarantees that it only references a single + /// MemoryLocation during execution of the containing function. + bool IsGuaranteedLoopInvariant(Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) { + Ptr = Ptr->stripPointerCasts(); + if (auto *I = dyn_cast(Ptr)) { + if (isa(Ptr)) + return true; + + if (isAllocLikeFn(I, &TLI)) + return true; + + return false; + } + return true; + }; + + Ptr = Ptr->stripPointerCasts(); + if (auto *GEP = dyn_cast(Ptr)) { + return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && + GEP->hasAllConstantIndices(); + } + return IsGuaranteedLoopInvariantBase(Ptr); + } + // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with // no read access between them or on any other path to a function exit block // if \p DefLoc is not accessible after the function returns. If there is no @@ -1992,6 +2018,17 @@ struct DSEState { } continue; } else { + // AliasAnalysis does not account for loops. Limit elimination to + // candidates for which we can guarantee they always store to the same + // memory location and not multiple locations in a loop. + if (Current->getBlock() != KillingDef->getBlock() && + !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + WalkerStepLimit -= 1; + continue; + } + int64_t InstWriteOffset, DepWriteOffset; auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll index 75f17d964b136..dc6004bf71d78 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -111,6 +111,7 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: @@ -327,6 +328,7 @@ define i16 @test_loop_carried_dep() { ; CHECK: do.body: ; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[IF_END10:%.*]], label [[IF_END]] ; CHECK: if.end: From 06fb4e90649f264a129d3ad2a08fd3492ee78651 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 14 Sep 2020 12:08:34 +0100 Subject: [PATCH 0530/1079] [CGP] Limit converting phi types to simple loads and stores Instcombine limits converting phi types to simple loads and stores. This does the same in codegenprepare, not processing phis that are not simple. Note that volatile loads/store ISel will happily convert between float and int. Atomics are more likely to always be integer. This just keeps things simple and doesn't process either. Differential Revision: https://reviews.llvm.org/D83770 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 4 +- llvm/test/CodeGen/AArch64/convertphitype.ll | 200 ++++++++++++++++++++ 2 files changed, 203 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index bb0bad74fb698..45feeae39659b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5831,6 +5831,8 @@ bool CodeGenPrepare::optimizePhiType( Worklist.push_back(OpPhi); } } else if (auto *OpLoad = dyn_cast(V)) { + if (!OpLoad->isSimple()) + return false; if (!Defs.count(OpLoad)) { Defs.insert(OpLoad); Worklist.push_back(OpLoad); @@ -5868,7 +5870,7 @@ bool CodeGenPrepare::optimizePhiType( Worklist.push_back(OpPhi); } } else if (auto *OpStore = dyn_cast(V)) { - if (OpStore->getOperand(0) != II) + if (!OpStore->isSimple() || OpStore->getOperand(0) != II) return false; Uses.insert(OpStore); } else if (auto *OpBC = dyn_cast(V)) { diff --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll index 2e3530de378b3..bc858aa11eb78 100644 --- a/llvm/test/CodeGen/AArch64/convertphitype.ll +++ b/llvm/test/CodeGen/AArch64/convertphitype.ll @@ -677,3 +677,203 @@ end: %b = bitcast i32 %phi to float ret float %b } + +define float @convphi_volatile(i32 *%s, i32 *%d, i32 %n) { +; CHECK-LABEL: @convphi_volatile( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load volatile i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] +; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float +; CHECK-NEXT: ret float [[B]] +; +; DEBUG-LABEL: @convphi_volatile( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0, !dbg !358 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP15]], metadata !353, metadata !DIExpression()), !dbg !358 +; DEBUG-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]], !dbg !359 +; DEBUG: then: +; DEBUG-NEXT: [[LS:%.*]] = load volatile i32, i32* [[S:%.*]], align 4, !dbg !360 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[LS]], metadata !354, metadata !DIExpression()), !dbg !360 +; DEBUG-NEXT: br label [[END:%.*]], !dbg !361 +; DEBUG: else: +; DEBUG-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4, !dbg !362 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[LD]], metadata !355, metadata !DIExpression()), !dbg !362 +; DEBUG-NEXT: br label [[END]], !dbg !363 +; DEBUG: end: +; DEBUG-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ], !dbg !364 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[PHI]], metadata !356, metadata !DIExpression()), !dbg !364 +; DEBUG-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float, !dbg !365 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[B]], metadata !357, metadata !DIExpression()), !dbg !365 +; DEBUG-NEXT: ret float [[B]], !dbg !366 +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load volatile i32, i32* %s, align 4 + br label %end + +else: + %ld = load i32, i32* %d, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %else ] + %b = bitcast i32 %phi to float + ret float %b +} + +define void @convphi_volatile2(i32 *%s, i32 *%d, i32 %n, float %f) { +; CHECK-LABEL: @convphi_volatile2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[FB:%.*]] = bitcast float [[F:%.*]] to i32 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store volatile i32 [[PHI]], i32* [[D:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEBUG-LABEL: @convphi_volatile2( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0, !dbg !373 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP15]], metadata !369, metadata !DIExpression()), !dbg !373 +; DEBUG-NEXT: [[FB:%.*]] = bitcast float [[F:%.*]] to i32, !dbg !374 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[FB]], metadata !370, metadata !DIExpression()), !dbg !374 +; DEBUG-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]], !dbg !375 +; DEBUG: then: +; DEBUG-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4, !dbg !376 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[LS]], metadata !371, metadata !DIExpression()), !dbg !376 +; DEBUG-NEXT: br label [[END]], !dbg !377 +; DEBUG: end: +; DEBUG-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ], !dbg !378 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[PHI]], metadata !372, metadata !DIExpression()), !dbg !378 +; DEBUG-NEXT: store volatile i32 [[PHI]], i32* [[D:%.*]], align 4, !dbg !379 +; DEBUG-NEXT: ret void, !dbg !380 +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + %fb = bitcast float %f to i32 + br i1 %cmp15, label %then, label %end + +then: + %ls = load i32, i32* %s, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %fb, %entry ] + store volatile i32 %phi, i32 *%d + ret void +} + +define float @convphi_atomic(i32 *%s, i32 *%d, i32 %n) { +; CHECK-LABEL: @convphi_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load atomic i32, i32* [[S:%.*]] acquire, align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ] +; CHECK-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float +; CHECK-NEXT: ret float [[B]] +; +; DEBUG-LABEL: @convphi_atomic( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0, !dbg !388 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP15]], metadata !383, metadata !DIExpression()), !dbg !388 +; DEBUG-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]], !dbg !389 +; DEBUG: then: +; DEBUG-NEXT: [[LS:%.*]] = load atomic i32, i32* [[S:%.*]] acquire, align 4, !dbg !390 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[LS]], metadata !384, metadata !DIExpression()), !dbg !390 +; DEBUG-NEXT: br label [[END:%.*]], !dbg !391 +; DEBUG: else: +; DEBUG-NEXT: [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4, !dbg !392 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[LD]], metadata !385, metadata !DIExpression()), !dbg !392 +; DEBUG-NEXT: br label [[END]], !dbg !393 +; DEBUG: end: +; DEBUG-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ], !dbg !394 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[PHI]], metadata !386, metadata !DIExpression()), !dbg !394 +; DEBUG-NEXT: [[B:%.*]] = bitcast i32 [[PHI]] to float, !dbg !395 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[B]], metadata !387, metadata !DIExpression()), !dbg !395 +; DEBUG-NEXT: ret float [[B]], !dbg !396 +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + br i1 %cmp15, label %then, label %else + +then: + %ls = load atomic i32, i32* %s acquire, align 4 + br label %end + +else: + %ld = load i32, i32* %d, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %ld, %else ] + %b = bitcast i32 %phi to float + ret float %b +} + +define void @convphi_atomic2(i32 *%s, i32 *%d, i32 %n, float %f) { +; CHECK-LABEL: @convphi_atomic2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[FB:%.*]] = bitcast float [[F:%.*]] to i32 +; CHECK-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store atomic i32 [[PHI]], i32* [[D:%.*]] release, align 4 +; CHECK-NEXT: ret void +; +; DEBUG-LABEL: @convphi_atomic2( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0, !dbg !403 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP15]], metadata !399, metadata !DIExpression()), !dbg !403 +; DEBUG-NEXT: [[FB:%.*]] = bitcast float [[F:%.*]] to i32, !dbg !404 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[FB]], metadata !400, metadata !DIExpression()), !dbg !404 +; DEBUG-NEXT: br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]], !dbg !405 +; DEBUG: then: +; DEBUG-NEXT: [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4, !dbg !406 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[LS]], metadata !401, metadata !DIExpression()), !dbg !406 +; DEBUG-NEXT: br label [[END]], !dbg !407 +; DEBUG: end: +; DEBUG-NEXT: [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ], !dbg !408 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[PHI]], metadata !402, metadata !DIExpression()), !dbg !408 +; DEBUG-NEXT: store atomic i32 [[PHI]], i32* [[D:%.*]] release, align 4, !dbg !409 +; DEBUG-NEXT: ret void, !dbg !410 +; +entry: + %cmp15 = icmp sgt i32 %n, 0 + %fb = bitcast float %f to i32 + br i1 %cmp15, label %then, label %end + +then: + %ls = load i32, i32* %s, align 4 + br label %end + +end: + %phi = phi i32 [ %ls, %then ], [ %fb, %entry ] + store atomic i32 %phi, i32 *%d release, align 4 + ret void +} From 5cac85c931d95f3c94f79837a3bf406eb68edaeb Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Thu, 3 Sep 2020 10:05:25 +0200 Subject: [PATCH 0531/1079] [mlir] Check for type conversion success in std->llvm function conversion Type converter may fail and return nullptr on unconvertible types. The function conversion did not include a check and was attempting to use a nullptr type to construct an LLVM function, leading to a crash. Add a check and return early. The rest of the call stack propagates errors properly. Fixes PR47403. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D87075 --- mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 2 ++ mlir/test/Conversion/StandardToLLVM/invalid.mlir | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 2aa589a0fb7b2..62b787153d84b 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1112,6 +1112,8 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { TypeConverter::SignatureConversion result(funcOp.getNumArguments()); auto llvmType = typeConverter.convertFunctionSignature( funcOp.getType(), varargsAttr && varargsAttr.getValue(), result); + if (!llvmType) + return nullptr; // Propagate argument attributes to all converted arguments obtained after // converting a given original argument. diff --git a/mlir/test/Conversion/StandardToLLVM/invalid.mlir b/mlir/test/Conversion/StandardToLLVM/invalid.mlir index 469bb9753ec49..5f79cef68ba8e 100644 --- a/mlir/test/Conversion/StandardToLLVM/invalid.mlir +++ b/mlir/test/Conversion/StandardToLLVM/invalid.mlir @@ -29,3 +29,8 @@ func @mlir_cast_to_llvm_vec(%0 : vector<1x1xf32>) -> !llvm.vec<1 x float> { %1 = llvm.mlir.cast %0 : vector<1x1xf32> to !llvm.vec<1 x float> return %1 : !llvm.vec<1 x float> } + +// ----- + +// Should not crash on unsupported types in function signatures. +func @unsupported_signature() -> tensor<10 x i32> From 0b2e0e80d963f3498705a38e8d02eafe541ca9d6 Mon Sep 17 00:00:00 2001 From: "Dvorskiy, Mikhail" Date: Mon, 14 Sep 2020 14:20:32 +0300 Subject: [PATCH 0532/1079] [pstl] Support Threading Building Blocks 2020 (oneTBB) for "tbb" parallel backend. After the changes the "tbb" parallel backend will work with old TBB versions(TBB_INTERFACE_VERSION <= 12000) and new ones (TBB 2020 and greater) More about oneTBB: https://github.com/oneapi-src/oneTBB Phabricator Review: https://reviews.llvm.org/D87380 --- .../pstl/internal/parallel_backend_tbb.h | 448 +++++++++++++++--- 1 file changed, 369 insertions(+), 79 deletions(-) diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h index a9ea0c7456fb4..f1836aace0ae5 100644 --- a/pstl/include/pstl/internal/parallel_backend_tbb.h +++ b/pstl/include/pstl/internal/parallel_backend_tbb.h @@ -25,6 +25,7 @@ #include #include #include +#include #if TBB_INTERFACE_VERSION < 10000 # error Intel(R) Threading Building Blocks 2018 is required; older versions are not supported. @@ -71,7 +72,11 @@ class __buffer inline void __cancel_execution() { +#if TBB_INTERFACE_VERSION <= 12000 tbb::task::self().group()->cancel_group_execution(); +#else + tbb::task::current_context()->cancel_group_execution(); +#endif } //------------------------------------------------------------------------ @@ -413,17 +418,308 @@ __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _ //------------------------------------------------------------------------ #define _PSTL_MERGE_CUT_OFF 2000 +template +class __func_task; +template +class __root_task; + +#if TBB_INTERFACE_VERSION <= 12000 +class __task : public tbb::task +{ + public: + template + __task* + make_continuation(_Fn&& __f) + { + return new (allocate_continuation()) __func_task::type>(std::forward<_Fn>(__f)); + } + + template + __task* + make_child_of(__task* parent, _Fn&& __f) + { + return new (parent->allocate_child()) __func_task::type>(std::forward<_Fn>(__f)); + } + + template + __task* + make_additional_child_of(tbb::task* parent, _Fn&& __f) + { + return new (tbb::task::allocate_additional_child_of(*parent)) + __func_task::type>(std::forward<_Fn>(__f)); + } + + inline void + recycle_as_continuation() + { + tbb::task::recycle_as_continuation(); + } + + inline void + recycle_as_child_of(__task* parent) + { + tbb::task::recycle_as_child_of(*parent); + } + + inline void + spawn(__task* __t) + { + tbb::task::spawn(*__t); + } + + template + static inline void + spawn_root_and_wait(__root_task<_Fn>& __root) + { + tbb::task::spawn_root_and_wait(*__root._M_task); + } +}; + +template +class __func_task : public __task +{ + _Func _M_func; + + tbb::task* + execute() + { + return _M_func(this); + }; + + public: + template + __func_task(_Fn&& __f) : _M_func{std::forward<_Fn>(__f)} + { + } + + _Func& + body() + { + return _M_func; + } +}; + +template +class __root_task +{ + tbb::task* _M_task; + + public: + template + __root_task(Args&&... args) + : _M_task{new (tbb::task::allocate_root()) __func_task<_Func>{_Func(std::forward(args)...)}} + { + } + + friend class __task; + friend class __func_task<_Func>; +}; + +#else // TBB_INTERFACE_VERSION <= 12000 +class __task : public tbb::detail::d1::task +{ + protected: + tbb::detail::d1::small_object_allocator _M_allocator{}; + tbb::detail::d1::execution_data* _M_execute_data{}; + __task* _M_parent{}; + std::atomic _M_refcount{}; + bool _M_recycle{}; + + template + __task* + allocate_func_task(_Fn&& __f) + { + assert(_M_execute_data != nullptr); + tbb::detail::d1::small_object_allocator __alloc{}; + auto __t = + __alloc.new_object<__func_task::type>>(*_M_execute_data, std::forward<_Fn>(__f)); + __t->_M_allocator = __alloc; + return __t; + } + + public: + __task* + parent() + { + return _M_parent; + } + + void + set_ref_count(int __n) + { + _M_refcount.store(__n, std::memory_order_release); + } + + template + __task* + make_continuation(_Fn&& __f) + { + auto __t = allocate_func_task(std::forward<_Fn&&>(__f)); + __t->_M_parent = _M_parent; + _M_parent = nullptr; + return __t; + } + + template + __task* + make_child_of(__task* __parent, _Fn&& __f) + { + auto __t = allocate_func_task(std::forward<_Fn&&>(__f)); + __t->_M_parent = __parent; + return __t; + } + + template + __task* + make_additional_child_of(__task* __parent, _Fn&& __f) + { + auto __t = make_child_of(__parent, std::forward<_Fn>(__f)); + assert(__parent->_M_refcount.load(std::memory_order_relaxed) > 0); + ++__parent->_M_refcount; + return __t; + } + + inline void + recycle_as_continuation() + { + _M_recycle = true; + } + + inline void + recycle_as_child_of(__task* parent) + { + _M_recycle = true; + _M_parent = parent; + } + + inline void + spawn(__task* __t) + { + assert(_M_execute_data != nullptr); + tbb::detail::d1::spawn(*__t, *_M_execute_data->context); + } + + template + static inline void + spawn_root_and_wait(__root_task<_Fn>& __root) + { + tbb::detail::d1::execute_and_wait(*__root._M_func_task, __root._M_context, __root._M_wait_object, + __root._M_context); + } + + template + friend class __func_task; +}; + +template +class __func_task : public __task +{ + _Func _M_func; + + __task* + execute(tbb::detail::d1::execution_data& __ed) override + { + _M_execute_data = &__ed; + _M_recycle = false; + __task* __next = _M_func(this); + return finalize(__next); + }; + + __task* + cancel(tbb::detail::d1::execution_data& __ed) override + { + return finalize(nullptr); + } + + __task* + finalize(__task* __next) + { + bool __recycle = _M_recycle; + _M_recycle = false; + + if (__recycle) + { + return __next; + } + + auto __parent = _M_parent; + auto __alloc = _M_allocator; + auto __ed = _M_execute_data; + + this->~__func_task(); + + assert(__parent != nullptr); + assert(__parent->_M_refcount.load(std::memory_order_relaxed) > 0); + if (--__parent->_M_refcount == 0) + { + assert(__next == nullptr); + __alloc.deallocate(this, *__ed); + return __parent; + } + + return __next; + } + + friend class __root_task<_Func>; + + public: + template + __func_task(_Fn&& __f) : _M_func(std::forward<_Fn>(__f)) + { + } + + _Func& + body() + { + return _M_func; + } +}; + +template +class __root_task : public __task +{ + __task* + execute(tbb::detail::d1::execution_data& __ed) override + { + _M_wait_object.release(); + return nullptr; + }; + + __task* + cancel(tbb::detail::d1::execution_data& __ed) override + { + _M_wait_object.release(); + return nullptr; + } + + __func_task<_Func>* _M_func_task{}; + tbb::detail::d1::wait_context _M_wait_object{0}; + tbb::task_group_context _M_context{}; + + public: + template + __root_task(Args&&... args) : _M_wait_object{1} + { + tbb::detail::d1::small_object_allocator __alloc{}; + _M_func_task = __alloc.new_object<__func_task<_Func>>(_Func(std::forward(args)...)); + _M_func_task->_M_allocator = __alloc; + _M_func_task->_M_parent = this; + _M_refcount.store(1, std::memory_order_relaxed); + } + + friend class __task; +}; +#endif // TBB_INTERFACE_VERSION <= 12000 + template -class __merge_task : public tbb::task +class __merge_func { typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; typedef typename std::iterator_traits<_RandomAccessIterator1>::value_type _ValueType; - /*override*/ tbb::task* - execute(); _RandomAccessIterator1 _M_x_beg; _RandomAccessIterator2 _M_z_beg; @@ -529,7 +825,7 @@ class __merge_task : public tbb::task }; public: - __merge_task(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp, + __merge_func(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp, _Cleanup, _LeafMerge __leaf_merge, _SizeType __nsort, _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg, bool __x_orig, bool __y_orig, bool __root) : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_x_beg(__x_beg), _M_z_beg(__z_beg), @@ -554,12 +850,14 @@ class __merge_task : public tbb::task _y_orig = __on_off; } + __task* + operator()(__task* __self); + private: - __merge_task* - parent_merge() const + __merge_func* + parent_merge(__task* __self) const { - tbb::task* p = (_root ? nullptr : parent()); - return static_cast<__merge_task*>(p); + return _root ? nullptr : &static_cast<__func_task<__merge_func>*>(__self->parent())->body(); } bool x_less_y() @@ -615,8 +913,8 @@ class __merge_task : public tbb::task _y_orig = !_y_orig; } - tbb::task* - merge_ranges() + __task* + merge_ranges(__task* __self) { assert(_x_orig == _y_orig); //two merged subrange must be lie into the same buffer @@ -626,7 +924,7 @@ class __merge_task : public tbb::task // need to merge {x} and {y} if (__n > __merge_cut_off) - return split_merging(); + return split_merging(__self); //merge to buffer if (_x_orig) @@ -634,7 +932,7 @@ class __merge_task : public tbb::task _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, _M_comp, __move_value_construct(), __move_value_construct(), __move_range_construct(), __move_range_construct()); - assert(parent_merge()); //not root merging task + assert(parent_merge(__self)); //not root merging task } //merge to "origin" else @@ -656,13 +954,13 @@ class __merge_task : public tbb::task return nullptr; } - tbb::task* - process_ranges() + __task* + process_ranges(__task* __self) { assert(_x_orig == _y_orig); assert(!_split); - auto p = parent_merge(); + auto p = parent_merge(__self); if (!p) { //root merging task @@ -685,7 +983,7 @@ class __merge_task : public tbb::task move_y_range(); //parallel moving } // need to merge {x} and {y}. - return merge_ranges(); + return merge_ranges(__self); } //else: not root merging task (parent_merge() == NULL) //optimization, just for sort algorithm, //{x} <= {y} @@ -699,12 +997,12 @@ class __merge_task : public tbb::task const auto id_range = _M_zs; p->set_odd(id_range, !_x_orig); - return merge_ranges(); + return merge_ranges(__self); } //splitting as merge task into 2 of the same level - tbb::task* - split_merging() + __task* + split_merging(__task* __self) { assert(_x_orig == _y_orig); const auto __nx = (_M_xe - _M_xs); @@ -732,43 +1030,42 @@ class __merge_task : public tbb::task } auto __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); + __merge_func __right_func(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort, + _M_x_beg, _M_z_beg, _x_orig, _y_orig, _root); + __right_func._split = true; + auto __merge_task = __self->make_additional_child_of(__self->parent(), std::move(__right_func)); + __self->spawn(__merge_task); + __self->recycle_as_continuation(); - __merge_task* __right = new (tbb::task::allocate_additional_child_of(*parent())) - __merge_task(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort, _M_x_beg, - _M_z_beg, _x_orig, _y_orig, _root); - - __right->_split = true; - - tbb::task::spawn(*__right); - tbb::task::recycle_as_continuation(); _M_xe = __xm; _M_ye = __ym; _split = true; - return this; + return __self; } }; template -tbb::task* -__merge_task<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Cleanup, _LeafMerge>::execute() +__task* +__merge_func<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Cleanup, _LeafMerge>:: +operator()(__task* __self) { //a. split merge task into 2 of the same level; the special logic, //without processing(process_ranges) adjacent sub-ranges x and y if (_split) - return merge_ranges(); + return merge_ranges(__self); //b. General merging of adjacent sub-ranges x and y (with optimization in case of {x} <= {y} ) //1. x and y are in the even buffer //2. x and y are in the odd buffer if (_x_orig == _y_orig) - return process_ranges(); + return process_ranges(__self); //3. x is in even buffer, y is in the odd buffer //4. x is in odd buffer, y is in the even buffer - if (!parent_merge()) + if (!parent_merge(__self)) { //root merge task if (_x_orig) move_x_range(); @@ -788,11 +1085,11 @@ __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Clean move_y_range(); } - return process_ranges(); + return process_ranges(__self); } template -class __stable_sort_task : public tbb::task +class __stable_sort_func { public: typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; @@ -800,8 +1097,6 @@ class __stable_sort_task : public tbb::task typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; private: - /*override*/ tbb::task* - execute(); _RandomAccessIterator1 _M_xs, _M_xe, _M_x_beg; _RandomAccessIterator2 _M_zs, _M_z_beg; _Compare _M_comp; @@ -810,22 +1105,25 @@ class __stable_sort_task : public tbb::task _SizeType _M_nsort; //zero or number of elements to be sorted for partial_sort alforithm public: - __stable_sort_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, + __stable_sort_func(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, bool __root, _Compare __comp, _LeafSort __leaf_sort, _SizeType __nsort, _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg) : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_comp(__comp), _M_leaf_sort(__leaf_sort), _M_root(__root), _M_nsort(__nsort) { } + + __task* + operator()(__task* __self); }; #define _PSTL_STABLE_SORT_CUT_OFF 500 template -tbb::task* -__stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::execute() +__task* +__stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::operator()(__task* __self) { - typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy, + typedef __merge_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy, __utils::__serial_move_merge> _MergeTaskType; @@ -835,34 +1133,27 @@ __stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _Le if (__n <= __sort_cut_off) { _M_leaf_sort(_M_xs, _M_xe, _M_comp); - assert(!_M_root); - - tbb::task* p = parent(); - const auto id_range = _M_xs - _M_x_beg; - return nullptr; } const _RandomAccessIterator1 __xm = _M_xs + __n / 2; const _RandomAccessIterator2 __zm = _M_zs + (__xm - _M_xs); const _RandomAccessIterator2 __ze = _M_zs + __n; - _MergeTaskType* __m = new (allocate_continuation()) _MergeTaskType( - _M_xs - _M_x_beg, __xm - _M_x_beg, __xm - _M_x_beg, _M_xe - _M_x_beg, _M_zs - _M_z_beg, _M_comp, - __utils::__serial_destroy(), __utils::__serial_move_merge(__nmerge), _M_nsort, _M_x_beg, _M_z_beg, - /*x_orig*/ true, /*y_orig*/ true, /*root*/ _M_root); - + _MergeTaskType __m(_MergeTaskType(_M_xs - _M_x_beg, __xm - _M_x_beg, __xm - _M_x_beg, _M_xe - _M_x_beg, + _M_zs - _M_z_beg, _M_comp, __utils::__serial_destroy(), + __utils::__serial_move_merge(__nmerge), _M_nsort, _M_x_beg, _M_z_beg, + /*x_orig*/ true, /*y_orig*/ true, /*root*/ _M_root)); + auto __parent = __self->make_continuation(std::move(__m)); + __parent->set_ref_count(2); + auto __right = __self->make_child_of( + __parent, __stable_sort_func(__xm, _M_xe, __zm, false, _M_comp, _M_leaf_sort, _M_nsort, _M_x_beg, _M_z_beg)); + __self->spawn(__right); + __self->recycle_as_child_of(__parent); _M_root = false; - - __m->set_ref_count(2); - auto __right = new (__m->allocate_child()) - __stable_sort_task(__xm, _M_xe, __zm, _M_root, _M_comp, _M_leaf_sort, _M_nsort, _M_x_beg, _M_z_beg); - - spawn(*__right); - recycle_as_child_of(*__m); _M_xe = __xm; - return this; + return __self; } template @@ -882,11 +1173,9 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc if (__n > __sort_cut_off) { __buffer<_ValueType> __buf(__n); - tbb::task* root = new (tbb::task::allocate_root()) - __stable_sort_task<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>( - __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()); - tbb::task::spawn_root_and_wait(*root); - + __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ + __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; + __task::spawn_root_and_wait(__root); return; } //serial sort @@ -899,10 +1188,8 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc //------------------------------------------------------------------------ template -class __merge_task_static : public tbb::task +class __merge_func_static { - /*override*/ tbb::task* - execute(); _RandomAccessIterator1 _M_xs, _M_xe; _RandomAccessIterator2 _M_ys, _M_ye; _RandomAccessIterator3 _M_zs; @@ -910,20 +1197,23 @@ class __merge_task_static : public tbb::task _LeafMerge _M_leaf_merge; public: - __merge_task_static(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + __merge_func_static(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_comp(__comp), _M_leaf_merge(__leaf_merge) { } + + __task* + operator()(__task* __self); }; //TODO: consider usage of parallel_for with a custom blocked_range template -tbb::task* -__merge_task_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, - _LeafMerge>::execute() +__task* +__merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, _LeafMerge>:: +operator()(__task* __self) { typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; @@ -949,14 +1239,14 @@ __merge_task_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAcces __ym = std::lower_bound(_M_ys, _M_ye, *__xm, _M_comp); } const _RandomAccessIterator3 __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); - tbb::task* __right = new (tbb::task::allocate_additional_child_of(*parent())) - __merge_task_static(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_leaf_merge); - tbb::task::spawn(*__right); - tbb::task::recycle_as_continuation(); + auto __right = __self->make_additional_child_of( + __self->parent(), __merge_func_static(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_leaf_merge)); + __self->spawn(__right); + __self->recycle_as_continuation(); _M_xe = __xm; _M_ye = __ym; - return this; + return __self; } template _TaskType; - tbb::task::spawn_root_and_wait(*new (tbb::task::allocate_root()) - _TaskType(__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge)); + __root_task<_TaskType> __root{__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge}; + __task::spawn_root_and_wait(__root); }); } } From f4eb94e1db88cd5ea2ffac502c9d788eedb1e547 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 10 Sep 2020 16:59:06 +0300 Subject: [PATCH 0533/1079] [llvm-readobj/elf][test] - Test all core note types properly. Currently we don't test all core note types that are defined in `getCoreNoteTypeName` in ELFDumper.cpp. Also we don't have a test for an unknown core note type. This patch fixes it. Differential revision: https://reviews.llvm.org/D87453 --- .../tools/llvm-readobj/ELF/note-core.test | 313 +++++++++++++++--- 1 file changed, 264 insertions(+), 49 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/note-core.test b/llvm/test/tools/llvm-readobj/ELF/note-core.test index c283519aec492..d7ec0c39ca4c2 100644 --- a/llvm/test/tools/llvm-readobj/ELF/note-core.test +++ b/llvm/test/tools/llvm-readobj/ELF/note-core.test @@ -1,8 +1,263 @@ ## Test that note values are interpreted correctly for core files. -# RUN: yaml2obj %s -o %t.o -# RUN: llvm-readelf --notes %t.o | FileCheck %s --check-prefix=GNU -# RUN: llvm-readobj --notes %t.o | FileCheck %s --check-prefix=LLVM +## Check NT_PRSTATUS. +# RUN: yaml2obj %s -DTYPE=0x1 -o %t1.o +# RUN: llvm-readelf --notes %t1.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PRSTATUS (prstatus structure)" +# RUN: llvm-readobj --notes %t1.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PRSTATUS (prstatus structure)" + +## Check NT_FPREGSET. +# RUN: yaml2obj %s -DTYPE=0x2 -o %t2.o +# RUN: llvm-readelf --notes %t2.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_FPREGSET (floating point registers)" +# RUN: llvm-readobj --notes %t2.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_FPREGSET (floating point registers)" + +## Check NT_PRPSINFO. +# RUN: yaml2obj %s -DTYPE=0x3 -o %t3.o +# RUN: llvm-readelf --notes %t3.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PRPSINFO (prpsinfo structure)" +# RUN: llvm-readobj --notes %t3.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PRPSINFO (prpsinfo structure)" + +## Check NT_TASKSTRUCT. +# RUN: yaml2obj %s -DTYPE=0x4 -o %t4.o +# RUN: llvm-readelf --notes %t4.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_TASKSTRUCT (task structure)" +# RUN: llvm-readobj --notes %t4.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_TASKSTRUCT (task structure)" + +## Check NT_AUXV. +# RUN: yaml2obj %s -DTYPE=0x6 -o %t5.o +# RUN: llvm-readelf --notes %t5.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_AUXV (auxiliary vector)" +# RUN: llvm-readobj --notes %t5.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_AUXV (auxiliary vector)" + +## Check NT_PSTATUS. +# RUN: yaml2obj %s -DTYPE=0xA -o %t6.o +# RUN: llvm-readelf --notes %t6.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PSTATUS (pstatus structure)" +# RUN: llvm-readobj --notes %t6.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PSTATUS (pstatus structure)" + +## Check NT_FPREGS. +# RUN: yaml2obj %s -DTYPE=0xC -o %t7.o +# RUN: llvm-readelf --notes %t7.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_FPREGS (floating point registers)" +# RUN: llvm-readobj --notes %t7.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_FPREGS (floating point registers)" + +## Check NT_PSINFO. +# RUN: yaml2obj %s -DTYPE=0xD -o %t8.o +# RUN: llvm-readelf --notes %t8.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PSINFO (psinfo structure)" +# RUN: llvm-readobj --notes %t8.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PSINFO (psinfo structure)" + +## Check NT_LWPSTATUS. +# RUN: yaml2obj %s -DTYPE=0x10 -o %t9.o +# RUN: llvm-readelf --notes %t9.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_LWPSTATUS (lwpstatus_t structure)" +# RUN: llvm-readobj --notes %t9.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_LWPSTATUS (lwpstatus_t structure)" + +## Check NT_LWPSINFO. +# RUN: yaml2obj %s -DTYPE=0x11 -o %t10.o +# RUN: llvm-readelf --notes %t10.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_LWPSINFO (lwpsinfo_t structure)" +# RUN: llvm-readobj --notes %t10.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_LWPSINFO (lwpsinfo_t structure)" + +## Check NT_WIN32PSTATUS. +# RUN: yaml2obj %s -DTYPE=0x12 -o %t11.o +# RUN: llvm-readelf --notes %t11.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_WIN32PSTATUS (win32_pstatus structure)" +# RUN: llvm-readobj --notes %t11.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_WIN32PSTATUS (win32_pstatus structure)" + +## Check ELF::NT_PPC_VMX. +# RUN: yaml2obj %s -DTYPE=0x100 -o %t12.o +# RUN: llvm-readelf --notes %t12.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_VMX (ppc Altivec registers)" +# RUN: llvm-readobj --notes %t12.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_VMX (ppc Altivec registers)" + +## Check ELF::NT_PPC_VSX. +# RUN: yaml2obj %s -DTYPE=0x102 -o %t13.o +# RUN: llvm-readelf --notes %t13.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_VSX (ppc VSX registers)" +# RUN: llvm-readobj --notes %t13.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_VSX (ppc VSX registers)" + +## Check ELF::NT_PPC_TAR. +# RUN: yaml2obj %s -DTYPE=0x103 -o %t14.o +# RUN: llvm-readelf --notes %t14.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TAR (ppc TAR register)" +# RUN: llvm-readobj --notes %t14.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TAR (ppc TAR register)" + +## Check ELF::NT_PPC_PPR. +# RUN: yaml2obj %s -DTYPE=0x104 -o %t15.o +# RUN: llvm-readelf --notes %t15.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_PPR (ppc PPR register)" +# RUN: llvm-readobj --notes %t15.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_PPR (ppc PPR register)" + +## Check ELF::NT_PPC_DSCR. +# RUN: yaml2obj %s -DTYPE=0x105 -o %t16.o +# RUN: llvm-readelf --notes %t16.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_DSCR (ppc DSCR register)" +# RUN: llvm-readobj --notes %t16.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_DSCR (ppc DSCR register)" + +## Check ELF::NT_PPC_EBB. +# RUN: yaml2obj %s -DTYPE=0x106 -o %t17.o +# RUN: llvm-readelf --notes %t17.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_EBB (ppc EBB registers)" +# RUN: llvm-readobj --notes %t17.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_EBB (ppc EBB registers)" + +## Check ELF::NT_PPC_PMU. +# RUN: yaml2obj %s -DTYPE=0x107 -o %t18.o +# RUN: llvm-readelf --notes %t18.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_PMU (ppc PMU registers)" +# RUN: llvm-readobj --notes %t18.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_PMU (ppc PMU registers)" + +## Check ELF::NT_PPC_TM_CGPR. +# RUN: yaml2obj %s -DTYPE=0x108 -o %t19.o +# RUN: llvm-readelf --notes %t19.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CGPR (ppc checkpointed GPR registers)" +# RUN: llvm-readobj --notes %t19.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CGPR (ppc checkpointed GPR registers)" + +## Check ELF::NT_PPC_TM_CFPR. +# RUN: yaml2obj %s -DTYPE=0x109 -o %t20.o +# RUN: llvm-readelf --notes %t20.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CFPR (ppc checkpointed floating point registers)" +# RUN: llvm-readobj --notes %t20.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CFPR (ppc checkpointed floating point registers)" + +## Check ELF::NT_PPC_TM_CVMX. +# RUN: yaml2obj %s -DTYPE=0x10a -o %t21.o +# RUN: llvm-readelf --notes %t21.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)" +# RUN: llvm-readobj --notes %t21.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)" + +## Check ELF::NT_PPC_TM_CVSX. +# RUN: yaml2obj %s -DTYPE=0x10b -o %t22.o +# RUN: llvm-readelf --notes %t22.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CVSX (ppc checkpointed VSX registers)" +# RUN: llvm-readobj --notes %t22.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CVSX (ppc checkpointed VSX registers)" + +## Check ELF::NT_PPC_TM_SPR. +# RUN: yaml2obj %s -DTYPE=0x10c -o %t23.o +# RUN: llvm-readelf --notes %t23.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_SPR (ppc TM special purpose registers)" +# RUN: llvm-readobj --notes %t23.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_SPR (ppc TM special purpose registers)" + +## Check ELF::NT_PPC_TM_CTAR. +# RUN: yaml2obj %s -DTYPE=0x10d -o %t24.o +# RUN: llvm-readelf --notes %t24.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CTAR (ppc checkpointed TAR register)" +# RUN: llvm-readobj --notes %t24.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CTAR (ppc checkpointed TAR register)" + +## Check ELF::NT_PPC_TM_CPPR. +# RUN: yaml2obj %s -DTYPE=0x10e -o %t25.o +# RUN: llvm-readelf --notes %t25.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CPPR (ppc checkpointed PPR register)" +# RUN: llvm-readobj --notes %t25.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CPPR (ppc checkpointed PPR register)" + +## Check ELF::NT_PPC_TM_CDSCR. +# RUN: yaml2obj %s -DTYPE=0x10f -o %t26.o +# RUN: llvm-readelf --notes %t26.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)" +# RUN: llvm-readobj --notes %t26.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)" + +## Check ELF::NT_386_TLS. +# RUN: yaml2obj %s -DTYPE=0x200 -o %t27.o +# RUN: llvm-readelf --notes %t27.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_386_TLS (x86 TLS information)" +# RUN: llvm-readobj --notes %t27.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_386_TLS (x86 TLS information)" + +## Check ELF::NT_386_IOPERM. +# RUN: yaml2obj %s -DTYPE=0x201 -o %t28.o +# RUN: llvm-readelf --notes %t28.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_386_IOPERM (x86 I/O permissions)" +# RUN: llvm-readobj --notes %t28.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_386_IOPERM (x86 I/O permissions)" + +## Check ELF::NT_X86_XSTATE. +# RUN: yaml2obj %s -DTYPE=0x202 -o %t29.o +# RUN: llvm-readelf --notes %t29.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_X86_XSTATE (x86 XSAVE extended state)" +# RUN: llvm-readobj --notes %t29.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_X86_XSTATE (x86 XSAVE extended state)" + +## Check ELF::NT_S390_HIGH_GPRS. +# RUN: yaml2obj %s -DTYPE=0x300 -o %t30.o +# RUN: llvm-readelf --notes %t30.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_HIGH_GPRS (s390 upper register halves)" +# RUN: llvm-readobj --notes %t30.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_HIGH_GPRS (s390 upper register halves)" + +## Check ELF::NT_S390_TIMER. +# RUN: yaml2obj %s -DTYPE=0x301 -o %t31.o +# RUN: llvm-readelf --notes %t31.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_TIMER (s390 timer register)" +# RUN: llvm-readobj --notes %t31.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_TIMER (s390 timer register)" + +## Check ELF::NT_S390_TODCMP. +# RUN: yaml2obj %s -DTYPE=0x302 -o %t32.o +# RUN: llvm-readelf --notes %t32.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_TODCMP (s390 TOD comparator register)" +# RUN: llvm-readobj --notes %t32.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_TODCMP (s390 TOD comparator register)" + +## Check ELF::NT_S390_TODPREG. +# RUN: yaml2obj %s -DTYPE=0x303 -o %t33.o +# RUN: llvm-readelf --notes %t33.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_TODPREG (s390 TOD programmable register)" +# RUN: llvm-readobj --notes %t33.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_TODPREG (s390 TOD programmable register)" + +## Check ELF::NT_S390_CTRS. +# RUN: yaml2obj %s -DTYPE=0x304 -o %t34.o +# RUN: llvm-readelf --notes %t34.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_CTRS (s390 control registers)" +# RUN: llvm-readobj --notes %t34.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_CTRS (s390 control registers)" + +## Check ELF::NT_S390_PREFIX. +# RUN: yaml2obj %s -DTYPE=0x305 -o %t35.o +# RUN: llvm-readelf --notes %t35.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_PREFIX (s390 prefix register)" +# RUN: llvm-readobj --notes %t35.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_PREFIX (s390 prefix register)" + +## Check ELF::NT_S390_LAST_BREAK. +# RUN: yaml2obj %s -DTYPE=0x306 -o %t36.o +# RUN: llvm-readelf --notes %t36.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_LAST_BREAK (s390 last breaking event address)" +# RUN: llvm-readobj --notes %t36.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_LAST_BREAK (s390 last breaking event address)" + +## Check ELF::NT_S390_SYSTEM_CALL. +# RUN: yaml2obj %s -DTYPE=0x307 -o %t37.o +# RUN: llvm-readelf --notes %t37.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_SYSTEM_CALL (s390 system call restart data)" +# RUN: llvm-readobj --notes %t37.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_SYSTEM_CALL (s390 system call restart data)" + +## Check ELF::NT_S390_TDB. +# RUN: yaml2obj %s -DTYPE=0x308 -o %t38.o +# RUN: llvm-readelf --notes %t38.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_TDB (s390 transaction diagnostic block)" +# RUN: llvm-readobj --notes %t38.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_TDB (s390 transaction diagnostic block)" + +## Check ELF::NT_S390_VXRS_LOW. +# RUN: yaml2obj %s -DTYPE=0x309 -o %t39.o +# RUN: llvm-readelf --notes %t39.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)" +# RUN: llvm-readobj --notes %t39.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)" + +## Check ELF::NT_S390_VXRS_HIGH. +# RUN: yaml2obj %s -DTYPE=0x30a -o %t40.o +# RUN: llvm-readelf --notes %t40.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_VXRS_HIGH (s390 vector registers 16-31)" +# RUN: llvm-readobj --notes %t40.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_VXRS_HIGH (s390 vector registers 16-31)" + +## Check ELF::NT_S390_GS_CB. +# RUN: yaml2obj %s -DTYPE=0x30b -o %t41.o +# RUN: llvm-readelf --notes %t41.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_GS_CB (s390 guarded-storage registers)" +# RUN: llvm-readobj --notes %t41.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_GS_CB (s390 guarded-storage registers)" + +## Check ELF::NT_S390_GS_BC. +# RUN: yaml2obj %s -DTYPE=0x30c -o %t42.o +# RUN: llvm-readelf --notes %t42.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_S390_GS_BC (s390 guarded-storage broadcast control)" +# RUN: llvm-readobj --notes %t42.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_S390_GS_BC (s390 guarded-storage broadcast control)" + +## Check ELF::NT_ARM_VFP. +# RUN: yaml2obj %s -DTYPE=0x400 -o %t43.o +# RUN: llvm-readelf --notes %t43.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_ARM_VFP (arm VFP registers)" +# RUN: llvm-readobj --notes %t43.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_ARM_VFP (arm VFP registers)" + +## Check ELF::NT_ARM_TLS. +# RUN: yaml2obj %s -DTYPE=0x401 -o %t44.o +# RUN: llvm-readelf --notes %t44.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_ARM_TLS (AArch TLS registers)" +# RUN: llvm-readobj --notes %t44.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_ARM_TLS (AArch TLS registers)" + +## Check ELF::NT_ARM_HW_BREAK. +# RUN: yaml2obj %s -DTYPE=0x402 -o %t45.o +# RUN: llvm-readelf --notes %t45.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_ARM_HW_BREAK (AArch hardware breakpoint registers)" +# RUN: llvm-readobj --notes %t45.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_ARM_HW_BREAK (AArch hardware breakpoint registers)" + +## Check ELF::NT_ARM_HW_WATCH. +# RUN: yaml2obj %s -DTYPE=0x403 -o %t46.o +# RUN: llvm-readelf --notes %t46.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_ARM_HW_WATCH (AArch hardware watchpoint registers)" +# RUN: llvm-readobj --notes %t46.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_ARM_HW_WATCH (AArch hardware watchpoint registers)" + +## Check ELF::NT_FILE. +# RUN: yaml2obj %s -DTYPE=0x46494c45 -o %t47.o +# RUN: llvm-readelf --notes %t47.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_FILE (mapped files)" +# RUN: llvm-readobj --notes %t47.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_FILE (mapped files)" + +## Check ELF::NT_PRXFPREG. +# RUN: yaml2obj %s -DTYPE=0x46e62b7f -o %t48.o +# RUN: llvm-readelf --notes %t48.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_PRXFPREG (user_xfpregs structure)" +# RUN: llvm-readobj --notes %t48.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_PRXFPREG (user_xfpregs structure)" + +## Check ELF::NT_SIGINFO. +# RUN: yaml2obj %s -DTYPE=0x53494749 -o %t49.o +# RUN: llvm-readelf --notes %t49.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="NT_SIGINFO (siginfo_t data)" +# RUN: llvm-readobj --notes %t49.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="NT_SIGINFO (siginfo_t data)" + +## Check an arbitrary unknown type. +# RUN: yaml2obj %s -DTYPE=0x12345678 -o %t50.o +# RUN: llvm-readelf --notes %t50.o | FileCheck %s --check-prefix=CHECK-GNU -DDESC="Unknown note type: (0x12345678)" +# RUN: llvm-readobj --notes %t50.o | FileCheck %s --check-prefix=CHECK-LLVM -DDESC="Unknown (0x12345678)" + +# CHECK-GNU: Owner Data size Description +# CHECK-GNU-NEXT: CORE 0x00000000 [[DESC]] + +# CHECK-LLVM: Note { +# CHECK-LLVM-NEXT: Owner: CORE +# CHECK-LLVM-NEXT: Data size: 0x0 +# CHECK-LLVM-NEXT: Type: [[DESC]] +# CHECK-LLVM-NEXT: } --- !ELF FileHeader: @@ -10,52 +265,12 @@ FileHeader: Data: ELFDATA2LSB Type: ET_CORE Sections: - - Name: .note.foo - Type: SHT_NOTE - # Note: format is 0500000000000000434F524500000000 repeated - Content: 050000000000000001000000434F524500000000050000000000000002000000434F524500000000050000000000000003000000434F524500000000050000000000000004000000434F524500000000050000000000000006000000434F524500000000 + - Name: .note.foo + Type: SHT_NOTE + Notes: + - Name: CORE + Type: [[TYPE]] ProgramHeaders: - - Type: PT_NOTE + - Type: PT_NOTE Sections: - Section: .note.foo - -# GNU: Displaying notes found -# GNU-NEXT: Owner Data size Description -# GNU-NEXT: CORE 0x00000000 NT_PRSTATUS (prstatus structure) -# GNU-NEXT: CORE 0x00000000 NT_FPREGSET (floating point registers) -# GNU-NEXT: CORE 0x00000000 NT_PRPSINFO (prpsinfo structure) -# GNU-NEXT: CORE 0x00000000 NT_TASKSTRUCT (task structure) -# GNU-NEXT: CORE 0x00000000 NT_AUXV (auxiliary vector) - -# LLVM: Notes [ -# LLVM-NEXT: NoteSection { -# LLVM-NEXT: Name: -# LLVM-NEXT: Offset: -# LLVM-NEXT: Size: -# LLVM-NEXT: Note { -# LLVM-NEXT: Owner: CORE -# LLVM-NEXT: Data size: 0x0 -# LLVM-NEXT: Type: NT_PRSTATUS (prstatus structure) -# LLVM-NEXT: } -# LLVM-NEXT: Note { -# LLVM-NEXT: Owner: CORE -# LLVM-NEXT: Data size: 0x0 -# LLVM-NEXT: Type: NT_FPREGSET (floating point registers) -# LLVM-NEXT: } -# LLVM-NEXT: Note { -# LLVM-NEXT: Owner: CORE -# LLVM-NEXT: Data size: 0x0 -# LLVM-NEXT: Type: NT_PRPSINFO (prpsinfo structure) -# LLVM-NEXT: } -# LLVM-NEXT: Note { -# LLVM-NEXT: Owner: CORE -# LLVM-NEXT: Data size: 0x0 -# LLVM-NEXT: Type: NT_TASKSTRUCT (task structure) -# LLVM-NEXT: } -# LLVM-NEXT: Note { -# LLVM-NEXT: Owner: CORE -# LLVM-NEXT: Data size: 0x0 -# LLVM-NEXT: Type: NT_AUXV (auxiliary vector) -# LLVM-NEXT: } -# LLVM-NEXT: } -# LLVM-NEXT: ] From e9c314611bc97dc0d5d4ba384b8d5321f3728b16 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 10 Sep 2020 15:26:23 +0300 Subject: [PATCH 0534/1079] [llvm-readelf/obj] - Refine and generalize the code that is used to dump notes. There is some code that can be shared between GNU/LLVM styles. Also, this fixes 2 inconsistencies related to dumping unknown note types: 1) For GNU style we printed "Unknown note type: (0x00000003)" in some cases, and "Unknown note type (0x00000003)" (no colon) in other cases. GNU readelf always prints `:`. This patch removes the related code duplication and does the same. 2) For LLVM style in some cases we printed "Unknown note type (0x00000003)", but sometimes just "Unknown (0x00000003)". The latter is the right form, which is consistent with other unknowns that are printed in LLVM style. Rebased on top of D87453. Differential revision: https://reviews.llvm.org/D87454 --- llvm/test/CodeGen/AMDGPU/elf-notes.ll | 6 +- .../tools/llvm-readobj/ELF/note-freebsd.s | 4 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 365 ++++++++---------- 3 files changed, 157 insertions(+), 218 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/elf-notes.ll b/llvm/test/CodeGen/AMDGPU/elf-notes.ll index 3a73b91249d51..0c76f00590264 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-notes.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-notes.ll @@ -31,8 +31,8 @@ ; OSABI-HSA: .amd_amdgpu_hsa_metadata ; OSABI-HSA-NOT: .amd_amdgpu_pal_metadata -; OSABI-HSA-ELF: Unknown note type (0x00000001) -; OSABI-HSA-ELF: Unknown note type (0x00000003) +; OSABI-HSA-ELF: Unknown note type: (0x00000001) +; OSABI-HSA-ELF: Unknown note type: (0x00000003) ; OSABI-HSA-ELF: NT_AMD_AMDGPU_ISA (ISA Version) ; OSABI-HSA-ELF: ISA Version: ; OSABI-HSA-ELF: amdgcn-amd-amdhsa--gfx802 @@ -59,7 +59,7 @@ ; OSABI-PAL-NOT: .amd_amdgpu_hsa_metadata ; OSABI-PAL: .amd_amdgpu_pal_metadata -; OSABI-PAL-ELF: Unknown note type (0x00000003) +; OSABI-PAL-ELF: Unknown note type: (0x00000003) ; OSABI-PAL-ELF: NT_AMD_AMDGPU_ISA (ISA Version) ; OSABI-PAL-ELF: ISA Version: ; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx802 diff --git a/llvm/test/tools/llvm-readobj/ELF/note-freebsd.s b/llvm/test/tools/llvm-readobj/ELF/note-freebsd.s index 3d4b461f1feb2..3caca6cc0d718 100644 --- a/llvm/test/tools/llvm-readobj/ELF/note-freebsd.s +++ b/llvm/test/tools/llvm-readobj/ELF/note-freebsd.s @@ -13,7 +13,7 @@ // GNU-NEXT: FreeBSD 0x00000000 NT_PROCSTAT_FILES (files data) // GNU-NEXT: Displaying notes found in: .note.baz // GNU-NEXT: Owner Data size Description -// GNU-NEXT: FreeBSD 0x0000001c Unknown note type (0x00000003) +// GNU-NEXT: FreeBSD 0x0000001c Unknown note type: (0x00000003) // GNU-NEXT: description data: 4c 6f 72 65 6d 20 69 70 73 75 6d 20 64 6f 6c 6f 72 20 73 69 74 20 61 6d 65 74 00 00 // LLVM: Notes [ @@ -49,7 +49,7 @@ // LLVM-NEXT: Note { // LLVM-NEXT: Owner: FreeBSD // LLVM-NEXT: Data size: 0x1C -// LLVM-NEXT: Type: Unknown note type (0x00000003) +// LLVM-NEXT: Type: Unknown (0x00000003) // LLVM-NEXT: Description data ( // LLVM-NEXT: 0000: 4C6F7265 6D206970 73756D20 646F6C6F |Lorem ipsum dolo| // LLVM-NEXT: 0010: 72207369 7420616D 65740000 |r sit amet..| diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index df3799c8fbe67..47246af570d01 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -203,6 +203,11 @@ struct VerNeed { std::vector AuxV; }; +struct NoteType { + uint32_t ID; + StringRef Name; +}; + } // namespace template class Relocation { @@ -4764,184 +4769,6 @@ template void GNUStyle::printAddrsig() { reportError(createError("--addrsig: not implemented"), this->FileName); } -static StringRef getGenericNoteTypeName(const uint32_t NT) { - static const struct { - uint32_t ID; - const char *Name; - } Notes[] = { - {ELF::NT_VERSION, "NT_VERSION (version)"}, - {ELF::NT_ARCH, "NT_ARCH (architecture)"}, - {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"}, - {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"}, - }; - - for (const auto &Note : Notes) - if (Note.ID == NT) - return Note.Name; - - return ""; -} - -static StringRef getCoreNoteTypeName(const uint32_t NT) { - static const struct { - uint32_t ID; - const char *Name; - } Notes[] = { - {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, - {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, - {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"}, - {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"}, - {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"}, - {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"}, - {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"}, - {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"}, - {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"}, - {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"}, - {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"}, - - {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"}, - {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"}, - {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"}, - {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"}, - {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"}, - {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"}, - {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"}, - {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"}, - {ELF::NT_PPC_TM_CFPR, - "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"}, - {ELF::NT_PPC_TM_CVMX, - "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"}, - {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"}, - {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"}, - {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"}, - {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"}, - {ELF::NT_PPC_TM_CDSCR, - "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"}, - - {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"}, - {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"}, - {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"}, - - {ELF::NT_S390_HIGH_GPRS, - "NT_S390_HIGH_GPRS (s390 upper register halves)"}, - {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"}, - {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"}, - {ELF::NT_S390_TODPREG, - "NT_S390_TODPREG (s390 TOD programmable register)"}, - {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"}, - {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"}, - {ELF::NT_S390_LAST_BREAK, - "NT_S390_LAST_BREAK (s390 last breaking event address)"}, - {ELF::NT_S390_SYSTEM_CALL, - "NT_S390_SYSTEM_CALL (s390 system call restart data)"}, - {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"}, - {ELF::NT_S390_VXRS_LOW, - "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"}, - {ELF::NT_S390_VXRS_HIGH, - "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"}, - {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"}, - {ELF::NT_S390_GS_BC, - "NT_S390_GS_BC (s390 guarded-storage broadcast control)"}, - - {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"}, - {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"}, - {ELF::NT_ARM_HW_BREAK, - "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"}, - {ELF::NT_ARM_HW_WATCH, - "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"}, - - {ELF::NT_FILE, "NT_FILE (mapped files)"}, - {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"}, - {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"}, - }; - - for (const auto &Note : Notes) - if (Note.ID == NT) - return Note.Name; - - return ""; -} - -static std::string getGNUNoteTypeName(const uint32_t NT) { - static const struct { - uint32_t ID; - const char *Name; - } Notes[] = { - {ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"}, - {ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"}, - {ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"}, - {ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"}, - {ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"}, - }; - - for (const auto &Note : Notes) - if (Note.ID == NT) - return std::string(Note.Name); - - std::string string; - raw_string_ostream OS(string); - OS << format("Unknown note type (0x%08x)", NT); - return OS.str(); -} - -static std::string getFreeBSDNoteTypeName(const uint32_t NT) { - static const struct { - uint32_t ID; - const char *Name; - } Notes[] = { - {ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"}, - {ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"}, - {ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"}, - {ELF::NT_FREEBSD_PROCSTAT_VMMAP, "NT_PROCSTAT_VMMAP (vmmap data)"}, - {ELF::NT_FREEBSD_PROCSTAT_GROUPS, "NT_PROCSTAT_GROUPS (groups data)"}, - {ELF::NT_FREEBSD_PROCSTAT_UMASK, "NT_PROCSTAT_UMASK (umask data)"}, - {ELF::NT_FREEBSD_PROCSTAT_RLIMIT, "NT_PROCSTAT_RLIMIT (rlimit data)"}, - {ELF::NT_FREEBSD_PROCSTAT_OSREL, "NT_PROCSTAT_OSREL (osreldate data)"}, - {ELF::NT_FREEBSD_PROCSTAT_PSSTRINGS, - "NT_PROCSTAT_PSSTRINGS (ps_strings data)"}, - {ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"}, - }; - - for (const auto &Note : Notes) - if (Note.ID == NT) - return std::string(Note.Name); - - std::string string; - raw_string_ostream OS(string); - OS << format("Unknown note type (0x%08x)", NT); - return OS.str(); -} - -static std::string getAMDNoteTypeName(const uint32_t NT) { - static const struct { - uint32_t ID; - const char *Name; - } Notes[] = {{ELF::NT_AMD_AMDGPU_HSA_METADATA, - "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"}, - {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"}, - {ELF::NT_AMD_AMDGPU_PAL_METADATA, - "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}}; - - for (const auto &Note : Notes) - if (Note.ID == NT) - return std::string(Note.Name); - - std::string string; - raw_string_ostream OS(string); - OS << format("Unknown note type (0x%08x)", NT); - return OS.str(); -} - -static std::string getAMDGPUNoteTypeName(const uint32_t NT) { - if (NT == ELF::NT_AMDGPU_METADATA) - return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)"); - - std::string string; - raw_string_ostream OS(string); - OS << format("Unknown note type (0x%08x)", NT); - return OS.str(); -} - template static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, ArrayRef Data) { @@ -5291,6 +5118,138 @@ static void printCoreNote(raw_ostream &OS, const CoreNote &Note) { } } +static const NoteType GenericNoteTypes[] = { + {ELF::NT_VERSION, "NT_VERSION (version)"}, + {ELF::NT_ARCH, "NT_ARCH (architecture)"}, + {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"}, + {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"}, +}; + +static const NoteType GNUNoteTypes[] = { + {ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"}, + {ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"}, + {ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"}, + {ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"}, + {ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"}, +}; + +static const NoteType FreeBSDNoteTypes[] = { + {ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"}, + {ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"}, + {ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"}, + {ELF::NT_FREEBSD_PROCSTAT_VMMAP, "NT_PROCSTAT_VMMAP (vmmap data)"}, + {ELF::NT_FREEBSD_PROCSTAT_GROUPS, "NT_PROCSTAT_GROUPS (groups data)"}, + {ELF::NT_FREEBSD_PROCSTAT_UMASK, "NT_PROCSTAT_UMASK (umask data)"}, + {ELF::NT_FREEBSD_PROCSTAT_RLIMIT, "NT_PROCSTAT_RLIMIT (rlimit data)"}, + {ELF::NT_FREEBSD_PROCSTAT_OSREL, "NT_PROCSTAT_OSREL (osreldate data)"}, + {ELF::NT_FREEBSD_PROCSTAT_PSSTRINGS, + "NT_PROCSTAT_PSSTRINGS (ps_strings data)"}, + {ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"}, +}; + +static const NoteType AMDNoteTypes[] = { + {ELF::NT_AMD_AMDGPU_HSA_METADATA, + "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"}, + {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"}, + {ELF::NT_AMD_AMDGPU_PAL_METADATA, + "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}, +}; + +static const NoteType AMDGPUNoteTypes[] = { + {ELF::NT_AMDGPU_METADATA, "NT_AMDGPU_METADATA (AMDGPU Metadata)"}, +}; + +static const NoteType CoreNoteTypes[] = { + {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, + {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, + {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"}, + {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"}, + {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"}, + {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"}, + {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"}, + {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"}, + {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"}, + {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"}, + {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"}, + + {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"}, + {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"}, + {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"}, + {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"}, + {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"}, + {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"}, + {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"}, + {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"}, + {ELF::NT_PPC_TM_CFPR, + "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"}, + {ELF::NT_PPC_TM_CVMX, + "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"}, + {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"}, + {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"}, + {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"}, + {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"}, + {ELF::NT_PPC_TM_CDSCR, "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"}, + + {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"}, + {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"}, + {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"}, + + {ELF::NT_S390_HIGH_GPRS, "NT_S390_HIGH_GPRS (s390 upper register halves)"}, + {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"}, + {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"}, + {ELF::NT_S390_TODPREG, "NT_S390_TODPREG (s390 TOD programmable register)"}, + {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"}, + {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"}, + {ELF::NT_S390_LAST_BREAK, + "NT_S390_LAST_BREAK (s390 last breaking event address)"}, + {ELF::NT_S390_SYSTEM_CALL, + "NT_S390_SYSTEM_CALL (s390 system call restart data)"}, + {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"}, + {ELF::NT_S390_VXRS_LOW, + "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"}, + {ELF::NT_S390_VXRS_HIGH, "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"}, + {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"}, + {ELF::NT_S390_GS_BC, + "NT_S390_GS_BC (s390 guarded-storage broadcast control)"}, + + {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"}, + {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"}, + {ELF::NT_ARM_HW_BREAK, + "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"}, + {ELF::NT_ARM_HW_WATCH, + "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"}, + + {ELF::NT_FILE, "NT_FILE (mapped files)"}, + {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"}, + {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"}, +}; + +template +const StringRef getNoteTypeName(const typename ELFT::Note &Note, + unsigned ELFType) { + uint32_t Type = Note.getType(); + auto FindNote = [&](ArrayRef V) -> StringRef { + for (const NoteType &N : V) + if (N.ID == Type) + return N.Name; + return ""; + }; + + StringRef Name = Note.getName(); + if (Name == "GNU") + return FindNote(GNUNoteTypes); + if (Name == "FreeBSD") + return FindNote(FreeBSDNoteTypes); + if (Name == "AMD") + return FindNote(AMDNoteTypes); + if (Name == "AMDGPU") + return FindNote(AMDGPUNoteTypes); + + if (ELFType == ELF::ET_CORE) + return FindNote(CoreNoteTypes); + return FindNote(GenericNoteTypes); +} + template void GNUStyle::printNotes() { auto PrintHeader = [&](Optional SecName, const typename ELFT::Off Offset, @@ -5314,23 +5273,13 @@ template void GNUStyle::printNotes() { // Print the note owner/type. OS << " " << left_justify(Name, 20) << ' ' << format_hex(Descriptor.size(), 10) << '\t'; - if (Name == "GNU") { - OS << getGNUNoteTypeName(Type) << '\n'; - } else if (Name == "FreeBSD") { - OS << getFreeBSDNoteTypeName(Type) << '\n'; - } else if (Name == "AMD") { - OS << getAMDNoteTypeName(Type) << '\n'; - } else if (Name == "AMDGPU") { - OS << getAMDGPUNoteTypeName(Type) << '\n'; - } else { - StringRef NoteType = this->Obj.getHeader()->e_type == ELF::ET_CORE - ? getCoreNoteTypeName(Type) - : getGenericNoteTypeName(Type); - if (!NoteType.empty()) - OS << NoteType << '\n'; - else - OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n"; - } + + StringRef NoteType = + getNoteTypeName(Note, this->Obj.getHeader()->e_type); + if (!NoteType.empty()) + OS << NoteType << '\n'; + else + OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n"; // Print the description, or fallback to printing raw bytes for unknown // owners. @@ -6624,24 +6573,14 @@ template void LLVMStyle::printNotes() { // Print the note owner/type. W.printString("Owner", Name); W.printHex("Data size", Descriptor.size()); - if (Name == "GNU") { - W.printString("Type", getGNUNoteTypeName(Type)); - } else if (Name == "FreeBSD") { - W.printString("Type", getFreeBSDNoteTypeName(Type)); - } else if (Name == "AMD") { - W.printString("Type", getAMDNoteTypeName(Type)); - } else if (Name == "AMDGPU") { - W.printString("Type", getAMDGPUNoteTypeName(Type)); - } else { - StringRef NoteType = this->Obj.getHeader()->e_type == ELF::ET_CORE - ? getCoreNoteTypeName(Type) - : getGenericNoteTypeName(Type); - if (!NoteType.empty()) - W.printString("Type", NoteType); - else - W.printString("Type", - "Unknown (" + to_string(format_hex(Type, 10)) + ")"); - } + + StringRef NoteType = + getNoteTypeName(Note, this->Obj.getHeader()->e_type); + if (!NoteType.empty()) + W.printString("Type", NoteType); + else + W.printString("Type", + "Unknown (" + to_string(format_hex(Type, 10)) + ")"); // Print the description, or fallback to printing raw bytes for unknown // owners. From 412b417bfa79d54ebea1ae8bd0fd359044a133f4 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 14 Sep 2020 18:28:58 +0700 Subject: [PATCH 0535/1079] [NFC] Add missing `const` statements in SCEV --- llvm/include/llvm/Analysis/ScalarEvolution.h | 16 +++++++++------- llvm/lib/Analysis/ScalarEvolution.cpp | 13 +++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 8a88645f7cfc5..82dbe380b947a 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -696,7 +696,8 @@ class ScalarEvolution { /// before taking the branch. For loops with multiple exits, it may not be /// the number times that the loop header executes if the loop exits /// prematurely via another branch. - unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock); + unsigned getSmallConstantTripCount(const Loop *L, + const BasicBlock *ExitingBlock); /// Returns the upper bound of the loop trip count as a normal unsigned /// value. @@ -718,8 +719,7 @@ class ScalarEvolution { /// for getSmallConstantTripCount, this assumes that control exits the loop /// via ExitingBlock. unsigned getSmallConstantTripMultiple(const Loop *L, - BasicBlock *ExitingBlock); - + const BasicBlock *ExitingBlock); /// The terms "backedge taken count" and "exit count" are used /// interchangeably to refer to the number of times the backedge of a loop @@ -737,8 +737,8 @@ class ScalarEvolution { /// For a single exit loop, this value is equivelent to the result of /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) /// before the backedge is executed (ExitCount + 1) times. Note that there - /// is no guarantee about *which* exit is taken on the exiting iteration. - const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock, + /// is no guarantee about *which* exit is taken on the exiting iteration. + const SCEV *getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind = Exact); /// If the specified loop has a predictable backedge-taken count, return it, @@ -1352,13 +1352,15 @@ class ScalarEvolution { /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via /// this block before this number of iterations, but may exit via another /// block. - const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; + const SCEV *getExact(const BasicBlock *ExitingBlock, + ScalarEvolution *SE) const; /// Get the max backedge taken count for the loop. const SCEV *getMax(ScalarEvolution *SE) const; /// Get the max backedge taken count for the particular loop exit. - const SCEV *getMax(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; + const SCEV *getMax(const BasicBlock *ExitingBlock, + ScalarEvolution *SE) const; /// Return true if the number of times this backedge is taken is either the /// value returned by getMax or zero. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c5745c0eebadd..e571bad59f3a6 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6392,8 +6392,9 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { return 0; } -unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, - BasicBlock *ExitingBlock) { +unsigned +ScalarEvolution::getSmallConstantTripCount(const Loop *L, + const BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); @@ -6430,7 +6431,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, - BasicBlock *ExitingBlock) { + const BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); @@ -6461,7 +6462,7 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, } const SCEV *ScalarEvolution::getExitCount(const Loop *L, - BasicBlock *ExitingBlock, + const BasicBlock *ExitingBlock, ExitCountKind Kind) { switch (Kind) { case Exact: @@ -6790,7 +6791,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, /// Get the exact not taken count for this loop exit. const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, +ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) @@ -6800,7 +6801,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, } const SCEV * -ScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock, +ScalarEvolution::BackedgeTakenInfo::getMax(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) From 14e191a0e7c54d40327c2367b00261ac4856f4b5 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Fri, 11 Sep 2020 14:35:06 +0300 Subject: [PATCH 0536/1079] [llvm-readobj] - Cleanup implementation LLVMStyle::printAddrsig(). It has following issues: 1) `getStaticSymbolName` returns `std::string`, but the code assigns a result to `Expected`. 2) The code uses `unwrapOrError` and never tests the error reported. This patch fixes these issues. Differential revision: https://reviews.llvm.org/D87507 --- llvm/test/tools/llvm-readobj/ELF/addrsig.test | 27 ++++++++++++----- llvm/tools/llvm-readobj/ELFDumper.cpp | 30 +++++++++---------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/addrsig.test b/llvm/test/tools/llvm-readobj/ELF/addrsig.test index f6e29c7a46819..24621d80f79e6 100644 --- a/llvm/test/tools/llvm-readobj/ELF/addrsig.test +++ b/llvm/test/tools/llvm-readobj/ELF/addrsig.test @@ -31,12 +31,15 @@ Symbols: # RUN: llvm-readobj --all %t1.o | FileCheck %s --check-prefix LLVM # RUN: llvm-readelf --all %t1.o 2>&1 | FileCheck %s --implicit-check-not=warning --implicit-check-not=error -## Check we report a warning when SHT_LLVM_ADDRSIG is broken (e.g. contains a malformed uleb128). +## Check we report a warning when the content of the SHT_LLVM_ADDRSIG section +## is broken (e.g. contains a malformed uleb128). -# RUN: yaml2obj --docnum=2 %s -o %t2.o -# RUN: llvm-readobj --addrsig %t2.o 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=MALFORMED +# RUN: yaml2obj --docnum=2 %s -o %t2.1.o +# RUN: llvm-readobj --addrsig %t2.1.o 2>&1 | FileCheck %s -DFILE=%t2.1.o --check-prefix=MALFORMED -# MALFORMED: warning: '[[FILE]]': malformed uleb128, extends past end +# MALFORMED: Addrsig [ +# MALFORMED-NEXT: warning: '[[FILE]]': unable to decode SHT_LLVM_ADDRSIG section with index 1: malformed uleb128, extends past end +# MALFORMED-NEXT: ] --- !ELF FileHeader: @@ -44,9 +47,19 @@ FileHeader: Data: ELFDATA2LSB Type: ET_DYN Sections: - - Name: .llvm_addrsig - Type: SHT_LLVM_ADDRSIG - Content: "FF" + - Name: .llvm_addrsig + Type: SHT_LLVM_ADDRSIG + Content: "FF" + ShOffset: [[OFFSET=]] + +## Check we report a warning when the content of the SHT_LLVM_ADDRSIG section can't be read. + +# RUN: yaml2obj --docnum=2 -DOFFSET=0xffffffff %s -o %t2.2.o +# RUN: llvm-readobj --addrsig %t2.2.o 2>&1 | FileCheck %s -DFILE=%t2.2.o --check-prefix=BROKEN-SEC + +# BROKEN-SEC: Addrsig [ +# BROKEN-SEC-NEXT: warning: '[[FILE]]': section [index 1] has a sh_offset (0xffffffff) + sh_size (0x1) that is greater than the file size (0x168) +# BROKEN-SEC-NEXT: ] ## Check we report a warning when SHT_LLVM_ADDRSIG references a symbol that can't be ## dumped (e.g. the index value is larger than the number of symbols in .symtab). diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 47246af570d01..a1cf62f546c78 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -6489,26 +6489,26 @@ static Expected> toULEB128Array(ArrayRef Data) { template void LLVMStyle::printAddrsig() { ListScope L(W, "Addrsig"); - if (!this->dumper()->getDotAddrsigSec()) + const Elf_Shdr *Sec = this->dumper()->getDotAddrsigSec(); + if (!Sec) return; - ArrayRef Contents = unwrapOrError( - this->FileName, - this->Obj.getSectionContents(this->dumper()->getDotAddrsigSec())); - Expected> V = toULEB128Array(Contents); - if (!V) { - reportWarning(V.takeError(), this->FileName); + + Expected> ContentsOrErr = this->Obj.getSectionContents(Sec); + if (!ContentsOrErr) { + this->reportUniqueWarning(ContentsOrErr.takeError()); return; } - for (uint64_t Sym : *V) { - Expected NameOrErr = this->dumper()->getStaticSymbolName(Sym); - if (NameOrErr) { - W.printNumber("Sym", *NameOrErr, Sym); - continue; - } - reportWarning(NameOrErr.takeError(), this->FileName); - W.printNumber("Sym", "", Sym); + Expected> SymsOrErr = toULEB128Array(*ContentsOrErr); + if (!SymsOrErr) { + this->reportUniqueWarning(createError("unable to decode " + + describe(this->Obj, *Sec) + ": " + + toString(SymsOrErr.takeError()))); + return; } + + for (uint64_t Sym : *SymsOrErr) + W.printNumber("Sym", this->dumper()->getStaticSymbolName(Sym), Sym); } template From 7448e64a790bfed10a04a550c14b91429cda07e0 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Fri, 11 Sep 2020 13:29:33 +0300 Subject: [PATCH 0537/1079] [llvm-readobj/elf] - Don't use unwrapOrError when reporting errors about SHT_DYNAMIC sections. This changes messages reported to stop using dynamic section names (use `describe()` instead). This allows to avoid `unwrapOrError` and improves diagnostics. Differential revision: https://reviews.llvm.org/D87503 --- .../llvm-readobj/ELF/dynamic-not-in-pt-dynamic.test | 4 ++-- .../llvm-readobj/ELF/non-dynamic-in-pt-dynamic.test | 2 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 10 ++++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-not-in-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-not-in-pt-dynamic.test index 8c33931468c6b..20dd7c0ef630b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dynamic-not-in-pt-dynamic.test +++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-not-in-pt-dynamic.test @@ -11,7 +11,7 @@ # RUN: llvm-readelf --dynamic-table %t1.o 2>&1 \ # RUN: | FileCheck -DFILE=%t1.o --check-prefixes=WARNING1,GNU1 %s -# WARNING1: warning: '[[FILE]]': The SHT_DYNAMIC section '.dynamic' is not contained within the PT_DYNAMIC segment +# WARNING1: warning: '[[FILE]]': SHT_DYNAMIC section with index 1 is not contained within the PT_DYNAMIC segment # WARNING1: warning: '[[FILE]]': invalid PT_DYNAMIC size (0x1){{$}} # WARNING1: warning: '[[FILE]]': SHT_DYNAMIC section header and PT_DYNAMIC program header disagree about the location of the dynamic table # WARNING1: warning: '[[FILE]]': PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used @@ -69,7 +69,7 @@ ProgramHeaders: # RUN: llvm-readelf --dynamic-table %t2.o 2>&1 \ # RUN: | FileCheck -DFILE=%t2.o --check-prefixes=WARNING2,GNU2 %s -# WARNING2: warning: '[[FILE]]': The SHT_DYNAMIC section '.dynamic' is not contained within the PT_DYNAMIC segment +# WARNING2: warning: '[[FILE]]': SHT_DYNAMIC section with index 1 is not contained within the PT_DYNAMIC segment # WARNING2: warning: '[[FILE]]': SHT_DYNAMIC section header and PT_DYNAMIC program header disagree about the location of the dynamic table # LLVM2: DynamicSection [ (1 entries) diff --git a/llvm/test/tools/llvm-readobj/ELF/non-dynamic-in-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/non-dynamic-in-pt-dynamic.test index 5905ccb2902cc..12bcdf6b7216b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/non-dynamic-in-pt-dynamic.test +++ b/llvm/test/tools/llvm-readobj/ELF/non-dynamic-in-pt-dynamic.test @@ -10,7 +10,7 @@ # RUN: llvm-readelf --dynamic-table %t1.o 2>&1 \ # RUN: | FileCheck %s --DFILE=%t1.o --check-prefixes=WARNING,GNU -# WARNING: warning: '[[FILE]]': The SHT_DYNAMIC section '.dynamic' is not at the start of PT_DYNAMIC segment +# WARNING: warning: '[[FILE]]': SHT_DYNAMIC section with index 2 is not at the start of PT_DYNAMIC segment # WARNING: warning: '[[FILE]]': invalid PT_DYNAMIC size (0x21){{$}} # WARNING: warning: '[[FILE]]': SHT_DYNAMIC section header and PT_DYNAMIC program header disagree about the location of the dynamic table # WARNING: warning: '[[FILE]]': PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index a1cf62f546c78..70584e8a161c8 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1886,19 +1886,17 @@ ELFDumper::findDynamic(const ELFFile *Obj) { } if (DynamicPhdr && DynamicSec) { - StringRef Name = - unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DynamicSec)); if (DynamicSec->sh_addr + DynamicSec->sh_size > DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz || DynamicSec->sh_addr < DynamicPhdr->p_vaddr) - reportWarning(createError("The SHT_DYNAMIC section '" + Name + - "' is not contained within the " + reportWarning(createError(describe(*DynamicSec) + + " is not contained within the " "PT_DYNAMIC segment"), ObjF->getFileName()); if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr) - reportWarning(createError("The SHT_DYNAMIC section '" + Name + - "' is not at the start of " + reportWarning(createError(describe(*DynamicSec) + + " is not at the start of " "PT_DYNAMIC segment"), ObjF->getFileName()); } From 7109fc9e42e6b9a56497dcc6a25228d818af4f38 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 14 Sep 2020 13:04:44 +0100 Subject: [PATCH 0538/1079] Don't dereference from a dyn_cast<>. NFCI. Use cast<> instead which will assert if it fails and not just return null. Fixes clang static analyzer warning. --- llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 230bc7adc07ab..0abe42d221207 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -166,13 +166,13 @@ class AVROperand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a imm8, but we have its bitwise // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const auto *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(~(uint8_t)CE->getValue())); } bool isImmCom8() const { if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast(getImm()); + const auto *CE = dyn_cast(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return isUInt<8>(Value); From 98eaacd73d40eb28d5fa86bc3cfc9371581ee0cb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 14 Sep 2020 13:24:17 +0100 Subject: [PATCH 0539/1079] Assert we've found both vector types. NFCI. Fixes clang static analyzer warning about potential null dereferences. --- llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index d8008320696c3..f36b341157036 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -1062,6 +1062,7 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, FixSummands(YElType, X); XElType = cast(X->getType()); } + assert(XElType && YElType && "Unknown vector types"); // Check that the summands are of compatible types if (XElType != YElType) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n"); From c799f873cb9feaea265aa3df8f3372949f8263d0 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 3 Jun 2020 10:01:12 +0100 Subject: [PATCH 0540/1079] [AMDGPU] Don't cluster stores Clustering loads has caching benefits, but as far as I know there is no advantage to clustering stores on any AMDGPU subtargets. The disadvantage is that it tends to increase register pressure and restricts scheduling freedom. Differential Revision: https://reviews.llvm.org/D85530 --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 - .../GlobalISel/extractelement-stack-lower.ll | 959 +++++++++--------- .../GlobalISel/insertelement-stack-lower.ll | 634 ++++++------ .../AMDGPU/GlobalISel/insertelement.i16.ll | 135 +-- .../AMDGPU/GlobalISel/insertelement.large.ll | 42 +- .../AMDGPU/GlobalISel/load-unaligned.ll | 38 +- .../AMDGPU/GlobalISel/store-local.128.ll | 192 ++-- .../AMDGPU/GlobalISel/store-local.96.ll | 144 +-- .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 24 +- .../CodeGen/AMDGPU/call-argument-types.ll | 48 +- llvm/test/CodeGen/AMDGPU/cluster_stores.ll | 13 +- .../fast-unaligned-load-store.global.ll | 26 +- .../fast-unaligned-load-store.private.ll | 14 +- llvm/test/CodeGen/AMDGPU/fshr.ll | 14 +- llvm/test/CodeGen/AMDGPU/half.ll | 2 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 15 +- .../CodeGen/AMDGPU/local-memory.amdgcn.ll | 2 +- llvm/test/CodeGen/AMDGPU/memory_clause.ll | 8 +- llvm/test/CodeGen/AMDGPU/merge-stores.ll | 2 +- llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll | 28 +- .../AMDGPU/si-triv-disjoint-mem-access.ll | 12 +- llvm/test/CodeGen/AMDGPU/store-local.128.ll | 282 ++--- llvm/test/CodeGen/AMDGPU/store-local.96.ll | 208 ++-- llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll | 254 ++--- .../AMDGPU/token-factor-inline-limit-test.ll | 28 +- llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll | 13 +- 26 files changed, 1566 insertions(+), 1575 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f46349cb87df5..ccc493640b292 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -283,7 +283,6 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new GCNScheduleDAGMILive(C, std::make_unique(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); DAG->addMutation(createAMDGPUExportClusteringDAGMutation()); return DAG; @@ -294,7 +293,6 @@ createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { auto DAG = new GCNIterativeScheduler(C, GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -308,7 +306,6 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) { auto DAG = new GCNIterativeScheduler(C, GCNIterativeScheduler::SCHEDULE_ILP); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); return DAG; } @@ -604,7 +601,6 @@ class AMDGPUPassConfig : public TargetPassConfig { createMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 909c05925e7fe..4f9668f8d3697 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -40,7 +40,6 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off ; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 ; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc @@ -56,214 +55,212 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GCN-NEXT: v_add_u32_e32 v1, 16, v0 -; GCN-NEXT: v_add_u32_e32 v2, 20, v0 ; GCN-NEXT: s_add_u32 s32, s32, 0x10000 ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 24, v0 -; GCN-NEXT: buffer_store_dword v8, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 28, v0 ; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 32, v0 -; GCN-NEXT: v_add_u32_e32 v2, 36, v0 ; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 40, v0 -; GCN-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 44, v0 ; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 48, v0 -; GCN-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 52, v0 ; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 56, v0 -; GCN-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 60, v0 ; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v18, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 64, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x44, v0 ; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 -; GCN-NEXT: buffer_store_dword v20, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x4c, v0 ; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 -; GCN-NEXT: buffer_store_dword v22, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x54, v0 ; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 -; GCN-NEXT: buffer_store_dword v24, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x5c, v0 ; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v26, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x64, v0 ; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 -; GCN-NEXT: buffer_store_dword v28, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x6c, v0 ; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 -; GCN-NEXT: buffer_store_dword v30, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x74, v0 ; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 -; GCN-NEXT: buffer_store_dword v32, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x7c, v0 ; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v34, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x84, v0 ; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 -; GCN-NEXT: buffer_store_dword v36, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x8c, v0 ; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 -; GCN-NEXT: buffer_store_dword v38, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x94, v0 ; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 -; GCN-NEXT: buffer_store_dword v40, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x9c, v0 ; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v42, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload ; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0xa4, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v8, v15 -; GCN-NEXT: v_mov_b32_e32 v9, v16 ; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen ; GCN-NEXT: v_mov_b32_e32 v10, v17 ; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 -; GCN-NEXT: buffer_store_dword v9, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 ; GCN-NEXT: v_mov_b32_e32 v11, v18 -; GCN-NEXT: v_add_u32_e32 v2, 0xac, v0 -; GCN-NEXT: buffer_store_dword v11, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xb4, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 ; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 -; GCN-NEXT: buffer_store_dword v48, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xbc, v0 ; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v50, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v2, 0xc4, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload ; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xcc, v0 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 -; GCN-NEXT: buffer_store_dword v10, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 4, v0 -; GCN-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 -; GCN-NEXT: v_add_u32_e32 v7, 8, v0 -; GCN-NEXT: v_add_u32_e32 v2, 12, v0 -; GCN-NEXT: buffer_store_dword v5, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 -; GCN-NEXT: v_add_u32_e32 v2, 0xd4, v0 -; GCN-NEXT: v_add_u32_e32 v3, 0xd8, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0xdc, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 ; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v52, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v53, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v54, v4, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v0 +; GCN-NEXT: buffer_store_dword v52, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v0 +; GCN-NEXT: buffer_store_dword v53, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v54, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0xe4, v0 -; GCN-NEXT: v_add_u32_e32 v3, 0xe8, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0xec, v0 -; GCN-NEXT: v_add_u32_e32 v5, 0xf0, v0 -; GCN-NEXT: v_add_u32_e32 v6, 0xf4, v0 -; GCN-NEXT: v_add_u32_e32 v7, 0xf8, v0 -; GCN-NEXT: v_add_u32_e32 v8, 0xfc, v0 ; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v56, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v57, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v58, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v59, v5, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v60, v6, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v61, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v62, v8, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 63, v1 +; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v0 +; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v0 +; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xec, v0 +; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v0 +; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 +; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v0 +; GCN-NEXT: buffer_store_dword v61, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v0 +; GCN-NEXT: buffer_store_dword v62, v1, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v1, 63, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GCN-NEXT: v_add_u32_e32 v0, v0, v1 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen @@ -326,7 +323,6 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off ; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 ; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc @@ -342,217 +338,215 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GCN-NEXT: v_add_u32_e32 v1, 16, v0 -; GCN-NEXT: v_add_u32_e32 v2, 20, v0 ; GCN-NEXT: s_add_u32 s32, s32, 0x10000 ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 24, v0 -; GCN-NEXT: buffer_store_dword v8, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 28, v0 ; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 32, v0 -; GCN-NEXT: v_add_u32_e32 v2, 36, v0 ; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 40, v0 -; GCN-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 44, v0 ; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 48, v0 -; GCN-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 52, v0 ; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 56, v0 -; GCN-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 60, v0 ; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v18, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 64, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x44, v0 ; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 -; GCN-NEXT: buffer_store_dword v20, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x4c, v0 ; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 -; GCN-NEXT: buffer_store_dword v22, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x54, v0 ; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 -; GCN-NEXT: buffer_store_dword v24, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x5c, v0 ; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v26, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x64, v0 ; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 -; GCN-NEXT: buffer_store_dword v28, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x6c, v0 ; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 -; GCN-NEXT: buffer_store_dword v30, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x74, v0 ; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 -; GCN-NEXT: buffer_store_dword v32, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x7c, v0 ; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v34, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x84, v0 ; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 -; GCN-NEXT: buffer_store_dword v36, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x8c, v0 ; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 -; GCN-NEXT: buffer_store_dword v38, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x94, v0 ; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 -; GCN-NEXT: buffer_store_dword v40, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x9c, v0 ; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v42, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload ; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0xa4, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v8, v15 -; GCN-NEXT: v_mov_b32_e32 v9, v16 ; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen ; GCN-NEXT: v_mov_b32_e32 v10, v17 ; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 -; GCN-NEXT: buffer_store_dword v9, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 ; GCN-NEXT: v_mov_b32_e32 v11, v18 -; GCN-NEXT: v_add_u32_e32 v2, 0xac, v0 -; GCN-NEXT: buffer_store_dword v11, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xb4, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 ; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 -; GCN-NEXT: buffer_store_dword v48, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xbc, v0 ; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v50, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v2, 0xc4, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload ; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xcc, v0 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 -; GCN-NEXT: buffer_store_dword v10, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 4, v0 -; GCN-NEXT: v_add_u32_e32 v7, 8, v0 ; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 12, v0 -; GCN-NEXT: buffer_store_dword v5, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0xd0, v0 +; GCN-NEXT: buffer_store_dword v51, v3, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v3, 0xd4, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0xd8, v0 -; GCN-NEXT: v_add_u32_e32 v5, 0xdc, v0 -; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v52, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v53, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v54, v5, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0xd8, v0 +; GCN-NEXT: buffer_store_dword v53, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v54, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xe0, v0 +; GCN-NEXT: buffer_store_dword v55, v3, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v3, 0xe4, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0xe8, v0 -; GCN-NEXT: v_add_u32_e32 v5, 0xec, v0 -; GCN-NEXT: v_add_u32_e32 v6, 0xf0, v0 -; GCN-NEXT: v_add_u32_e32 v7, 0xf4, v0 -; GCN-NEXT: v_add_u32_e32 v8, 0xf8, v0 -; GCN-NEXT: v_add_u32_e32 v9, 0xfc, v0 -; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v56, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v57, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v58, v5, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v59, v6, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v60, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v61, v8, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v62, v9, s[0:3], 0 offen -; GCN-NEXT: s_waitcnt vmcnt(12) -; GCN-NEXT: v_lshrrev_b32_e32 v2, 1, v10 -; GCN-NEXT: v_and_b32_e32 v1, 63, v2 +; GCN-NEXT: v_add_u32_e32 v3, 0xe8, v0 +; GCN-NEXT: buffer_store_dword v57, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xec, v0 +; GCN-NEXT: buffer_store_dword v58, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xf0, v0 +; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v2 +; GCN-NEXT: buffer_store_dword v59, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xf4, v0 +; GCN-NEXT: v_and_b32_e32 v1, 63, v1 +; GCN-NEXT: buffer_store_dword v60, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xf8, v0 +; GCN-NEXT: buffer_store_dword v61, v3, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xfc, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_store_dword v62, v3, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -569,7 +563,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: v_and_b32_e32 v1, 1, v10 +; GCN-NEXT: v_and_b32_e32 v1, 1, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GCN-NEXT: s_mov_b32 s33, s6 ; GCN-NEXT: s_waitcnt vmcnt(15) @@ -585,9 +579,22 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v32i64_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v15, v0 ; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 +; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v16, v1 +; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc +; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 +; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v11, s4 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -603,41 +610,8 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_mov_b32_e32 v15, v0 -; GCN-NEXT: v_mov_b32_e32 v16, v1 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[0:3], v[15:16], off -; GCN-NEXT: s_mov_b32 s5, 0 -; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 -; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v11, s4 -; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 -; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc -; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v11, s4 ; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off ; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 ; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc ; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 @@ -649,198 +623,215 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off ; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[3:6], v[59:60], off ; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GCN-NEXT: v_add_u32_e32 v1, 16, v0 -; GCN-NEXT: v_add_u32_e32 v2, 24, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v9, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 20, v0 -; GCN-NEXT: v_add_u32_e32 v1, 44, v0 -; GCN-NEXT: v_add_u32_e32 v7, 28, v0 -; GCN-NEXT: v_add_u32_e32 v9, 36, v0 -; GCN-NEXT: buffer_store_dword v8, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v9, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 24, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 32, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 40, v0 -; GCN-NEXT: v_add_u32_e32 v3, 32, v0 ; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 48, v0 +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 48, v0 +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 56, v0 -; GCN-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v4, 52, v0 -; GCN-NEXT: v_add_u32_e32 v5, 60, v0 -; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v18, v5, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x4c, v0 -; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v22, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 64, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x48, v0 ; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 +; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 -; GCN-NEXT: buffer_store_dword v21, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x58, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 +; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 -; GCN-NEXT: v_add_u32_e32 v3, 0x54, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0x5c, v0 -; GCN-NEXT: v_add_u32_e32 v5, 0x64, v0 -; GCN-NEXT: v_add_u32_e32 v6, 0x6c, v0 -; GCN-NEXT: buffer_store_dword v25, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v24, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v26, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v28, v5, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v30, v6, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x68, v0 ; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 +; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 -; GCN-NEXT: buffer_store_dword v29, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v7, 0x74, v0 -; GCN-NEXT: v_add_u32_e32 v8, 0x7c, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x78, v0 ; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v33, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v32, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v34, v8, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x8c, v0 -; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v38, v2, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 +; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x88, v0 ; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 +; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 -; GCN-NEXT: buffer_store_dword v37, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0x98, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 +; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload ; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 -; GCN-NEXT: v_add_u32_e32 v3, 0x94, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0x9c, v0 -; GCN-NEXT: v_add_u32_e32 v5, 0xa4, v0 -; GCN-NEXT: v_add_u32_e32 v6, 0xac, v0 -; GCN-NEXT: buffer_store_dword v41, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v40, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v42, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v44, v5, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v46, v6, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xa8, v0 -; GCN-NEXT: buffer_store_dword v43, v1, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v15 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v17 +; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v18 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 -; GCN-NEXT: buffer_store_dword v45, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v7, 0xb4, v0 -; GCN-NEXT: v_add_u32_e32 v8, 0xbc, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0xb8, v0 ; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v49, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v48, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v50, v8, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v2, 0xc8, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload ; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_u32_e32 v7, 0xec, v0 -; GCN-NEXT: v_add_u32_e32 v8, 0xf4, v0 -; GCN-NEXT: v_mov_b32_e32 v12, v6 -; GCN-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v4 -; GCN-NEXT: v_add_u32_e32 v2, 0xc4, v0 -; GCN-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v3 -; GCN-NEXT: v_mov_b32_e32 v11, v5 -; GCN-NEXT: v_add_u32_e32 v3, 0xcc, v0 -; GCN-NEXT: buffer_store_dword v10, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v2, 8, v0 +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 ; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 -; GCN-NEXT: v_add_u32_e32 v3, 12, v0 -; GCN-NEXT: v_add_u32_e32 v4, 0xd4, v0 -; GCN-NEXT: v_add_u32_e32 v5, 0xdc, v0 -; GCN-NEXT: v_add_u32_e32 v6, 0xe4, v0 -; GCN-NEXT: v_add_u32_e32 v9, 0xfc, v0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 4, v0 -; GCN-NEXT: buffer_store_dword v11, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v13, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:256 -; GCN-NEXT: v_add_u32_e32 v2, 0xd8, v0 -; GCN-NEXT: v_add_u32_e32 v3, 0xe0, v0 ; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v0 +; GCN-NEXT: buffer_store_dword v52, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v0 +; GCN-NEXT: buffer_store_dword v53, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v54, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 +; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v0 +; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen ; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v0 -; GCN-NEXT: buffer_store_dword v53, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v55, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v2, 0xf0, v0 -; GCN-NEXT: v_add_u32_e32 v3, 0xf8, v0 ; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v59, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v61, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v52, v4, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v54, v5, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v56, v6, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v58, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v60, v8, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v62, v9, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 31, v1 +; GCN-NEXT: v_add_u32_e32 v1, 0xec, v0 +; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v0 +; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 +; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v0 +; GCN-NEXT: buffer_store_dword v61, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v0 +; GCN-NEXT: buffer_store_dword v62, v1, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v1, 31, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GCN-NEXT: v_add_u32_e32 v0, v0, v1 ; GCN-NEXT: v_add_u32_e32 v1, 4, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll index abb422ae7363f..7901f2286b2a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll @@ -10,362 +10,364 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out. ; GCN-NEXT: s_add_u32 s0, s0, s7 ; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GCN-NEXT: v_mov_b32_e32 v0, 0x100 +; GCN-NEXT: v_mov_b32_e32 v16, 0x100 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: v_add_u32_e32 v31, 64, v16 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0x0 -; GCN-NEXT: s_load_dwordx16 s[68:83], s[10:11], 0x40 -; GCN-NEXT: s_load_dwordx16 s[52:67], s[10:11], 0x80 -; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0xc0 -; GCN-NEXT: s_movk_i32 s4, 0x50 +; GCN-NEXT: s_load_dwordx16 s[52:67], s[10:11], 0x40 +; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0x80 +; GCN-NEXT: v_add_u32_e32 v32, 0x44, v16 +; GCN-NEXT: v_add_u32_e32 v33, 0x48, v16 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s13 -; GCN-NEXT: v_mov_b32_e32 v5, s14 -; GCN-NEXT: v_mov_b32_e32 v6, s15 -; GCN-NEXT: v_mov_b32_e32 v8, s16 -; GCN-NEXT: v_mov_b32_e32 v10, s17 -; GCN-NEXT: v_mov_b32_e32 v12, s18 -; GCN-NEXT: v_mov_b32_e32 v14, s19 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: v_mov_b32_e32 v1, s13 +; GCN-NEXT: v_mov_b32_e32 v2, s14 +; GCN-NEXT: v_mov_b32_e32 v3, s15 +; GCN-NEXT: v_mov_b32_e32 v4, s16 +; GCN-NEXT: v_mov_b32_e32 v5, s17 +; GCN-NEXT: v_mov_b32_e32 v6, s18 +; GCN-NEXT: v_mov_b32_e32 v7, s19 +; GCN-NEXT: v_mov_b32_e32 v8, s20 +; GCN-NEXT: v_mov_b32_e32 v9, s21 +; GCN-NEXT: v_mov_b32_e32 v10, s22 +; GCN-NEXT: v_mov_b32_e32 v11, s23 +; GCN-NEXT: v_mov_b32_e32 v12, s24 +; GCN-NEXT: v_mov_b32_e32 v13, s25 +; GCN-NEXT: v_mov_b32_e32 v14, s26 +; GCN-NEXT: v_mov_b32_e32 v15, s27 +; GCN-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0xc0 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:256 +; GCN-NEXT: v_add_u32_e32 v0, 4, v16 +; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v1, s52 +; GCN-NEXT: buffer_store_dword v1, v31, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v1, s53 +; GCN-NEXT: buffer_store_dword v1, v32, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v1, s54 +; GCN-NEXT: buffer_store_dword v1, v33, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s4, 0x50 +; GCN-NEXT: v_add_u32_e32 v34, 0x4c, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s55 +; GCN-NEXT: buffer_store_dword v1, v34, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v35, s4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s56 +; GCN-NEXT: buffer_store_dword v1, v35, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v36, 0x54, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s57 +; GCN-NEXT: buffer_store_dword v1, v36, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v37, 0x58, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s58 +; GCN-NEXT: buffer_store_dword v1, v37, s[0:3], 0 offen ; GCN-NEXT: s_movk_i32 s5, 0x60 -; GCN-NEXT: v_add_u32_e32 v2, 8, v0 -; GCN-NEXT: v_add_u32_e32 v3, 12, v0 -; GCN-NEXT: v_add_u32_e32 v7, 16, v0 -; GCN-NEXT: v_add_u32_e32 v9, 20, v0 -; GCN-NEXT: v_add_u32_e32 v11, 24, v0 -; GCN-NEXT: v_add_u32_e32 v13, 28, v0 -; GCN-NEXT: v_add_u32_e32 v15, 32, v0 -; GCN-NEXT: v_mov_b32_e32 v16, s20 -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v9, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v11, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v13, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v15, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v17, 36, v0 -; GCN-NEXT: v_mov_b32_e32 v18, s21 -; GCN-NEXT: v_mov_b32_e32 v26, s25 -; GCN-NEXT: v_add_u32_e32 v33, 0x44, v0 -; GCN-NEXT: v_mov_b32_e32 v34, s69 -; GCN-NEXT: v_mov_b32_e32 v4, s71 -; GCN-NEXT: v_add_u32_e32 v19, 40, v0 -; GCN-NEXT: v_mov_b32_e32 v20, s22 -; GCN-NEXT: v_add_u32_e32 v21, 44, v0 -; GCN-NEXT: v_mov_b32_e32 v22, s23 -; GCN-NEXT: v_add_u32_e32 v23, 48, v0 -; GCN-NEXT: v_mov_b32_e32 v24, s24 -; GCN-NEXT: v_add_u32_e32 v25, 52, v0 -; GCN-NEXT: v_add_u32_e32 v27, 56, v0 -; GCN-NEXT: v_mov_b32_e32 v28, s26 -; GCN-NEXT: v_add_u32_e32 v29, 60, v0 -; GCN-NEXT: v_mov_b32_e32 v30, s27 -; GCN-NEXT: v_add_u32_e32 v31, 64, v0 -; GCN-NEXT: v_mov_b32_e32 v32, s68 -; GCN-NEXT: buffer_store_dword v18, v17, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v20, v19, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v22, v21, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v24, v23, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v26, v25, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v28, v27, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v30, v29, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v32, v31, s[0:3], 0 offen -; GCN-NEXT: s_movk_i32 s13, 0x70 -; GCN-NEXT: v_add_u32_e32 v35, 0x48, v0 -; GCN-NEXT: v_mov_b32_e32 v36, s70 -; GCN-NEXT: v_add_u32_e32 v37, 0x4c, v0 -; GCN-NEXT: v_add_u32_e32 v38, s4, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s72 -; GCN-NEXT: v_add_u32_e32 v39, 0x54, v0 -; GCN-NEXT: v_mov_b32_e32 v6, s73 -; GCN-NEXT: v_add_u32_e32 v40, 0x58, v0 -; GCN-NEXT: v_mov_b32_e32 v8, s74 -; GCN-NEXT: v_add_u32_e32 v41, 0x5c, v0 -; GCN-NEXT: v_mov_b32_e32 v10, s75 -; GCN-NEXT: v_add_u32_e32 v42, s5, v0 -; GCN-NEXT: v_mov_b32_e32 v12, s76 -; GCN-NEXT: buffer_store_dword v34, v33, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v36, v35, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v4, v37, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v38, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v39, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v40, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v41, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v42, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v26, 0x64, v0 -; GCN-NEXT: v_mov_b32_e32 v14, s77 -; GCN-NEXT: v_mov_b32_e32 v4, s81 -; GCN-NEXT: s_movk_i32 s14, 0x90 -; GCN-NEXT: s_movk_i32 s15, 0xa0 -; GCN-NEXT: v_add_u32_e32 v28, 0x68, v0 -; GCN-NEXT: v_mov_b32_e32 v16, s78 -; GCN-NEXT: v_add_u32_e32 v30, 0x6c, v0 -; GCN-NEXT: v_mov_b32_e32 v18, s79 -; GCN-NEXT: v_add_u32_e32 v32, s13, v0 -; GCN-NEXT: v_mov_b32_e32 v20, s80 -; GCN-NEXT: v_add_u32_e32 v34, 0x74, v0 -; GCN-NEXT: v_add_u32_e32 v36, 0x78, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s82 -; GCN-NEXT: v_add_u32_e32 v43, 0x7c, v0 -; GCN-NEXT: v_mov_b32_e32 v6, s83 -; GCN-NEXT: v_add_u32_e32 v44, 0x80, v0 -; GCN-NEXT: v_mov_b32_e32 v8, s52 -; GCN-NEXT: buffer_store_dword v14, v26, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v28, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v18, v30, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v20, v32, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v4, v34, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v36, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v43, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v44, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v45, 0x84, v0 -; GCN-NEXT: v_mov_b32_e32 v4, s53 -; GCN-NEXT: s_movk_i32 s16, 0xb0 -; GCN-NEXT: v_add_u32_e32 v46, 0x88, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s54 -; GCN-NEXT: v_add_u32_e32 v47, 0x8c, v0 -; GCN-NEXT: v_mov_b32_e32 v6, s55 -; GCN-NEXT: v_add_u32_e32 v48, s14, v0 -; GCN-NEXT: v_mov_b32_e32 v8, s56 -; GCN-NEXT: v_add_u32_e32 v49, 0x94, v0 -; GCN-NEXT: v_mov_b32_e32 v10, s57 -; GCN-NEXT: v_add_u32_e32 v50, 0x98, v0 -; GCN-NEXT: v_mov_b32_e32 v12, s58 -; GCN-NEXT: v_add_u32_e32 v51, 0x9c, v0 -; GCN-NEXT: v_mov_b32_e32 v14, s59 -; GCN-NEXT: v_add_u32_e32 v52, s15, v0 -; GCN-NEXT: v_mov_b32_e32 v16, s60 -; GCN-NEXT: buffer_store_dword v4, v45, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v46, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v47, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v48, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v49, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v50, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v51, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v52, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v53, 0xa4, v0 -; GCN-NEXT: v_mov_b32_e32 v4, s61 -; GCN-NEXT: s_movk_i32 s17, 0xd0 -; GCN-NEXT: s_movk_i32 s18, 0xe0 -; GCN-NEXT: v_add_u32_e32 v54, 0xa8, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s62 -; GCN-NEXT: v_add_u32_e32 v55, 0xac, v0 -; GCN-NEXT: v_mov_b32_e32 v6, s63 -; GCN-NEXT: v_add_u32_e32 v56, s16, v0 -; GCN-NEXT: v_mov_b32_e32 v8, s64 -; GCN-NEXT: v_add_u32_e32 v57, 0xb4, v0 -; GCN-NEXT: v_mov_b32_e32 v10, s65 -; GCN-NEXT: v_add_u32_e32 v58, 0xb8, v0 -; GCN-NEXT: v_mov_b32_e32 v12, s66 -; GCN-NEXT: v_add_u32_e32 v59, 0xbc, v0 -; GCN-NEXT: v_mov_b32_e32 v14, s67 -; GCN-NEXT: v_add_u32_e32 v60, 0xc0, v0 -; GCN-NEXT: v_mov_b32_e32 v16, s36 -; GCN-NEXT: buffer_store_dword v4, v53, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v54, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v55, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v56, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v57, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v58, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v59, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v60, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v61, 0xc4, v0 -; GCN-NEXT: v_mov_b32_e32 v4, s37 +; GCN-NEXT: v_add_u32_e32 v38, 0x5c, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s59 +; GCN-NEXT: buffer_store_dword v1, v38, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v39, s5, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s60 +; GCN-NEXT: buffer_store_dword v1, v39, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v40, 0x64, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s61 +; GCN-NEXT: buffer_store_dword v1, v40, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v41, 0x68, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s62 +; GCN-NEXT: buffer_store_dword v1, v41, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s10, 0x70 +; GCN-NEXT: v_add_u32_e32 v42, 0x6c, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s63 +; GCN-NEXT: buffer_store_dword v1, v42, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v43, s10, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s64 +; GCN-NEXT: buffer_store_dword v1, v43, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v44, 0x74, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s65 +; GCN-NEXT: buffer_store_dword v1, v44, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v45, 0x78, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s66 +; GCN-NEXT: buffer_store_dword v1, v45, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v46, 0x7c, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s67 +; GCN-NEXT: buffer_store_dword v1, v46, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v47, 0x80, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s36 +; GCN-NEXT: buffer_store_dword v1, v47, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v48, 0x84, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s37 +; GCN-NEXT: buffer_store_dword v1, v48, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v49, 0x88, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s38 +; GCN-NEXT: buffer_store_dword v1, v49, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s11, 0x90 +; GCN-NEXT: v_add_u32_e32 v50, 0x8c, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s39 +; GCN-NEXT: buffer_store_dword v1, v50, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v51, s11, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s40 +; GCN-NEXT: buffer_store_dword v1, v51, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v52, 0x94, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s41 +; GCN-NEXT: buffer_store_dword v1, v52, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v53, 0x98, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s42 +; GCN-NEXT: buffer_store_dword v1, v53, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s28, 0xa0 +; GCN-NEXT: v_add_u32_e32 v54, 0x9c, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s43 +; GCN-NEXT: buffer_store_dword v1, v54, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v55, s28, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s44 +; GCN-NEXT: buffer_store_dword v1, v55, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v56, 0xa4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s45 +; GCN-NEXT: buffer_store_dword v1, v56, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v57, 0xa8, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s46 +; GCN-NEXT: buffer_store_dword v1, v57, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s29, 0xb0 +; GCN-NEXT: v_add_u32_e32 v58, 0xac, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s47 +; GCN-NEXT: buffer_store_dword v1, v58, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v59, s29, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s48 +; GCN-NEXT: buffer_store_dword v1, v59, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v60, 0xb4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s49 +; GCN-NEXT: buffer_store_dword v1, v60, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v61, 0xb8, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s50 +; GCN-NEXT: buffer_store_dword v1, v61, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v62, 0xbc, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s51 +; GCN-NEXT: buffer_store_dword v1, v62, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s12 +; GCN-NEXT: v_add_u32_e32 v63, 0xc0, v16 +; GCN-NEXT: buffer_store_dword v1, v63, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v1, s13 +; GCN-NEXT: v_add_u32_e32 v64, 0xc4, v16 +; GCN-NEXT: buffer_store_dword v1, v64, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v1, s14 +; GCN-NEXT: v_add_u32_e32 v65, 0xc8, v16 +; GCN-NEXT: buffer_store_dword v1, v65, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s12, 0xd0 +; GCN-NEXT: v_add_u32_e32 v66, 0xcc, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s15 +; GCN-NEXT: buffer_store_dword v1, v66, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v67, s12, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s16 +; GCN-NEXT: buffer_store_dword v1, v67, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v68, 0xd4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s17 +; GCN-NEXT: buffer_store_dword v1, v68, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v69, 0xd8, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s18 +; GCN-NEXT: buffer_store_dword v1, v69, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s13, 0xe0 +; GCN-NEXT: v_add_u32_e32 v70, 0xdc, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s19 +; GCN-NEXT: buffer_store_dword v1, v70, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v71, s13, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s20 +; GCN-NEXT: buffer_store_dword v1, v71, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v72, 0xe4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s21 +; GCN-NEXT: buffer_store_dword v1, v72, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v73, 0xe8, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s22 +; GCN-NEXT: buffer_store_dword v1, v73, s[0:3], 0 offen +; GCN-NEXT: s_movk_i32 s14, 0xf0 +; GCN-NEXT: v_add_u32_e32 v74, 0xec, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s23 +; GCN-NEXT: buffer_store_dword v1, v74, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v75, s14, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s24 +; GCN-NEXT: buffer_store_dword v1, v75, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v76, 0xf4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s25 ; GCN-NEXT: s_and_b32 s7, s7, 63 -; GCN-NEXT: s_movk_i32 s19, 0xf0 -; GCN-NEXT: v_add_u32_e32 v62, 0xc8, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s38 -; GCN-NEXT: v_add_u32_e32 v63, 0xcc, v0 -; GCN-NEXT: v_mov_b32_e32 v6, s39 -; GCN-NEXT: v_add_u32_e32 v64, s17, v0 -; GCN-NEXT: v_mov_b32_e32 v8, s40 -; GCN-NEXT: v_add_u32_e32 v65, 0xd4, v0 -; GCN-NEXT: v_mov_b32_e32 v10, s41 -; GCN-NEXT: v_add_u32_e32 v66, 0xd8, v0 -; GCN-NEXT: v_mov_b32_e32 v12, s42 -; GCN-NEXT: v_add_u32_e32 v67, 0xdc, v0 -; GCN-NEXT: v_mov_b32_e32 v14, s43 -; GCN-NEXT: v_add_u32_e32 v68, s18, v0 -; GCN-NEXT: v_mov_b32_e32 v16, s44 -; GCN-NEXT: buffer_store_dword v4, v61, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v62, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v63, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v64, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v65, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v66, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v67, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v68, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v69, 0xe4, v0 -; GCN-NEXT: v_mov_b32_e32 v4, s45 -; GCN-NEXT: v_add_u32_e32 v70, 0xe8, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s46 -; GCN-NEXT: v_add_u32_e32 v71, 0xec, v0 -; GCN-NEXT: v_mov_b32_e32 v6, s47 -; GCN-NEXT: v_add_u32_e32 v72, s19, v0 -; GCN-NEXT: v_mov_b32_e32 v8, s48 -; GCN-NEXT: v_add_u32_e32 v73, 0xf4, v0 -; GCN-NEXT: v_mov_b32_e32 v10, s49 -; GCN-NEXT: v_add_u32_e32 v74, 0xf8, v0 -; GCN-NEXT: v_mov_b32_e32 v12, s50 -; GCN-NEXT: buffer_store_dword v4, v69, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v70, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v71, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v72, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v73, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v74, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v4, s12 +; GCN-NEXT: buffer_store_dword v1, v76, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v77, 0xf8, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s26 +; GCN-NEXT: v_add_u32_e32 v17, 8, v16 +; GCN-NEXT: buffer_store_dword v1, v77, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v78, 0xfc, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s27 ; GCN-NEXT: s_lshl_b32 s7, s7, 2 -; GCN-NEXT: v_add_u32_e32 v75, 0xfc, v0 -; GCN-NEXT: v_mov_b32_e32 v14, s51 -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:256 -; GCN-NEXT: buffer_store_dword v14, v75, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: v_add_u32_e32 v0, s7, v0 -; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v4, v7, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v5, v9, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v6, v11, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, v13, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v8, v15, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v9, v17, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v10, v19, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v11, v21, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v12, v23, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v13, v25, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v14, v27, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v15, v29, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v18, 12, v16 +; GCN-NEXT: v_add_u32_e32 v19, 16, v16 +; GCN-NEXT: v_add_u32_e32 v20, 20, v16 +; GCN-NEXT: v_add_u32_e32 v21, 24, v16 +; GCN-NEXT: v_add_u32_e32 v22, 28, v16 +; GCN-NEXT: v_add_u32_e32 v23, 32, v16 +; GCN-NEXT: v_add_u32_e32 v24, 36, v16 +; GCN-NEXT: v_add_u32_e32 v25, 40, v16 +; GCN-NEXT: v_add_u32_e32 v26, 44, v16 +; GCN-NEXT: v_add_u32_e32 v27, 48, v16 +; GCN-NEXT: v_add_u32_e32 v28, 52, v16 +; GCN-NEXT: v_add_u32_e32 v29, 56, v16 +; GCN-NEXT: v_add_u32_e32 v30, 60, v16 +; GCN-NEXT: buffer_store_dword v1, v78, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: v_add_u32_e32 v1, s7, v16 +; GCN-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v4, v19, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v5, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v6, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v7, v22, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v8, v23, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v9, v24, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v10, v25, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v11, v26, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v12, v27, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v13, v28, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v14, v29, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v15, v30, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, v17, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v4, v19, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v5, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v6, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, v22, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v8, v23, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v9, v24, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v10, v25, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v11, v26, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v12, v27, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v13, v28, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v14, v29, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, v30, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v16, v31, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v17, v33, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v18, v35, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v19, v37, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v20, v38, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v21, v39, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v22, v40, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v23, v41, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v24, v42, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v25, v26, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v26, v28, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v27, v30, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v28, v32, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v29, v34, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v30, v36, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v31, v43, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v32, v44, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v33, v45, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v34, v46, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v35, v47, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v36, v48, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v37, v49, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v38, v50, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v39, v51, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v40, v52, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v41, v53, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v42, v54, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v43, v55, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v44, v56, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v45, v57, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v46, v58, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v47, v59, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v48, v60, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v49, v61, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v50, v62, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v51, v63, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v52, v64, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v53, v65, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v54, v66, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v55, v67, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v56, v68, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v57, v69, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v58, v70, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v59, v71, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v60, v72, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v61, v73, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v62, v74, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v63, v75, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v17, v32, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v18, v33, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v19, v34, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v20, v35, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v21, v36, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v22, v37, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v23, v38, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v24, v39, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v25, v40, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v26, v41, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v27, v42, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v28, v43, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v29, v44, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v30, v45, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v31, v46, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v32, v47, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v33, v48, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v34, v49, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v35, v50, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v36, v51, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v37, v52, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v38, v53, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v39, v54, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v40, v55, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v41, v56, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v42, v57, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v43, v58, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v44, v59, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v45, v60, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v46, v61, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v47, v62, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v48, v63, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v49, v64, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v50, v65, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v51, v66, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v52, v67, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v53, v68, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v54, v69, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v55, v70, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v56, v71, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v57, v72, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v58, v73, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v59, v74, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v60, v75, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v61, v76, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, v77, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v63, v78, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:256 -; GCN-NEXT: s_add_u32 s6, s8, 16 -; GCN-NEXT: s_addc_u32 s7, s9, 0 -; GCN-NEXT: v_mov_b32_e32 v67, s7 -; GCN-NEXT: v_mov_b32_e32 v66, s6 -; GCN-NEXT: s_add_u32 s6, s8, 32 -; GCN-NEXT: s_addc_u32 s7, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v65, s9 -; GCN-NEXT: s_add_u32 s10, s8, 48 +; GCN-NEXT: s_add_u32 s6, s8, 16 ; GCN-NEXT: v_mov_b32_e32 v64, s8 -; GCN-NEXT: s_addc_u32 s11, s9, 0 +; GCN-NEXT: s_addc_u32 s7, s9, 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: global_store_dwordx4 v[64:65], v[0:3], off -; GCN-NEXT: global_store_dwordx4 v[66:67], v[4:7], off +; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v0, s6 ; GCN-NEXT: v_mov_b32_e32 v1, s7 -; GCN-NEXT: s_add_u32 s6, s8, 64 -; GCN-NEXT: v_mov_b32_e32 v2, s10 +; GCN-NEXT: s_add_u32 s6, s8, 32 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GCN-NEXT: s_addc_u32 s7, s9, 0 -; GCN-NEXT: v_mov_b32_e32 v3, s11 -; GCN-NEXT: s_add_u32 s10, s8, s4 -; GCN-NEXT: s_addc_u32 s11, s9, 0 -; GCN-NEXT: s_add_u32 s4, s8, s5 +; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: s_add_u32 s6, s8, 48 ; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GCN-NEXT: s_addc_u32 s7, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s6 -; GCN-NEXT: s_addc_u32 s5, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v1, s7 -; GCN-NEXT: s_add_u32 s6, s8, s13 -; GCN-NEXT: v_mov_b32_e32 v2, s10 -; GCN-NEXT: v_mov_b32_e32 v3, s11 -; GCN-NEXT: global_store_dwordx4 v[0:1], v[16:19], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[20:23], off +; GCN-NEXT: s_add_u32 s6, s8, 64 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[12:15], off ; GCN-NEXT: s_addc_u32 s7, s9, 0 +; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: s_add_u32 s6, s8, s4 +; GCN-NEXT: s_addc_u32 s7, s9, 0 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[16:19], off +; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: s_add_u32 s4, s8, s5 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[20:23], off +; GCN-NEXT: s_addc_u32 s5, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: s_add_u32 s4, s8, 0x80 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: s_addc_u32 s5, s9, 0 -; GCN-NEXT: v_mov_b32_e32 v3, s7 -; GCN-NEXT: s_add_u32 s6, s8, s14 +; GCN-NEXT: s_add_u32 s4, s8, s10 ; GCN-NEXT: global_store_dwordx4 v[0:1], v[24:27], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[28:31], off -; GCN-NEXT: s_addc_u32 s7, s9, 0 +; GCN-NEXT: s_addc_u32 s5, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: s_add_u32 s4, s8, s15 -; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: s_add_u32 s4, s8, 0x80 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[28:31], off ; GCN-NEXT: s_addc_u32 s5, s9, 0 -; GCN-NEXT: v_mov_b32_e32 v3, s7 -; GCN-NEXT: s_add_u32 s6, s8, s16 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_add_u32 s4, s8, s11 ; GCN-NEXT: global_store_dwordx4 v[0:1], v[32:35], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[36:39], off -; GCN-NEXT: s_addc_u32 s7, s9, 0 +; GCN-NEXT: s_addc_u32 s5, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: s_add_u32 s4, s8, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_mov_b32_e32 v3, s7 +; GCN-NEXT: s_add_u32 s4, s8, s28 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[36:39], off +; GCN-NEXT: s_addc_u32 s5, s9, 0 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_add_u32 s4, s8, s29 ; GCN-NEXT: global_store_dwordx4 v[0:1], v[40:43], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[44:47], off ; GCN-NEXT: s_addc_u32 s5, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: s_add_u32 s4, s8, s17 +; GCN-NEXT: s_add_u32 s4, s8, 0xc0 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[44:47], off ; GCN-NEXT: s_addc_u32 s5, s9, 0 -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s5 -; GCN-NEXT: s_add_u32 s4, s8, s18 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_add_u32 s4, s8, s12 ; GCN-NEXT: global_store_dwordx4 v[0:1], v[48:51], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[52:55], off ; GCN-NEXT: s_addc_u32 s5, s9, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: s_add_u32 s4, s8, s19 +; GCN-NEXT: s_add_u32 s4, s8, s13 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[52:55], off ; GCN-NEXT: s_addc_u32 s5, s9, 0 -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_add_u32 s4, s8, s14 ; GCN-NEXT: global_store_dwordx4 v[0:1], v[56:59], off -; GCN-NEXT: global_store_dwordx4 v[2:3], v[60:63], off +; GCN-NEXT: s_addc_u32 s5, s9, 0 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[60:63], off ; GCN-NEXT: s_endpgm %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll index 008b09d968870..ffdb1155a9343 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -1954,7 +1954,7 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: s_lshr_b32 s7, s5, 1 ; GFX9-NEXT: s_cmp_eq_u32 s7, 1 ; GFX9-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_cselect_b32 s0, s9, s8 ; GFX9-NEXT: s_cmp_eq_u32 s7, 2 @@ -1997,16 +1997,16 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-NEXT: s_add_u32 s0, 0, 16 +; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: s_addc_u32 s1, 0, 0 -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_mov_b32_e32 v11, s1 -; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-NEXT: v_mov_b32_e32 v7, s7 -; GFX9-NEXT: v_mov_b32_e32 v10, s0 -; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_s_s: @@ -2015,7 +2015,7 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: s_lshr_b32 s7, s5, 1 ; GFX8-NEXT: s_cmp_eq_u32 s7, 1 ; GFX8-NEXT: s_mov_b32 s2, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v8, 0 +; GFX8-NEXT: v_mov_b32_e32 v4, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_cselect_b32 s0, s9, s8 ; GFX8-NEXT: s_cmp_eq_u32 s7, 2 @@ -2058,16 +2058,16 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 ; GFX8-NEXT: s_add_u32 s0, 0, 16 +; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: s_addc_u32 s1, 0, 0 -; GFX8-NEXT: v_mov_b32_e32 v4, s4 -; GFX8-NEXT: v_mov_b32_e32 v11, s1 -; GFX8-NEXT: v_mov_b32_e32 v9, 0 -; GFX8-NEXT: v_mov_b32_e32 v5, s5 -; GFX8-NEXT: v_mov_b32_e32 v6, s6 -; GFX8-NEXT: v_mov_b32_e32 v7, s7 -; GFX8-NEXT: v_mov_b32_e32 v10, s0 -; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GFX8-NEXT: v_mov_b32_e32 v5, s1 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s6 +; GFX8-NEXT: v_mov_b32_e32 v3, s7 +; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GFX8-NEXT: s_endpgm ; ; GFX7-LABEL: insertelement_s_v16i16_s_s: @@ -2108,24 +2108,25 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg ; GFX7-NEXT: s_cmp_eq_u32 s7, 4 ; GFX7-NEXT: s_cselect_b32 s4, s16, s12 ; GFX7-NEXT: s_cmp_eq_u32 s7, 5 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_cselect_b32 s5, s16, s13 ; GFX7-NEXT: s_cmp_eq_u32 s7, 6 -; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_cselect_b32 s6, s16, s14 ; GFX7-NEXT: s_cmp_eq_u32 s7, 7 -; GFX7-NEXT: s_cselect_b32 s7, s16, s15 -; GFX7-NEXT: v_mov_b32_e32 v4, s4 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_mov_b32 s11, 0xf000 -; GFX7-NEXT: v_mov_b32_e32 v5, s5 -; GFX7-NEXT: v_mov_b32_e32 v6, s6 -; GFX7-NEXT: v_mov_b32_e32 v7, s7 +; GFX7-NEXT: s_cselect_b32 s7, s16, s15 ; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 -; GFX7-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GFX7-NEXT: s_nop 0 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 +; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 ; GFX7-NEXT: s_endpgm %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx @@ -2329,23 +2330,23 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_mov_b32_e32 v5, s13 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s12 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, 4 ; GFX9-NEXT: v_mov_b32_e32 v6, s14 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[0:1] -; GFX9-NEXT: s_add_u32 s0, 0, 16 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; GFX9-NEXT: v_mov_b32_e32 v4, s12 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, 4 ; GFX9-NEXT: v_mov_b32_e32 v7, s15 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 -; GFX9-NEXT: s_addc_u32 s1, 0, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc ; GFX9-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-NEXT: v_mov_b32_e32 v11, s1 +; GFX9-NEXT: s_add_u32 s0, 0, 16 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: v_mov_b32_e32 v10, s0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off +; GFX9-NEXT: s_addc_u32 s1, 0, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_v_s: @@ -2390,23 +2391,23 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_mov_b32_e32 v5, s13 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; GFX8-NEXT: v_mov_b32_e32 v4, s12 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, 4 ; GFX8-NEXT: v_mov_b32_e32 v6, s14 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[0:1] -; GFX8-NEXT: s_add_u32 s0, 0, 16 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; GFX8-NEXT: v_mov_b32_e32 v4, s12 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, 4 ; GFX8-NEXT: v_mov_b32_e32 v7, s15 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 -; GFX8-NEXT: s_addc_u32 s1, 0, 0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc ; GFX8-NEXT: v_mov_b32_e32 v8, 0 -; GFX8-NEXT: v_mov_b32_e32 v11, s1 +; GFX8-NEXT: s_add_u32 s0, 0, 16 ; GFX8-NEXT: v_mov_b32_e32 v9, 0 -; GFX8-NEXT: v_mov_b32_e32 v10, s0 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GFX8-NEXT: s_addc_u32 s1, 0, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GFX8-NEXT: s_endpgm ; ; GFX7-LABEL: insertelement_s_v16i16_v_s: @@ -2509,8 +2510,8 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 ; GFX9-NEXT: v_and_or_b32 v9, v1, v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v0, s16 -; GFX9-NEXT: v_mov_b32_e32 v2, s18 ; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 ; GFX9-NEXT: v_mov_b32_e32 v3, s19 ; GFX9-NEXT: v_mov_b32_e32 v4, s20 ; GFX9-NEXT: v_mov_b32_e32 v5, s21 @@ -2518,8 +2519,6 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_mov_b32_e32 v7, s23 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] -; GFX9-NEXT: s_add_u32 s0, 0, 16 -; GFX9-NEXT: s_addc_u32 s1, 0, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[12:13] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[2:3] @@ -2528,11 +2527,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[10:11] ; GFX9-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-NEXT: v_mov_b32_e32 v11, s1 +; GFX9-NEXT: s_add_u32 s0, 0, 16 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: v_mov_b32_e32 v10, s0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off +; GFX9-NEXT: s_addc_u32 s1, 0, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_s_v: @@ -2572,8 +2573,8 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_and_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_or_b32_e32 v9, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v0, s16 -; GFX8-NEXT: v_mov_b32_e32 v2, s18 ; GFX8-NEXT: v_mov_b32_e32 v1, s17 +; GFX8-NEXT: v_mov_b32_e32 v2, s18 ; GFX8-NEXT: v_mov_b32_e32 v3, s19 ; GFX8-NEXT: v_mov_b32_e32 v4, s20 ; GFX8-NEXT: v_mov_b32_e32 v5, s21 @@ -2581,8 +2582,6 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_mov_b32_e32 v7, s23 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] -; GFX8-NEXT: s_add_u32 s0, 0, 16 -; GFX8-NEXT: s_addc_u32 s1, 0, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[12:13] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[2:3] @@ -2591,11 +2590,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] ; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[10:11] ; GFX8-NEXT: v_mov_b32_e32 v8, 0 -; GFX8-NEXT: v_mov_b32_e32 v11, s1 +; GFX8-NEXT: s_add_u32 s0, 0, 16 ; GFX8-NEXT: v_mov_b32_e32 v9, 0 -; GFX8-NEXT: v_mov_b32_e32 v10, s0 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GFX8-NEXT: s_addc_u32 s1, 0, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GFX8-NEXT: s_endpgm ; ; GFX7-LABEL: insertelement_s_v16i16_s_v: @@ -2699,8 +2700,8 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX9-NEXT: v_and_or_b32 v9, v2, v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, s12 -; GFX9-NEXT: v_mov_b32_e32 v2, s14 ; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 ; GFX9-NEXT: v_mov_b32_e32 v3, s15 ; GFX9-NEXT: v_mov_b32_e32 v4, s16 ; GFX9-NEXT: v_mov_b32_e32 v5, s17 @@ -2708,8 +2709,6 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_mov_b32_e32 v7, s19 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] -; GFX9-NEXT: s_add_u32 s0, 0, 16 -; GFX9-NEXT: s_addc_u32 s1, 0, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[12:13] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[2:3] @@ -2718,11 +2717,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[10:11] ; GFX9-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-NEXT: v_mov_b32_e32 v11, s1 +; GFX9-NEXT: s_add_u32 s0, 0, 16 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: v_mov_b32_e32 v10, s0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off +; GFX9-NEXT: s_addc_u32 s1, 0, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_v_v: @@ -2761,8 +2762,8 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_and_b32_e32 v1, v2, v1 ; GFX8-NEXT: v_or_b32_e32 v9, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s12 -; GFX8-NEXT: v_mov_b32_e32 v2, s14 ; GFX8-NEXT: v_mov_b32_e32 v1, s13 +; GFX8-NEXT: v_mov_b32_e32 v2, s14 ; GFX8-NEXT: v_mov_b32_e32 v3, s15 ; GFX8-NEXT: v_mov_b32_e32 v4, s16 ; GFX8-NEXT: v_mov_b32_e32 v5, s17 @@ -2770,8 +2771,6 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_mov_b32_e32 v7, s19 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] -; GFX8-NEXT: s_add_u32 s0, 0, 16 -; GFX8-NEXT: s_addc_u32 s1, 0, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[12:13] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[2:3] @@ -2780,11 +2779,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] ; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[10:11] ; GFX8-NEXT: v_mov_b32_e32 v8, 0 -; GFX8-NEXT: v_mov_b32_e32 v11, s1 +; GFX8-NEXT: s_add_u32 s0, 0, 16 ; GFX8-NEXT: v_mov_b32_e32 v9, 0 -; GFX8-NEXT: v_mov_b32_e32 v10, s0 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GFX8-NEXT: s_addc_u32 s1, 0, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GFX8-NEXT: s_endpgm ; ; GFX7-LABEL: insertelement_s_v16i16_v_v: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll index 43692dc81535e..7cad269df704b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -8,39 +8,39 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: v_lshlrev_b32_e32 v64, 8, v0 ; GCN-NEXT: s_movk_i32 s4, 0x80 ; GCN-NEXT: s_mov_b32 s5, 0 -; GCN-NEXT: v_mov_b32_e32 v2, s4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_add_co_u32_e32 v6, vcc, v0, v64 -; GCN-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc -; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 64, v6 -; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v7, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s5 -; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2 -; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v3, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s5 -; GCN-NEXT: v_add_co_u32_e32 v16, vcc, v6, v4 -; GCN-NEXT: v_addc_co_u32_e32 v17, vcc, v7, v5, vcc -; GCN-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:32 +; GCN-NEXT: v_add_co_u32_e32 v4, vcc, v0, v64 +; GCN-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 64, v4 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v5, vcc ; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:48 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v3, vcc ; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] ; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:16 ; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:32 ; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:48 ; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:64 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[2:3], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[20:23], v[16:17], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[24:27], v[16:17], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[28:31], v[16:17], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[20:23], v[2:3], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[24:27], v[2:3], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[28:31], v[2:3], off offset:48 ; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:192 -; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: s_waitcnt vmcnt(7) ; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] offset:128 @@ -55,8 +55,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:16 ; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:32 ; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:48 -; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:64 +; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:80 ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:96 ; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:112 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll index ef28a300590a0..50de683890186 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -177,35 +177,35 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) ; GFX7-LABEL: store_lds_v4i32_align1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX7-NEXT: s_mov_b32 m0, -1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v2 ; GFX7-NEXT: ds_write_b8 v0, v1 ; GFX7-NEXT: ds_write_b8 v0, v5 offset:1 ; GFX7-NEXT: ds_write_b8 v0, v6 offset:2 ; GFX7-NEXT: ds_write_b8 v0, v7 offset:3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 ; GFX7-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX7-NEXT: ds_write_b8 v0, v8 offset:5 -; GFX7-NEXT: ds_write_b8 v0, v9 offset:6 -; GFX7-NEXT: ds_write_b8 v0, v10 offset:7 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX7-NEXT: ds_write_b8 v0, v5 offset:6 +; GFX7-NEXT: ds_write_b8 v0, v6 offset:7 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 8, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v4 ; GFX7-NEXT: ds_write_b8 v0, v3 offset:8 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:9 ; GFX7-NEXT: ds_write_b8 v0, v2 offset:10 ; GFX7-NEXT: ds_write_b8 v0, v5 offset:11 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v4 ; GFX7-NEXT: ds_write_b8 v0, v4 offset:12 -; GFX7-NEXT: ds_write_b8 v0, v6 offset:13 -; GFX7-NEXT: ds_write_b8 v0, v7 offset:14 -; GFX7-NEXT: ds_write_b8 v0, v8 offset:15 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:13 +; GFX7-NEXT: ds_write_b8 v0, v2 offset:14 +; GFX7-NEXT: ds_write_b8 v0, v3 offset:15 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1 @@ -227,17 +227,17 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v2 ; GFX7-NEXT: ds_write_b8 v0, v1 ; GFX7-NEXT: ds_write_b8 v0, v4 offset:1 ; GFX7-NEXT: ds_write_b8 v0, v5 offset:2 ; GFX7-NEXT: ds_write_b8 v0, v6 offset:3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v2 ; GFX7-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX7-NEXT: ds_write_b8 v0, v7 offset:5 -; GFX7-NEXT: ds_write_b8 v0, v8 offset:6 -; GFX7-NEXT: ds_write_b8 v0, v9 offset:7 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX7-NEXT: ds_write_b8 v0, v4 offset:6 +; GFX7-NEXT: ds_write_b8 v0, v5 offset:7 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll index 5f71277bb50e7..5b078d41e8d89 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll @@ -43,50 +43,50 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s5, s0, 8 +; GFX9-NEXT: ds_write_b8 v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s5 ; GFX9-NEXT: s_lshr_b32 s6, s0, 16 ; GFX9-NEXT: s_lshr_b32 s7, s0, 24 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:1 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:3 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: s_lshr_b32 s0, s1, 8 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:4 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16 ; GFX9-NEXT: s_lshr_b32 s5, s1, 24 -; GFX9-NEXT: v_mov_b32_e32 v5, s1 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:5 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:7 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: s_lshr_b32 s0, s2, 8 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s1, s2, 16 -; GFX9-NEXT: v_mov_b32_e32 v7, s4 -; GFX9-NEXT: v_mov_b32_e32 v4, s7 -; GFX9-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-NEXT: ds_write_b8 v1, v0 -; GFX9-NEXT: ds_write_b8 v1, v2 offset:1 -; GFX9-NEXT: ds_write_b8 v1, v3 offset:2 -; GFX9-NEXT: ds_write_b8 v1, v4 offset:3 -; GFX9-NEXT: ds_write_b8 v1, v5 offset:4 -; GFX9-NEXT: ds_write_b8 v1, v6 offset:5 -; GFX9-NEXT: ds_write_b8 v1, v7 offset:6 -; GFX9-NEXT: ds_write_b8 v1, v8 offset:7 ; GFX9-NEXT: s_lshr_b32 s4, s2, 24 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:9 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:10 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:11 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: s_lshr_b32 s0, s3, 8 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:12 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:13 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: s_lshr_b32 s2, s3, 24 -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_mov_b32_e32 v5, s3 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: v_mov_b32_e32 v7, s1 -; GFX9-NEXT: v_mov_b32_e32 v8, s2 -; GFX9-NEXT: ds_write_b8 v1, v0 offset:8 -; GFX9-NEXT: ds_write_b8 v1, v2 offset:9 -; GFX9-NEXT: ds_write_b8 v1, v3 offset:10 -; GFX9-NEXT: ds_write_b8 v1, v4 offset:11 -; GFX9-NEXT: ds_write_b8 v1, v5 offset:12 -; GFX9-NEXT: ds_write_b8 v1, v6 offset:13 -; GFX9-NEXT: ds_write_b8 v1, v7 offset:14 -; GFX9-NEXT: ds_write_b8 v1, v8 offset:15 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:14 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:15 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v4i32_align1: @@ -96,50 +96,50 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s5, s0, 8 +; GFX7-NEXT: ds_write_b8 v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 ; GFX7-NEXT: s_lshr_b32 s6, s0, 16 ; GFX7-NEXT: s_lshr_b32 s7, s0, 24 -; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:1 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v0, s7 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:3 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: s_lshr_b32 s0, s1, 8 -; GFX7-NEXT: v_mov_b32_e32 v2, s5 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s4, s1, 16 ; GFX7-NEXT: s_lshr_b32 s5, s1, 24 -; GFX7-NEXT: v_mov_b32_e32 v5, s1 -; GFX7-NEXT: v_mov_b32_e32 v6, s0 -; GFX7-NEXT: v_mov_b32_e32 v3, s6 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:5 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:6 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:7 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: s_lshr_b32 s0, s2, 8 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:8 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s1, s2, 16 -; GFX7-NEXT: v_mov_b32_e32 v7, s4 -; GFX7-NEXT: v_mov_b32_e32 v4, s7 -; GFX7-NEXT: v_mov_b32_e32 v8, s5 -; GFX7-NEXT: ds_write_b8 v1, v0 -; GFX7-NEXT: ds_write_b8 v1, v2 offset:1 -; GFX7-NEXT: ds_write_b8 v1, v3 offset:2 -; GFX7-NEXT: ds_write_b8 v1, v4 offset:3 -; GFX7-NEXT: ds_write_b8 v1, v5 offset:4 -; GFX7-NEXT: ds_write_b8 v1, v6 offset:5 -; GFX7-NEXT: ds_write_b8 v1, v7 offset:6 -; GFX7-NEXT: ds_write_b8 v1, v8 offset:7 ; GFX7-NEXT: s_lshr_b32 s4, s2, 24 -; GFX7-NEXT: v_mov_b32_e32 v0, s2 -; GFX7-NEXT: v_mov_b32_e32 v2, s0 -; GFX7-NEXT: v_mov_b32_e32 v3, s1 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:9 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:11 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 ; GFX7-NEXT: s_lshr_b32 s0, s3, 8 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:12 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s1, s3, 16 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:13 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: s_lshr_b32 s2, s3, 24 -; GFX7-NEXT: v_mov_b32_e32 v4, s4 -; GFX7-NEXT: v_mov_b32_e32 v5, s3 -; GFX7-NEXT: v_mov_b32_e32 v6, s0 -; GFX7-NEXT: v_mov_b32_e32 v7, s1 -; GFX7-NEXT: v_mov_b32_e32 v8, s2 -; GFX7-NEXT: ds_write_b8 v1, v0 offset:8 -; GFX7-NEXT: ds_write_b8 v1, v2 offset:9 -; GFX7-NEXT: ds_write_b8 v1, v3 offset:10 -; GFX7-NEXT: ds_write_b8 v1, v4 offset:11 -; GFX7-NEXT: ds_write_b8 v1, v5 offset:12 -; GFX7-NEXT: ds_write_b8 v1, v6 offset:13 -; GFX7-NEXT: ds_write_b8 v1, v7 offset:14 -; GFX7-NEXT: ds_write_b8 v1, v8 offset:15 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:14 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:15 ; GFX7-NEXT: s_endpgm store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1 ret void @@ -152,26 +152,26 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_lshr_b32 s5, s0, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: s_lshr_b32 s5, s0, 16 +; GFX9-NEXT: ds_write_b16 v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: s_lshr_b32 s0, s1, 16 -; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:4 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: s_lshr_b32 s0, s2, 16 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:10 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: s_lshr_b32 s0, s3, 16 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_mov_b32_e32 v5, s2 -; GFX9-NEXT: v_mov_b32_e32 v7, s3 -; GFX9-NEXT: v_mov_b32_e32 v8, s0 -; GFX9-NEXT: ds_write_b16 v1, v0 -; GFX9-NEXT: ds_write_b16 v1, v2 offset:2 -; GFX9-NEXT: ds_write_b16 v1, v3 offset:4 -; GFX9-NEXT: ds_write_b16 v1, v4 offset:6 -; GFX9-NEXT: ds_write_b16 v1, v5 offset:8 -; GFX9-NEXT: ds_write_b16 v1, v6 offset:10 -; GFX9-NEXT: ds_write_b16 v1, v7 offset:12 -; GFX9-NEXT: ds_write_b16 v1, v8 offset:14 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:12 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:14 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v4i32_align2: @@ -181,26 +181,26 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: s_lshr_b32 s5, s0, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: s_lshr_b32 s5, s0, 16 +; GFX7-NEXT: ds_write_b16 v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: s_lshr_b32 s0, s1, 16 -; GFX7-NEXT: v_mov_b32_e32 v4, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:6 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: s_lshr_b32 s0, s2, 16 -; GFX7-NEXT: v_mov_b32_e32 v6, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:8 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 ; GFX7-NEXT: s_lshr_b32 s0, s3, 16 -; GFX7-NEXT: v_mov_b32_e32 v2, s5 -; GFX7-NEXT: v_mov_b32_e32 v3, s1 -; GFX7-NEXT: v_mov_b32_e32 v5, s2 -; GFX7-NEXT: v_mov_b32_e32 v7, s3 -; GFX7-NEXT: v_mov_b32_e32 v8, s0 -; GFX7-NEXT: ds_write_b16 v1, v0 -; GFX7-NEXT: ds_write_b16 v1, v2 offset:2 -; GFX7-NEXT: ds_write_b16 v1, v3 offset:4 -; GFX7-NEXT: ds_write_b16 v1, v4 offset:6 -; GFX7-NEXT: ds_write_b16 v1, v5 offset:8 -; GFX7-NEXT: ds_write_b16 v1, v6 offset:10 -; GFX7-NEXT: ds_write_b16 v1, v7 offset:12 -; GFX7-NEXT: ds_write_b16 v1, v8 offset:14 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:12 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:14 ; GFX7-NEXT: s_endpgm store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 2 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll index e96a5163e92f3..538c146601bda 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll @@ -41,39 +41,39 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8 +; GFX9-NEXT: ds_write_b8 v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: s_lshr_b32 s5, s0, 16 ; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:1 +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:3 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: s_lshr_b32 s0, s1, 8 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:4 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16 ; GFX9-NEXT: s_lshr_b32 s4, s1, 24 -; GFX9-NEXT: v_mov_b32_e32 v5, s1 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: v_mov_b32_e32 v7, s3 -; GFX9-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NEXT: v_mov_b32_e32 v4, s6 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 -; GFX9-NEXT: ds_write_b8 v1, v0 -; GFX9-NEXT: ds_write_b8 v1, v2 offset:1 -; GFX9-NEXT: ds_write_b8 v1, v3 offset:2 -; GFX9-NEXT: ds_write_b8 v1, v4 offset:3 -; GFX9-NEXT: ds_write_b8 v1, v5 offset:4 -; GFX9-NEXT: ds_write_b8 v1, v6 offset:5 -; GFX9-NEXT: ds_write_b8 v1, v7 offset:6 -; GFX9-NEXT: ds_write_b8 v1, v8 offset:7 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:5 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:7 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: s_lshr_b32 s0, s2, 8 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_lshr_b32 s1, s2, 16 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:9 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: s_lshr_b32 s3, s2, 24 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_mov_b32_e32 v4, s3 -; GFX9-NEXT: ds_write_b8 v1, v0 offset:8 -; GFX9-NEXT: ds_write_b8 v1, v2 offset:9 -; GFX9-NEXT: ds_write_b8 v1, v3 offset:10 -; GFX9-NEXT: ds_write_b8 v1, v4 offset:11 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:10 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:11 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align1: @@ -83,39 +83,39 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s3, s0, 8 +; GFX7-NEXT: ds_write_b8 v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 ; GFX7-NEXT: s_lshr_b32 s5, s0, 16 ; GFX7-NEXT: s_lshr_b32 s6, s0, 24 -; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: v_mov_b32_e32 v2, s3 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:1 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:3 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: s_lshr_b32 s0, s1, 8 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s3, s1, 16 ; GFX7-NEXT: s_lshr_b32 s4, s1, 24 -; GFX7-NEXT: v_mov_b32_e32 v5, s1 -; GFX7-NEXT: v_mov_b32_e32 v6, s0 -; GFX7-NEXT: v_mov_b32_e32 v7, s3 -; GFX7-NEXT: v_mov_b32_e32 v3, s5 -; GFX7-NEXT: v_mov_b32_e32 v4, s6 -; GFX7-NEXT: v_mov_b32_e32 v8, s4 -; GFX7-NEXT: ds_write_b8 v1, v0 -; GFX7-NEXT: ds_write_b8 v1, v2 offset:1 -; GFX7-NEXT: ds_write_b8 v1, v3 offset:2 -; GFX7-NEXT: ds_write_b8 v1, v4 offset:3 -; GFX7-NEXT: ds_write_b8 v1, v5 offset:4 -; GFX7-NEXT: ds_write_b8 v1, v6 offset:5 -; GFX7-NEXT: ds_write_b8 v1, v7 offset:6 -; GFX7-NEXT: ds_write_b8 v1, v8 offset:7 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:5 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:6 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:7 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: s_lshr_b32 s0, s2, 8 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:8 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: s_lshr_b32 s1, s2, 16 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:9 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: s_lshr_b32 s3, s2, 24 -; GFX7-NEXT: v_mov_b32_e32 v0, s2 -; GFX7-NEXT: v_mov_b32_e32 v2, s0 -; GFX7-NEXT: v_mov_b32_e32 v3, s1 -; GFX7-NEXT: v_mov_b32_e32 v4, s3 -; GFX7-NEXT: ds_write_b8 v1, v0 offset:8 -; GFX7-NEXT: ds_write_b8 v1, v2 offset:9 -; GFX7-NEXT: ds_write_b8 v1, v3 offset:10 -; GFX7-NEXT: ds_write_b8 v1, v4 offset:11 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: ds_write_b8 v1, v0 offset:11 ; GFX7-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 ret void @@ -128,21 +128,21 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: s_lshr_b32 s3, s0, 16 +; GFX9-NEXT: ds_write_b16 v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: s_lshr_b32 s0, s1, 16 -; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:4 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: s_lshr_b32 s0, s2, 16 -; GFX9-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_mov_b32_e32 v5, s2 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: ds_write_b16 v1, v0 -; GFX9-NEXT: ds_write_b16 v1, v2 offset:2 -; GFX9-NEXT: ds_write_b16 v1, v3 offset:4 -; GFX9-NEXT: ds_write_b16 v1, v4 offset:6 -; GFX9-NEXT: ds_write_b16 v1, v5 offset:8 -; GFX9-NEXT: ds_write_b16 v1, v6 offset:10 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_write_b16 v1, v0 offset:10 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align2: @@ -152,21 +152,21 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: s_lshr_b32 s3, s0, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: s_lshr_b32 s3, s0, 16 +; GFX7-NEXT: ds_write_b16 v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: s_lshr_b32 s0, s1, 16 -; GFX7-NEXT: v_mov_b32_e32 v4, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:6 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: s_lshr_b32 s0, s2, 16 -; GFX7-NEXT: v_mov_b32_e32 v2, s3 -; GFX7-NEXT: v_mov_b32_e32 v3, s1 -; GFX7-NEXT: v_mov_b32_e32 v5, s2 -; GFX7-NEXT: v_mov_b32_e32 v6, s0 -; GFX7-NEXT: ds_write_b16 v1, v0 -; GFX7-NEXT: ds_write_b16 v1, v2 offset:2 -; GFX7-NEXT: ds_write_b16 v1, v3 offset:4 -; GFX7-NEXT: ds_write_b16 v1, v4 offset:6 -; GFX7-NEXT: ds_write_b16 v1, v5 offset:8 -; GFX7-NEXT: ds_write_b16 v1, v6 offset:10 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:8 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b16 v1, v0 offset:10 ; GFX7-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 ret void diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index c44f5dd6bd594..7eec033fa2717 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -3316,13 +3316,14 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GCN-NEXT: v_and_b32_e32 v2, s3, v3 ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v5, vcc ; GCN-NEXT: v_and_b32_e32 v3, s3, v4 -; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 ; GCN-NEXT: v_lshlrev_b32_e32 v3, 15, v3 +; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 ; GCN-NEXT: v_or_b32_e32 v2, v2, v3 ; GCN-NEXT: v_or_b32_e32 v0, v2, v0 -; GCN-NEXT: v_and_b32_e32 v1, 0x1fff, v1 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GCN-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0x1fff, v1 +; GCN-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GCN-NEXT: s_endpgm %r = udiv <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out @@ -3460,9 +3461,10 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GCN-NEXT: v_lshlrev_b32_e32 v3, 15, v3 ; GCN-NEXT: v_or_b32_e32 v2, v2, v3 ; GCN-NEXT: v_or_b32_e32 v0, v2, v0 -; GCN-NEXT: v_and_b32_e32 v1, 0x1fff, v1 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GCN-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0x1fff, v1 +; GCN-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GCN-NEXT: s_endpgm %r = urem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out @@ -3612,9 +3614,10 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GCN-NEXT: v_lshlrev_b32_e32 v3, 15, v3 ; GCN-NEXT: v_or_b32_e32 v2, v2, v3 ; GCN-NEXT: v_or_b32_e32 v0, v2, v0 -; GCN-NEXT: v_and_b32_e32 v1, 0x1fff, v1 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GCN-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0x1fff, v1 +; GCN-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GCN-NEXT: s_endpgm %r = sdiv <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out @@ -3780,13 +3783,14 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GCN-NEXT: v_sub_i32_e32 v3, vcc, s8, v3 ; GCN-NEXT: v_and_b32_e32 v3, s3, v3 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, v1, v0 -; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 ; GCN-NEXT: v_lshlrev_b32_e32 v3, 15, v3 +; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 ; GCN-NEXT: v_or_b32_e32 v2, v2, v3 ; GCN-NEXT: v_or_b32_e32 v0, v2, v0 -; GCN-NEXT: v_and_b32_e32 v1, 0x1fff, v1 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GCN-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0x1fff, v1 +; GCN-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GCN-NEXT: s_endpgm %r = srem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 52ac3705a490e..fb1cd3bbbaf10 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -744,13 +744,13 @@ entry: ; GCN-LABEL: {{^}}tail_call_byval_align16: ; GCN-NOT: s32 -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:12 +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 +; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; GCN: s_getpc_b64 -; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} +; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 +; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}} ; GCN-NOT: s32 ; GCN: s_setpc_b64 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { @@ -777,12 +777,12 @@ entry: ; GCN-LABEL: {{^}}stack_12xv3i32: ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: buffer_store_dword [[REG12]], {{.*$}} +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12 ; GCN: v_mov_b32_e32 v31, 11 ; GCN: s_getpc @@ -806,12 +806,12 @@ entry: ; GCN-LABEL: {{^}}stack_12xv3f32: ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: buffer_store_dword [[REG12]], {{.*$}} +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12 ; GCN: v_mov_b32_e32 v31, 0x41300000 ; GCN: s_getpc @@ -836,20 +836,20 @@ entry: ; GCN-LABEL: {{^}}stack_8xv5i32: ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 -; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 -; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 -; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 -; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: buffer_store_dword [[REG8]], {{.*$}} +; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 ; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 +; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN: v_mov_b32_e32 v31, 7 @@ -870,20 +870,20 @@ entry: ; GCN-LABEL: {{^}}stack_8xv5f32: ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000 -; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 -; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 -; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 -; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: buffer_store_dword [[REG8]], {{.*$}} +; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 ; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 ; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 ; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 +; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN: v_mov_b32_e32 v31, 0x40e00000 diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll index bc3bcfe6089af..566899486d954 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -31,9 +31,7 @@ bb: %la3 = getelementptr inbounds i32, i32* %lb, i32 6 %ld3 = load i32, i32* %la3 -; DBG: Cluster ld/st SU([[S1:[0-9]+]]) - SU([[S2:[0-9]+]]) -; DBG: Cluster ld/st SU([[S2]]) - SU([[S3:[0-9]+]]) -; DBG: Cluster ld/st SU([[S3]]) - SU([[S4:[0-9]+]]) +; DBG-NOT: Cluster ld/st ; GCN: flat_store_dword v[{{[0-9:]+}}], [[LD1]] ; GCN-NEXT: flat_store_dword v[{{[0-9:]+}}], [[LD2]] offset:8 ; GCN-NEXT: flat_store_dword v[{{[0-9:]+}}], [[LD3]] offset:16 @@ -78,13 +76,11 @@ bb: %la3 = getelementptr inbounds i32, i32* %lb, i32 6 %ld3 = load i32, i32* %la3 -; DBG: Cluster ld/st SU([[S1:[0-9]+]]) - SU([[S2:[0-9]+]]) -; DBG: Cluster ld/st SU([[S2]]) - SU([[S3:[0-9]+]]) -; DBG: Cluster ld/st SU([[S3]]) - SU([[S4:[0-9]+]]) -; GCN: v_add_u32_e32 [[ST2:v[0-9]+]], 1, [[LD2]] +; DBG-NOT: Cluster ld/st ; GCN: flat_store_dword v[{{[0-9:]+}}], [[LD1]] -; GCN-NEXT: flat_store_dword v[{{[0-9:]+}}], [[ST2]] offset:8 +; GCN: v_add_u32_e32 [[ST2:v[0-9]+]], 1, [[LD2]] ; GCN-NEXT: flat_store_dword v[{{[0-9:]+}}], [[LD3]] offset:16 +; GCN-NEXT: flat_store_dword v[{{[0-9:]+}}], [[ST2]] offset:8 ; GCN-NEXT: flat_store_dword v[{{[0-9:]+}}], [[LD4]] offset:24 %sa0 = getelementptr inbounds i32, i32* %sb, i32 0 store i32 %ld0, i32* %sa0 @@ -125,7 +121,6 @@ entry: ; CHECK-LABEL: {{^}}no_cluster_image_load: ; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 ; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 ; DBG-NOT: {{^}}Cluster ld/st define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll index 76490407c7447..3b6396f8b63fc 100644 --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -156,28 +156,28 @@ define amdgpu_kernel void @global_store_2xi16_align1(i16 addrspace(1)* %p, i16 a ; GFX7-ALIGNED-LABEL: global_store_2xi16_align1: ; GFX7-ALIGNED: ; %bb.0: ; GFX7-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v4, 1 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v5, 0 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 1 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, 0 ; GFX7-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-ALIGNED-NEXT: s_add_u32 s2, s0, 2 ; GFX7-ALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX7-ALIGNED-NEXT: s_add_u32 s4, s0, 1 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-ALIGNED-NEXT: s_addc_u32 s5, s1, 0 +; GFX7-ALIGNED-NEXT: s_add_u32 s4, s0, 1 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-ALIGNED-NEXT: s_addc_u32 s5, s1, 0 +; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v2 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-ALIGNED-NEXT: s_add_u32 s0, s0, 3 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, s4 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, s5 -; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v4 -; GFX7-ALIGNED-NEXT: flat_store_byte v[2:3], v5 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v3 ; GFX7-ALIGNED-NEXT: s_addc_u32 s1, s1, 0 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v4, 2 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v5 -; GFX7-ALIGNED-NEXT: flat_store_byte v[2:3], v4 +; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v3 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 2 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v2 ; GFX7-ALIGNED-NEXT: s_endpgm ; ; GFX7-UNALIGNED-LABEL: global_store_2xi16_align1: diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll index 85f9ea173eb5e..3a4778333001d 100644 --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll @@ -73,9 +73,9 @@ define void @private_store_2xi16_align2(i16 addrspace(5)* %p, i16 addrspace(5)* ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: v_mov_b32_e32 v2, 2 ; GFX9-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen -; GFX9-NEXT: buffer_store_short v2, v1, s[0:3], 0 offen offset:2 +; GFX9-NEXT: v_mov_b32_e32 v0, 2 +; GFX9-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen offset:2 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %gep.r = getelementptr i16, i16 addrspace(5)* %r, i64 1 @@ -140,14 +140,14 @@ define void @private_store_2xi16_align1(i16 addrspace(5)* %p, i16 addrspace(5)* ; GFX7-ALIGNED: ; %bb.0: ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, 1 -; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v1 -; GFX7-ALIGNED-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v5, 0 ; GFX7-ALIGNED-NEXT: buffer_store_byte v3, v1, s[0:3], 0 offen -; GFX7-ALIGNED-NEXT: buffer_store_byte v5, v4, s[0:3], 0 offen +; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v1 +; GFX7-ALIGNED-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v4, 0 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v1, vcc, 3, v1 ; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, 2 -; GFX7-ALIGNED-NEXT: buffer_store_byte v5, v1, s[0:3], 0 offen +; GFX7-ALIGNED-NEXT: buffer_store_byte v4, v3, s[0:3], 0 offen +; GFX7-ALIGNED-NEXT: buffer_store_byte v4, v1, s[0:3], 0 offen ; GFX7-ALIGNED-NEXT: buffer_store_byte v0, v2, s[0:3], 0 offen ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 157330b8bd47d..0733e2877bffc 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -1084,23 +1084,23 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 ; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 -; GFX9-NEXT: v_add_u32_e32 v2, 8, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 ; GFX9-NEXT: v_add_u32_e32 v1, 8, v1 -; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, v2 +; GFX9-NEXT: v_add_u32_e32 v2, 8, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_alignbit_b32 v1, v8, v5, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, v2 ; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 +; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 ; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 ; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 -; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX9-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll index 1908015f47707..d54058eec30c9 100644 --- a/llvm/test/CodeGen/AMDGPU/half.ll +++ b/llvm/test/CodeGen/AMDGPU/half.ll @@ -312,7 +312,6 @@ define amdgpu_kernel void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1 ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 -; SI: v_cvt_f32_f16_e32 ; GCN: flat_store_dwordx4 @@ -326,6 +325,7 @@ define amdgpu_kernel void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1 ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 ; VI: v_cvt_f32_f16_e32 ; VI: v_cvt_f32_f16_sdwa diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 9b525585d876d..5d8ed0f540427 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -773,12 +773,13 @@ define amdgpu_kernel void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* % ; VI-NEXT: v_mov_b32_e32 v1, s11 ; VI-NEXT: v_mov_b32_e32 v2, s10 ; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: v_mov_b32_e32 v4, s8 -; VI-NEXT: v_mov_b32_e32 v5, s7 -; VI-NEXT: v_mov_b32_e32 v6, s6 -; VI-NEXT: v_mov_b32_e32 v7, s5 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 +; VI-NEXT: s_nop 0 +; VI-NEXT: v_mov_b32_e32 v0, s8 +; VI-NEXT: v_mov_b32_e32 v1, s7 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <8 x i32> %a, i32 5, i32 %b store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32 @@ -910,9 +911,9 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* % ; SI-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; SI-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] ; SI-NEXT: v_mov_b32_e32 v0, s5 -; SI-NEXT: v_mov_b32_e32 v1, s4 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 -; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: dynamic_insertelement_v3i16: diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll index ef646d6be267f..d8a82859629c7 100644 --- a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll +++ b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll @@ -45,7 +45,7 @@ entry: ; GCN: s_barrier -; SI: v_sub_i32_e32 [[SUB1:v[0-9]+]], vcc, 12, [[ADDRW]] +; SI-DAG: v_sub_i32_e32 [[SUB1:v[0-9]+]], vcc, 12, [[ADDRW]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[SUB0]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[SUB1]] ; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, [[SUB]] offset0:3 offset1:7 diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll index e1386d3e07d7f..e17c322a37728 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -70,16 +70,16 @@ define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocaptu ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: v_mov_b32_e32 v6, s6 ; GCN-NEXT: v_mov_b32_e32 v7, s7 -; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off -; GCN-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16 -; GCN-NEXT: v_mov_b32_e32 v0, s12 ; GCN-NEXT: v_mov_b32_e32 v9, s9 ; GCN-NEXT: v_mov_b32_e32 v10, s10 ; GCN-NEXT: v_mov_b32_e32 v11, s11 +; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off +; GCN-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16 +; GCN-NEXT: global_store_dwordx4 v[12:13], v[8:11], off offset:32 +; GCN-NEXT: v_mov_b32_e32 v0, s12 ; GCN-NEXT: v_mov_b32_e32 v1, s13 ; GCN-NEXT: v_mov_b32_e32 v2, s14 ; GCN-NEXT: v_mov_b32_e32 v3, s15 -; GCN-NEXT: global_store_dwordx4 v[12:13], v[8:11], off offset:32 ; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off offset:48 ; GCN-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll index 925a2daa93da7..8d3b401c57884 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll @@ -529,8 +529,8 @@ define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* % ; GCN-LABEL: {{^}}merge_global_store_5_constants_i32: ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}} -; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}} ; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}} +; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}} ; GCN: buffer_store_dword v[[HI]] define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { store i32 9, i32 addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index 58085f89e04a8..ebd7ca184bd35 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -28,14 +28,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; GCN-NEXT: s_cbranch_scc1 BB0_3 ; GCN-NEXT: ; %bb.2: ; %bb.1 ; GCN-NEXT: s_add_i32 s6, s32, 0x1000 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, s6 ; GCN-NEXT: s_lshl_b32 s7, s10, 2 ; GCN-NEXT: s_mov_b32 s32, s6 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_mov_b32_e32 v1, 0 -; GCN-NEXT: s_add_i32 s6, s6, s7 -; GCN-NEXT: v_mov_b32_e32 v3, 1 ; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: s_add_i32 s6, s6, s7 +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_mov_b32_e32 v1, s6 ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 @@ -98,14 +98,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; GCN-NEXT: ; %bb.1: ; %bb.0 ; GCN-NEXT: s_add_i32 s6, s32, 0x1000 ; GCN-NEXT: s_and_b32 s6, s6, 0xfffff000 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, s6 ; GCN-NEXT: s_lshl_b32 s7, s7, 2 ; GCN-NEXT: s_mov_b32 s32, s6 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_mov_b32_e32 v1, 0 -; GCN-NEXT: s_add_i32 s6, s6, s7 -; GCN-NEXT: v_mov_b32_e32 v3, 1 ; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: s_add_i32 s6, s6, s7 +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_mov_b32_e32 v1, s6 ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 @@ -166,9 +166,9 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i ; GCN-NEXT: s_add_i32 s6, s32, 0x1000 ; GCN-NEXT: v_mov_b32_e32 v2, 0 ; GCN-NEXT: v_mov_b32_e32 v3, s6 -; GCN-NEXT: v_mov_b32_e32 v6, 1 ; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_mov_b32_e32 v2, 1 +; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_lshl_add_u32 v2, v4, 2, s6 ; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen ; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v5 @@ -228,9 +228,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, ; GCN-NEXT: s_and_b32 s6, s6, 0xfffff000 ; GCN-NEXT: v_mov_b32_e32 v2, 0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 -; GCN-NEXT: v_mov_b32_e32 v6, 1 ; GCN-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v5, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_mov_b32_e32 v2, 1 +; GCN-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_lshl_add_u32 v2, v3, 2, s6 ; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen ; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v4 diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index ee61d6dd0b711..e089ac0afc163 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -249,13 +249,13 @@ define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %o ; CI: v_mov_b32 ; CI: v_mov_b32 -; CI: v_add_i32 -; CI: v_add_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_add_i32 -; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} -; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:36{{$}} -; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}} +; CI-DAG: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} +; CI-DAG: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:36{{$}} +; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}} ; GFX9: global_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:12 ; GFX9: global_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:28 diff --git a/llvm/test/CodeGen/AMDGPU/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/store-local.128.ll index 3fa202768f483..80658fa9ed756 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.128.ll @@ -55,42 +55,42 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_lshr_b32 s4, s2, 8 -; GFX9-NEXT: s_lshr_b32 s2, s2, 24 -; GFX9-NEXT: v_mov_b32_e32 v4, s2 -; GFX9-NEXT: s_lshr_b32 s2, s3, 8 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: ds_write_b8 v0, v2 offset:8 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:10 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_lshr_b32 s0, s0, 24 -; GFX9-NEXT: v_mov_b32_e32 v8, s0 -; GFX9-NEXT: s_lshr_b32 s0, s1, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_mov_b32_e32 v5, s1 -; GFX9-NEXT: ds_write_b8 v0, v6 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v6 offset:2 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: s_lshr_b32 s0, s3, 24 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:12 -; GFX9-NEXT: ds_write_b8 v0, v5 offset:4 -; GFX9-NEXT: ds_write_b8 v0, v2 offset:13 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_lshr_b32 s0, s1, 24 -; GFX9-NEXT: v_mov_b32_e32 v3, s4 ; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:14 -; GFX9-NEXT: ds_write_b8 v0, v2 offset:15 -; GFX9-NEXT: ds_write_b8 v0, v3 offset:9 -; GFX9-NEXT: ds_write_b8 v0, v4 offset:11 -; GFX9-NEXT: ds_write_b8 v0, v6 offset:5 -; GFX9-NEXT: v_mov_b32_e32 v7, s2 +; GFX9-NEXT: ds_write_b8 v0, v2 offset:8 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:10 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:4 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:6 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v5 offset:6 +; GFX9-NEXT: s_lshr_b32 s4, s3, 8 +; GFX9-NEXT: ds_write_b8 v0, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:13 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: s_lshr_b32 s3, s2, 8 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:15 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:9 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_lshr_b32 s2, s1, 8 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:11 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_lshr_b32 s1, s1, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_lshr_b32 s1, s0, 8 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:7 -; GFX9-NEXT: ds_write_b8 v0, v7 offset:1 -; GFX9-NEXT: ds_write_b8 v0, v8 offset:3 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_lshr_b32 s0, s0, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:3 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v4i32_align1: @@ -100,50 +100,50 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: s_lshr_b32 s4, s3, 8 -; GFX7-NEXT: v_mov_b32_e32 v5, s4 -; GFX7-NEXT: s_lshr_b32 s4, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 -; GFX7-NEXT: s_lshr_b32 s3, s3, 24 -; GFX7-NEXT: ds_write_b8 v0, v5 offset:13 -; GFX7-NEXT: v_mov_b32_e32 v5, s3 -; GFX7-NEXT: s_lshr_b32 s3, s2, 8 -; GFX7-NEXT: v_mov_b32_e32 v6, s4 -; GFX7-NEXT: ds_write_b8 v0, v5 offset:15 -; GFX7-NEXT: ds_write_b8 v0, v6 offset:14 -; GFX7-NEXT: v_mov_b32_e32 v5, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_lshr_b32 s3, s2, 16 -; GFX7-NEXT: s_lshr_b32 s2, s2, 24 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:12 ; GFX7-NEXT: ds_write_b8 v0, v2 offset:8 -; GFX7-NEXT: ds_write_b8 v0, v5 offset:9 -; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: s_lshr_b32 s4, s3, 8 +; GFX7-NEXT: ds_write_b8 v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: s_lshr_b32 s4, s3, 24 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:13 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: s_lshr_b32 s3, s3, 16 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:15 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s3, s2, 8 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:14 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s3, s2, 24 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:9 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:11 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: s_lshr_b32 s2, s1, 8 -; GFX7-NEXT: v_mov_b32_e32 v6, s3 -; GFX7-NEXT: ds_write_b8 v0, v1 offset:12 -; GFX7-NEXT: ds_write_b8 v0, v2 offset:11 -; GFX7-NEXT: ds_write_b8 v0, v6 offset:10 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:10 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s2, s1, 16 -; GFX7-NEXT: v_mov_b32_e32 v3, s1 -; GFX7-NEXT: s_lshr_b32 s1, s1, 24 +; GFX7-NEXT: s_lshr_b32 s2, s1, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_lshr_b32 s1, s1, 16 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: s_lshr_b32 s1, s0, 8 -; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: ds_write_b8 v0, v1 offset:7 -; GFX7-NEXT: ds_write_b8 v0, v2 offset:6 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v4, s0 -; GFX7-NEXT: s_lshr_b32 s1, s0, 16 -; GFX7-NEXT: s_lshr_b32 s0, s0, 24 -; GFX7-NEXT: ds_write_b8 v0, v4 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:1 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: ds_write_b8 v0, v3 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s0, s0, 16 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:3 -; GFX7-NEXT: ds_write_b8 v0, v2 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:2 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v4i32_align1: @@ -153,50 +153,50 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: s_lshr_b32 s4, s3, 8 -; GFX6-NEXT: v_mov_b32_e32 v5, s4 -; GFX6-NEXT: s_lshr_b32 s4, s3, 16 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_lshr_b32 s3, s3, 24 -; GFX6-NEXT: ds_write_b8 v0, v5 offset:13 -; GFX6-NEXT: v_mov_b32_e32 v5, s3 -; GFX6-NEXT: s_lshr_b32 s3, s2, 8 -; GFX6-NEXT: v_mov_b32_e32 v6, s4 -; GFX6-NEXT: ds_write_b8 v0, v5 offset:15 -; GFX6-NEXT: ds_write_b8 v0, v6 offset:14 -; GFX6-NEXT: v_mov_b32_e32 v5, s3 ; GFX6-NEXT: v_mov_b32_e32 v2, s2 -; GFX6-NEXT: s_lshr_b32 s3, s2, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 24 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:12 ; GFX6-NEXT: ds_write_b8 v0, v2 offset:8 -; GFX6-NEXT: ds_write_b8 v0, v5 offset:9 -; GFX6-NEXT: v_mov_b32_e32 v2, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:4 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: s_lshr_b32 s4, s3, 8 +; GFX6-NEXT: ds_write_b8 v0, v1 +; GFX6-NEXT: v_mov_b32_e32 v1, s4 +; GFX6-NEXT: s_lshr_b32 s4, s3, 24 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:13 +; GFX6-NEXT: v_mov_b32_e32 v1, s4 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:15 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_lshr_b32 s3, s2, 8 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:14 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_lshr_b32 s3, s2, 24 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:9 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:11 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: s_lshr_b32 s2, s1, 8 -; GFX6-NEXT: v_mov_b32_e32 v6, s3 -; GFX6-NEXT: ds_write_b8 v0, v1 offset:12 -; GFX6-NEXT: ds_write_b8 v0, v2 offset:11 -; GFX6-NEXT: ds_write_b8 v0, v6 offset:10 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:10 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s2, s1, 16 -; GFX6-NEXT: v_mov_b32_e32 v3, s1 -; GFX6-NEXT: s_lshr_b32 s1, s1, 24 +; GFX6-NEXT: s_lshr_b32 s2, s1, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_lshr_b32 s1, s0, 8 -; GFX6-NEXT: v_mov_b32_e32 v2, s2 -; GFX6-NEXT: ds_write_b8 v0, v1 offset:7 -; GFX6-NEXT: ds_write_b8 v0, v2 offset:6 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v4, s0 -; GFX6-NEXT: s_lshr_b32 s1, s0, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 -; GFX6-NEXT: ds_write_b8 v0, v4 +; GFX6-NEXT: s_lshr_b32 s1, s0, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:1 -; GFX6-NEXT: v_mov_b32_e32 v2, s1 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: ds_write_b8 v0, v3 offset:4 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:3 -; GFX6-NEXT: ds_write_b8 v0, v2 offset:2 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:2 ; GFX6-NEXT: s_endpgm store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1 ret void @@ -210,17 +210,17 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_mov_b32_e32 v4, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: ds_write_b16 v0, v1 offset:12 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:14 -; GFX9-NEXT: ds_write_b16 v0, v4 -; GFX9-NEXT: ds_write_b16 v0, v3 offset:4 ; GFX9-NEXT: ds_write_b16 v0, v2 offset:8 -; GFX9-NEXT: ds_write_b16 v0, v1 offset:12 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v2 offset:10 -; GFX9-NEXT: ds_write_b16_d16_hi v0, v3 offset:6 -; GFX9-NEXT: ds_write_b16_d16_hi v0, v4 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: ds_write_b16 v0, v1 +; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:2 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v4i32_align2: @@ -230,26 +230,26 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v4, s0 -; GFX7-NEXT: s_lshr_b32 s0, s0, 16 -; GFX7-NEXT: v_mov_b32_e32 v5, s0 -; GFX7-NEXT: s_lshr_b32 s0, s1, 16 -; GFX7-NEXT: v_mov_b32_e32 v3, s1 -; GFX7-NEXT: ds_write_b16 v0, v4 -; GFX7-NEXT: v_mov_b32_e32 v4, s0 -; GFX7-NEXT: s_lshr_b32 s0, s2, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: ds_write_b16 v0, v3 offset:4 -; GFX7-NEXT: v_mov_b32_e32 v3, s0 -; GFX7-NEXT: s_lshr_b32 s0, s3, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:12 ; GFX7-NEXT: ds_write_b16 v0, v2 offset:8 -; GFX7-NEXT: v_mov_b32_e32 v2, s0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: s_lshr_b32 s3, s3, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 -; GFX7-NEXT: ds_write_b16 v0, v2 offset:14 -; GFX7-NEXT: ds_write_b16 v0, v1 offset:12 -; GFX7-NEXT: ds_write_b16 v0, v3 offset:10 -; GFX7-NEXT: ds_write_b16 v0, v4 offset:6 -; GFX7-NEXT: ds_write_b16 v0, v5 offset:2 +; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:14 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_lshr_b32 s1, s1, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s0, s0, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:6 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:2 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v4i32_align2: @@ -259,26 +259,26 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v4, s0 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: v_mov_b32_e32 v5, s0 -; GFX6-NEXT: s_lshr_b32 s0, s1, 16 -; GFX6-NEXT: v_mov_b32_e32 v3, s1 -; GFX6-NEXT: ds_write_b16 v0, v4 -; GFX6-NEXT: v_mov_b32_e32 v4, s0 -; GFX6-NEXT: s_lshr_b32 s0, s2, 16 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 ; GFX6-NEXT: v_mov_b32_e32 v2, s2 -; GFX6-NEXT: ds_write_b16 v0, v3 offset:4 -; GFX6-NEXT: v_mov_b32_e32 v3, s0 -; GFX6-NEXT: s_lshr_b32 s0, s3, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:12 ; GFX6-NEXT: ds_write_b16 v0, v2 offset:8 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: ds_write_b16 v0, v2 offset:14 -; GFX6-NEXT: ds_write_b16 v0, v1 offset:12 -; GFX6-NEXT: ds_write_b16 v0, v3 offset:10 -; GFX6-NEXT: ds_write_b16 v0, v4 offset:6 -; GFX6-NEXT: ds_write_b16 v0, v5 offset:2 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:14 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:10 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:6 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:2 ; GFX6-NEXT: s_endpgm store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 2 ret void @@ -307,10 +307,10 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: v_mov_b32_e32 v3, s2 -; GFX7-NEXT: v_mov_b32_e32 v4, s3 ; GFX7-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; GFX7-NEXT: ds_write2_b32 v0, v3, v4 offset0:2 offset1:3 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: v_mov_b32_e32 v2, s3 +; GFX7-NEXT: ds_write2_b32 v0, v1, v2 offset0:2 offset1:3 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v4i32_align4: diff --git a/llvm/test/CodeGen/AMDGPU/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/store-local.96.ll index 351b632d06479..41fdb1cbd61be 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.96.ll @@ -36,10 +36,10 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: ds_write_b32 v2, v3 offset:8 ; GFX6-NEXT: ds_write_b64 v2, v[0:1] ; GFX6-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out @@ -53,33 +53,33 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: s_lshr_b32 s3, s2, 8 -; GFX9-NEXT: v_mov_b32_e32 v4, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-NEXT: s_lshr_b32 s3, s0, 8 -; GFX9-NEXT: s_lshr_b32 s0, s0, 24 -; GFX9-NEXT: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: s_lshr_b32 s0, s1, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: ds_write_b8 v0, v4 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v4 offset:2 -; GFX9-NEXT: v_mov_b32_e32 v4, s0 -; GFX9-NEXT: s_lshr_b32 s0, s2, 24 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX9-NEXT: ds_write_b8 v0, v3 offset:4 -; GFX9-NEXT: ds_write_b8 v0, v2 offset:9 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_lshr_b32 s0, s1, 24 ; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:10 -; GFX9-NEXT: ds_write_b8 v0, v2 offset:11 -; GFX9-NEXT: ds_write_b8 v0, v4 offset:5 -; GFX9-NEXT: v_mov_b32_e32 v5, s3 +; GFX9-NEXT: ds_write_b8 v0, v2 offset:4 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v3 offset:6 +; GFX9-NEXT: s_lshr_b32 s3, s2, 8 +; GFX9-NEXT: ds_write_b8 v0, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:9 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_lshr_b32 s2, s1, 8 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:11 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_lshr_b32 s1, s1, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_lshr_b32 s1, s0, 8 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:7 -; GFX9-NEXT: ds_write_b8 v0, v5 offset:1 -; GFX9-NEXT: ds_write_b8 v0, v6 offset:3 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_lshr_b32 s0, s0, 24 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:3 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align1: @@ -89,39 +89,39 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: s_lshr_b32 s3, s2, 8 -; GFX7-NEXT: v_mov_b32_e32 v4, s3 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s3, s2, 16 -; GFX7-NEXT: s_lshr_b32 s2, s2, 24 +; GFX7-NEXT: v_mov_b32_e32 v2, s1 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX7-NEXT: ds_write_b8 v0, v4 offset:9 +; GFX7-NEXT: ds_write_b8 v0, v2 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: s_lshr_b32 s3, s2, 8 +; GFX7-NEXT: ds_write_b8 v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s3, s2, 24 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:9 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:11 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: s_lshr_b32 s2, s1, 8 -; GFX7-NEXT: v_mov_b32_e32 v5, s3 -; GFX7-NEXT: ds_write_b8 v0, v1 offset:11 -; GFX7-NEXT: ds_write_b8 v0, v5 offset:10 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:10 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s2, s1, 16 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_lshr_b32 s1, s1, 24 +; GFX7-NEXT: s_lshr_b32 s2, s1, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_lshr_b32 s1, s1, 16 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: s_lshr_b32 s1, s0, 8 -; GFX7-NEXT: v_mov_b32_e32 v4, s2 -; GFX7-NEXT: ds_write_b8 v0, v1 offset:7 -; GFX7-NEXT: ds_write_b8 v0, v4 offset:6 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v3, s0 -; GFX7-NEXT: s_lshr_b32 s1, s0, 16 -; GFX7-NEXT: s_lshr_b32 s0, s0, 24 -; GFX7-NEXT: ds_write_b8 v0, v3 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:1 -; GFX7-NEXT: v_mov_b32_e32 v4, s1 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: ds_write_b8 v0, v2 offset:4 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s0, s0, 16 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:3 -; GFX7-NEXT: ds_write_b8 v0, v4 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:2 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align1: @@ -131,39 +131,39 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: s_lshr_b32 s3, s2, 8 -; GFX6-NEXT: v_mov_b32_e32 v4, s3 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s3, s2, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 24 +; GFX6-NEXT: v_mov_b32_e32 v2, s1 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX6-NEXT: ds_write_b8 v0, v4 offset:9 +; GFX6-NEXT: ds_write_b8 v0, v2 offset:4 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: s_lshr_b32 s3, s2, 8 +; GFX6-NEXT: ds_write_b8 v0, v1 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_lshr_b32 s3, s2, 24 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:9 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:11 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: s_lshr_b32 s2, s1, 8 -; GFX6-NEXT: v_mov_b32_e32 v5, s3 -; GFX6-NEXT: ds_write_b8 v0, v1 offset:11 -; GFX6-NEXT: ds_write_b8 v0, v5 offset:10 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:10 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s2, s1, 16 -; GFX6-NEXT: v_mov_b32_e32 v2, s1 -; GFX6-NEXT: s_lshr_b32 s1, s1, 24 +; GFX6-NEXT: s_lshr_b32 s2, s1, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:5 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_lshr_b32 s1, s0, 8 -; GFX6-NEXT: v_mov_b32_e32 v4, s2 -; GFX6-NEXT: ds_write_b8 v0, v1 offset:7 -; GFX6-NEXT: ds_write_b8 v0, v4 offset:6 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v3, s0 -; GFX6-NEXT: s_lshr_b32 s1, s0, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 -; GFX6-NEXT: ds_write_b8 v0, v3 +; GFX6-NEXT: s_lshr_b32 s1, s0, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:1 -; GFX6-NEXT: v_mov_b32_e32 v4, s1 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: ds_write_b8 v0, v2 offset:4 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:3 -; GFX6-NEXT: ds_write_b8 v0, v4 offset:2 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:2 ; GFX6-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 ret void @@ -178,13 +178,13 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NEXT: ds_write_b16 v0, v1 offset:8 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:10 -; GFX9-NEXT: ds_write_b16 v0, v3 ; GFX9-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX9-NEXT: ds_write_b16 v0, v1 offset:8 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v2 offset:6 -; GFX9-NEXT: ds_write_b16_d16_hi v0, v3 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: ds_write_b16 v0, v1 +; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:2 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align2: @@ -194,21 +194,21 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v3, s0 -; GFX7-NEXT: s_lshr_b32 s0, s0, 16 -; GFX7-NEXT: v_mov_b32_e32 v4, s0 -; GFX7-NEXT: s_lshr_b32 s0, s1, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: ds_write_b16 v0, v3 -; GFX7-NEXT: v_mov_b32_e32 v3, s0 -; GFX7-NEXT: s_lshr_b32 s0, s2, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:8 ; GFX7-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX7-NEXT: v_mov_b32_e32 v2, s0 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: ds_write_b16 v0, v2 offset:10 -; GFX7-NEXT: ds_write_b16 v0, v1 offset:8 -; GFX7-NEXT: ds_write_b16 v0, v3 offset:6 -; GFX7-NEXT: ds_write_b16 v0, v4 offset:2 +; GFX7-NEXT: s_lshr_b32 s1, s1, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s0, s0, 16 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:6 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:2 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align2: @@ -218,21 +218,21 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s0 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: v_mov_b32_e32 v4, s0 -; GFX6-NEXT: s_lshr_b32 s0, s1, 16 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: v_mov_b32_e32 v2, s1 -; GFX6-NEXT: ds_write_b16 v0, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, s0 -; GFX6-NEXT: s_lshr_b32 s0, s2, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:8 ; GFX6-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: ds_write_b16 v0, v2 offset:10 -; GFX6-NEXT: ds_write_b16 v0, v1 offset:8 -; GFX6-NEXT: ds_write_b16 v0, v3 offset:6 -; GFX6-NEXT: ds_write_b16 v0, v4 offset:2 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:10 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:6 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:2 ; GFX6-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 ret void @@ -260,9 +260,9 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: v_mov_b32_e32 v3, s2 ; GFX7-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; GFX7-NEXT: ds_write_b32 v0, v3 offset:8 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: ds_write_b32 v0, v1 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align4: @@ -302,10 +302,10 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, s4 -; GFX7-NEXT: v_mov_b32_e32 v3, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: ds_write_b32 v2, v1 offset:8 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: ds_write_b32 v2, v3 offset:8 ; GFX7-NEXT: ds_write_b64 v2, v[0:1] ; GFX7-NEXT: s_endpgm ; @@ -316,10 +316,10 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: ds_write_b32 v2, v3 offset:8 ; GFX6-NEXT: ds_write_b64 v2, v[0:1] ; GFX6-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8 @@ -359,10 +359,10 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: ds_write_b32 v2, v3 offset:8 ; GFX6-NEXT: ds_write_b64 v2, v[0:1] ; GFX6-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16 diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll index 70c5655fe8117..90336ca79ac29 100644 --- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll +++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -6,14 +6,14 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { ; CIVI-LABEL: local_store_i56: ; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: s_mov_b32 m0, -1 -; CIVI-NEXT: ds_write_b32 v0, v1 -; CIVI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 -; CIVI-NEXT: ds_write_b8 v0, v1 offset:6 -; CIVI-NEXT: s_waitcnt lgkmcnt(0) -; CIVI-NEXT: s_setpc_b64 s[30:31] +; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIVI-NEXT: s_mov_b32 m0, -1 +; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 +; CIVI-NEXT: ds_write_b32 v0, v1 +; CIVI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; CIVI-NEXT: ds_write_b8 v0, v1 offset:6 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: local_store_i56: ; GFX9: ; %bb.0: @@ -30,70 +30,70 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 { ; HAWAII-LABEL: local_store_i55: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_or_b32 s0, s4, 14 -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v1, s5 -; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] -; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v1, s0 -; HAWAII-NEXT: v_mov_b32_e32 v2, s1 -; HAWAII-NEXT: v_mov_b32_e32 v3, s2 -; HAWAII-NEXT: s_waitcnt vmcnt(0) -; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 -; HAWAII-NEXT: ds_write_b16 v1, v3 offset:4 -; HAWAII-NEXT: ds_write_b32 v1, v2 -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_or_b32 s0, s4, 14 +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s5 +; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 +; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v1, s0 +; HAWAII-NEXT: v_mov_b32_e32 v3, s2 +; HAWAII-NEXT: v_mov_b32_e32 v2, s1 +; HAWAII-NEXT: ds_write_b16 v1, v3 offset:4 +; HAWAII-NEXT: s_waitcnt vmcnt(0) +; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 +; HAWAII-NEXT: ds_write_b32 v1, v2 +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i55: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_or_b32 s0, s4, 14 -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v1, s5 -; FIJI-NEXT: flat_load_ubyte v0, v[0:1] -; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 -; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v1, s0 -; FIJI-NEXT: v_mov_b32_e32 v3, s1 -; FIJI-NEXT: s_and_b32 s3, s2, 0xffff -; FIJI-NEXT: v_mov_b32_e32 v2, s2 -; FIJI-NEXT: s_waitcnt vmcnt(0) -; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 -; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 -; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 -; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 -; FIJI-NEXT: ds_write_b32 v1, v3 -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_or_b32 s0, s4, 14 +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s5 +; FIJI-NEXT: flat_load_ubyte v0, v[0:1] +; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 +; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v1, s0 +; FIJI-NEXT: v_mov_b32_e32 v3, s1 +; FIJI-NEXT: s_and_b32 s3, s2, 0xffff +; FIJI-NEXT: v_mov_b32_e32 v2, s2 +; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 +; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 +; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 +; FIJI-NEXT: ds_write_b32 v1, v3 +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i55: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:14 -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: s_and_b32 s3, s2, 0xffff -; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v2, s3, v2 -; GFX9-NEXT: v_and_b32_e32 v2, 0x7fffff, v2 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 -; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 -; GFX9-NEXT: ds_write_b32 v0, v3 -; GFX9-NEXT: s_endpgm +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:14 +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_and_b32 s3, s2, 0xffff +; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_or_b32_e32 v1, s3, v2 +; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffff, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:6 +; GFX9-NEXT: ds_write_b32 v0, v3 +; GFX9-NEXT: s_endpgm store i55 %arg, i55 addrspace(3)* %ptr, align 8 ret void } @@ -101,31 +101,31 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 { ; HAWAII-LABEL: local_store_i48: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v2, s1 -; HAWAII-NEXT: v_mov_b32_e32 v1, s2 -; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 -; HAWAII-NEXT: ds_write_b32 v0, v2 -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s2 +; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 +; HAWAII-NEXT: v_mov_b32_e32 v1, s1 +; HAWAII-NEXT: ds_write_b32 v0, v1 +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i48: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 -; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v2, s1 -; FIJI-NEXT: v_mov_b32_e32 v1, s2 -; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 -; FIJI-NEXT: ds_write_b32 v0, v2 -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 +; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s2 +; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 +; FIJI-NEXT: v_mov_b32_e32 v1, s1 +; FIJI-NEXT: ds_write_b32 v0, v1 +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i48: ; GFX9: ; %bb.0: @@ -146,35 +146,35 @@ define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 { ; HAWAII-LABEL: local_store_i65: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v2, s2 -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: s_and_b32 s3, s3, 1 -; HAWAII-NEXT: v_mov_b32_e32 v3, s3 -; HAWAII-NEXT: v_mov_b32_e32 v1, s1 -; HAWAII-NEXT: ds_write_b8 v2, v3 offset:8 -; HAWAII-NEXT: ds_write_b64 v2, v[0:1] -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v2, s2 +; HAWAII-NEXT: s_and_b32 s3, s3, 1 +; HAWAII-NEXT: v_mov_b32_e32 v0, s3 +; HAWAII-NEXT: ds_write_b8 v2, v0 offset:8 +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s1 +; HAWAII-NEXT: ds_write_b64 v2, v[0:1] +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i65: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 -; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v2, s2 -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: s_and_b32 s3, s3, 1 -; FIJI-NEXT: v_mov_b32_e32 v3, s3 -; FIJI-NEXT: v_mov_b32_e32 v1, s1 -; FIJI-NEXT: ds_write_b8 v2, v3 offset:8 -; FIJI-NEXT: ds_write_b64 v2, v[0:1] -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 +; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v2, s2 +; FIJI-NEXT: s_and_b32 s3, s3, 1 +; FIJI-NEXT: v_mov_b32_e32 v0, s3 +; FIJI-NEXT: ds_write_b8 v2, v0 offset:8 +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s1 +; FIJI-NEXT: ds_write_b64 v2, v[0:1] +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i65: ; GFX9: ; %bb.0: @@ -218,22 +218,22 @@ define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 { define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 { ; CIVI-LABEL: local_store_i17: ; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: s_mov_b32 m0, -1 -; CIVI-NEXT: v_bfe_u32 v2, v1, 16, 1 -; CIVI-NEXT: ds_write_b16 v0, v1 -; CIVI-NEXT: ds_write_b8 v0, v2 offset:2 -; CIVI-NEXT: s_waitcnt lgkmcnt(0) -; CIVI-NEXT: s_setpc_b64 s[30:31] +; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIVI-NEXT: s_mov_b32 m0, -1 +; CIVI-NEXT: ds_write_b16 v0, v1 +; CIVI-NEXT: v_bfe_u32 v1, v1, 16, 1 +; CIVI-NEXT: ds_write_b8 v0, v1 offset:2 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: local_store_i17: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v2, 0x1ffff, v1 -; GFX9-NEXT: ds_write_b16 v0, v1 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_write_b16 v0, v1 +; GFX9-NEXT: v_and_b32_e32 v1, 0x1ffff, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store i17 %arg, i17 addrspace(3)* %ptr, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll b/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll index 1648c7fe37ccb..e10cd44c6f3b0 100644 --- a/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll +++ b/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll @@ -5,37 +5,37 @@ ; GCN-LABEL: {{^}}token_factor_inline_limit_test: ; GCN-TFILD: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 -; GCN-TFILD: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 -; GCN-TFILD: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 -; GCN-TFILD: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 -; GCN-TFILD: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 -; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN-TFILD: buffer_store_dword [[REG8]], {{.*$}} +; GCN-TFILD: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 ; GCN-TFILD: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN-TFILD: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN-TFILD: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN-TFILD: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN-TFILD: buffer_store_dword [[REG11]], {{.*}} offset:12 +; GCN-TFILD: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN-TFILD: buffer_store_dword [[REG12]], {{.*}} offset:16 +; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 ; GCN-TFILD: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN-TFILD: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN-TFILD: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN-TFIL7: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 -; GCN-TFIL7: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN-TFIL7: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN-TFIL7: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 -; GCN-TFIL7: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 -; GCN-TFIL7: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 -; GCN-TFIL7: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 -; GCN-TFIL7: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 ; GCN-TFIL7: buffer_store_dword [[REG15]], {{.*}} offset:28 +; GCN-TFIL7: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN-TFIL7: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN-TFIL7: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 ; GCN-TFIL7: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN-TFIL7: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN-TFIL7: buffer_store_dword [[REG12]], {{.*}} offset:16 +; GCN-TFIL7: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN-TFIL7: buffer_store_dword [[REG11]], {{.*}} offset:12 +; GCN-TFIL7: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN-TFIL7: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN-TFIL7: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 ; GCN-TFIL7: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN-TFIL7: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 ; GCN-TFIL7: buffer_store_dword [[REG8]], {{.*$}} ; GCN: v_mov_b32_e32 v31, 7 diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll index bff7cf6809905..a56137757b411 100644 --- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -135,12 +135,13 @@ define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) { ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s7, 34 ; SI-NEXT: s_or_b32 s7, s7, 4 -; SI-NEXT: s_bfe_u32 s8, s7, 0x10010 ; SI-NEXT: v_mov_b32_e32 v0, s7 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: v_mov_b32_e32 v1, s8 +; SI-NEXT: s_bfe_u32 s8, s7, 0x10010 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 -; SI-NEXT: buffer_store_byte v1, off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s8 +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i17_constant_load: @@ -157,9 +158,9 @@ define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) { ; VI-NEXT: s_or_b32 s0, s0, 4 ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: s_bfe_u32 s0, s0, 0x10010 -; VI-NEXT: v_mov_b32_e32 v5, s0 ; VI-NEXT: flat_store_short v[0:1], v4 -; VI-NEXT: flat_store_byte v[2:3], v5 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: flat_store_byte v[2:3], v0 ; VI-NEXT: s_endpgm %load = load i17, i17 addrspace(4)* %arg, align 4 %add = add i17 %load, 34 From 7bb9a2f996a33fde689fc0b7603fce0115fb92b4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 14 Sep 2020 09:06:41 -0400 Subject: [PATCH 0541/1079] [InstSimplify] fix miscompiles with maximum/minimum intrinsics As discussed in the sibling codegen functionality patch D87571, this transform was created with D52766, but it is not correct. The incorrect test diffs were missed during review, but the 'TODO' comment about this functionality was still in the code - we need 'nnan' to enable this fold. --- llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++-- .../InstSimplify/floating-point-arithmetic.ll | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index f7f5105f9383c..271e79df71531 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5476,9 +5476,9 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum; const APFloat *C; if ((match(Op0, m_APFloat(C)) && C->isInfinity() && - C->isNegative() == UseNegInf) || + C->isNegative() == UseNegInf && !PropagateNaN) || (match(Op1, m_APFloat(C)) && C->isInfinity() && - C->isNegative() == UseNegInf)) + C->isNegative() == UseNegInf && !PropagateNaN)) return ConstantFP::getInfinity(ReturnType, UseNegInf); // TODO: minnum(nnan x, inf) -> x diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 8b606dca2e21f..0707f08bf69ba 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -1064,7 +1064,8 @@ define float @minimum_x_y_minimum_z(float %x, float %y, float %z) { define float @minimum_neginf(float %x) { ; CHECK-LABEL: @minimum_neginf( -; CHECK-NEXT: ret float 0xFFF0000000000000 +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.minimum.f32(float %x, float 0xFFF0000000000000) ret float %val @@ -1072,7 +1073,8 @@ define float @minimum_neginf(float %x) { define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { ; CHECK-LABEL: @minimum_neginf_commute_vec( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: ret <2 x double> [[R]] ; %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %r @@ -1158,7 +1160,8 @@ define float @maximum_x_y_maximum_z(float %x, float %y, float %z) { define <2 x double> @maximum_inf(<2 x double> %x) { ; CHECK-LABEL: @maximum_inf( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: ret <2 x double> [[VAL]] ; %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double>) ret <2 x double> %val @@ -1166,7 +1169,8 @@ define <2 x double> @maximum_inf(<2 x double> %x) { define float @maximum_inf_commute(float %x) { ; CHECK-LABEL: @maximum_inf_commute( -; CHECK-NEXT: ret float 0x7FF0000000000000 +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.maximum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.maximum.f32(float 0x7FF0000000000000, float %x) ret float %val From 08baa979235ab98cf13497dde813ab8ae58b11cb Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 14 Sep 2020 14:26:10 +0100 Subject: [PATCH 0542/1079] [ARM] Enable tail predication for reduction tests. NFC --- .../LoopVectorize/ARM/mve-reductions.ll | 791 ++++++------------ 1 file changed, 239 insertions(+), 552 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index 677142e3c37af..614d055730d88 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -loop-vectorize < %s -S -o - | FileCheck %s +; RUN: opt -loop-vectorize -instcombine -simplifycfg -tail-predication=enabled < %s -S -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" @@ -8,23 +8,18 @@ define i64 @add_i64_i64(i64* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i64_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[ADD]] = add nsw i64 [[TMP0]], [[R_07]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -51,24 +46,19 @@ define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i32_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[ADD]] = add nsw i64 [[R_07]], [[CONV]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -96,24 +86,19 @@ define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i16_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i64 ; CHECK-NEXT: [[ADD]] = add nsw i64 [[R_07]], [[CONV]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -141,24 +126,19 @@ define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i8_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64 ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[R_07]], [[CONV]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -185,48 +165,28 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i32_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !2 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -253,50 +213,29 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i16_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP1]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> undef) +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !5 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -324,50 +263,29 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i8_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP8]] to i32 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[R_07]], [[CONV]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -394,48 +312,28 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i16_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP8]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5]] = add i16 [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> undef) +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP3]] = add <8 x i16> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_09:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_010]] -; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[ADD]] = add i16 [[TMP7]], [[R_09]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !9 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP3]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[R_0_LCSSA]] ; entry: @@ -462,50 +360,29 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i8_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP8]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <8 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]]) -; CHECK-NEXT: [[TMP6]] = add i16 [[TMP5]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[WIDE_MASKED_LOAD]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP2]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP4]] = add <8 x i16> [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_09:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_010]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP8]] to i16 -; CHECK-NEXT: [[ADD]] = add i16 [[R_09]], [[CONV]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[R_0_LCSSA]] ; entry: @@ -532,48 +409,28 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 { ; CHECK-LABEL: @add_i8_i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP7]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 16 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 15 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5]] = add i8 [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[WIDE_MASKED_LOAD]], <16 x i8> zeroinitializer +; CHECK-NEXT: [[TMP3]] = add <16 x i8> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_08:%.*]] = phi i8 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_09]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ADD]] = add i8 [[TMP7]], [[R_08]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_09]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP3]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[R_0_LCSSA]] ; entry: @@ -599,12 +456,10 @@ define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y, ; CHECK-LABEL: @mla_i64_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_010]] ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[Y:%.*]], i32 [[I_010]] @@ -613,12 +468,9 @@ define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y, ; CHECK-NEXT: [[ADD]] = add nsw i64 [[MUL]], [[R_09]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -647,12 +499,10 @@ define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-LABEL: @mla_i32_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[I_010]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[I_010]] @@ -662,12 +512,9 @@ define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-NEXT: [[ADD]] = add nsw i64 [[R_09]], [[CONV]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -697,12 +544,10 @@ define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-LABEL: @mla_i16_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[I_012]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 @@ -714,12 +559,9 @@ define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-NEXT: [[ADD]] = add nsw i64 [[R_011]], [[CONV3]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_012]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -751,12 +593,10 @@ define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-LABEL: @mla_i8_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[I_012]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 @@ -768,12 +608,9 @@ define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[R_011]], [[CONV3]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_012]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i64 [[R_0_LCSSA]] ; entry: @@ -805,56 +642,32 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, ; CHECK-LABEL: @mla_i32_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP8]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) -; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6]] = add <4 x i32> [[VEC_PHI]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_09:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_010]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[Y]], i32 [[I_010]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[R_09]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -883,60 +696,34 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, ; CHECK-LABEL: @mla_i16_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 -; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) -; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP1]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> undef) +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP4]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> undef) +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_011]] -; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y]], i32 [[I_011]] -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP14]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[R_010]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -967,60 +754,34 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 ; CHECK-LABEL: @mla_i8_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <4 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) -; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef) +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_011]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP13]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_011]] -; CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP14]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[MUL]], [[R_010]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -1051,56 +812,32 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado ; CHECK-LABEL: @mla_i16_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP11]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> undef) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP6]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i16> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP7]]) -; CHECK-NEXT: [[TMP9]] = add i16 [[TMP8]], [[VEC_PHI]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> undef) +; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i16> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP4]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6]] = add <8 x i16> [[VEC_PHI]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_012:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_013]] -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y]], i32 [[I_013]] -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[TMP12]], [[TMP11]] -; CHECK-NEXT: [[ADD]] = add i16 [[MUL]], [[R_012]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_013]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP6]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[R_0_LCSSA]] ; entry: @@ -1129,60 +866,34 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly ; CHECK-LABEL: @mla_i8_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP11]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <8 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <8 x i8>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw <8 x i16> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP9]]) -; CHECK-NEXT: [[TMP11]] = add i16 [[TMP10]], [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[WIDE_MASKED_LOAD]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP4]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> undef) +; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[WIDE_MASKED_LOAD1]] to <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <8 x i16> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP6]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8]] = add <8 x i16> [[VEC_PHI]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_012:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_013]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP13]] to i16 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_013]] -; CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP14]] to i16 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw i16 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add i16 [[MUL]], [[R_012]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_013]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP8]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[R_0_LCSSA]] ; entry: @@ -1213,56 +924,32 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly % ; CHECK-LABEL: @mla_i8_i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP10]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 16 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 15 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul <16 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP7]]) -; CHECK-NEXT: [[TMP9]] = add i8 [[TMP8]], [[VEC_PHI]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef) +; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP4]], <16 x i8> zeroinitializer +; CHECK-NEXT: [[TMP6]] = add <16 x i8> [[VEC_PHI]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[R_011:%.*]] = phi i8 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_012]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_012]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP12]], [[TMP11]] -; CHECK-NEXT: [[ADD]] = add i8 [[MUL]], [[R_011]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_012]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25 -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP6]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[R_0_LCSSA]] ; entry: From 9868ea764f31b0fd4ec250867807aa0ad7958abf Mon Sep 17 00:00:00 2001 From: jasonliu Date: Fri, 11 Sep 2020 14:26:26 +0000 Subject: [PATCH 0543/1079] [XCOFF][AIX] Handle TOC entries that could not be reached by positive range in small code model Summary: In small code model, AIX assembler could not deal with labels that could not be reached within the [-0x8000, 0x8000) range from TOC base. So when generating the assembly, we would need to help the assembler by subtracting an offset from the label to keep the actual value within [-0x8000, 0x8000). Reviewed By: hubert.reinterpretcast, Xiangling_L Differential Revision: https://reviews.llvm.org/D86879 --- llvm/lib/MC/XCOFFObjectWriter.cpp | 16 +++-- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 52 +++++++++++---- llvm/test/CodeGen/PowerPC/aix-overflow-toc.py | 66 +++++++++++++++++++ llvm/test/CodeGen/PowerPC/lit.local.cfg | 2 + 4 files changed, 116 insertions(+), 20 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-overflow-toc.py diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index 5047b5041aa75..d6cee3bb59bb8 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -49,7 +49,6 @@ namespace { constexpr unsigned DefaultSectionAlign = 4; constexpr int16_t MaxSectionIndex = INT16_MAX; -constexpr uint16_t MaxTOCSizeInARegion = UINT16_MAX; // Packs the csect's alignment and type into a byte. uint8_t getEncodedType(const MCSectionXCOFF *); @@ -431,12 +430,15 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant(); else if (Type == XCOFF::RelocationType::R_TOC || Type == XCOFF::RelocationType::R_TOCL) { - // The FixedValue should be the TC entry offset from TOC-base. - FixedValue = SectionMap[SymASec]->Address - TOCCsects.front().Address; - if (FixedValue >= MaxTOCSizeInARegion) - report_fatal_error( - "handling of TOC entries could not fit in the initial TOC " - "entry region is not yet supported"); + // The FixedValue should be the TOC entry offset from the TOC-base plus any + // constant offset value. + const int64_t TOCEntryOffset = SectionMap[SymASec]->Address - + TOCCsects.front().Address + + Target.getConstant(); + if (Type == XCOFF::RelocationType::R_TOC && !isInt<16>(TOCEntryOffset)) + report_fatal_error("TOCEntryOffset overflows in small code model mode"); + + FixedValue = TOCEntryOffset; } assert( diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8f1477012bfdd..f950e748158f5 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -579,6 +579,38 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { } } #endif + + auto getTOCRelocAdjustedExprForXCOFF = [this](const MCExpr *Expr, + ptrdiff_t OriginalOffset) { + // Apply an offset to the TOC-based expression such that the adjusted + // notional offset from the TOC base (to be encoded into the instruction's D + // or DS field) is the signed 16-bit truncation of the original notional + // offset from the TOC base. + // This is consistent with the treatment used both by XL C/C++ and + // by AIX ld -r. + ptrdiff_t Adjustment = + OriginalOffset - llvm::SignExtend32<16>(OriginalOffset); + return MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(-Adjustment, OutContext), OutContext); + }; + + auto getTOCEntryLoadingExprForXCOFF = + [IsPPC64, getTOCRelocAdjustedExprForXCOFF, + this](const MCSymbol *MOSymbol, const MCExpr *Expr) -> const MCExpr * { + const unsigned EntryByteSize = IsPPC64 ? 8 : 4; + const auto TOCEntryIter = TOC.find(MOSymbol); + assert(TOCEntryIter != TOC.end() && + "Could not find the TOC entry for this symbol."); + const ptrdiff_t EntryDistanceFromTOCBase = + (TOCEntryIter - TOC.begin()) * EntryByteSize; + constexpr int16_t PositiveTOCRange = INT16_MAX; + + if (EntryDistanceFromTOCBase > PositiveTOCRange) + return getTOCRelocAdjustedExprForXCOFF(Expr, EntryDistanceFromTOCBase); + + return Expr; + }; + // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { default: break; @@ -725,6 +757,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { assert( TM.getCodeModel() == CodeModel::Small && "This pseudo should only be selected for 32-bit small code model."); + Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -753,17 +786,20 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand!"); + // Map the operand to its corresponding MCSymbol. + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // Map the machine operand to its corresponding MCSymbol, then map the // global address operand to be a reference to the TOC entry we will // synthesize later. - MCSymbol *TOCEntry = - lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCSymbolRefExpr::VariantKind VK = IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, VK, OutContext); - TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + TmpInst.getOperand(1) = MCOperand::createExpr( + IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp) : Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -1821,16 +1857,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { PPCTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); - const unsigned EntryByteSize = Subtarget->isPPC64() ? 8 : 4; - const unsigned TOCEntriesByteSize = TOC.size() * EntryByteSize; - // TODO: If TOC entries' size is larger than 32768, then we run out of - // positive displacement to reach the TOC entry. We need to decide how to - // handle entries' size larger than that later. - if (TOCEntriesByteSize > 32767) { - report_fatal_error("Handling of TOC entry displacement larger than 32767 " - "is not yet implemented."); - } - for (auto &I : TOC) { // Setup the csect for the current TC entry. MCSectionXCOFF *TCEntry = cast( diff --git a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py new file mode 100644 index 0000000000000..5e56b6f9fa250 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py @@ -0,0 +1,66 @@ +# RUN: python %s > %t.ll +# RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=small -mcpu=pwr4 -mattr=-altivec -O0 < %t.ll | \ +# RUN: FileCheck --check-prefix=ASM32 %s + +# RUN: llc -mtriple powerpc64-ibm-aix-xcoff -code-model=small -mcpu=pwr4 -mattr=-altivec -O0 < %t.ll | \ +# RUN: FileCheck --check-prefix=ASM64 %s + +# RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=small -mcpu=pwr4 -mattr=-altivec -O0 \ +# RUN: -filetype=obj -o %t.o < %t.ll +# RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS32 %s + +# RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff \ +# RUN: -mcpu=pwr4 -mattr=-altivec -filetype=obj -o %t.o 2>&1 < %t.ll | \ +# RUN: FileCheck --check-prefix=XCOFF64 %s +# XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + +numentries = 12290 +for x in range(0, numentries): + print("@a%d = global i32 0, align 4" % (x)) + +print("define void @foo() {") +print("entry:") +for x in range(0, numentries): + print("store i32 1, i32* @a%d, align 4" % (x)) +print("ret void") +print("}") + +# 32-bit assembly check +# ASM32: lwz 3, L..C0(2) +# ASM32: lwz 3, L..C1(2) + +# ASM32: lwz 3, L..C8191(2) +# ASM32: lwz 3, L..C8192-65536(2) +# ASM32: lwz 3, L..C8193-65536(2) + +# ASM32: lwz 3, L..C12288-65536(2) +# ASM32: lwz 3, L..C12289-65536(2) + +# 64-bit assembly check +# ASM64: ld 3, L..C0(2) +# ASM64: ld 3, L..C1(2) + +# ASM64: ld 3, L..C4095(2) +# ASM64: ld 3, L..C4096-65536(2) +# ASM64: ld 3, L..C4097-65536(2) + +# ASM64: ld 3, L..C12287-65536(2) +# ASM64: ld 3, L..C12288-131072(2) +# ASM64: ld 3, L..C12289-131072(2) + +# DIS32: 0: 80 62 00 00 lwz 3, 0(2) +# DIS32: 00000002: R_TOC (idx: 24590) a0[TC] +# DIS32: c: 80 62 00 04 lwz 3, 4(2) +# DIS32: 0000000e: R_TOC (idx: 24592) a1[TC] + +# DIS32: fffc: 80 62 7f fc lwz 3, 32764(2) +# DIS32: 0000fffe: R_TOC (idx: 40972) a8191[TC] +# DIS32: 10004: 80 62 80 00 lwz 3, -32768(2) +# DIS32: 00010006: R_TOC (idx: 40974) a8192[TC] +# DIS32: 1000c: 80 62 80 04 lwz 3, -32764(2) +# DIS32: 0001000e: R_TOC (idx: 40976) a8193[TC] + +# DIS32: 18004: 80 62 c0 00 lwz 3, -16384(2) +# DIS32: 00018006: R_TOC (idx: 49166) a12288[TC] +# DIS32: 1800c: 80 62 c0 04 lwz 3, -16380(2) +# DIS32: 0001800e: R_TOC (idx: 49168) a12289[TC] diff --git a/llvm/test/CodeGen/PowerPC/lit.local.cfg b/llvm/test/CodeGen/PowerPC/lit.local.cfg index 091332439b186..1dbbf92fcf5e3 100644 --- a/llvm/test/CodeGen/PowerPC/lit.local.cfg +++ b/llvm/test/CodeGen/PowerPC/lit.local.cfg @@ -1,2 +1,4 @@ if not 'PowerPC' in config.root.targets: config.unsupported = True + +config.suffixes.add('.py') From 5df9cb5bc71fc880a05ff7a1a2af727c7ce3cab3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 14 Sep 2020 10:07:26 -0400 Subject: [PATCH 0544/1079] [InstSimplify] fix test comments; NFC --- .../InstSimplify/floating-point-arithmetic.ll | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 0707f08bf69ba..b26ef69c0e01c 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -1060,36 +1060,6 @@ define float @minimum_x_y_minimum_z(float %x, float %y, float %z) { ret float %b } -; minimum(X, -INF) --> -INF - -define float @minimum_neginf(float %x) { -; CHECK-LABEL: @minimum_neginf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.minimum.f32(float %x, float 0xFFF0000000000000) - ret float %val -} - -define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { -; CHECK-LABEL: @minimum_neginf_commute_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) -; CHECK-NEXT: ret <2 x double> [[R]] -; - %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -; negative test - -define float @minimum_inf(float %x) { -; CHECK-LABEL: @minimum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.minimum.f32(float 0x7FF0000000000000, float %x) - ret float %val -} define float @maximum_x_maximum_x_y(float %x, float %y) { ; CHECK-LABEL: @maximum_x_maximum_x_y( ; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -1156,7 +1126,40 @@ define float @maximum_x_y_maximum_z(float %x, float %y, float %z) { ret float %b } -; maximum(X, INF) --> INF +; negative test - minimum(X, -INF) != -INF because X could be NaN + +define float @minimum_neginf(float %x) { +; CHECK-LABEL: @minimum_neginf( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.minimum.f32(float %x, float 0xFFF0000000000000) + ret float %val +} + +; negative test - minimum(-INF, X) != -INF because X could be NaN + +define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { +; CHECK-LABEL: @minimum_neginf_commute_vec( +; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: ret <2 x double> [[R]] +; + %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +; TODO: minimum(INF, X) --> X + +define float @minimum_inf(float %x) { +; CHECK-LABEL: @minimum_inf( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.minimum.f32(float 0x7FF0000000000000, float %x) + ret float %val +} + +; negative test - maximum(X, INF) != INF because X could be NaN define <2 x double> @maximum_inf(<2 x double> %x) { ; CHECK-LABEL: @maximum_inf( @@ -1167,6 +1170,8 @@ define <2 x double> @maximum_inf(<2 x double> %x) { ret <2 x double> %val } +; negative test - maximum(INF, X) != INF because X could be NaN + define float @maximum_inf_commute(float %x) { ; CHECK-LABEL: @maximum_inf_commute( ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.maximum.f32(float 0x7FF0000000000000, float [[X:%.*]]) From dae68fdf9ece930ad158e15966cb99a15636e8c7 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 14 Sep 2020 10:24:19 -0400 Subject: [PATCH 0545/1079] [InstSimplify] add/move tests for fmin/fmax; NFC The new tests are duplicated from the sibling patch for codegen: D87571 --- .../InstSimplify/floating-point-arithmetic.ll | 653 +--------- .../Transforms/InstSimplify/fminmax-folds.ll | 1116 +++++++++++++++++ 2 files changed, 1117 insertions(+), 652 deletions(-) create mode 100644 llvm/test/Transforms/InstSimplify/fminmax-folds.ll diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll index b26ef69c0e01c..b1dd69c19f813 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -223,6 +223,7 @@ define float @PR22688(float %x) { declare float @llvm.fabs.f32(float) declare <2 x float> @llvm.fabs.v2f32(<2 x float>) declare float @llvm.sqrt.f32(float) +declare float @llvm.maxnum.f32(float, float) define float @fabs_select_positive_constants(i32 %c) { ; CHECK-LABEL: @fabs_select_positive_constants( @@ -529,658 +530,6 @@ define float @fabs_select_positive_constants_vector_extract(i32 %c) { ret float %fabs } -declare float @llvm.minnum.f32(float, float) -declare float @llvm.maxnum.f32(float, float) -declare double @llvm.minnum.f64(double, double) -declare double @llvm.maxnum.f64(double, double) -declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) -declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) - -; From the LangRef for minnum/maxnum: -; "If either operand is a NaN, returns the other non-NaN operand." - -define double @maxnum_nan_op0(double %x) { -; CHECK-LABEL: @maxnum_nan_op0( -; CHECK-NEXT: ret double [[X:%.*]] -; - %r = call double @llvm.maxnum.f64(double 0x7ff8000000000000, double %x) - ret double %r -} - -define double @maxnum_nan_op1(double %x) { -; CHECK-LABEL: @maxnum_nan_op1( -; CHECK-NEXT: ret double [[X:%.*]] -; - %r = call double @llvm.maxnum.f64(double %x, double 0x7ff800000000dead) - ret double %r -} - -define double @minnum_nan_op0(double %x) { -; CHECK-LABEL: @minnum_nan_op0( -; CHECK-NEXT: ret double [[X:%.*]] -; - %r = call double @llvm.minnum.f64(double 0x7ff8000dead00000, double %x) - ret double %r -} - -define double @minnum_nan_op1(double %x) { -; CHECK-LABEL: @minnum_nan_op1( -; CHECK-NEXT: ret double [[X:%.*]] -; - %r = call double @llvm.minnum.f64(double %x, double 0x7ff800dead00dead) - ret double %r -} - -define <2 x double> @maxnum_nan_op0_vec(<2 x double> %x) { -; CHECK-LABEL: @maxnum_nan_op0_vec( -; CHECK-NEXT: ret <2 x double> [[X:%.*]] -; - %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -define <2 x double> @maxnum_nan_op1_vec(<2 x double> %x) { -; CHECK-LABEL: @maxnum_nan_op1_vec( -; CHECK-NEXT: ret <2 x double> [[X:%.*]] -; - %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> ) - ret <2 x double> %r -} - -define <2 x double> @minnum_nan_op0_vec(<2 x double> %x) { -; CHECK-LABEL: @minnum_nan_op0_vec( -; CHECK-NEXT: ret <2 x double> [[X:%.*]] -; - %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -define <2 x double> @minnum_nan_op1_vec(<2 x double> %x) { -; CHECK-LABEL: @minnum_nan_op1_vec( -; CHECK-NEXT: ret <2 x double> [[X:%.*]] -; - %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> ) - ret <2 x double> %r -} - -define float @maxnum_undef_op1(float %x) { -; CHECK-LABEL: @maxnum_undef_op1( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.maxnum.f32(float %x, float undef) - ret float %val -} - -define float @maxnum_undef_op0(float %x) { -; CHECK-LABEL: @maxnum_undef_op0( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.maxnum.f32(float undef, float %x) - ret float %val -} - -define float @minnum_undef_op1(float %x) { -; CHECK-LABEL: @minnum_undef_op1( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.minnum.f32(float %x, float undef) - ret float %val -} - -define float @minnum_undef_op0(float %x) { -; CHECK-LABEL: @minnum_undef_op0( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.minnum.f32(float undef, float %x) - ret float %val -} - -define float @minnum_undef_undef(float %x) { -; CHECK-LABEL: @minnum_undef_undef( -; CHECK-NEXT: ret float undef -; - %val = call float @llvm.minnum.f32(float undef, float undef) - ret float %val -} - -define float @maxnum_undef_undef(float %x) { -; CHECK-LABEL: @maxnum_undef_undef( -; CHECK-NEXT: ret float undef -; - %val = call float @llvm.maxnum.f32(float undef, float undef) - ret float %val -} - -define float @minnum_same_args(float %x) { -; CHECK-LABEL: @minnum_same_args( -; CHECK-NEXT: ret float [[X:%.*]] -; - %y = call float @llvm.minnum.f32(float %x, float %x) - ret float %y -} - -define float @maxnum_same_args(float %x) { -; CHECK-LABEL: @maxnum_same_args( -; CHECK-NEXT: ret float [[X:%.*]] -; - %y = call float @llvm.maxnum.f32(float %x, float %x) - ret float %y -} - -define float @minnum_x_minnum_x_y(float %x, float %y) { -; CHECK-LABEL: @minnum_x_minnum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minnum.f32(float %x, float %y) - %b = call float @llvm.minnum.f32(float %x, float %a) - ret float %b -} - -define float @minnum_y_minnum_x_y(float %x, float %y) { -; CHECK-LABEL: @minnum_y_minnum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minnum.f32(float %x, float %y) - %b = call float @llvm.minnum.f32(float %y, float %a) - ret float %b -} - -define float @minnum_x_y_minnum_x(float %x, float %y) { -; CHECK-LABEL: @minnum_x_y_minnum_x( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minnum.f32(float %x, float %y) - %b = call float @llvm.minnum.f32(float %a, float %x) - ret float %b -} - -define float @minnum_x_y_minnum_y(float %x, float %y) { -; CHECK-LABEL: @minnum_x_y_minnum_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minnum.f32(float %x, float %y) - %b = call float @llvm.minnum.f32(float %a, float %y) - ret float %b -} - -; negative test - -define float @minnum_z_minnum_x_y(float %x, float %y, float %z) { -; CHECK-LABEL: @minnum_z_minnum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.minnum.f32(float [[Z:%.*]], float [[A]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.minnum.f32(float %x, float %y) - %b = call float @llvm.minnum.f32(float %z, float %a) - ret float %b -} - -; negative test - -define float @minnum_x_y_minnum_z(float %x, float %y, float %z) { -; CHECK-LABEL: @minnum_x_y_minnum_z( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.minnum.f32(float [[A]], float [[Z:%.*]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.minnum.f32(float %x, float %y) - %b = call float @llvm.minnum.f32(float %a, float %z) - ret float %b -} - -; minnum(X, -INF) --> -INF - -define float @minnum_neginf(float %x) { -; CHECK-LABEL: @minnum_neginf( -; CHECK-NEXT: ret float 0xFFF0000000000000 -; - %val = call float @llvm.minnum.f32(float %x, float 0xFFF0000000000000) - ret float %val -} - -define <2 x double> @minnum_neginf_commute_vec(<2 x double> %x) { -; CHECK-LABEL: @minnum_neginf_commute_vec( -; CHECK-NEXT: ret <2 x double> -; - %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -; negative test - -define float @minnum_inf(float %x) { -; CHECK-LABEL: @minnum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minnum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.minnum.f32(float 0x7FF0000000000000, float %x) - ret float %val -} -define float @maxnum_x_maxnum_x_y(float %x, float %y) { -; CHECK-LABEL: @maxnum_x_maxnum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maxnum.f32(float %x, float %y) - %b = call float @llvm.maxnum.f32(float %x, float %a) - ret float %b -} - -define float @maxnum_y_maxnum_x_y(float %x, float %y) { -; CHECK-LABEL: @maxnum_y_maxnum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maxnum.f32(float %x, float %y) - %b = call float @llvm.maxnum.f32(float %y, float %a) - ret float %b -} - -define float @maxnum_x_y_maxnum_x(float %x, float %y) { -; CHECK-LABEL: @maxnum_x_y_maxnum_x( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maxnum.f32(float %x, float %y) - %b = call float @llvm.maxnum.f32(float %a, float %x) - ret float %b -} - -define float @maxnum_x_y_maxnum_y(float %x, float %y) { -; CHECK-LABEL: @maxnum_x_y_maxnum_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maxnum.f32(float %x, float %y) - %b = call float @llvm.maxnum.f32(float %a, float %y) - ret float %b -} - -; negative test - -define float @maxnum_z_maxnum_x_y(float %x, float %y, float %z) { -; CHECK-LABEL: @maxnum_z_maxnum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.maxnum.f32(float [[Z:%.*]], float [[A]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.maxnum.f32(float %x, float %y) - %b = call float @llvm.maxnum.f32(float %z, float %a) - ret float %b -} - -; negative test - -define float @maxnum_x_y_maxnum_z(float %x, float %y, float %z) { -; CHECK-LABEL: @maxnum_x_y_maxnum_z( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.maxnum.f32(float [[A]], float [[Z:%.*]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.maxnum.f32(float %x, float %y) - %b = call float @llvm.maxnum.f32(float %a, float %z) - ret float %b -} - -; maxnum(X, INF) --> INF - -define <2 x double> @maxnum_inf(<2 x double> %x) { -; CHECK-LABEL: @maxnum_inf( -; CHECK-NEXT: ret <2 x double> -; - %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) - ret <2 x double> %val -} - -define float @maxnum_inf_commute(float %x) { -; CHECK-LABEL: @maxnum_inf_commute( -; CHECK-NEXT: ret float 0x7FF0000000000000 -; - %val = call float @llvm.maxnum.f32(float 0x7FF0000000000000, float %x) - ret float %val -} - -; negative test - -define float @maxnum_neginf(float %x) { -; CHECK-LABEL: @maxnum_neginf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float %x) - ret float %val -} - -declare float @llvm.minimum.f32(float, float) -declare float @llvm.maximum.f32(float, float) -declare double @llvm.minimum.f64(double, double) -declare double @llvm.maximum.f64(double, double) -declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) -declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) - -; From the LangRef for minimum/maximum: -; "If either operand is a NaN, returns NaN." - -define double @maximum_nan_op0(double %x) { -; CHECK-LABEL: @maximum_nan_op0( -; CHECK-NEXT: ret double 0x7FF8000000000000 -; - %r = call double @llvm.maximum.f64(double 0x7ff8000000000000, double %x) - ret double %r -} - -define double @maximum_nan_op1(double %x) { -; CHECK-LABEL: @maximum_nan_op1( -; CHECK-NEXT: ret double 0x7FF800000000DEAD -; - %r = call double @llvm.maximum.f64(double %x, double 0x7ff800000000dead) - ret double %r -} - -define double @minimum_nan_op0(double %x) { -; CHECK-LABEL: @minimum_nan_op0( -; CHECK-NEXT: ret double 0x7FF8000DEAD00000 -; - %r = call double @llvm.minimum.f64(double 0x7ff8000dead00000, double %x) - ret double %r -} - -define double @minimum_nan_op1(double %x) { -; CHECK-LABEL: @minimum_nan_op1( -; CHECK-NEXT: ret double 0x7FF800DEAD00DEAD -; - %r = call double @llvm.minimum.f64(double %x, double 0x7ff800dead00dead) - ret double %r -} - -define <2 x double> @maximum_nan_op0_vec(<2 x double> %x) { -; CHECK-LABEL: @maximum_nan_op0_vec( -; CHECK-NEXT: ret <2 x double> -; - %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -define <2 x double> @maximum_nan_op1_vec(<2 x double> %x) { -; CHECK-LABEL: @maximum_nan_op1_vec( -; CHECK-NEXT: ret <2 x double> -; - %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> ) - ret <2 x double> %r -} - -define <2 x double> @minimum_nan_op0_vec(<2 x double> %x) { -; CHECK-LABEL: @minimum_nan_op0_vec( -; CHECK-NEXT: ret <2 x double> -; - %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -define <2 x double> @minimum_nan_op1_vec(<2 x double> %x) { -; CHECK-LABEL: @minimum_nan_op1_vec( -; CHECK-NEXT: ret <2 x double> -; - %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> ) - ret <2 x double> %r -} - -define float @maximum_undef_op1(float %x) { -; CHECK-LABEL: @maximum_undef_op1( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.maximum.f32(float %x, float undef) - ret float %val -} - -define float @maximum_undef_op0(float %x) { -; CHECK-LABEL: @maximum_undef_op0( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.maximum.f32(float undef, float %x) - ret float %val -} - -define float @minimum_undef_op1(float %x) { -; CHECK-LABEL: @minimum_undef_op1( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.minimum.f32(float %x, float undef) - ret float %val -} - -define float @minimum_undef_op0(float %x) { -; CHECK-LABEL: @minimum_undef_op0( -; CHECK-NEXT: ret float [[X:%.*]] -; - %val = call float @llvm.minimum.f32(float undef, float %x) - ret float %val -} - -define float @minimum_undef_undef(float %x) { -; CHECK-LABEL: @minimum_undef_undef( -; CHECK-NEXT: ret float undef -; - %val = call float @llvm.minimum.f32(float undef, float undef) - ret float %val -} - -define float @maximum_undef_undef(float %x) { -; CHECK-LABEL: @maximum_undef_undef( -; CHECK-NEXT: ret float undef -; - %val = call float @llvm.maximum.f32(float undef, float undef) - ret float %val -} - -define float @minimum_same_args(float %x) { -; CHECK-LABEL: @minimum_same_args( -; CHECK-NEXT: ret float [[X:%.*]] -; - %y = call float @llvm.minimum.f32(float %x, float %x) - ret float %y -} - -define float @maximum_same_args(float %x) { -; CHECK-LABEL: @maximum_same_args( -; CHECK-NEXT: ret float [[X:%.*]] -; - %y = call float @llvm.maximum.f32(float %x, float %x) - ret float %y -} - -define float @minimum_x_minimum_x_y(float %x, float %y) { -; CHECK-LABEL: @minimum_x_minimum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minimum.f32(float %x, float %y) - %b = call float @llvm.minimum.f32(float %x, float %a) - ret float %b -} - -define float @minimum_y_minimum_x_y(float %x, float %y) { -; CHECK-LABEL: @minimum_y_minimum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minimum.f32(float %x, float %y) - %b = call float @llvm.minimum.f32(float %y, float %a) - ret float %b -} - -define float @minimum_x_y_minimum_x(float %x, float %y) { -; CHECK-LABEL: @minimum_x_y_minimum_x( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minimum.f32(float %x, float %y) - %b = call float @llvm.minimum.f32(float %a, float %x) - ret float %b -} - -define float @minimum_x_y_minimum_y(float %x, float %y) { -; CHECK-LABEL: @minimum_x_y_minimum_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.minimum.f32(float %x, float %y) - %b = call float @llvm.minimum.f32(float %a, float %y) - ret float %b -} - -; negative test - -define float @minimum_z_minimum_x_y(float %x, float %y, float %z) { -; CHECK-LABEL: @minimum_z_minimum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.minimum.f32(float [[Z:%.*]], float [[A]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.minimum.f32(float %x, float %y) - %b = call float @llvm.minimum.f32(float %z, float %a) - ret float %b -} - -; negative test - -define float @minimum_x_y_minimum_z(float %x, float %y, float %z) { -; CHECK-LABEL: @minimum_x_y_minimum_z( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.minimum.f32(float [[A]], float [[Z:%.*]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.minimum.f32(float %x, float %y) - %b = call float @llvm.minimum.f32(float %a, float %z) - ret float %b -} - -define float @maximum_x_maximum_x_y(float %x, float %y) { -; CHECK-LABEL: @maximum_x_maximum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maximum.f32(float %x, float %y) - %b = call float @llvm.maximum.f32(float %x, float %a) - ret float %b -} - -define float @maximum_y_maximum_x_y(float %x, float %y) { -; CHECK-LABEL: @maximum_y_maximum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maximum.f32(float %x, float %y) - %b = call float @llvm.maximum.f32(float %y, float %a) - ret float %b -} - -define float @maximum_x_y_maximum_x(float %x, float %y) { -; CHECK-LABEL: @maximum_x_y_maximum_x( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maximum.f32(float %x, float %y) - %b = call float @llvm.maximum.f32(float %a, float %x) - ret float %b -} - -define float @maximum_x_y_maximum_y(float %x, float %y) { -; CHECK-LABEL: @maximum_x_y_maximum_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: ret float [[A]] -; - %a = call float @llvm.maximum.f32(float %x, float %y) - %b = call float @llvm.maximum.f32(float %a, float %y) - ret float %b -} - -; negative test - -define float @maximum_z_maximum_x_y(float %x, float %y, float %z) { -; CHECK-LABEL: @maximum_z_maximum_x_y( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.maximum.f32(float [[Z:%.*]], float [[A]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.maximum.f32(float %x, float %y) - %b = call float @llvm.maximum.f32(float %z, float %a) - ret float %b -} - -; negative test - -define float @maximum_x_y_maximum_z(float %x, float %y, float %z) { -; CHECK-LABEL: @maximum_x_y_maximum_z( -; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[B:%.*]] = call float @llvm.maximum.f32(float [[A]], float [[Z:%.*]]) -; CHECK-NEXT: ret float [[B]] -; - %a = call float @llvm.maximum.f32(float %x, float %y) - %b = call float @llvm.maximum.f32(float %a, float %z) - ret float %b -} - -; negative test - minimum(X, -INF) != -INF because X could be NaN - -define float @minimum_neginf(float %x) { -; CHECK-LABEL: @minimum_neginf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.minimum.f32(float %x, float 0xFFF0000000000000) - ret float %val -} - -; negative test - minimum(-INF, X) != -INF because X could be NaN - -define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { -; CHECK-LABEL: @minimum_neginf_commute_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) -; CHECK-NEXT: ret <2 x double> [[R]] -; - %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) - ret <2 x double> %r -} - -; TODO: minimum(INF, X) --> X - -define float @minimum_inf(float %x) { -; CHECK-LABEL: @minimum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.minimum.f32(float 0x7FF0000000000000, float %x) - ret float %val -} - -; negative test - maximum(X, INF) != INF because X could be NaN - -define <2 x double> @maximum_inf(<2 x double> %x) { -; CHECK-LABEL: @maximum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; CHECK-NEXT: ret <2 x double> [[VAL]] -; - %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double>) - ret <2 x double> %val -} - -; negative test - maximum(INF, X) != INF because X could be NaN - -define float @maximum_inf_commute(float %x) { -; CHECK-LABEL: @maximum_inf_commute( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.maximum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[VAL]] -; - %val = call float @llvm.maximum.f32(float 0x7FF0000000000000, float %x) - ret float %val -} - ; Y - (Y - X) --> X define float @fsub_fsub_common_op(float %x, float %y) { diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll new file mode 100644 index 0000000000000..5d502d22cccab --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -0,0 +1,1116 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +declare float @llvm.minnum.f32(float, float) +declare float @llvm.maxnum.f32(float, float) +declare float @llvm.minimum.f32(float, float) +declare float @llvm.maximum.f32(float, float) +declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) + +declare double @llvm.minnum.f64(double, double) +declare double @llvm.maxnum.f64(double, double) +declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) +declare double @llvm.minimum.f64(double, double) +declare double @llvm.maximum.f64(double, double) +declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) + +define float @test_minnum_const_nan(float %x) { +; CHECK-LABEL: @test_minnum_const_nan( +; CHECK-NEXT: ret float [[X:%.*]] +; + %r = call float @llvm.minnum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_maxnum_const_nan(float %x) { +; CHECK-LABEL: @test_maxnum_const_nan( +; CHECK-NEXT: ret float [[X:%.*]] +; + %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_maximum_const_nan(float %x) { +; CHECK-LABEL: @test_maximum_const_nan( +; CHECK-NEXT: ret float 0x7FFF000000000000 +; + %r = call float @llvm.maximum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_minimum_const_nan(float %x) { +; CHECK-LABEL: @test_minimum_const_nan( +; CHECK-NEXT: ret float 0x7FFF000000000000 +; + %r = call float @llvm.minimum.f32(float %x, float 0x7fff000000000000) + ret float %r +} + +define float @test_minnum_const_inf(float %x) { +; CHECK-LABEL: @test_minnum_const_inf( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0x7FF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maxnum_const_inf(float %x) { +; CHECK-LABEL: @test_maxnum_const_inf( +; CHECK-NEXT: ret float 0x7FF0000000000000 +; + %r = call float @llvm.maxnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maximum_const_inf(float %x) { +; CHECK-LABEL: @test_maximum_const_inf( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float 0x7FF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maximum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minimum_const_inf(float %x) { +; CHECK-LABEL: @test_minimum_const_inf( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0x7FF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minnum_const_neg_inf(float %x) { +; CHECK-LABEL: @test_minnum_const_neg_inf( +; CHECK-NEXT: ret float 0xFFF0000000000000 +; + %r = call float @llvm.minnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maxnum_const_neg_inf(float %x) { +; CHECK-LABEL: @test_maxnum_const_neg_inf( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maximum_const_neg_inf(float %x) { +; CHECK-LABEL: @test_maximum_const_neg_inf( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maximum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minimum_const_neg_inf(float %x) { +; CHECK-LABEL: @test_minimum_const_neg_inf( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minimum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minnum_const_inf_nnan(float %x) { +; CHECK-LABEL: @test_minnum_const_inf_nnan( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float 0x7FF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.minnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maxnum_const_inf_nnan(float %x) { +; CHECK-LABEL: @test_maxnum_const_inf_nnan( +; CHECK-NEXT: ret float 0x7FF0000000000000 +; + %r = call nnan float @llvm.maxnum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_maximum_const_inf_nnan(float %x) { +; CHECK-LABEL: @test_maximum_const_inf_nnan( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maximum.f32(float [[X:%.*]], float 0x7FF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.maximum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minimum_const_inf_nnan(float %x) { +; CHECK-LABEL: @test_minimum_const_inf_nnan( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minimum.f32(float [[X:%.*]], float 0x7FF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) + ret float %r +} + +define float @test_minnum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: @test_minnum_const_inf_nnan_comm( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minnum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.minnum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define float @test_maxnum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: @test_maxnum_const_inf_nnan_comm( +; CHECK-NEXT: ret float 0x7FF0000000000000 +; + %r = call nnan float @llvm.maxnum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define float @test_maximum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: @test_maximum_const_inf_nnan_comm( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maximum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.maximum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define float @test_minimum_const_inf_nnan_comm(float %x) { +; CHECK-LABEL: @test_minimum_const_inf_nnan_comm( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.minimum.f32(float 0x7ff0000000000000, float %x) + ret float %r +} + +define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: @test_minnum_const_inf_nnan_comm_vec( +; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[R]] +; + %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: @test_maxnum_const_inf_nnan_comm_vec( +; CHECK-NEXT: ret <2 x float> +; + %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: @test_maximum_const_inf_nnan_comm_vec( +; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[R]] +; + %r = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define <2 x float> @test_minimum_const_inf_nnan_comm_vec(<2 x float> %x) { +; CHECK-LABEL: @test_minimum_const_inf_nnan_comm_vec( +; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.minimum.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[R]] +; + %r = call nnan <2 x float> @llvm.minimum.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %r +} + +define float @test_minnum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: @test_minnum_const_neg_inf_nnan( +; CHECK-NEXT: ret float 0xFFF0000000000000 +; + %r = call nnan float @llvm.minnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maxnum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: @test_maxnum_const_neg_inf_nnan( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maxnum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_maximum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: @test_maximum_const_neg_inf_nnan( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maximum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.maximum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minimum_const_neg_inf_nnan(float %x) { +; CHECK-LABEL: @test_minimum_const_neg_inf_nnan( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.minimum.f32(float %x, float 0xfff0000000000000) + ret float %r +} + +define float @test_minnum_const_max(float %x) { +; CHECK-LABEL: @test_minnum_const_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_max(float %x) { +; CHECK-LABEL: @test_maxnum_const_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maximum_const_max(float %x) { +; CHECK-LABEL: @test_maximum_const_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minimum_const_max(float %x) { +; CHECK-LABEL: @test_minimum_const_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minnum_const_neg_max(float %x) { +; CHECK-LABEL: @test_minnum_const_neg_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_neg_max(float %x) { +; CHECK-LABEL: @test_maxnum_const_neg_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maximum_const_neg_max(float %x) { +; CHECK-LABEL: @test_maximum_const_neg_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minimum_const_neg_max(float %x) { +; CHECK-LABEL: @test_minimum_const_neg_max( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minnum_const_max_ninf(float %x) { +; CHECK-LABEL: @test_minnum_const_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.minnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_max_ninf(float %x) { +; CHECK-LABEL: @test_maxnum_const_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maximum_const_max_ninf(float %x) { +; CHECK-LABEL: @test_maximum_const_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.maximum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minimum_const_max_ninf(float %x) { +; CHECK-LABEL: @test_minimum_const_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.minimum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minnum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: @test_minnum_const_neg_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.minnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: @test_maxnum_const_neg_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maximum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: @test_maximum_const_neg_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.maximum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minimum_const_neg_max_ninf(float %x) { +; CHECK-LABEL: @test_minimum_const_neg_max_ninf( +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.minimum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minnum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_minnum_const_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_maxnum_const_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_maximum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_maximum_const_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maximum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minimum_const_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_minimum_const_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minimum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) + ret float %r +} + +define float @test_minnum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_minnum_const_neg_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_maxnum_const_neg_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_maximum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_maximum_const_neg_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maximum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +define float @test_minimum_const_neg_max_nnan_ninf(float %x) { +; CHECK-LABEL: @test_minimum_const_neg_max_nnan_ninf( +; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minimum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) + ret float %r +} + +; From the LangRef for minnum/maxnum: +; "If either operand is a NaN, returns the other non-NaN operand." + +define double @maxnum_nan_op0(double %x) { +; CHECK-LABEL: @maxnum_nan_op0( +; CHECK-NEXT: ret double [[X:%.*]] +; + %r = call double @llvm.maxnum.f64(double 0x7ff8000000000000, double %x) + ret double %r +} + +define double @maxnum_nan_op1(double %x) { +; CHECK-LABEL: @maxnum_nan_op1( +; CHECK-NEXT: ret double [[X:%.*]] +; + %r = call double @llvm.maxnum.f64(double %x, double 0x7ff800000000dead) + ret double %r +} + +define double @minnum_nan_op0(double %x) { +; CHECK-LABEL: @minnum_nan_op0( +; CHECK-NEXT: ret double [[X:%.*]] +; + %r = call double @llvm.minnum.f64(double 0x7ff8000dead00000, double %x) + ret double %r +} + +define double @minnum_nan_op1(double %x) { +; CHECK-LABEL: @minnum_nan_op1( +; CHECK-NEXT: ret double [[X:%.*]] +; + %r = call double @llvm.minnum.f64(double %x, double 0x7ff800dead00dead) + ret double %r +} + +define <2 x double> @maxnum_nan_op0_vec(<2 x double> %x) { +; CHECK-LABEL: @maxnum_nan_op0_vec( +; CHECK-NEXT: ret <2 x double> [[X:%.*]] +; + %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +define <2 x double> @maxnum_nan_op1_vec(<2 x double> %x) { +; CHECK-LABEL: @maxnum_nan_op1_vec( +; CHECK-NEXT: ret <2 x double> [[X:%.*]] +; + %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define <2 x double> @minnum_nan_op0_vec(<2 x double> %x) { +; CHECK-LABEL: @minnum_nan_op0_vec( +; CHECK-NEXT: ret <2 x double> [[X:%.*]] +; + %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +define <2 x double> @minnum_nan_op1_vec(<2 x double> %x) { +; CHECK-LABEL: @minnum_nan_op1_vec( +; CHECK-NEXT: ret <2 x double> [[X:%.*]] +; + %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define float @maxnum_undef_op1(float %x) { +; CHECK-LABEL: @maxnum_undef_op1( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.maxnum.f32(float %x, float undef) + ret float %val +} + +define float @maxnum_undef_op0(float %x) { +; CHECK-LABEL: @maxnum_undef_op0( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.maxnum.f32(float undef, float %x) + ret float %val +} + +define float @minnum_undef_op1(float %x) { +; CHECK-LABEL: @minnum_undef_op1( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.minnum.f32(float %x, float undef) + ret float %val +} + +define float @minnum_undef_op0(float %x) { +; CHECK-LABEL: @minnum_undef_op0( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.minnum.f32(float undef, float %x) + ret float %val +} + +define float @minnum_undef_undef(float %x) { +; CHECK-LABEL: @minnum_undef_undef( +; CHECK-NEXT: ret float undef +; + %val = call float @llvm.minnum.f32(float undef, float undef) + ret float %val +} + +define float @maxnum_undef_undef(float %x) { +; CHECK-LABEL: @maxnum_undef_undef( +; CHECK-NEXT: ret float undef +; + %val = call float @llvm.maxnum.f32(float undef, float undef) + ret float %val +} + +define float @minnum_same_args(float %x) { +; CHECK-LABEL: @minnum_same_args( +; CHECK-NEXT: ret float [[X:%.*]] +; + %y = call float @llvm.minnum.f32(float %x, float %x) + ret float %y +} + +define float @maxnum_same_args(float %x) { +; CHECK-LABEL: @maxnum_same_args( +; CHECK-NEXT: ret float [[X:%.*]] +; + %y = call float @llvm.maxnum.f32(float %x, float %x) + ret float %y +} + +define float @minnum_x_minnum_x_y(float %x, float %y) { +; CHECK-LABEL: @minnum_x_minnum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minnum.f32(float %x, float %y) + %b = call float @llvm.minnum.f32(float %x, float %a) + ret float %b +} + +define float @minnum_y_minnum_x_y(float %x, float %y) { +; CHECK-LABEL: @minnum_y_minnum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minnum.f32(float %x, float %y) + %b = call float @llvm.minnum.f32(float %y, float %a) + ret float %b +} + +define float @minnum_x_y_minnum_x(float %x, float %y) { +; CHECK-LABEL: @minnum_x_y_minnum_x( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minnum.f32(float %x, float %y) + %b = call float @llvm.minnum.f32(float %a, float %x) + ret float %b +} + +define float @minnum_x_y_minnum_y(float %x, float %y) { +; CHECK-LABEL: @minnum_x_y_minnum_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minnum.f32(float %x, float %y) + %b = call float @llvm.minnum.f32(float %a, float %y) + ret float %b +} + +; negative test + +define float @minnum_z_minnum_x_y(float %x, float %y, float %z) { +; CHECK-LABEL: @minnum_z_minnum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.minnum.f32(float [[Z:%.*]], float [[A]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.minnum.f32(float %x, float %y) + %b = call float @llvm.minnum.f32(float %z, float %a) + ret float %b +} + +; negative test + +define float @minnum_x_y_minnum_z(float %x, float %y, float %z) { +; CHECK-LABEL: @minnum_x_y_minnum_z( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.minnum.f32(float [[A]], float [[Z:%.*]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.minnum.f32(float %x, float %y) + %b = call float @llvm.minnum.f32(float %a, float %z) + ret float %b +} + +; minnum(X, -INF) --> -INF + +define float @minnum_neginf(float %x) { +; CHECK-LABEL: @minnum_neginf( +; CHECK-NEXT: ret float 0xFFF0000000000000 +; + %val = call float @llvm.minnum.f32(float %x, float 0xFFF0000000000000) + ret float %val +} + +define <2 x double> @minnum_neginf_commute_vec(<2 x double> %x) { +; CHECK-LABEL: @minnum_neginf_commute_vec( +; CHECK-NEXT: ret <2 x double> +; + %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +; negative test + +define float @minnum_inf(float %x) { +; CHECK-LABEL: @minnum_inf( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minnum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.minnum.f32(float 0x7FF0000000000000, float %x) + ret float %val +} +define float @maxnum_x_maxnum_x_y(float %x, float %y) { +; CHECK-LABEL: @maxnum_x_maxnum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maxnum.f32(float %x, float %y) + %b = call float @llvm.maxnum.f32(float %x, float %a) + ret float %b +} + +define float @maxnum_y_maxnum_x_y(float %x, float %y) { +; CHECK-LABEL: @maxnum_y_maxnum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maxnum.f32(float %x, float %y) + %b = call float @llvm.maxnum.f32(float %y, float %a) + ret float %b +} + +define float @maxnum_x_y_maxnum_x(float %x, float %y) { +; CHECK-LABEL: @maxnum_x_y_maxnum_x( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maxnum.f32(float %x, float %y) + %b = call float @llvm.maxnum.f32(float %a, float %x) + ret float %b +} + +define float @maxnum_x_y_maxnum_y(float %x, float %y) { +; CHECK-LABEL: @maxnum_x_y_maxnum_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maxnum.f32(float %x, float %y) + %b = call float @llvm.maxnum.f32(float %a, float %y) + ret float %b +} + +; negative test + +define float @maxnum_z_maxnum_x_y(float %x, float %y, float %z) { +; CHECK-LABEL: @maxnum_z_maxnum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.maxnum.f32(float [[Z:%.*]], float [[A]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.maxnum.f32(float %x, float %y) + %b = call float @llvm.maxnum.f32(float %z, float %a) + ret float %b +} + +; negative test + +define float @maxnum_x_y_maxnum_z(float %x, float %y, float %z) { +; CHECK-LABEL: @maxnum_x_y_maxnum_z( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.maxnum.f32(float [[A]], float [[Z:%.*]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.maxnum.f32(float %x, float %y) + %b = call float @llvm.maxnum.f32(float %a, float %z) + ret float %b +} + +; maxnum(X, INF) --> INF + +define <2 x double> @maxnum_inf(<2 x double> %x) { +; CHECK-LABEL: @maxnum_inf( +; CHECK-NEXT: ret <2 x double> +; + %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %val +} + +define float @maxnum_inf_commute(float %x) { +; CHECK-LABEL: @maxnum_inf_commute( +; CHECK-NEXT: ret float 0x7FF0000000000000 +; + %val = call float @llvm.maxnum.f32(float 0x7FF0000000000000, float %x) + ret float %val +} + +; negative test + +define float @maxnum_neginf(float %x) { +; CHECK-LABEL: @maxnum_neginf( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float %x) + ret float %val +} + +; From the LangRef for minimum/maximum: +; "If either operand is a NaN, returns NaN." + +define double @maximum_nan_op0(double %x) { +; CHECK-LABEL: @maximum_nan_op0( +; CHECK-NEXT: ret double 0x7FF8000000000000 +; + %r = call double @llvm.maximum.f64(double 0x7ff8000000000000, double %x) + ret double %r +} + +define double @maximum_nan_op1(double %x) { +; CHECK-LABEL: @maximum_nan_op1( +; CHECK-NEXT: ret double 0x7FF800000000DEAD +; + %r = call double @llvm.maximum.f64(double %x, double 0x7ff800000000dead) + ret double %r +} + +define double @minimum_nan_op0(double %x) { +; CHECK-LABEL: @minimum_nan_op0( +; CHECK-NEXT: ret double 0x7FF8000DEAD00000 +; + %r = call double @llvm.minimum.f64(double 0x7ff8000dead00000, double %x) + ret double %r +} + +define double @minimum_nan_op1(double %x) { +; CHECK-LABEL: @minimum_nan_op1( +; CHECK-NEXT: ret double 0x7FF800DEAD00DEAD +; + %r = call double @llvm.minimum.f64(double %x, double 0x7ff800dead00dead) + ret double %r +} + +define <2 x double> @maximum_nan_op0_vec(<2 x double> %x) { +; CHECK-LABEL: @maximum_nan_op0_vec( +; CHECK-NEXT: ret <2 x double> +; + %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +define <2 x double> @maximum_nan_op1_vec(<2 x double> %x) { +; CHECK-LABEL: @maximum_nan_op1_vec( +; CHECK-NEXT: ret <2 x double> +; + %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define <2 x double> @minimum_nan_op0_vec(<2 x double> %x) { +; CHECK-LABEL: @minimum_nan_op0_vec( +; CHECK-NEXT: ret <2 x double> +; + %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +define <2 x double> @minimum_nan_op1_vec(<2 x double> %x) { +; CHECK-LABEL: @minimum_nan_op1_vec( +; CHECK-NEXT: ret <2 x double> +; + %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define float @maximum_undef_op1(float %x) { +; CHECK-LABEL: @maximum_undef_op1( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.maximum.f32(float %x, float undef) + ret float %val +} + +define float @maximum_undef_op0(float %x) { +; CHECK-LABEL: @maximum_undef_op0( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.maximum.f32(float undef, float %x) + ret float %val +} + +define float @minimum_undef_op1(float %x) { +; CHECK-LABEL: @minimum_undef_op1( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.minimum.f32(float %x, float undef) + ret float %val +} + +define float @minimum_undef_op0(float %x) { +; CHECK-LABEL: @minimum_undef_op0( +; CHECK-NEXT: ret float [[X:%.*]] +; + %val = call float @llvm.minimum.f32(float undef, float %x) + ret float %val +} + +define float @minimum_undef_undef(float %x) { +; CHECK-LABEL: @minimum_undef_undef( +; CHECK-NEXT: ret float undef +; + %val = call float @llvm.minimum.f32(float undef, float undef) + ret float %val +} + +define float @maximum_undef_undef(float %x) { +; CHECK-LABEL: @maximum_undef_undef( +; CHECK-NEXT: ret float undef +; + %val = call float @llvm.maximum.f32(float undef, float undef) + ret float %val +} + +define float @minimum_same_args(float %x) { +; CHECK-LABEL: @minimum_same_args( +; CHECK-NEXT: ret float [[X:%.*]] +; + %y = call float @llvm.minimum.f32(float %x, float %x) + ret float %y +} + +define float @maximum_same_args(float %x) { +; CHECK-LABEL: @maximum_same_args( +; CHECK-NEXT: ret float [[X:%.*]] +; + %y = call float @llvm.maximum.f32(float %x, float %x) + ret float %y +} + +define float @minimum_x_minimum_x_y(float %x, float %y) { +; CHECK-LABEL: @minimum_x_minimum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minimum.f32(float %x, float %y) + %b = call float @llvm.minimum.f32(float %x, float %a) + ret float %b +} + +define float @minimum_y_minimum_x_y(float %x, float %y) { +; CHECK-LABEL: @minimum_y_minimum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minimum.f32(float %x, float %y) + %b = call float @llvm.minimum.f32(float %y, float %a) + ret float %b +} + +define float @minimum_x_y_minimum_x(float %x, float %y) { +; CHECK-LABEL: @minimum_x_y_minimum_x( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minimum.f32(float %x, float %y) + %b = call float @llvm.minimum.f32(float %a, float %x) + ret float %b +} + +define float @minimum_x_y_minimum_y(float %x, float %y) { +; CHECK-LABEL: @minimum_x_y_minimum_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.minimum.f32(float %x, float %y) + %b = call float @llvm.minimum.f32(float %a, float %y) + ret float %b +} + +; negative test + +define float @minimum_z_minimum_x_y(float %x, float %y, float %z) { +; CHECK-LABEL: @minimum_z_minimum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.minimum.f32(float [[Z:%.*]], float [[A]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.minimum.f32(float %x, float %y) + %b = call float @llvm.minimum.f32(float %z, float %a) + ret float %b +} + +; negative test + +define float @minimum_x_y_minimum_z(float %x, float %y, float %z) { +; CHECK-LABEL: @minimum_x_y_minimum_z( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.minimum.f32(float [[A]], float [[Z:%.*]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.minimum.f32(float %x, float %y) + %b = call float @llvm.minimum.f32(float %a, float %z) + ret float %b +} + +define float @maximum_x_maximum_x_y(float %x, float %y) { +; CHECK-LABEL: @maximum_x_maximum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maximum.f32(float %x, float %y) + %b = call float @llvm.maximum.f32(float %x, float %a) + ret float %b +} + +define float @maximum_y_maximum_x_y(float %x, float %y) { +; CHECK-LABEL: @maximum_y_maximum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maximum.f32(float %x, float %y) + %b = call float @llvm.maximum.f32(float %y, float %a) + ret float %b +} + +define float @maximum_x_y_maximum_x(float %x, float %y) { +; CHECK-LABEL: @maximum_x_y_maximum_x( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maximum.f32(float %x, float %y) + %b = call float @llvm.maximum.f32(float %a, float %x) + ret float %b +} + +define float @maximum_x_y_maximum_y(float %x, float %y) { +; CHECK-LABEL: @maximum_x_y_maximum_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: ret float [[A]] +; + %a = call float @llvm.maximum.f32(float %x, float %y) + %b = call float @llvm.maximum.f32(float %a, float %y) + ret float %b +} + +; negative test + +define float @maximum_z_maximum_x_y(float %x, float %y, float %z) { +; CHECK-LABEL: @maximum_z_maximum_x_y( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.maximum.f32(float [[Z:%.*]], float [[A]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.maximum.f32(float %x, float %y) + %b = call float @llvm.maximum.f32(float %z, float %a) + ret float %b +} + +; negative test + +define float @maximum_x_y_maximum_z(float %x, float %y, float %z) { +; CHECK-LABEL: @maximum_x_y_maximum_z( +; CHECK-NEXT: [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.maximum.f32(float [[A]], float [[Z:%.*]]) +; CHECK-NEXT: ret float [[B]] +; + %a = call float @llvm.maximum.f32(float %x, float %y) + %b = call float @llvm.maximum.f32(float %a, float %z) + ret float %b +} + +; negative test - minimum(X, -INF) != -INF because X could be NaN + +define float @minimum_neginf(float %x) { +; CHECK-LABEL: @minimum_neginf( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.minimum.f32(float %x, float 0xFFF0000000000000) + ret float %val +} + +; negative test - minimum(-INF, X) != -INF because X could be NaN + +define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { +; CHECK-LABEL: @minimum_neginf_commute_vec( +; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: ret <2 x double> [[R]] +; + %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %r +} + +; TODO: minimum(INF, X) --> X + +define float @minimum_inf(float %x) { +; CHECK-LABEL: @minimum_inf( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.minimum.f32(float 0x7FF0000000000000, float %x) + ret float %val +} + +; negative test - maximum(X, INF) != INF because X could be NaN + +define <2 x double> @maximum_inf(<2 x double> %x) { +; CHECK-LABEL: @maximum_inf( +; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: ret <2 x double> [[VAL]] +; + %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %val +} + +; negative test - maximum(INF, X) != INF because X could be NaN + +define float @maximum_inf_commute(float %x) { +; CHECK-LABEL: @maximum_inf_commute( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.maximum.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.maximum.f32(float 0x7FF0000000000000, float %x) + ret float %val +} From 22c583c3d03a6750d6474ad46e5d52eb9974e2b0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 14 Sep 2020 10:32:11 -0400 Subject: [PATCH 0546/1079] [InstSimplify] reduce code duplication for fmin/fmax folds; NFC We use the same code structure for folding integer min/max. --- llvm/lib/Analysis/InstructionSimplify.cpp | 39 +++++++++++------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 271e79df71531..9933360a3a1a3 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5447,19 +5447,32 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // If the arguments are the same, this is a no-op. if (Op0 == Op1) return Op0; - // If one argument is undef, return the other argument. - if (Q.isUndefValue(Op0)) - return Op1; + // Canonicalize constant operand as Op1. + if (isa(Op0)) + std::swap(Op0, Op1); + + // If an argument is undef, return the other argument. if (Q.isUndefValue(Op1)) return Op0; - // If one argument is NaN, return other or NaN appropriately. + // If an argument is NaN, return other or NaN appropriately. bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum; - if (match(Op0, m_NaN())) - return PropagateNaN ? Op0 : Op1; if (match(Op1, m_NaN())) return PropagateNaN ? Op1 : Op0; + // min(X, -Inf) --> -Inf + // max(X, +Inf) --> +Inf + bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum; + const APFloat *C; + if (match(Op1, m_APFloat(C)) && C->isInfinity() && + C->isNegative() == UseNegInf && !PropagateNaN) + return ConstantFP::getInfinity(ReturnType, UseNegInf); + + // TODO: minimum(nnan x, inf) -> x + // TODO: minnum(nnan ninf x, flt_max) -> x + // TODO: maximum(nnan x, -inf) -> x + // TODO: maxnum(nnan ninf x, -flt_max) -> x + // Min/max of the same operation with common operand: // m(m(X, Y)), X --> m(X, Y) (4 commuted variants) if (auto *M0 = dyn_cast(Op0)) @@ -5471,20 +5484,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, (M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0)) return Op1; - // min(X, -Inf) --> -Inf (and commuted variant) - // max(X, +Inf) --> +Inf (and commuted variant) - bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum; - const APFloat *C; - if ((match(Op0, m_APFloat(C)) && C->isInfinity() && - C->isNegative() == UseNegInf && !PropagateNaN) || - (match(Op1, m_APFloat(C)) && C->isInfinity() && - C->isNegative() == UseNegInf && !PropagateNaN)) - return ConstantFP::getInfinity(ReturnType, UseNegInf); - - // TODO: minnum(nnan x, inf) -> x - // TODO: minnum(nnan ninf x, flt_max) -> x - // TODO: maxnum(nnan x, -inf) -> x - // TODO: maxnum(nnan ninf x, -flt_max) -> x break; } default: From ef7a255c037ca462f71ddd3d2b5a46310b08f6eb Mon Sep 17 00:00:00 2001 From: Lubomir Litchev Date: Wed, 9 Sep 2020 12:34:08 -0700 Subject: [PATCH 0547/1079] Add support for casting elements in vectors for certain Std dialect type conversion operations. Added support to the Std dialect cast operations to do casts in vector types when feasible. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D87410 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 16 ++--- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 48 +++++++++---- .../StandardToLLVM/convert-to-llvmir.mlir | 71 +++++++++++++++++++ 3 files changed, 113 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index afdc3edae86c3..4d0cf76ec9d8b 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2443,10 +2443,10 @@ def SignExtendIOp : Std_Op<"sexti", def SIToFPOp : CastOp<"sitofp">, Arguments<(ins AnyType:$in)> { let summary = "cast from integer type to floating-point"; let description = [{ - Cast from a value interpreted as signed integer to the corresponding - floating-point value. If the value cannot be exactly represented, it is - rounded using the default rounding mode. Only scalars are currently - supported. + Cast from a value interpreted as signed or vector of signed integers to the + corresponding floating-point scalar or vector value. If the value cannot be + exactly represented, it is rounded using the default rounding mode. Scalars + and vector types are currently supported. }]; let extraClassDeclaration = [{ @@ -3124,10 +3124,10 @@ def TruncateIOp : Std_Op<"trunci", [NoSideEffect, SameOperandsAndResultShape]> { def UIToFPOp : CastOp<"uitofp">, Arguments<(ins AnyType:$in)> { let summary = "cast from unsigned integer type to floating-point"; let description = [{ - Cast from a value interpreted as unsigned integer to the corresponding - floating-point value. If the value cannot be exactly represented, it is - rounded using the default rounding mode. Only scalars are currently - supported. + Cast from a value interpreted as unsigned integer or vector of unsigned + integers to the corresponding scalar or vector floating-point value. If the + value cannot be exactly represented, it is rounded using the default + rounding mode. Scalars and vector types are currently supported. }]; let extraClassDeclaration = [{ diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index cf085a604b46b..c77bc12cca333 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -217,6 +217,26 @@ static LogicalResult foldMemRefCast(Operation *op) { return success(folded); } +//===----------------------------------------------------------------------===// +// Common cast compatibility check for vector types. +//===----------------------------------------------------------------------===// + +/// This method checks for cast compatibility of vector types. +/// If 'a' and 'b' are vector types, and they are cast compatible, +/// it calls the 'areElementsCastCompatible' function to check for +/// element cast compatibility. +/// Returns 'true' if the vector types are cast compatible, and 'false' +/// otherwise. +static bool areVectorCastSimpleCompatible( + Type a, Type b, function_ref areElementsCastCompatible) { + if (auto va = a.dyn_cast()) + if (auto vb = b.dyn_cast()) + return va.getShape().equals(vb.getShape()) && + areElementsCastCompatible(va.getElementType(), + vb.getElementType()); + return false; +} + //===----------------------------------------------------------------------===// // AddFOp //===----------------------------------------------------------------------===// @@ -1816,11 +1836,7 @@ bool FPExtOp::areCastCompatible(Type a, Type b) { if (auto fa = a.dyn_cast()) if (auto fb = b.dyn_cast()) return fa.getWidth() < fb.getWidth(); - if (auto va = a.dyn_cast()) - if (auto vb = b.dyn_cast()) - return va.getShape().equals(vb.getShape()) && - areCastCompatible(va.getElementType(), vb.getElementType()); - return false; + return areVectorCastSimpleCompatible(a, b, areCastCompatible); } //===----------------------------------------------------------------------===// @@ -1828,7 +1844,9 @@ bool FPExtOp::areCastCompatible(Type a, Type b) { //===----------------------------------------------------------------------===// bool FPToSIOp::areCastCompatible(Type a, Type b) { - return a.isa() && b.isSignlessInteger(); + if (a.isa() && b.isSignlessInteger()) + return true; + return areVectorCastSimpleCompatible(a, b, areCastCompatible); } //===----------------------------------------------------------------------===// @@ -1836,7 +1854,9 @@ bool FPToSIOp::areCastCompatible(Type a, Type b) { //===----------------------------------------------------------------------===// bool FPToUIOp::areCastCompatible(Type a, Type b) { - return a.isa() && b.isSignlessInteger(); + if (a.isa() && b.isSignlessInteger()) + return true; + return areVectorCastSimpleCompatible(a, b, areCastCompatible); } //===----------------------------------------------------------------------===// @@ -1847,11 +1867,7 @@ bool FPTruncOp::areCastCompatible(Type a, Type b) { if (auto fa = a.dyn_cast()) if (auto fb = b.dyn_cast()) return fa.getWidth() > fb.getWidth(); - if (auto va = a.dyn_cast()) - if (auto vb = b.dyn_cast()) - return va.getShape().equals(vb.getShape()) && - areCastCompatible(va.getElementType(), vb.getElementType()); - return false; + return areVectorCastSimpleCompatible(a, b, areCastCompatible); } //===----------------------------------------------------------------------===// @@ -2291,7 +2307,9 @@ OpFoldResult SignedRemIOp::fold(ArrayRef operands) { // sitofp is applicable from integer types to float types. bool SIToFPOp::areCastCompatible(Type a, Type b) { - return a.isSignlessInteger() && b.isa(); + if (a.isSignlessInteger() && b.isa()) + return true; + return areVectorCastSimpleCompatible(a, b, areCastCompatible); } //===----------------------------------------------------------------------===// @@ -2371,7 +2389,9 @@ OpFoldResult SubIOp::fold(ArrayRef operands) { // uitofp is applicable from integer types to float types. bool UIToFPOp::areCastCompatible(Type a, Type b) { - return a.isSignlessInteger() && b.isa(); + if (a.isSignlessInteger() && b.isa()) + return true; + return areVectorCastSimpleCompatible(a, b, areCastCompatible); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index 62be4783e364b..bb0363b1cba52 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -594,6 +594,24 @@ func @sitofp(%arg0 : i32, %arg1 : i64) { return } +// Checking conversion of integer vectors to floating point vector types. +// CHECK-LABEL: @sitofp_vector +func @sitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) { +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x float> + %0 = sitofp %arg0: vector<2xi16> to vector<2xf32> +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x double> + %1 = sitofp %arg0: vector<2xi16> to vector<2xf64> +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x float> + %2 = sitofp %arg1: vector<2xi32> to vector<2xf32> +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x double> + %3 = sitofp %arg1: vector<2xi32> to vector<2xf64> +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x float> + %4 = sitofp %arg2: vector<2xi64> to vector<2xf32> +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x double> + %5 = sitofp %arg2: vector<2xi64> to vector<2xf64> + return +} + // Checking conversion of unsigned integer types to floating point. // CHECK-LABEL: @uitofp func @uitofp(%arg0 : i32, %arg1 : i64) { @@ -646,6 +664,24 @@ func @fptosi(%arg0 : f32, %arg1 : f64) { return } +// Checking conversion of floating point vectors to integer vector types. +// CHECK-LABEL: @fptosi_vector +func @fptosi_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) { +// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x half> to !llvm.vec<2 x i32> + %0 = fptosi %arg0: vector<2xf16> to vector<2xi32> +// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x half> to !llvm.vec<2 x i64> + %1 = fptosi %arg0: vector<2xf16> to vector<2xi64> +// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x i32> + %2 = fptosi %arg1: vector<2xf32> to vector<2xi32> +// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x i64> + %3 = fptosi %arg1: vector<2xf32> to vector<2xi64> +// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x i32> + %4 = fptosi %arg2: vector<2xf64> to vector<2xi32> +// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x i64> + %5 = fptosi %arg2: vector<2xf64> to vector<2xi64> + return +} + // Checking conversion of floating point to integer types. // CHECK-LABEL: @fptoui func @fptoui(%arg0 : f32, %arg1 : f64) { @@ -660,6 +696,41 @@ func @fptoui(%arg0 : f32, %arg1 : f64) { return } +// Checking conversion of floating point vectors to integer vector types. +// CHECK-LABEL: @fptoui_vector +func @fptoui_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) { +// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x half> to !llvm.vec<2 x i32> + %0 = fptoui %arg0: vector<2xf16> to vector<2xi32> +// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x half> to !llvm.vec<2 x i64> + %1 = fptoui %arg0: vector<2xf16> to vector<2xi64> +// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x i32> + %2 = fptoui %arg1: vector<2xf32> to vector<2xi32> +// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x i64> + %3 = fptoui %arg1: vector<2xf32> to vector<2xi64> +// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x i32> + %4 = fptoui %arg2: vector<2xf64> to vector<2xi32> +// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x i64> + %5 = fptoui %arg2: vector<2xf64> to vector<2xi64> + return +} + +// Checking conversion of integer vectors to floating point vector types. +// CHECK-LABEL: @uitofp_vector +func @uitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) { +// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x float> + %0 = uitofp %arg0: vector<2xi16> to vector<2xf32> +// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x double> + %1 = uitofp %arg0: vector<2xi16> to vector<2xf64> +// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x float> + %2 = uitofp %arg1: vector<2xi32> to vector<2xf32> +// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x double> + %3 = uitofp %arg1: vector<2xi32> to vector<2xf64> +// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x float> + %4 = uitofp %arg2: vector<2xi64> to vector<2xf32> +// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x double> + %5 = uitofp %arg2: vector<2xi64> to vector<2xf64> + return +} // Checking conversion of integer types to floating point. // CHECK-LABEL: @fptrunc From 71a16e40f78adee12663816edf6635b96dca09dc Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 11 Sep 2020 10:15:56 -0400 Subject: [PATCH 0548/1079] [libcxx] ostream{,buf}_iterator::difference_type changes in C++20 In C++20, since P0896R4, std::ostream_iterator and std::ostreambuf_iterator must have std::ptrdiff_t instead of void as a difference_type. Tests by Casey Carter (thanks!). Differential Revision: https://reviews.llvm.org/D87459 --- libcxx/include/iterator | 34 +++++++++++++++---- .../ostream.iterator/types.pass.cpp | 9 +++++ .../ostreambuf.iterator/types.pass.cpp | 9 +++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index a13214fca5e4b..36571a50b8bc5 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -1052,9 +1052,19 @@ class _LIBCPP_TEMPLATE_VIS ostream_iterator : public iterator { public: - typedef _CharT char_type; - typedef _Traits traits_type; - typedef basic_ostream<_CharT,_Traits> ostream_type; + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef std::ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _CharT char_type; + typedef _Traits traits_type; + typedef basic_ostream<_CharT, _Traits> ostream_type; + private: ostream_type* __out_stream_; const char_type* __delim_; @@ -1151,10 +1161,20 @@ class _LIBCPP_TEMPLATE_VIS ostreambuf_iterator : public iterator { public: - typedef _CharT char_type; - typedef _Traits traits_type; - typedef basic_streambuf<_CharT,_Traits> streambuf_type; - typedef basic_ostream<_CharT,_Traits> ostream_type; + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef std::ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _CharT char_type; + typedef _Traits traits_type; + typedef basic_streambuf<_CharT, _Traits> streambuf_type; + typedef basic_ostream<_CharT, _Traits> ostream_type; + private: streambuf_type* __sbuf_; public: diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/types.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/types.pass.cpp index 950c7dfe8c0b5..739e39d62b78f 100644 --- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/types.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/types.pass.cpp @@ -19,6 +19,7 @@ // typedef basic_istream istream_type; // ... +#include #include #include @@ -33,7 +34,11 @@ int main(int, char**) #else static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#if TEST_STD_VER <= 17 static_assert((std::is_same::value), ""); +#else + static_assert((std::is_same::value), ""); +#endif static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); #endif @@ -47,7 +52,11 @@ int main(int, char**) #else static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#if TEST_STD_VER <= 17 static_assert((std::is_same::value), ""); +#else + static_assert((std::is_same::value), ""); +#endif static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); #endif diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/types.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/types.pass.cpp index 671a09bb7a3fa..2a4e6ffa5e6b6 100644 --- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/types.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/types.pass.cpp @@ -19,6 +19,7 @@ // typedef basic_ostream ostream_type; // ... +#include #include #include #include @@ -34,7 +35,11 @@ int main(int, char**) #else static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#if TEST_STD_VER <= 17 static_assert((std::is_same::value), ""); +#else + static_assert((std::is_same::value), ""); +#endif static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); #endif @@ -50,7 +55,11 @@ int main(int, char**) #else static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#if TEST_STD_VER <= 17 static_assert((std::is_same::value), ""); +#else + static_assert((std::is_same::value), ""); +#endif static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); #endif From 3b7708e2deb48befcef764fb69f9217f55ac1155 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 14 Sep 2020 15:37:47 +0100 Subject: [PATCH 0549/1079] Assert we've found the size of each (non-overlapping) structure. NFCI. Fixes clang static analyzer warning. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c55403920d8fa..5384e9196896b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7692,6 +7692,7 @@ class MappableExprsHandler { break; } } + assert(Size && "Failed to determine structure size"); CombinedInfo.BasePointers.push_back(BP.getPointer()); CombinedInfo.Pointers.push_back(LB.getPointer()); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( From f07f3c72375b872bfb988f7531d4e0485233ade1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 14 Sep 2020 15:33:50 +0100 Subject: [PATCH 0550/1079] [MemorySSA] Precommit test case for PR47498. --- .../Analysis/MemorySSA/phi-translation.ll | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 3909437b12303..0844760327b18 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -369,3 +369,59 @@ for.end: ; preds = %for.body ret i32 0 } +declare i1 @should_exit(i32) readnone +declare void @init([32 x i32]*) + +; Test case for PR47498. +; %l.1 may read the result of `store i32 10, i32* %p.1` in %storebb, because +; after %storebb has been executed, %loop.1.header might be executed again. +; Make sure %l.1's defining access is the MemoryPhi in the block. +define void @dont_merge_noalias_complex_2(i32 %arg, i32 %arg1) { +; CHECK-LABEL: define void @dont_merge_noalias_complex_2( + +; CHECK-LABEL: entry: +; CHECK: ; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: call void @init([32 x i32]* %tmp) + +; CHECK-LABEL: loop.1.header: +; CHECK-NEXT: ; 4 = MemoryPhi({entry,1},{loop.1.latch,3}) +; NOLIMIT: ; MemoryUse(1) MayAlias +; LIMIT: ; MemoryUse(4) MayAlias +; CHECK-NEXT: %l.1 = load i32, i32* %p.1, align 4 + +; CHECK-LABEL: loop.1.latch: +; CHECK-NEXT: ; 3 = MemoryPhi({loop.1.header,4},{storebb,2}) + +; CHECK-LABEL: storebb: +; NOLIMIT: ; MemoryUse(1) MayAlias +; LIMIT: ; MemoryUse(4) MayAlias +; CHECK-NEXT: %l.2 = load i32, i32* %p.2, align 4 +; CHECK-NEXT: ; 2 = MemoryDef(4) +; CHECK-NEXT: store i32 10, i32* %p.1, align 4 +entry: + %tmp = alloca [32 x i32], align 16 + call void @init([32 x i32]* %tmp) + br label %loop.1.header + +loop.1.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.1.latch ] + %iv.next = add nuw nsw i64 %iv, 1 + %p.1 = getelementptr inbounds [32 x i32], [32 x i32]* %tmp, i64 0, i64 %iv.next + %l.1 = load i32, i32* %p.1, align 4 + %tmp244 = icmp ult i64 %iv, 10 + br i1 %tmp244, label %loop.1.latch, label %storebb + +loop.1.latch: + %ec = call i1 @should_exit(i32 %l.1) + br i1 %ec, label %exit, label %loop.1.header + +storebb: + %iv.add2 = add nuw nsw i64 %iv, 2 + %p.2 = getelementptr inbounds [32 x i32], [32 x i32]* %tmp, i64 0, i64 %iv.add2 + %l.2 = load i32, i32* %p.2, align 4 + store i32 10, i32* %p.1, align 4 + br label %loop.1.latch + +exit: + ret void +} From c4f1b3144184e4c276a7e7c801cbcd4ac3c573ba Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 14 Sep 2020 15:51:17 +0100 Subject: [PATCH 0551/1079] [MemorySSA] Make sure PerformedPhiTrans is updated for each visited def. 1ce82015f6d0 added a fix to restrict phi optimizations after phi translations. But the current use of performedPhiTranslation only checked whether phi translation happened for the first iterator and missed cases where phi translations happens at subsequent iterators/upwards defs. This patch changes upward_defs_iteartor to take a pointer to a bool, so we can easily ensure the final value includes all visited defs, while still being able to conveniently use it with make_range & co. --- llvm/include/llvm/Analysis/MemorySSA.h | 20 ++++++++++--------- llvm/lib/Analysis/MemorySSA.cpp | 4 ++-- .../Analysis/MemorySSA/phi-translation.ll | 7 +++---- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 3ec09e8c0a45e..5878b53fa3726 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -1181,9 +1181,11 @@ class upward_defs_iterator using BaseT = upward_defs_iterator::iterator_facade_base; public: - upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT) + upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT, + bool *PerformedPhiTranslation = nullptr) : DefIterator(Info.first), Location(Info.second), - OriginalAccess(Info.first), DT(DT) { + OriginalAccess(Info.first), DT(DT), + PerformedPhiTranslation(PerformedPhiTranslation) { CurrentPair.first = nullptr; WalkingPhi = Info.first && isa(Info.first); @@ -1214,8 +1216,6 @@ class upward_defs_iterator BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } - bool performedPhiTranslation() const { return PerformedPhiTranslation; } - private: void fillInCurrentPair() { CurrentPair.first = *DefIterator; @@ -1228,7 +1228,8 @@ class upward_defs_iterator false)) { if (Translator.getAddr() != Location.Ptr) { CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); - PerformedPhiTranslation = true; + if (PerformedPhiTranslation) + *PerformedPhiTranslation = true; return; } } else { @@ -1245,12 +1246,13 @@ class upward_defs_iterator MemoryAccess *OriginalAccess = nullptr; DominatorTree *DT = nullptr; bool WalkingPhi = false; - bool PerformedPhiTranslation = false; + bool *PerformedPhiTranslation = nullptr; }; -inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair, - DominatorTree &DT) { - return upward_defs_iterator(Pair, &DT); +inline upward_defs_iterator +upward_defs_begin(const MemoryAccessPair &Pair, DominatorTree &DT, + bool *PerformedPhiTranslation = nullptr) { + return upward_defs_iterator(Pair, &DT, PerformedPhiTranslation); } inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); } diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index f54f04460a4d7..14fa11988362d 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -603,13 +603,13 @@ template class ClobberWalker { void addSearches(MemoryPhi *Phi, SmallVectorImpl &PausedSearches, ListIndex PriorNode) { - auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT); + auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT, + &PerformedPhiTranslation); auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); for (const MemoryAccessPair &P : UpwardDefs) { PausedSearches.push_back(Paths.size()); Paths.emplace_back(P.second, P.first, PriorNode); } - PerformedPhiTranslation |= UpwardDefsBegin.performedPhiTranslation(); } /// Represents a search that terminated after finding a clobber. This clobber diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 0844760327b18..1274e365066d6 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -384,10 +384,9 @@ define void @dont_merge_noalias_complex_2(i32 %arg, i32 %arg1) { ; CHECK-NEXT: call void @init([32 x i32]* %tmp) ; CHECK-LABEL: loop.1.header: -; CHECK-NEXT: ; 4 = MemoryPhi({entry,1},{loop.1.latch,3}) -; NOLIMIT: ; MemoryUse(1) MayAlias -; LIMIT: ; MemoryUse(4) MayAlias -; CHECK-NEXT: %l.1 = load i32, i32* %p.1, align 4 +; CHECK-NEXT: ; 4 = MemoryPhi({entry,1},{loop.1.latch,3}) +; CHECK: ; MemoryUse(4) MayAlias +; CHECK-NEXT: %l.1 = load i32, i32* %p.1, align 4 ; CHECK-LABEL: loop.1.latch: ; CHECK-NEXT: ; 3 = MemoryPhi({loop.1.header,4},{storebb,2}) From 7526376164801cc758c94217931ab025bc226b0e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 14 Sep 2020 11:13:09 -0400 Subject: [PATCH 0552/1079] [InstSimplify] allow folds for fmin/fmax with 'ninf' maxnum(ninf X, +FLT_MAX) --> +FLT_MAX minnum(ninf X, -FLT_MAX) --> -FLT_MAX This is based on the similar codegen transform proposed in: D87571 --- llvm/lib/Analysis/InstructionSimplify.cpp | 31 ++++++++++++------- .../Transforms/InstSimplify/fminmax-folds.ll | 12 +++---- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 9933360a3a1a3..88cfe5a1fa855 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5455,23 +5455,30 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, if (Q.isUndefValue(Op1)) return Op0; - // If an argument is NaN, return other or NaN appropriately. bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum; + bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum; + + // minnum(X, nan) -> X + // maxnum(X, nan) -> X + // minimum(X, nan) -> nan + // maximum(X, nan) -> nan if (match(Op1, m_NaN())) return PropagateNaN ? Op1 : Op0; - // min(X, -Inf) --> -Inf - // max(X, +Inf) --> +Inf - bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum; + // In the following folds, inf can be replaced with the largest finite + // float, if the ninf flag is set. const APFloat *C; - if (match(Op1, m_APFloat(C)) && C->isInfinity() && - C->isNegative() == UseNegInf && !PropagateNaN) - return ConstantFP::getInfinity(ReturnType, UseNegInf); - - // TODO: minimum(nnan x, inf) -> x - // TODO: minnum(nnan ninf x, flt_max) -> x - // TODO: maximum(nnan x, -inf) -> x - // TODO: maxnum(nnan ninf x, -flt_max) -> x + if (match(Op1, m_APFloat(C)) && + (C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) { + // min(X, -Inf) --> -Inf + // max(X, +Inf) --> +Inf + if (C->isNegative() == IsMin && !PropagateNaN) + return ConstantFP::get(ReturnType, *C); + // TODO: minimum(nnan x, inf) -> x + // TODO: minnum(nnan ninf x, flt_max) -> x + // TODO: maximum(nnan x, -inf) -> x + // TODO: maxnum(nnan ninf x, -flt_max) -> x + } // Min/max of the same operation with common operand: // m(m(X, Y)), X --> m(X, Y) (4 commuted variants) diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index 5d502d22cccab..3811ae81e8d39 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -344,8 +344,7 @@ define float @test_minnum_const_max_ninf(float %x) { define float @test_maxnum_const_max_ninf(float %x) { ; CHECK-LABEL: @test_maxnum_const_max_ninf( -; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x47EFFFFFE0000000 ; %r = call ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -371,8 +370,7 @@ define float @test_minimum_const_max_ninf(float %x) { define float @test_minnum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: @test_minnum_const_neg_max_ninf( -; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.minnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0xC7EFFFFFE0000000 ; %r = call ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -416,8 +414,7 @@ define float @test_minnum_const_max_nnan_ninf(float %x) { define float @test_maxnum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_maxnum_const_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x47EFFFFFE0000000 ; %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -443,8 +440,7 @@ define float @test_minimum_const_max_nnan_ninf(float %x) { define float @test_minnum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_minnum_const_neg_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0xC7EFFFFFE0000000 ; %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r From 916b43403588a85425bbc82712427cf53ed877cc Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Tue, 8 Sep 2020 22:33:02 +0000 Subject: [PATCH 0553/1079] Sema: add support for `__attribute__((__swift_objc_members__))` This adds the `__swift_objc_members__` attribute to the semantic analysis. It allows for annotating ObjC interfaces to provide Swift semantics indicating that the types derived from this interface will be back-bridged to Objective-C to allow interoperability with Objective-C and Swift. This is based on the work of the original changes in https://github.com/llvm/llvm-project-staging/commit/8afaf3aad2af43cfedca7a24cd817848c4e95c0c Differential Revision: https://reviews.llvm.org/D87395 Reviewed By: Aaron Ballman, Dmitri Gribenko --- clang/include/clang/Basic/Attr.td | 6 +++++ clang/include/clang/Basic/AttrDocs.td | 10 ++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 3 +++ ...a-attribute-supported-attributes-list.test | 1 + clang/test/SemaObjC/attr-swift_objc_members.m | 24 +++++++++++++++++++ 5 files changed, 44 insertions(+) create mode 100644 clang/test/SemaObjC/attr-swift_objc_members.m diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 1790ae01497fb..3221cf23c4b53 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2130,6 +2130,12 @@ def Regparm : TypeAttr { let ASTNode = 0; } +def SwiftObjCMembers : Attr { + let Spellings = [GNU<"swift_objc_members">]; + let Subjects = SubjectList<[ObjCInterface], ErrorDiag>; + let Documentation = [SwiftObjCMembersDocs]; +} + def SwiftError : InheritableAttr { let Spellings = [GNU<"swift_error">]; let Args = [ diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2fffc0daabee3..939f52dae3d5a 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3476,6 +3476,16 @@ Swift. }]; } +def SwiftObjCMembersDocs : Documentation { + let Category = SwiftDocs; + let Heading = "swift_objc_members"; + let Content = [{ +This attribute indicates that Swift subclasses and members of Swift extensions +of this class will be implicitly marked with the ``@objcMembers`` Swift +attribute, exposing them back to Objective-C. + }]; +} + def SwiftErrorDocs : Documentation { let Category = SwiftDocs; let Heading = "swift_error"; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e317211d8bee8..bf9d8497f5a26 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7536,6 +7536,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_SwiftError: handleSwiftError(S, D, AL); break; + case ParsedAttr::AT_SwiftObjCMembers: + handleSimpleAttribute(S, D, AL); + break; // XRay attributes. case ParsedAttr::AT_XRayLogArgs: diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 12800b9d54eaa..dcf7cd2b7f1a4 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -150,6 +150,7 @@ // CHECK-NEXT: SwiftError (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: SwiftErrorResult (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: SwiftIndirectResult (SubjectMatchRule_variable_is_parameter) +// CHECK-NEXT: SwiftObjCMembers (SubjectMatchRule_objc_interface) // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local) // CHECK-NEXT: Target (SubjectMatchRule_function) // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member) diff --git a/clang/test/SemaObjC/attr-swift_objc_members.m b/clang/test/SemaObjC/attr-swift_objc_members.m new file mode 100644 index 0000000000000..81328b6245947 --- /dev/null +++ b/clang/test/SemaObjC/attr-swift_objc_members.m @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -verify -fsyntax-only %s + +#if !__has_attribute(swift_objc_members) +#error cannot verify presence of swift_objc_members attribute +#endif + +__attribute__((__swift_objc_members__)) +__attribute__((__objc_root_class__)) +@interface I +@end + +__attribute__((swift_objc_members)) +@protocol P +@end +// expected-error@-3 {{'swift_objc_members' attribute only applies to Objective-C interfaces}} + +__attribute__((swift_objc_members)) +extern void f(void); +// expected-error@-2 {{'swift_objc_members' attribute only applies to Objective-C interfaces}} + +// expected-error@+1 {{'__swift_objc_members__' attribute takes no arguments}} +__attribute__((__swift_objc_members__("J"))) +@interface J +@end From 55d371abd7f470496f45d960c29bb66da0e81aee Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 14 Sep 2020 11:42:34 -0400 Subject: [PATCH 0554/1079] [InstSimplify] add folds for fmin/fmax with 'nnan' maximum(nnan X, +INF) --> +INF minimum(nnan X, -INF) --> -INF This is based on the similar codegen transform proposed in: D87571 --- llvm/lib/Analysis/InstructionSimplify.cpp | 9 ++++++--- .../Transforms/InstSimplify/fminmax-folds.ll | 18 ++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 88cfe5a1fa855..716af06769f9e 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5470,10 +5470,13 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, const APFloat *C; if (match(Op1, m_APFloat(C)) && (C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) { - // min(X, -Inf) --> -Inf - // max(X, +Inf) --> +Inf - if (C->isNegative() == IsMin && !PropagateNaN) + // minnum(X, -inf) -> -inf + // maxnum(X, +inf) -> +inf + // minimum(X, -inf) -> -inf if nnan + // maximum(X, +inf) -> +inf if nnan + if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs())) return ConstantFP::get(ReturnType, *C); + // TODO: minimum(nnan x, inf) -> x // TODO: minnum(nnan ninf x, flt_max) -> x // TODO: maximum(nnan x, -inf) -> x diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index 3811ae81e8d39..f05837a8c2f66 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -140,8 +140,7 @@ define float @test_maxnum_const_inf_nnan(float %x) { define float @test_maximum_const_inf_nnan(float %x) { ; CHECK-LABEL: @test_maximum_const_inf_nnan( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maximum.f32(float [[X:%.*]], float 0x7FF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF0000000000000 ; %r = call nnan float @llvm.maximum.f32(float %x, float 0x7ff0000000000000) ret float %r @@ -175,8 +174,7 @@ define float @test_maxnum_const_inf_nnan_comm(float %x) { define float @test_maximum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: @test_maximum_const_inf_nnan_comm( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maximum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF0000000000000 ; %r = call nnan float @llvm.maximum.f32(float 0x7ff0000000000000, float %x) ret float %r @@ -210,8 +208,7 @@ define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_maximum_const_inf_nnan_comm_vec( -; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> [[X:%.*]]) -; CHECK-NEXT: ret <2 x float> [[R]] +; CHECK-NEXT: ret <2 x float> ; %r = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -254,8 +251,7 @@ define float @test_maximum_const_neg_inf_nnan(float %x) { define float @test_minimum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: @test_minimum_const_neg_inf_nnan( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minimum.f32(float [[X:%.*]], float 0xFFF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0xFFF0000000000000 ; %r = call nnan float @llvm.minimum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -422,8 +418,7 @@ define float @test_maxnum_const_max_nnan_ninf(float %x) { define float @test_maximum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_maximum_const_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maximum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x47EFFFFFE0000000 ; %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -466,8 +461,7 @@ define float @test_maximum_const_neg_max_nnan_ninf(float %x) { define float @test_minimum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_minimum_const_neg_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minimum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0xC7EFFFFFE0000000 ; %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) ret float %r From ed0abc8ad3f3be99f40c25238ec42065a8ba077f Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Mon, 14 Sep 2020 09:10:45 -0700 Subject: [PATCH 0555/1079] [flang] Correctly detect overlapping integer cases Integer case values were being compared as unsigned by operator< on evaluate::value::Integer. Change that to signed so that overlap can be detected correctly. Explicit CompareUnsigned and BLT are still available if unsigned comparison is needed. Fixes https://bugs.llvm.org/show_bug.cgi?id=47309 Differential Revision: https://reviews.llvm.org/D87595 --- flang/include/flang/Evaluate/integer.h | 10 +++++----- flang/test/Semantics/case01.f90 | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Evaluate/integer.h b/flang/include/flang/Evaluate/integer.h index 6b91cb250c98e..20b6731768de8 100644 --- a/flang/include/flang/Evaluate/integer.h +++ b/flang/include/flang/Evaluate/integer.h @@ -176,22 +176,22 @@ class Integer { constexpr Integer &operator=(const Integer &) = default; constexpr bool operator<(const Integer &that) const { - return CompareUnsigned(that) == Ordering::Less; + return CompareSigned(that) == Ordering::Less; } constexpr bool operator<=(const Integer &that) const { - return CompareUnsigned(that) != Ordering::Greater; + return CompareSigned(that) != Ordering::Greater; } constexpr bool operator==(const Integer &that) const { - return CompareUnsigned(that) == Ordering::Equal; + return CompareSigned(that) == Ordering::Equal; } constexpr bool operator!=(const Integer &that) const { return !(*this == that); } constexpr bool operator>=(const Integer &that) const { - return CompareUnsigned(that) != Ordering::Less; + return CompareSigned(that) != Ordering::Less; } constexpr bool operator>(const Integer &that) const { - return CompareUnsigned(that) == Ordering::Greater; + return CompareSigned(that) == Ordering::Greater; } // Left-justified mask (e.g., MASKL(1) has only its sign bit set) diff --git a/flang/test/Semantics/case01.f90 b/flang/test/Semantics/case01.f90 index e1965db573b6d..6342233a727e8 100644 --- a/flang/test/Semantics/case01.f90 +++ b/flang/test/Semantics/case01.f90 @@ -163,3 +163,17 @@ program selectCaseProg end select end program + +program test_overlap + integer :: i + !OK: these cases do not overlap + select case(i) + case(0:) + case(:-1) + end select + select case(i) + case(-1:) + !ERROR: CASE (:0_4) conflicts with previous cases + case(:0) + end select +end From c92d1aa44b132597d57523a90342b3e620dbdb1e Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 14 Sep 2020 12:20:58 -0400 Subject: [PATCH 0556/1079] [libc] Decouple string functions. This revision removes dependencies that exist between different string functions. This allows for the libc user to use a specific function X of this library without also depending on Y and Z. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D87421 --- libc/src/string/CMakeLists.txt | 10 +++++----- libc/src/string/memchr.cpp | 9 ++++----- libc/src/string/strcat.cpp | 4 ++-- libc/src/string/strcpy.cpp | 4 ++-- libc/src/string/string_utils.h | 18 ++++++++++++++++++ libc/src/string/strlen.cpp | 6 ++---- libc/src/string/strnlen.cpp | 8 ++++---- 7 files changed, 37 insertions(+), 22 deletions(-) diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index a347f2bf52675..8a2adbe08e0b0 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -16,8 +16,7 @@ add_entrypoint_object( strcat.h DEPENDS .strcpy - .strlen - libc.include.string + .string_utils ) add_entrypoint_object( @@ -28,8 +27,7 @@ add_entrypoint_object( strcpy.h DEPENDS .memcpy - .strlen - libc.include.string + .string_utils ) add_entrypoint_object( @@ -56,6 +54,8 @@ add_entrypoint_object( memchr.cpp HDRS memchr.h + DEPENDS + .string_utils ) add_entrypoint_object( @@ -81,7 +81,7 @@ add_entrypoint_object( HDRS strnlen.h DEPENDS - .memchr + .string_utils ) add_entrypoint_object( diff --git a/libc/src/string/memchr.cpp b/libc/src/string/memchr.cpp index 303f78185f49c..c95e2724f1a16 100644 --- a/libc/src/string/memchr.cpp +++ b/libc/src/string/memchr.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/string/memchr.h" +#include "src/string/string_utils.h" + #include "src/__support/common.h" #include @@ -14,11 +16,8 @@ namespace __llvm_libc { // TODO: Look at performance benefits of comparing words. void *LLVM_LIBC_ENTRYPOINT(memchr)(const void *src, int c, size_t n) { - const unsigned char *str = reinterpret_cast(src); - const unsigned char ch = c; - for (; n && *str != ch; --n, ++str) - ; - return n ? const_cast(str) : nullptr; + return internal::find_first_character( + reinterpret_cast(src), c, n); } } // namespace __llvm_libc diff --git a/libc/src/string/strcat.cpp b/libc/src/string/strcat.cpp index c02de2d21b93f..f5e8616f022ac 100644 --- a/libc/src/string/strcat.cpp +++ b/libc/src/string/strcat.cpp @@ -8,7 +8,7 @@ #include "src/string/strcat.h" #include "src/string/strcpy.h" -#include "src/string/strlen.h" +#include "src/string/string_utils.h" #include "src/__support/common.h" @@ -16,7 +16,7 @@ namespace __llvm_libc { char *LLVM_LIBC_ENTRYPOINT(strcat)(char *__restrict dest, const char *__restrict src) { - __llvm_libc::strcpy(dest + __llvm_libc::strlen(dest), src); + __llvm_libc::strcpy(dest + internal::string_length(dest), src); return dest; } diff --git a/libc/src/string/strcpy.cpp b/libc/src/string/strcpy.cpp index 6927d9d3ec898..69a40c9f53925 100644 --- a/libc/src/string/strcpy.cpp +++ b/libc/src/string/strcpy.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/string/strcpy.h" -#include "src/string/strlen.h" #include "src/string/memcpy.h" +#include "src/string/string_utils.h" #include "src/__support/common.h" @@ -17,7 +17,7 @@ namespace __llvm_libc { char *LLVM_LIBC_ENTRYPOINT(strcpy)(char *__restrict dest, const char *__restrict src) { return reinterpret_cast( - __llvm_libc::memcpy(dest, src, __llvm_libc::strlen(src) + 1)); + __llvm_libc::memcpy(dest, src, internal::string_length(src) + 1)); } } // namespace __llvm_libc diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 234246c10b065..dfb2c8af45279 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -15,6 +15,24 @@ namespace __llvm_libc { namespace internal { +// Returns the length of a string, denoted by the first occurrence +// of a null terminator. +static inline size_t string_length(const char *src) { + size_t length; + for (length = 0; *src; ++src, ++length) + ; + return length; +} + +// Returns the first occurrence of 'ch' within the first 'n' characters of +// 'src'. If 'ch' is not found, returns nullptr. +static inline void *find_first_character(const unsigned char *src, + unsigned char ch, size_t n) { + for (; n && *src != ch; --n, ++src) + ; + return n ? const_cast(src) : nullptr; +} + // Returns the maximum length span that contains only characters not found in // 'segment'. If no characters are found, returns the length of 'src'. static inline size_t complementary_span(const char *src, const char *segment) { diff --git a/libc/src/string/strlen.cpp b/libc/src/string/strlen.cpp index 0b7597ec52b6f..81e1f17e7c118 100644 --- a/libc/src/string/strlen.cpp +++ b/libc/src/string/strlen.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/string/strlen.h" +#include "src/string/string_utils.h" #include "src/__support/common.h" @@ -15,10 +16,7 @@ namespace __llvm_libc { // TODO: investigate the performance of this function. // There might be potential for compiler optimization. size_t LLVM_LIBC_ENTRYPOINT(strlen)(const char *src) { - const char *end = src; - while (*end != '\0') - ++end; - return end - src; + return internal::string_length(src); } } // namespace __llvm_libc diff --git a/libc/src/string/strnlen.cpp b/libc/src/string/strnlen.cpp index 17dd6e171504a..ea8fa9c26d54b 100644 --- a/libc/src/string/strnlen.cpp +++ b/libc/src/string/strnlen.cpp @@ -7,17 +7,17 @@ //===----------------------------------------------------------------------===// #include "src/string/strnlen.h" +#include "src/string/string_utils.h" #include "src/__support/common.h" -#include "src/string/memchr.h" #include namespace __llvm_libc { size_t LLVM_LIBC_ENTRYPOINT(strnlen)(const char *src, size_t n) { - const char *temp = - reinterpret_cast(__llvm_libc::memchr(src, '\0', n)); - return temp ? temp - src : n; + const void *temp = internal::find_first_character( + reinterpret_cast(src), '\0', n); + return temp ? reinterpret_cast(temp) - src : n; } } // namespace __llvm_libc From 94921e9f8ad04793638e02a6104f63e06ae62b9e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 11 Sep 2020 09:31:37 -0700 Subject: [PATCH 0557/1079] [ELF] Define a reportRangeError() overload for thunks and tidy up recent PPC64 thunk range errors Prefer `errorOrWarn` to `fatal` for recoverable errors and graceful degradation when --noinhibit-exec is specified. Mention the destination symbol, otherwise the diagnostic is not really actionable. Two errors are not tested but the patch does not intend to add the coverage. Reviewed By: grimar Differential Revision: https://reviews.llvm.org/D87486 --- lld/ELF/Relocations.cpp | 11 +++++++++++ lld/ELF/Target.h | 2 ++ lld/ELF/Thunks.cpp | 7 ++++--- lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s | 5 ++++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 3080d53c33295..1ff47244c9903 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -113,6 +113,17 @@ void elf::reportRangeError(uint8_t *loc, const Relocation &rel, const Twine &v, ", " + Twine(max).str() + "]" + hint); } +void elf::reportRangeError(uint8_t *loc, int64_t v, int n, const Symbol &sym, + const Twine &msg) { + ErrorPlace errPlace = getErrorPlace(loc); + std::string hint; + if (!sym.getName().empty()) + hint = "; references " + lld::toString(sym) + getDefinedLocation(sym); + errorOrWarn(errPlace.loc + msg + " is out of range: " + Twine(v) + + " is not in [" + Twine(llvm::minIntN(n)) + ", " + + Twine(llvm::maxIntN(n)) + "]" + hint); +} + namespace { // Build a bitmask with one bit set for each RelExpr. // diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index e53ac4d066272..9399ecf526f4f 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -229,6 +229,8 @@ template bool isMipsPIC(const Defined *sym); void reportRangeError(uint8_t *loc, const Relocation &rel, const Twine &v, int64_t min, uint64_t max); +void reportRangeError(uint8_t *loc, int64_t v, int n, const Symbol &sym, + const Twine &msg); // Make sure that V can be represented as an N bit signed integer. inline void checkInt(uint8_t *loc, int64_t v, int n, const Relocation &rel) { diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 6a8ea4dc0e48f..684ff5154a332 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -896,7 +896,7 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) { int64_t offset = destination.getVA() - (getThunkTargetSym()->getVA() + 4); // The branch offset needs to fit in 26 bits. if (!isInt<26>(offset)) - fatal("R2 save stub branch offset is too large: " + Twine(offset)); + reportRangeError(buf, offset, 26, destination, "R2 save stub offset"); write32(buf + 0, 0xf8410018); // std r2,24(r1) write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b } @@ -910,7 +910,7 @@ void PPC64R2SaveStub::addSymbols(ThunkSection &isec) { void PPC64R12SetupStub::writeTo(uint8_t *buf) { int64_t offset = destination.getVA() - getThunkTargetSym()->getVA(); if (!isInt<34>(offset)) - fatal("offset must fit in 34 bits to encode in the instruction"); + reportRangeError(buf, offset, 34, destination, "R12 setup stub offset"); uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | (offset & 0xffff); @@ -927,7 +927,8 @@ void PPC64R12SetupStub::addSymbols(ThunkSection &isec) { void PPC64PCRelPLTStub::writeTo(uint8_t *buf) { int64_t offset = destination.getGotPltVA() - getThunkTargetSym()->getVA(); if (!isInt<34>(offset)) - fatal("offset must fit in 34 bits to encode in the instruction"); + reportRangeError(buf, offset, 34, destination, + "PC-relative PLT stub offset"); uint64_t pld = PLD_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | (offset & 0xffff); diff --git a/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s b/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s index a6e99db8c5c0b..4175ba3131082 100644 --- a/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s +++ b/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s @@ -10,7 +10,10 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o # RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s -# CHECK: error: R2 save stub branch offset is too large: -268501028 +# CHECK: error: R2 save stub offset is out of range: -268501028 is not in [-33554432, 33554431]; references callee +# CHECK-NEXT: >>> defined in {{.*}}.o + +# RUN: ld.lld -T %t.script %t.o -o /dev/null --noinhibit-exec .section .text_callee, "ax", %progbits callee: From ce6dd973ac556a326c38bd7667b4fb448f215d09 Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Mon, 14 Sep 2020 09:59:49 -0700 Subject: [PATCH 0558/1079] [flang] Fix analyzed form of type-bound assignment Change the analyzed form of type-bound assignment to match that of call statements. Resolve the binding name to a specific subprogram when possible by using `GetBindingResolution`. Otherwise leave it as a type-bound procedure call. Differential Revision: https://reviews.llvm.org/D87541 --- flang/lib/Semantics/expression.cpp | 27 +++++---- flang/test/Semantics/defined-ops.f90 | 88 ++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 flang/test/Semantics/defined-ops.f90 diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index ae53559ea5db2..fcce08db6ef6d 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1684,7 +1684,6 @@ auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( const parser::ProcComponentRef &pcr, ActualArguments &&arguments) -> std::optional { const parser::StructureComponent &sc{pcr.v.thing}; - const auto &name{sc.component.source}; if (MaybeExpr base{Analyze(sc.base)}) { if (const Symbol * sym{sc.component.symbol}) { if (auto *dtExpr{UnwrapExpr>(*base)}) { @@ -1722,7 +1721,7 @@ auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( } } } - Say(name, + Say(sc.component.source, "Base of procedure component reference is not a derived-type object"_err_en_US); } } @@ -2940,18 +2939,26 @@ std::optional ArgumentAnalyzer::GetDefinedAssignmentProc() { context_.EmitGenericResolutionError(*symbol); } } - for (std::size_t passIndex{0}; passIndex < actuals_.size(); ++passIndex) { - if (const Symbol * specific{FindBoundOp(oprName, passIndex)}) { - proc = specific; + int passedObjectIndex{-1}; + for (std::size_t i{0}; i < actuals_.size(); ++i) { + if (const Symbol * specific{FindBoundOp(oprName, i)}) { + if (const Symbol * + resolution{GetBindingResolution(GetType(i), *specific)}) { + proc = resolution; + } else { + proc = specific; + passedObjectIndex = i; + } } } - if (proc) { - ActualArguments actualsCopy{actuals_}; - actualsCopy[1]->Parenthesize(); - return ProcedureRef{ProcedureDesignator{*proc}, std::move(actualsCopy)}; - } else { + if (!proc) { return std::nullopt; } + ActualArguments actualsCopy{actuals_}; + if (passedObjectIndex >= 0) { + actualsCopy[passedObjectIndex]->set_isPassedObject(); + } + return ProcedureRef{ProcedureDesignator{*proc}, std::move(actualsCopy)}; } void ArgumentAnalyzer::Dump(llvm::raw_ostream &os) { diff --git a/flang/test/Semantics/defined-ops.f90 b/flang/test/Semantics/defined-ops.f90 new file mode 100644 index 0000000000000..24e72677c6eb1 --- /dev/null +++ b/flang/test/Semantics/defined-ops.f90 @@ -0,0 +1,88 @@ +! RUN: %f18 -funparse %s 2>&1 | FileCheck %s + +! Check the analyzed form of a defined operator or assignment. + +! Type-bound defined assignment +module m1 + type :: t + contains + procedure :: b1 => s1 + procedure, pass(y) :: b2 => s2 + generic :: assignment(=) => b1, b2 + end type +contains + subroutine s1(x, y) + class(t), intent(out) :: x + integer, intent(in) :: y + end + subroutine s2(x, y) + real, intent(out) :: x + class(t), intent(in) :: y + end + subroutine test1(x) + type(t) :: x + real :: a + !CHECK: CALL s1(x,1_4) + x = 1 + !CHECK: CALL s2(a,x) + a = x + end + subroutine test2(x) + class(t) :: x + real :: a + !CHECK: CALL x%b1(1_4) + x = 1 + !CHECK: CALL x%b2(a) + a = x + end +end + +! Type-bound operator +module m2 + type :: t2 + contains + procedure, pass(x2) :: b2 => f + generic :: operator(+) => b2 + end type +contains + integer pure function f(x1, x2) + class(t2), intent(in) :: x1 + class(t2), intent(in) :: x2 + end + subroutine test2(x, y) + class(t2) :: x + type(t2) :: y + !CHECK: i=f(x,y) + i = x + y + !CHECK: i=x%b2(y) + i = y + x + end +end module + +! Non-type-bound assignment and operator +module m3 + type t + end type + interface assignment(=) + subroutine s1(x, y) + import + class(t), intent(out) :: x + integer, intent(in) :: y + end + end interface + interface operator(+) + integer function f(x, y) + import + class(t), intent(in) :: x, y + end + end interface +contains + subroutine test(x, y) + class(t) :: x, y + !CHECK: CALL s1(x,2_4) + x = 2 + !CHECK: i=f(x,y) + i = x + y + end +end + From 7841e21c98495ba5e33e0d2507d985bd5b938445 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 14 Sep 2020 10:16:44 -0700 Subject: [PATCH 0559/1079] Let -basic-block-sections=labels emit basicblock metadata in a new .bb_addr_map section, instead of emitting special unary-encoded symbols. This patch introduces the new .bb_addr_map section feature which allows us to emit the bits needed for mapping binary profiles to basic blocks into a separate section. The format of the emitted data is represented as follows. It includes a header for every function: | Address of the function | -> 8 bytes (pointer size) | Number of basic blocks in this function (>0) | -> ULEB128 The header is followed by a BB record for every basic block. These records are ordered in the same order as MachineBasicBlocks are placed in the function. Each BB Info is structured as follows: | Offset of the basic block relative to function begin | -> ULEB128 | Binary size of the basic block | -> ULEB128 | BB metadata | -> ULEB128 [ MBB.isReturn() OR MBB.hasTailCall() << 1 OR MBB.isEHPad() << 2 ] The new feature will replace the existing "BB labels" functionality with -basic-block-sections=labels. The .bb_addr_map section scrubs the specially-encoded BB symbols from the binary and makes it friendly to profilers and debuggers. Furthermore, the new feature reduces the binary size overhead from 70% bloat to only 12%. For more information and results please refer to the RFC: https://lists.llvm.org/pipermail/llvm-dev/2020-July/143512.html Reviewed By: MaskRay, snehasish Differential Revision: https://reviews.llvm.org/D85408 --- clang/docs/UsersManual.rst | 9 +- clang/test/CodeGen/basic-block-sections.c | 17 ++-- llvm/include/llvm/CodeGen/AsmPrinter.h | 2 + llvm/include/llvm/CodeGen/MachineFunction.h | 3 - llvm/include/llvm/MC/MCObjectFileInfo.h | 2 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 87 +++++++++++++------ llvm/lib/CodeGen/BasicBlockSections.cpp | 20 ++--- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 2 - llvm/lib/CodeGen/MachineBasicBlock.cpp | 26 ++---- llvm/lib/CodeGen/MachineFunction.cpp | 27 ------ llvm/lib/MC/MCObjectFileInfo.cpp | 18 ++++ ...lock-sections-labels-functions-sections.ll | 35 ++++++++ .../X86/basic-block-sections-labels.ll | 65 +++++++++----- 13 files changed, 184 insertions(+), 129 deletions(-) create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 1a1aea2ae5382..2d0d71443dfda 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1700,9 +1700,12 @@ are listed below. **-fbasic-block-sections=[labels, all, list=, none]** - Controls whether Clang emits a label for each basic block. Further, with - values "all" and "list=arg", each basic block or a subset of basic blocks - can be placed in its own unique section. + Controls how Clang emits text sections for basic blocks. With values ``all`` + and ``list=``, each basic block or a subset of basic blocks can be placed + in its own unique section. With the "labels" value, normal text sections are + emitted, but a ``.bb_addr_map`` section is emitted which includes address + offsets for each basic block in the program, relative to the parent function + address. With the ``list=`` option, a file containing the subset of basic blocks that need to placed in unique sections can be specified. The format of the diff --git a/clang/test/CodeGen/basic-block-sections.c b/clang/test/CodeGen/basic-block-sections.c index 6cdea79f0fa7b..dc414d70ba5f9 100644 --- a/clang/test/CodeGen/basic-block-sections.c +++ b/clang/test/CodeGen/basic-block-sections.c @@ -1,12 +1,11 @@ // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -o - < %s | FileCheck %s --check-prefix=PLAIN -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=all -fbasic-block-sections=none -o - < %s | FileCheck %s --check-prefix=PLAIN +// RUN: %clang_cc1 -triple x86_64 -S -o - < %s | FileCheck %s --check-prefix=PLAIN +// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -fbasic-block-sections=none -o - < %s | FileCheck %s --check-prefix=PLAIN -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=labels -o - < %s | FileCheck %s --check-prefix=BB_LABELS -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=all -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_ALL -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=list=%S/Inputs/basic-block-sections.funcnames -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_LIST -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=all -funique-basic-block-section-names -o - < %s | FileCheck %s --check-prefix=UNIQUE +// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_ALL +// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=list=%S/Inputs/basic-block-sections.funcnames -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_LIST +// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -funique-basic-block-section-names -o - < %s | FileCheck %s --check-prefix=UNIQUE int world(int a) { if (a > 10) @@ -26,12 +25,6 @@ int another(int a) { // PLAIN-NOT: section // PLAIN: world: // -// BB_LABELS-NOT: section -// BB_LABELS: world: -// BB_LABELS: a.BB.world: -// BB_LABELS: aa.BB.world: -// BB_LABELS: a.BB.another: -// // BB_WORLD: .section .text.world,"ax",@progbits{{$}} // BB_WORLD: world: // BB_WORLD: .section .text.world,"ax",@progbits,unique diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index eab6eb52b86cf..c157bb0672ba3 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -342,6 +342,8 @@ class AsmPrinter : public MachineFunctionPass { void emitStackSizeSection(const MachineFunction &MF); + void emitBBAddrMapSection(const MachineFunction &MF); + void emitRemarksSection(remarks::RemarkStreamer &RS); enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug }; diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 247716df78825..8f80eca939fd4 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -510,9 +510,6 @@ class MachineFunction { void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; } - /// Creates basic block Labels for this function. - void createBBLabels(); - /// Assign IsBeginSection IsEndSection fields for basic blocks in this /// function. void assignBeginEndSections(); diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index ca04d8e8d3b68..8c6bcba2332b1 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -338,6 +338,8 @@ class MCObjectFileInfo { MCSection *getStackSizesSection(const MCSection &TextSec) const; + MCSection *getBBAddrMapSection(const MCSection &TextSec) const; + // ELF specific sections. MCSection *getDataRelROSection() const { return DataRelROSection; } const MCSection *getMergeableConst4Section() const { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7a141819950a9..01370baa4fd12 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1023,6 +1023,46 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +/// Returns the BB metadata to be emitted in the bb_addr_map section for a given +/// basic block. This can be used to capture more precise profile information. +/// We use the last 3 bits (LSBs) to ecnode the following information: +/// * (1): set if return block (ret or tail call). +/// * (2): set if ends with a tail call. +/// * (3): set if exception handling (EH) landing pad. +/// The remaining bits are zero. +static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) { + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + return ((unsigned)MBB.isReturnBlock()) | + ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) | + (MBB.isEHPad() << 2); +} + +void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { + MCSection *BBAddrMapSection = + getObjFileLowering().getBBAddrMapSection(*MF.getSection()); + assert(BBAddrMapSection && ".bb_addr_map section is not initialized."); + + const MCSymbol *FunctionSymbol = getFunctionBegin(); + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(BBAddrMapSection); + OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); + // Emit the total number of basic blocks in this function. + OutStreamer->emitULEB128IntValue(MF.size()); + // Emit BB Information for each basic block in the funciton. + for (const MachineBasicBlock &MBB : MF) { + const MCSymbol *MBBSymbol = + MBB.pred_empty() ? FunctionSymbol : MBB.getSymbol(); + // Emit the basic block offset. + emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol); + // Emit the basic block size. When BBs have alignments, their size cannot + // always be computed from their offsets. + emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); + OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + } + OutStreamer->PopSection(); +} + void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { if (!MF.getTarget().Options.EmitStackSizeSection) return; @@ -1179,34 +1219,26 @@ void AsmPrinter::emitFunctionBody() { } // We must emit temporary symbol for the end of this basic block, if either - // we have BBLabels enabled and we want to emit size directive for the BBs, - // or if this basic blocks marks the end of a section (except the section - // containing the entry basic block as the end symbol for that section is - // CurrentFnEnd). - if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) || - (MBB.isEndSection() && !MBB.sameSection(&MF->front()))) + // we have BBLabels enabled or if this basic blocks marks the end of a + // section (except the section containing the entry basic block as the end + // symbol for that section is CurrentFnEnd). + if (MF->hasBBLabels() || + (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() && + !MBB.sameSection(&MF->front()))) OutStreamer->emitLabel(MBB.getEndSymbol()); - // Helper for emitting the size directive associated with a basic block - // symbol. - auto emitELFSizeDirective = [&](MCSymbol *SymForSize) { - const MCExpr *SizeExp = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext), - MCSymbolRefExpr::create(SymForSize, OutContext), OutContext); - OutStreamer->emitELFSize(SymForSize, SizeExp); - }; - - // Emit size directive for the size of each basic block, if BBLabels is - // enabled. - if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) - emitELFSizeDirective(MBB.getSymbol()); - - // Emit size directive for the size of each basic block section once we - // get to the end of that section. if (MBB.isEndSection()) { + // The size directive for the section containing the entry block is + // handled separately by the function section. if (!MBB.sameSection(&MF->front())) { - if (MAI->hasDotTypeDotSizeDirective()) - emitELFSizeDirective(CurrentSectionBeginSym); + if (MAI->hasDotTypeDotSizeDirective()) { + // Emit the size directive for the basic block section. + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext), + MCSymbolRefExpr::create(CurrentSectionBeginSym, OutContext), + OutContext); + OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp); + } MBBSectionRanges[MBB.getSectionIDNum()] = MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()}; } @@ -1298,6 +1330,11 @@ void AsmPrinter::emitFunctionBody() { HI.Handler->endFunction(MF); } + // Emit section containing BB address offsets and their metadata, when + // BB labels are requested for this function. + if (MF->hasBBLabels()) + emitBBAddrMapSection(*MF); + // Emit section containing stack size metadata. emitStackSizeSection(*MF); @@ -1807,7 +1844,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("function-instrument") || F.hasFnAttribute("xray-instruction-threshold") || needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize || - MF.getTarget().Options.EmitStackSizeSection) { + MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index a3c366004c7f3..421c1d896a0f1 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -48,19 +48,11 @@ // Basic Block Labels // ================== // -// With -fbasic-block-sections=labels, or when a basic block is placed in a -// unique section, it is labelled with a symbol. This allows easy mapping of -// virtual addresses from PMU profiles back to the corresponding basic blocks. -// Since the number of basic blocks is large, the labeling bloats the symbol -// table sizes and the string table sizes significantly. While the binary size -// does increase, it does not affect performance as the symbol table is not -// loaded in memory during run-time. The string table size bloat is kept very -// minimal using a unary naming scheme that uses string suffix compression. The -// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This -// turns out to be very good for string table sizes and the bloat in the string -// table size for a very large binary is ~8 %. The naming also allows using -// the --symbol-ordering-file option in LLD to arbitrarily reorder the -// sections. +// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of +// every function into a .bb_addr_map section. Along with the function symbols, +// this allows for mapping of virtual addresses in PMU profiles back to the +// corresponding basic blocks. This logic is implemented in AsmPrinter. This +// pass only assigns the BBSectionType of every function to ``labels``. // //===----------------------------------------------------------------------===// @@ -304,7 +296,6 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::Labels) { MF.setBBSectionsType(BBSectionsType); - MF.createBBLabels(); return true; } @@ -314,7 +305,6 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); - MF.createBBLabels(); assignSections(MF, FuncBBClusterInfo); // We make sure that the cluster including the entry basic block precedes all diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 945a560de3ca9..030c3d3e23ab4 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -451,10 +451,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, } // Check Basic Block Section Flags. if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) { - MF.createBBLabels(); MF.setBBSectionsType(BasicBlockSection::Labels); } else if (MF.hasBBSections()) { - MF.createBBLabels(); MF.assignBeginEndSections(); } PFS.SM = &SM; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index ebdd17fc728d3..b260af72043b4 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -60,28 +60,11 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); - assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); - - // We emit a non-temporary symbol for every basic block if we have BBLabels - // or -- with basic block sections -- when a basic block begins a section. - // With basic block symbols, we use a unary encoding which can - // compress the symbol names significantly. For basic block sections where - // this block is the first in a cluster, we use a non-temp descriptive name. - // Otherwise we fall back to use temp label. - if (MF->hasBBLabels()) { - auto Iter = MF->getBBSectionsSymbolPrefix().begin(); - if (getNumber() < 0 || - getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size()) - report_fatal_error("Unreachable MBB: " + Twine(getNumber())); - // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and - // so on. - std::string Prefix(Iter + 1, Iter + getNumber() + 1); - std::reverse(Prefix.begin(), Prefix.end()); - CachedMCSymbol = - Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName())); - } else if (MF->hasBBSections() && isBeginSection()) { + // We emit a non-temporary symbol -- with a descriptive name -- if it begins + // a section (with basic block sections). Otherwise we fall back to use temp + // label. + if (MF->hasBBSections() && isBeginSection()) { SmallString<5> Suffix; if (SectionID == MBBSectionID::ColdSectionID) { Suffix += ".cold"; @@ -92,6 +75,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { } CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix); } else { + const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 0950d6497e433..e4473fd124dfc 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -341,33 +341,6 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumbering.resize(BlockNo); } -/// This is used with -fbasic-block-sections or -fbasicblock-labels option. -/// A unary encoding of basic block labels is done to keep ".strtab" sizes -/// small. -void MachineFunction::createBBLabels() { - const TargetInstrInfo *TII = getSubtarget().getInstrInfo(); - this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a'); - for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) { - assert( - (MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) && - "BasicBlock number was out of range!"); - // 'a' - Normal block. - // 'r' - Return block. - // 'l' - Landing Pad. - // 'L' - Return and landing pad. - bool isEHPad = MBBI->isEHPad(); - bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back()); - char type = 'a'; - if (isEHPad && isRetBlock) - type = 'L'; - else if (isEHPad) - type = 'l'; - else if (isRetBlock) - type = 'r'; - BBSectionsSymbolPrefix[MBBI->getNumber()] = type; - } -} - /// This method iterates over the basic blocks and assigns their IsBeginSection /// and IsEndSection fields. This must be called after MBB layout is finalized /// and the SectionID's are assigned to MBBs. diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 927294fcd7e15..0660780c15a18 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -953,3 +953,21 @@ MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const { GroupName, MCSection::NonUniqueID, cast(TextSec.getBeginSymbol())); } + +MCSection * +MCObjectFileInfo::getBBAddrMapSection(const MCSection &TextSec) const { + if (Env != IsELF) + return nullptr; + + const MCSectionELF &ElfSec = static_cast(TextSec); + unsigned Flags = ELF::SHF_LINK_ORDER; + StringRef GroupName; + if (const MCSymbol *Group = ElfSec.getGroup()) { + GroupName = Group->getName(); + Flags |= ELF::SHF_GROUP; + } + + return Ctx->getELFSection(".bb_addr_map", ELF::SHT_PROGBITS, Flags, 0, + GroupName, MCSection::NonUniqueID, + cast(TextSec.getBeginSymbol())); +} diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll new file mode 100644 index 0000000000000..1142a8a1ec1ba --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s + +$_Z4fooTIiET_v = comdat any + +define dso_local i32 @_Z3barv() { + ret i32 0 +} +;; Check we add SHF_LINK_ORDER for .bb_addr_map and link it with the corresponding .text sections. +; CHECK: .section .text._Z3barv,"ax",@progbits +; CHECK-LABEL: _Z3barv: +; CHECK-NEXT: [[BAR_BEGIN:.Lfunc_begin[0-9]+]]: +; CHECK: .section .bb_addr_map,"o",@progbits,.text._Z3barv{{$}} +; CHECK-NEXT: .quad [[BAR_BEGIN]] + + +define dso_local i32 @_Z3foov() { + %1 = call i32 @_Z4fooTIiET_v() + ret i32 %1 +} +; CHECK: .section .text._Z3foov,"ax",@progbits +; CHECK-LABEL: _Z3foov: +; CHECK-NEXT: [[FOO_BEGIN:.Lfunc_begin[0-9]+]]: +; CHECK: .section .bb_addr_map,"o",@progbits,.text._Z3foov{{$}} +; CHECK-NEXT: .quad [[FOO_BEGIN]] + + +define linkonce_odr dso_local i32 @_Z4fooTIiET_v() comdat { + ret i32 0 +} +;; Check we add .bb_addr_map section to a COMDAT group with the corresponding .text section if such a COMDAT exists. +; CHECK: .section .text._Z4fooTIiET_v,"axG",@progbits,_Z4fooTIiET_v,comdat +; CHECK-LABEL: _Z4fooTIiET_v: +; CHECK-NEXT: [[FOOCOMDAT_BEGIN:.Lfunc_begin[0-9]+]]: +; CHECK: .section .bb_addr_map,"Go",@progbits,_Z4fooTIiET_v,comdat,.text._Z4fooTIiET_v{{$}} +; CHECK-NEXT: .quad [[FOOCOMDAT_BEGIN]] diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels.ll index 80aaf79c115a4..267132c92e982 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels.ll @@ -1,23 +1,24 @@ ; Check the basic block sections labels option -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels | FileCheck %s -check-prefix=LINUX-LABELS +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s -define void @_Z3bazb(i1 zeroext) { - %2 = alloca i8, align 1 - %3 = zext i1 %0 to i8 - store i8 %3, i8* %2, align 1 - %4 = load i8, i8* %2, align 1 - %5 = trunc i8 %4 to i1 - br i1 %5, label %6, label %8 +define void @_Z3bazb(i1 zeroext) personality i32 (...)* @__gxx_personality_v0 { + br i1 %0, label %2, label %7 -6: ; preds = %1 - %7 = call i32 @_Z3barv() - br label %10 +2: + %3 = invoke i32 @_Z3barv() + to label %7 unwind label %5 + br label %9 -8: ; preds = %1 - %9 = call i32 @_Z3foov() - br label %10 +5: + landingpad { i8*, i32 } + catch i8* null + br label %9 -10: ; preds = %8, %6 +7: + %8 = call i32 @_Z3foov() + br label %9 + +9: ret void } @@ -25,9 +26,31 @@ declare i32 @_Z3barv() #1 declare i32 @_Z3foov() #1 -; LINUX-LABELS: .section -; LINUX-LABELS: _Z3bazb: -; LINUX-LABELS-NOT: .section -; LINUX-LABELS: r.BB._Z3bazb: -; LINUX-LABELS-NOT: .section -; LINUX-LABELS: rr.BB._Z3bazb: +declare i32 @__gxx_personality_v0(...) + +; CHECK-LABEL: _Z3bazb: +; CHECK-LABEL: .Lfunc_begin0: +; CHECK-LABEL: .LBB_END0_0: +; CHECK-LABEL: .LBB0_1: +; CHECK-LABEL: .LBB_END0_1: +; CHECK-LABEL: .LBB0_2: +; CHECK-LABEL: .LBB_END0_2: +; CHECK-LABEL: .LBB0_3: +; CHECK-LABEL: .LBB_END0_3: +; CHECK-LABEL: .Lfunc_end0: + +; CHECK: .section .bb_addr_map,"o",@progbits,.text +; CHECK-NEXT: .quad .Lfunc_begin0 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_0-.Lfunc_begin0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .uleb128 .LBB0_1-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_1-.LBB0_1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .uleb128 .LBB0_2-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_2-.LBB0_2 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .uleb128 .LBB0_3-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_3-.LBB0_3 +; CHECK-NEXT: .byte 5 From 4ff4708d39b790bf7231ad0fa4e7cfddb4e26f95 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 14 Sep 2020 18:16:17 +0100 Subject: [PATCH 0560/1079] collectBitParts - use const references. NFCI. Fixes clang-tidy warnings first noticed on D87452. --- llvm/lib/Transforms/Utils/Local.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 41349457e2b95..0b848feddf8ee 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2795,10 +2795,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (Instruction *I = dyn_cast(V)) { // If this is an or instruction, it may be an inner node of the bswap. if (I->getOpcode() == Instruction::Or) { - auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS, Depth + 1); + const auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, + MatchBitReversals, BPS, Depth + 1); if (!A || !B) return Result; @@ -2830,8 +2830,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (BitShift > BitWidth) return Result; - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2862,8 +2862,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (!MatchBitReversals && NumMaskedBits % 8 != 0) return Result; - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2877,8 +2877,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a zext instruction zero extend the result. if (I->getOpcode() == Instruction::ZExt) { - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; From 132e57bc597bd3f50174b7d286c43f76b47f11c1 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 1 Sep 2020 18:52:14 -0700 Subject: [PATCH 0561/1079] Retry of D84974 - Fix a small issue caused by a conflicting name (GetObject) on Windows. The fix was to rename the internal GetObject function to GetNextFunction. --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +++- .../tools/lldb-vscode/runInTerminal/Makefile | 3 + .../runInTerminal/TestVSCode_runInTerminal.py | 48 +++++ .../tools/lldb-vscode/runInTerminal/main.c | 11 ++ lldb/tools/lldb-vscode/JSONUtils.cpp | 40 +++++ lldb/tools/lldb-vscode/JSONUtils.h | 12 ++ lldb/tools/lldb-vscode/VSCode.cpp | 70 +++++++- lldb/tools/lldb-vscode/VSCode.h | 45 +++++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++++-------- lldb/tools/lldb-vscode/package.json | 5 + 11 files changed, 363 insertions(+), 82 deletions(-) create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index fa5a9c0db1ebd..5710751ec34bf 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True): + sourceMap=None, disconnectAutomatically=True, runInTerminal=False): '''Sending launch request to vscode ''' @@ -316,10 +316,16 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap) + sourceMap=sourceMap, + runInTerminal=runInTerminal) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) + # We need to trigger a request_configurationDone after we've successfully + # attached a runInTerminal process to finish initialization. + if runInTerminal: + self.vscode.request_configurationDone() + def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -327,7 +333,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None): + debuggerRoot=None, runInTerminal=False): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -337,4 +343,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot) + terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 6b1c1c961b545..834e33ef5c3da 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,12 +300,29 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response = self.recv_packet(filter_type='response') - if response is None: + response_or_request = self.recv_packet(filter_type=['response', 'request']) + if response_or_request is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - self.validate_response(command, response) - return response + if response_or_request['type'] == 'response': + self.validate_response(command, response_or_request) + return response_or_request + else: + if response_or_request['command'] == 'runInTerminal': + subprocess.Popen(response_or_request['arguments']['args'], + env=response_or_request['arguments']['env']) + self.send_packet({ + "type": "response", + "seq": -1, + "request_seq": response_or_request['seq'], + "success": True, + "command": "runInTerminal", + "body": {} + }, set_sequence=False) + else: + desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) + raise ValueError(desc) + return None def wait_for_event(self, filter=None, timeout=None): @@ -599,7 +616,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None): + debuggerRoot=None, launchCommands=None, sourceMap=None, + runInTerminal=False): args_dict = { 'program': program } @@ -638,6 +656,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap + if runInTerminal: + args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py new file mode 100644 index 0000000000000..6a463dfacc1f9 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py @@ -0,0 +1,48 @@ +""" +Test lldb-vscode runInTerminal reverse request +""" + + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase +import time +import os + + +class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipUnlessDarwin + @skipIfRemote + def test_runInTerminal(self): + ''' + Tests the "runInTerminal" reverse request. It makes sure that the IDE can + launch the inferior with the correct environment variables and arguments. + ''' + program = self.getBuildArtifact("a.out") + source = 'main.c' + self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) + breakpoint_line = line_number(source, '// breakpoint') + + self.set_source_breakpoints(source, [breakpoint_line]) + self.continue_to_next_stop() + + # We verify we actually stopped inside the loop + counter = int(self.vscode.get_local_variable_value('counter')) + self.assertTrue(counter > 0) + + # We verify we were able to set the launch arguments + argc = int(self.vscode.get_local_variable_value('argc')) + self.assertEqual(argc, 2) + + argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] + self.assertIn('foobar', argv1) + + # We verify we were able to set the environment + env = self.vscode.request_evaluate('foo')['body']['result'] + self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c new file mode 100644 index 0000000000000..676bd830e657b --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + const char *foo = getenv("FOO"); + for (int counter = 1;; counter++) { + sleep(1); // breakpoint + } + return 0; +} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 36156ca2c42f9..044bfd13ec463 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,4 +998,44 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } +/// See +/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { + llvm::json::Object reverse_request; + reverse_request.try_emplace("type", "request"); + reverse_request.try_emplace("command", "runInTerminal"); + + llvm::json::Object run_in_terminal_args; + // This indicates the IDE to open an embedded terminal, instead of opening the + // terminal in a new window. + run_in_terminal_args.try_emplace("kind", "integrated"); + + auto launch_request_arguments = launch_request.getObject("arguments"); + std::vector args = GetStrings(launch_request_arguments, "args"); + // The program path must be the first entry in the "args" field + args.insert(args.begin(), + GetString(launch_request_arguments, "program").str()); + run_in_terminal_args.try_emplace("args", args); + + const auto cwd = GetString(launch_request_arguments, "cwd"); + if (!cwd.empty()) + run_in_terminal_args.try_emplace("cwd", cwd); + + // We need to convert the input list of environments variables into a + // dictionary + std::vector envs = GetStrings(launch_request_arguments, "env"); + llvm::json::Object environment; + for (const std::string &env : envs) { + size_t index = env.find("="); + environment.try_emplace(env.substr(0, index), env.substr(index + 1)); + } + run_in_terminal_args.try_emplace("env", + llvm::json::Value(std::move(environment))); + + reverse_request.try_emplace( + "arguments", llvm::json::Value(std::move(run_in_terminal_args))); + return reverse_request; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index df4428f390ba2..88cbef9e5fdd4 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,6 +443,18 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); +/// Create a runInTerminal reverse request object +/// +/// \param[in] launch_request +/// The original launch_request object whose fields are used to construct +/// the reverse request object. +/// +/// \return +/// A "runInTerminal" JSON object that follows the specification outlined by +/// Microsoft. +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); + } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 537cae7868631..9450cdf3132a1 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,7 +38,8 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false) { + stop_at_entry(false), is_attach(false), + reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -362,4 +363,71 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } +PacketStatus VSCode::GetNextObject(llvm::json::Object &object) { + std::string json = ReadJSON(); + if (json.empty()) + return PacketStatus::EndOfFile; + + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + *log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return PacketStatus::JSONMalformed; + } + object = *json_value->getAsObject(); + if (!json_value->getAsObject()) { + if (log) + *log << "error: json packet isn't a object" << std::endl; + return PacketStatus::JSONNotObject; + } + return PacketStatus::Success; +} + +bool VSCode::HandleObject(const llvm::json::Object &object) { + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(object); + return true; // Success + } else { + if (log) + *log << "error: unhandled command \"" << command.data() << std::endl; + return false; // Fail + } + } + return false; +} + +PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response) { + request.try_emplace("seq", ++reverse_request_seq); + SendJSON(llvm::json::Value(std::move(request))); + while (true) { + PacketStatus status = GetNextObject(response); + const auto packet_type = GetString(response, "type"); + if (packet_type == "response") + return status; + else { + // Not our response, we got another packet + HandleObject(response); + } + } + return PacketStatus::EndOfFile; +} + +void VSCode::RegisterRequestCallback(std::string request, + RequestCallback callback) { + request_handlers[request] = callback; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 88a0c08de2454..28e9eef13d6b3 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H +#include #include #include #include @@ -19,6 +20,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -65,6 +67,15 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; +typedef void (*RequestCallback)(const llvm::json::Object &command); + +enum class PacketStatus { + Success = 0, + EndOfFile, + JSONMalformed, + JSONNotObject +}; + struct VSCode { InputStream input; OutputStream output; @@ -91,6 +102,10 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; + uint32_t reverse_request_seq; + std::map request_handlers; + std::condition_variable request_in_terminal_cv; + bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -152,6 +167,36 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); + + const std::map &GetRequestHandlers(); + + PacketStatus GetNextObject(llvm::json::Object &object); + bool HandleObject(const llvm::json::Object &object); + + /// Send a Debug Adapter Protocol reverse request to the IDE + /// + /// \param[in] request + /// The payload of the request to send. + /// + /// \param[out] response + /// The response of the IDE. It might be undefined if there was an error. + /// + /// \return + /// A \a PacketStatus object indicating the sucess or failure of the + /// request. + PacketStatus SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response); + + /// Registers a callback handler for a Debug Adapter Protocol request + /// + /// \param[in] request + /// The name of the request following the Debug Adapter Protocol + /// specification. + /// + /// \param[in] callback + /// The callback to execute when the given request is triggered by the + /// IDE. + void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 7d7d0f9ebe91c..08973ec0f171c 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,7 +384,12 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: + case lldb::eStateStopped: { + if (g_vsc.waiting_for_run_in_terminal) { + g_vsc.waiting_for_run_in_terminal = false; + g_vsc.request_in_terminal_cv.notify_one(); + } + } // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1374,6 +1379,9 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); + // The debug adapter supports launching a debugee in intergrated VSCode + // terminal. + body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1433,6 +1441,49 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } +void request_runInTerminal(const llvm::json::Object &launch_request, + llvm::json::Object &launch_response) { + // We have already created a target that has a valid "program" path to the + // executable. We will attach to the next process whose name matches that + // of the target's. + g_vsc.is_attach = true; + lldb::SBAttachInfo attach_info; + lldb::SBError error; + attach_info.SetWaitForLaunch(true, /*async*/ true); + g_vsc.target.Attach(attach_info, error); + + llvm::json::Object reverse_request = + CreateRunInTerminalReverseRequest(launch_request); + llvm::json::Object reverse_response; + lldb_vscode::PacketStatus status = + g_vsc.SendReverseRequest(reverse_request, reverse_response); + if (status != lldb_vscode::PacketStatus::Success) + error.SetErrorString("Process cannot be launched by IDE."); + + if (error.Success()) { + // Wait for the attach stop event to happen or for a timeout. + g_vsc.waiting_for_run_in_terminal = true; + static std::mutex mutex; + std::unique_lock locker(mutex); + g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); + + auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); + if (attached_pid == LLDB_INVALID_PROCESS_ID) + error.SetErrorString("Failed to attach to a process"); + else + SendProcessEvent(Attach); + } + + if (error.Fail()) { + launch_response["success"] = llvm::json::Value(false); + EmplaceSafeString(launch_response, "message", + std::string(error.GetCString())); + } else { + launch_response["success"] = llvm::json::Value(true); + g_vsc.SendJSON(CreateEventObject("initialized")); + } +} + // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1505,6 +1556,12 @@ void request_launch(const llvm::json::Object &request) { return; } + if (GetBoolean(arguments, "runInTerminal", false)) { + request_runInTerminal(request, response); + g_vsc.SendJSON(llvm::json::Value(std::move(response))); + return; + } + // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2831,39 +2888,35 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -const std::map &GetRequestHandlers() { -#define REQUEST_CALLBACK(name) \ - { #name, request_##name } - static std::map g_request_handlers = { - // VSCode Debug Adaptor requests - REQUEST_CALLBACK(attach), - REQUEST_CALLBACK(completions), - REQUEST_CALLBACK(continue), - REQUEST_CALLBACK(configurationDone), - REQUEST_CALLBACK(disconnect), - REQUEST_CALLBACK(evaluate), - REQUEST_CALLBACK(exceptionInfo), - REQUEST_CALLBACK(getCompileUnits), - REQUEST_CALLBACK(initialize), - REQUEST_CALLBACK(launch), - REQUEST_CALLBACK(next), - REQUEST_CALLBACK(pause), - REQUEST_CALLBACK(scopes), - REQUEST_CALLBACK(setBreakpoints), - REQUEST_CALLBACK(setExceptionBreakpoints), - REQUEST_CALLBACK(setFunctionBreakpoints), - REQUEST_CALLBACK(setVariable), - REQUEST_CALLBACK(source), - REQUEST_CALLBACK(stackTrace), - REQUEST_CALLBACK(stepIn), - REQUEST_CALLBACK(stepOut), - REQUEST_CALLBACK(threads), - REQUEST_CALLBACK(variables), - // Testing requests - REQUEST_CALLBACK(_testGetTargetBreakpoints), - }; -#undef REQUEST_CALLBACK - return g_request_handlers; +void RegisterRequestCallbacks() { + g_vsc.RegisterRequestCallback("attach", request_attach); + g_vsc.RegisterRequestCallback("completions", request_completions); + g_vsc.RegisterRequestCallback("continue", request_continue); + g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); + g_vsc.RegisterRequestCallback("disconnect", request_disconnect); + g_vsc.RegisterRequestCallback("evaluate", request_evaluate); + g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); + g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); + g_vsc.RegisterRequestCallback("initialize", request_initialize); + g_vsc.RegisterRequestCallback("launch", request_launch); + g_vsc.RegisterRequestCallback("next", request_next); + g_vsc.RegisterRequestCallback("pause", request_pause); + g_vsc.RegisterRequestCallback("scopes", request_scopes); + g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); + g_vsc.RegisterRequestCallback("setExceptionBreakpoints", + request_setExceptionBreakpoints); + g_vsc.RegisterRequestCallback("setFunctionBreakpoints", + request_setFunctionBreakpoints); + g_vsc.RegisterRequestCallback("setVariable", request_setVariable); + g_vsc.RegisterRequestCallback("source", request_source); + g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); + g_vsc.RegisterRequestCallback("stepIn", request_stepIn); + g_vsc.RegisterRequestCallback("stepOut", request_stepOut); + g_vsc.RegisterRequestCallback("threads", request_threads); + g_vsc.RegisterRequestCallback("variables", request_variables); + // Testing requests + g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", + request__testGetTargetBreakpoints); } } // anonymous namespace @@ -2895,6 +2948,8 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); + RegisterRequestCallbacks(); + int portno = -1; LLDBVSCodeOptTable T; @@ -2937,49 +2992,17 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } - auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - std::string json = g_vsc.ReadJSON(); - if (json.empty()) + llvm::json::Object object; + lldb_vscode::PacketStatus status = g_vsc.GetObject(object); + if (status == lldb_vscode::PacketStatus::EndOfFile) break; + if (status != lldb_vscode::PacketStatus::Success) + return 1; // Fatal error - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (g_vsc.log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - - *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return 1; - } - - auto object = json_value->getAsObject(); - if (!object) { - if (g_vsc.log) - *g_vsc.log << "error: json packet isn't a object" << std::endl; + if (!g_vsc.HandleObject(object)) return 1; - } - - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(*object); - } else { - if (g_vsc.log) - *g_vsc.log << "error: unhandled command \"" << command.data() - << std::endl; - return 1; - } - } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 29ca06dd17d63..9077ab51dd7fa 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,6 +175,11 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] + }, + "runInTerminal": { + "type": "boolean", + "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", + "default": false } } }, From 7235326fb2342227d478d63378d2ba4d5e2418db Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 14 Sep 2020 13:51:23 -0400 Subject: [PATCH 0562/1079] [libc++] Upgrade the Clang on build bots --- libcxx/utils/docker/debian9/buildbot/Dockerfile | 1 - libcxx/utils/docker/debian9/buildbot/docker-compose.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/libcxx/utils/docker/debian9/buildbot/Dockerfile b/libcxx/utils/docker/debian9/buildbot/Dockerfile index ea2ac9d55933e..7da50687b9527 100644 --- a/libcxx/utils/docker/debian9/buildbot/Dockerfile +++ b/libcxx/utils/docker/debian9/buildbot/Dockerfile @@ -14,7 +14,6 @@ ADD install-packages.sh /tmp/ RUN /tmp/install-packages.sh && rm /tmp/install-packages.sh COPY --from=ericwf/gcc:5.5.0 /compiler /opt/gcc-5 -COPY --from=ericwf/llvm:9.x /compiler /opt/llvm-9 FROM base-image as worker-image diff --git a/libcxx/utils/docker/debian9/buildbot/docker-compose.yml b/libcxx/utils/docker/debian9/buildbot/docker-compose.yml index f9a2a2ad9c31c..b65a91e4e255c 100644 --- a/libcxx/utils/docker/debian9/buildbot/docker-compose.yml +++ b/libcxx/utils/docker/debian9/buildbot/docker-compose.yml @@ -5,7 +5,7 @@ services: context: https://github.com/llvm/llvm-project.git#master:libcxx/utils/docker/debian9/buildbot args: gcc_tot: "ericwf/gcc:9.2.0" - llvm_tot: "ericwf/llvm:9.x" + llvm_tot: "ericwf/llvm:trunk-2020-09-11" image: llvm-buildbot-worker volumes: - /var/run/docker.sock:/var/run/docker.sock From a3bc0401d436d8c7d2dd5b54e13b81333d53bdff Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Mon, 14 Sep 2020 10:53:48 -0700 Subject: [PATCH 0563/1079] Fix 132e57bc597bd3f50174b7d286c43f76b47f11c1 Compile error found in http://lab.llvm.org:8011/builders/lldb-x86_64-debian/builds/17403/steps/build/logs/stdio Simple fix --- lldb/tools/lldb-vscode/lldb-vscode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 08973ec0f171c..3b0817c71e62f 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -2995,7 +2995,7 @@ int main(int argc, char *argv[]) { uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { llvm::json::Object object; - lldb_vscode::PacketStatus status = g_vsc.GetObject(object); + lldb_vscode::PacketStatus status = g_vsc.GetNextObject(object); if (status == lldb_vscode::PacketStatus::EndOfFile) break; if (status != lldb_vscode::PacketStatus::Success) From 8e69c3cde8eed94be226bdef1ff6cedda3a33bc4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 13 Sep 2020 00:12:30 +0200 Subject: [PATCH 0564/1079] [DAGCombiner] Fold fmin/fmax with INF / FLT_MAX Similar to D87415, this folds the various float min/max opcodes with a constant INF or -INF operand, or FLT_MAX / -FLT_MAX operand if the ninf flag is set. Some of the folds are only possible under nnan. The fminnum(X, INF) with nnan and fmaxnum(X, -INF) with nnan cases are needed to improve the VECREDUCE_FMIN/FMAX lowerings on X86, the rest is here for the sake of completeness. Differential Revision: https://reviews.llvm.org/D87571 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 51 +++- llvm/test/CodeGen/ARM/fminmax-folds.ll | 271 ++---------------- 2 files changed, 63 insertions(+), 259 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e4a5176019689..48e964c107619 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14037,13 +14037,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, - APFloat (*Op)(const APFloat &, const APFloat &), - bool PropagatesNaN) { + APFloat (*Op)(const APFloat &, const APFloat &)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + const SDNodeFlags Flags = N->getFlags(); + unsigned Opc = N->getOpcode(); + bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; + bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); @@ -14054,32 +14057,54 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, // Canonicalize to constant on RHS. if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Flags); - // minnum(X, nan) -> X - // maxnum(X, nan) -> X - // minimum(X, nan) -> nan - // maximum(X, nan) -> nan - if (N1CFP && N1CFP->isNaN()) - return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); + if (N1CFP) { + const APFloat &AF = N1CFP->getValueAPF(); + + // minnum(X, nan) -> X + // maxnum(X, nan) -> X + // minimum(X, nan) -> nan + // maximum(X, nan) -> nan + if (AF.isNaN()) + return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); + + // In the following folds, inf can be replaced with the largest finite + // float, if the ninf flag is set. + if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) { + // minnum(X, -inf) -> -inf + // maxnum(X, +inf) -> +inf + // minimum(X, -inf) -> -inf if nnan + // maximum(X, +inf) -> +inf if nnan + if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs())) + return N->getOperand(1); + + // minnum(X, +inf) -> X if nnan + // maxnum(X, -inf) -> X if nnan + // minimum(X, +inf) -> X + // maximum(X, -inf) -> X + if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs())) + return N->getOperand(0); + } + } return SDValue(); } SDValue DAGCombiner::visitFMINNUM(SDNode *N) { - return visitFMinMax(DAG, N, minnum, /* PropagatesNaN */ false); + return visitFMinMax(DAG, N, minnum); } SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { - return visitFMinMax(DAG, N, maxnum, /* PropagatesNaN */ false); + return visitFMinMax(DAG, N, maxnum); } SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { - return visitFMinMax(DAG, N, minimum, /* PropagatesNaN */ true); + return visitFMinMax(DAG, N, minimum); } SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { - return visitFMinMax(DAG, N, maximum, /* PropagatesNaN */ true); + return visitFMinMax(DAG, N, maximum); } SDValue DAGCombiner::visitFABS(SDNode *N) { diff --git a/llvm/test/CodeGen/ARM/fminmax-folds.ll b/llvm/test/CodeGen/ARM/fminmax-folds.ll index 30dfd4915d892..b13426c7c0500 100644 --- a/llvm/test/CodeGen/ARM/fminmax-folds.ll +++ b/llvm/test/CodeGen/ARM/fminmax-folds.ll @@ -65,15 +65,9 @@ define float @test_minnum_const_inf(float %x) { define float @test_maxnum_const_inf(float %x) { ; CHECK-LABEL: test_maxnum_const_inf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI5_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32640 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI5_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call float @llvm.maxnum.f32(float %x, float 0x7ff0000000000000) ret float %r } @@ -97,15 +91,7 @@ define float @test_maximum_const_inf(float %x) { define float @test_minimum_const_inf(float %x) { ; CHECK-LABEL: test_minimum_const_inf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI7_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI7_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) ret float %r } @@ -113,15 +99,9 @@ define float @test_minimum_const_inf(float %x) { define float @test_minnum_const_neg_inf(float %x) { ; CHECK-LABEL: test_minnum_const_neg_inf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI8_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #65408 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI8_0: -; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call float @llvm.minnum.f32(float %x, float 0xfff0000000000000) ret float %r } @@ -145,15 +125,7 @@ define float @test_maxnum_const_neg_inf(float %x) { define float @test_maximum_const_neg_inf(float %x) { ; CHECK-LABEL: test_maximum_const_neg_inf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI10_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI10_0: -; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call float @llvm.maximum.f32(float %x, float 0xfff0000000000000) ret float %r } @@ -177,15 +149,7 @@ define float @test_minimum_const_neg_inf(float %x) { define float @test_minnum_const_inf_nnan(float %x) { ; CHECK-LABEL: test_minnum_const_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI12_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.minnum.f32(float %x, float 0x7ff0000000000000) ret float %r } @@ -193,15 +157,9 @@ define float @test_minnum_const_inf_nnan(float %x) { define float @test_maxnum_const_inf_nnan(float %x) { ; CHECK-LABEL: test_maxnum_const_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI13_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32640 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI13_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.maxnum.f32(float %x, float 0x7ff0000000000000) ret float %r } @@ -209,15 +167,9 @@ define float @test_maxnum_const_inf_nnan(float %x) { define float @test_maximum_const_inf_nnan(float %x) { ; CHECK-LABEL: test_maximum_const_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI14_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32640 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI14_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.maximum.f32(float %x, float 0x7ff0000000000000) ret float %r } @@ -225,15 +177,7 @@ define float @test_maximum_const_inf_nnan(float %x) { define float @test_minimum_const_inf_nnan(float %x) { ; CHECK-LABEL: test_minimum_const_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI15_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI15_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) ret float %r } @@ -241,15 +185,7 @@ define float @test_minimum_const_inf_nnan(float %x) { define float @test_minnum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: test_minnum_const_inf_nnan_comm: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI16_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI16_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.minnum.f32(float 0x7ff0000000000000, float %x) ret float %r } @@ -257,15 +193,9 @@ define float @test_minnum_const_inf_nnan_comm(float %x) { define float @test_maxnum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: test_maxnum_const_inf_nnan_comm: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI17_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32640 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI17_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.maxnum.f32(float 0x7ff0000000000000, float %x) ret float %r } @@ -273,15 +203,9 @@ define float @test_maxnum_const_inf_nnan_comm(float %x) { define float @test_maximum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: test_maximum_const_inf_nnan_comm: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI18_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #32640 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI18_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.maximum.f32(float 0x7ff0000000000000, float %x) ret float %r } @@ -289,15 +213,7 @@ define float @test_maximum_const_inf_nnan_comm(float %x) { define float @test_minimum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: test_minimum_const_inf_nnan_comm: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI19_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI19_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan float @llvm.minimum.f32(float 0x7ff0000000000000, float %x) ret float %r } @@ -305,16 +221,7 @@ define float @test_minimum_const_inf_nnan_comm(float %x) { define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: test_minnum_const_inf_nnan_comm_vec: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d16, .LCPI20_0 -; CHECK-NEXT: vmov d17, r0, r1 -; CHECK-NEXT: vminnm.f32 d16, d17, d16 -; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI20_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r } @@ -323,8 +230,6 @@ define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: test_maxnum_const_inf_nnan_comm_vec: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, .LCPI21_0 -; CHECK-NEXT: vmov d17, r0, r1 -; CHECK-NEXT: vmaxnm.f32 d16, d17, d16 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 @@ -340,8 +245,6 @@ define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: test_maximum_const_inf_nnan_comm_vec: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, .LCPI22_0 -; CHECK-NEXT: vmov d17, r0, r1 -; CHECK-NEXT: vmax.f32 d16, d17, d16 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 @@ -356,16 +259,7 @@ define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_minimum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: test_minimum_const_inf_nnan_comm_vec: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d16, .LCPI23_0 -; CHECK-NEXT: vmov d17, r0, r1 -; CHECK-NEXT: vmin.f32 d16, d17, d16 -; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI23_0: -; CHECK-NEXT: .long 0x7f800000 @ float +Inf -; CHECK-NEXT: .long 0x7f800000 @ float +Inf %r = call nnan <2 x float> @llvm.minimum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r } @@ -373,15 +267,9 @@ define <2 x float> @test_minimum_const_inf_nnan_comm_vec(<2 x float> %x) { define float @test_minnum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_minnum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI24_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #65408 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI24_0: -; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.minnum.f32(float %x, float 0xfff0000000000000) ret float %r } @@ -389,15 +277,7 @@ define float @test_minnum_const_neg_inf_nnan(float %x) { define float @test_maxnum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_maxnum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI25_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: -; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) ret float %r } @@ -405,15 +285,7 @@ define float @test_maxnum_const_neg_inf_nnan(float %x) { define float @test_maximum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_maximum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI26_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI26_0: -; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.maximum.f32(float %x, float 0xfff0000000000000) ret float %r } @@ -421,15 +293,9 @@ define float @test_maximum_const_neg_inf_nnan(float %x) { define float @test_minimum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: test_minimum_const_neg_inf_nnan: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI27_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #0 +; CHECK-NEXT: movt r0, #65408 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI27_0: -; CHECK-NEXT: .long 0xff800000 @ float -Inf %r = call nnan float @llvm.minimum.f32(float %x, float 0xfff0000000000000) ret float %r } @@ -581,15 +447,9 @@ define float @test_minnum_const_max_ninf(float %x) { define float @test_maxnum_const_max_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI37_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: movt r0, #32639 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI37_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r } @@ -613,15 +473,7 @@ define float @test_maximum_const_max_ninf(float %x) { define float @test_minimum_const_max_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI39_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI39_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r } @@ -629,15 +481,8 @@ define float @test_minimum_const_max_ninf(float %x) { define float @test_minnum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_neg_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI40_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mvn r0, #8388608 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r } @@ -661,15 +506,7 @@ define float @test_maxnum_const_neg_max_ninf(float %x) { define float @test_maximum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_neg_max_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI42_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI42_0: -; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r } @@ -693,15 +530,7 @@ define float @test_minimum_const_neg_max_ninf(float %x) { define float @test_minnum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI44_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI44_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) ret float %r } @@ -709,15 +538,9 @@ define float @test_minnum_const_max_nnan_ninf(float %x) { define float @test_maxnum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI45_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: movt r0, #32639 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI45_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0x47efffffe0000000) ret float %r } @@ -725,15 +548,9 @@ define float @test_maxnum_const_max_nnan_ninf(float %x) { define float @test_maximum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI46_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: movt r0, #32639 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI46_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0x47efffffe0000000) ret float %r } @@ -741,15 +558,7 @@ define float @test_maximum_const_max_nnan_ninf(float %x) { define float @test_minimum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI47_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI47_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r } @@ -757,15 +566,8 @@ define float @test_minimum_const_max_nnan_ninf(float %x) { define float @test_minnum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minnum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI48_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vminnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mvn r0, #8388608 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI48_0: -; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0xc7efffffe0000000) ret float %r } @@ -773,15 +575,7 @@ define float @test_minnum_const_neg_max_nnan_ninf(float %x) { define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maxnum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI49_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmaxnm.f32 s0, s2, s0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI49_0: -; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) ret float %r } @@ -789,15 +583,7 @@ define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { define float @test_maximum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_maximum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI50_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmax.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI50_0: -; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r } @@ -805,15 +591,8 @@ define float @test_maximum_const_neg_max_nnan_ninf(float %x) { define float @test_minimum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: test_minimum_const_neg_max_nnan_ninf: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s0, .LCPI51_0 -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmin.f32 d0, d1, d0 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mvn r0, #8388608 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 0xff7fffff @ float -3.40282347E+38 %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0xc7efffffe0000000) ret float %r } From cfff88c03cf9e9b72906a41fd11e06721d54f293 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 18:45:53 +0200 Subject: [PATCH 0565/1079] [InstCombine] Simplify select operand based on equality condition For selects of the type X == Y ? A : B, check if we can simplify A by using the X == Y equality and replace the operand if that's possible. We already try to do this in InstSimplify, but will only fold if the result of the simplification is the same as B, in which case the select can be dropped entirely. Here the select will be retained, just one operand simplified. As we are performing an actual replacement here, we don't have problems with refinement / poison values. Differential Revision: https://reviews.llvm.org/D87480 --- .../InstCombine/InstCombineSelect.cpp | 30 ++++++++++++++----- llvm/test/Transforms/InstCombine/rem.ll | 3 +- .../InstCombine/select-binop-cmp.ll | 15 ++++------ llvm/test/Transforms/InstCombine/select.ll | 15 ++++------ 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 378132011aba2..ce473410f4caf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1165,15 +1165,32 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// /// We can't replace %sel with %add unless we strip away the flags. /// TODO: Wrapping flags could be preserved in some cases with better analysis. -static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, - const SimplifyQuery &Q) { +static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, + const SimplifyQuery &Q, + InstCombiner &IC) { if (!Cmp.isEquality()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); - if (Cmp.getPredicate() == ICmpInst::ICMP_NE) + bool Swapped = false; + if (Cmp.getPredicate() == ICmpInst::ICMP_NE) { std::swap(TrueVal, FalseVal); + Swapped = true; + } + + // In X == Y ? f(X) : Z, try to evaluate f(X) and replace the operand. + // Take care to avoid replacing X == Y ? X : Z with X == Y ? Y : Z, as that + // would lead to an infinite replacement cycle. + Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); + if (TrueVal != CmpLHS) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, + /* AllowRefinement */ true)) + return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); + if (TrueVal != CmpRHS) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, + /* AllowRefinement */ true)) + return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); auto *FalseInst = dyn_cast(FalseVal); if (!FalseInst) @@ -1198,12 +1215,11 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, // We have an 'EQ' comparison, so the select's false value will propagate. // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 - Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, /* AllowRefinement */ false) == TrueVal) { - return FalseVal; + return IC.replaceInstUsesWith(Sel, FalseVal); } // Restore poison-generating flags if the transform did not apply. @@ -1439,8 +1455,8 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ)) - return replaceInstUsesWith(SI, V); + if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI, SQ, *this)) + return NewSel; if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) return NewSel; diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index 2b9f5326dd152..37d81f2ebf6a0 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -50,8 +50,7 @@ define i8 @big_divisor(i8 %x) { define i5 @biggest_divisor(i5 %x) { ; CHECK-LABEL: @biggest_divisor( ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[DOTNOT]] to i5 -; CHECK-NEXT: [[REM:%.*]] = add i5 [[TMP1]], [[X]] +; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X]] ; CHECK-NEXT: ret i5 [[REM]] ; %rem = urem i5 %x, -1 diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index 4173c31b2acb1..aa450f8af8b7e 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -564,12 +564,10 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) ret <2 x i8> %C } -; TODO: support for undefs, check for an identity constant does not handle them yet -define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { -; CHECK-LABEL: @select_xor_icmp_vec_bad_2( +define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { +; CHECK-LABEL: @select_xor_icmp_vec_undef( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; %A = icmp eq <2 x i8> %x, @@ -604,11 +602,10 @@ define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) { ret i32 %C } -define i32 @select_and_icmp_bad(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: @select_and_icmp_bad( +define i32 @select_and_icmp_zero(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @select_and_icmp_zero( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = and i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 0, i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index d9a4f4bdbd473..c4c282e9cacf4 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2606,8 +2606,7 @@ define i32 @pr47322_more_poisonous_replacement(i32 %arg) { define i8 @select_replacement_add_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_eq( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 2, i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, 1 @@ -2620,8 +2619,7 @@ define i8 @select_replacement_add_ne(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_ne( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 1 ; CHECK-NEXT: call void @use(i1 [[CMP]]) -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 [[ADD]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 2 ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp ne i8 %x, 1 @@ -2634,8 +2632,7 @@ define i8 @select_replacement_add_ne(i8 %x, i8 %y) { define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_nuw( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 -; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 2, i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, 1 @@ -2647,8 +2644,7 @@ define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_sub( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, %y @@ -2661,8 +2657,7 @@ define i8 @select_replacement_shift(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_shift( ; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[Y]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SHL]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %shr = lshr exact i8 %x, 1 From da17e0d5c1dfabcba887e323b1aabc8cc4342cd6 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 14 Sep 2020 14:07:33 -0400 Subject: [PATCH 0566/1079] [ms] [llvm-ml] Add missing built-in type aliases Add signed aliases for integral types, as well as the "DF" abbreviation for the FWORD type. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D87246 --- llvm/lib/MC/MCParser/MasmParser.cpp | 8 +++ llvm/test/tools/llvm-ml/builtin_types.test | 77 ++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 llvm/test/tools/llvm-ml/builtin_types.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 4d62174f7e5e4..ea18cf8936ded 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -623,6 +623,7 @@ class MasmParser : public MCAsmParser { DK_SQWORD, DK_DB, DK_DD, + DK_DF, DK_DQ, DK_DW, DK_REAL4, @@ -2114,6 +2115,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_DD: return parseDirectiveValue(IDVal, 4); case DK_FWORD: + case DK_DF: return parseDirectiveValue(IDVal, 6); case DK_QWORD: case DK_SQWORD: @@ -2325,21 +2327,26 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, Lex(); return parseDirectiveEquate(nextVal, IDVal, DirKind); case DK_BYTE: + case DK_SBYTE: case DK_DB: Lex(); return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc); case DK_WORD: + case DK_SWORD: case DK_DW: Lex(); return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc); case DK_DWORD: + case DK_SDWORD: case DK_DD: Lex(); return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc); case DK_FWORD: + case DK_DF: Lex(); return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc); case DK_QWORD: + case DK_SQWORD: case DK_DQ: Lex(); return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); @@ -6284,6 +6291,7 @@ void MasmParser::initializeDirectiveKindMap() { // DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO; DirectiveKindMap["db"] = DK_DB; DirectiveKindMap["dd"] = DK_DD; + DirectiveKindMap["df"] = DK_DF; DirectiveKindMap["dq"] = DK_DQ; DirectiveKindMap["dw"] = DK_DW; DirectiveKindMap["echo"] = DK_ECHO; diff --git a/llvm/test/tools/llvm-ml/builtin_types.test b/llvm/test/tools/llvm-ml/builtin_types.test new file mode 100644 index 0000000000000..b99c491cb8dd8 --- /dev/null +++ b/llvm/test/tools/llvm-ml/builtin_types.test @@ -0,0 +1,77 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +t1_long BYTE 1 +t1_short DB 1 +t1_signed SBYTE -1 + +; CHECK-LABEL: t1_long: +; CHECK: .byte 1 +; CHECK-LABEL: t1_short: +; CHECK: .byte 1 +; CHECK-LABEL: t1_signed: +; CHECK: .byte -1 + +t2_long WORD 2 +t2_short DW 2 +t2_signed SWORD -2 + +; CHECK-LABEL: t2_long: +; CHECK: .short 2 +; CHECK-LABEL: t2_short: +; CHECK: .short 2 +; CHECK-LABEL: t2_signed: +; CHECK: .short -2 + +t3_long DWORD 3 +t3_short DD 3 +t3_signed SDWORD -3 + +; CHECK-LABEL: t3_long: +; CHECK: .long 3 +; CHECK-LABEL: t3_short: +; CHECK: .long 3 +; CHECK-LABEL: t3_signed: +; CHECK: .long -3 + +t4_long FWORD 4 +t4_short DF 4 +t4_long_large FWORD 4294967298 +t4_short_large FWORD 4294967298 + +; CHECK-LABEL: t4_long: +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .short 0 +; CHECK-LABEL: t4_short: +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .short 0 +; CHECK-LABEL: t4_long_large: +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .short 1 +; CHECK-LABEL: t4_short_large: +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .short 1 + +t5_long QWORD 4611686018427387904 +t5_short DQ 4611686018427387904 +t5_signed SQWORD -4611686018427387904 + +; CHECK-LABEL: t5_long: +; CHECK-NEXT: .quad 4611686018427387904 +; CHECK-LABEL: t5_short: +; CHECK-NEXT: .quad 4611686018427387904 +; CHECK-LABEL: t5_signed: +; CHECK-NEXT: .quad -4611686018427387904 + +t6_single REAL4 1.3 +t6_double REAL8 1.3 + +; CHECK-LABEL: t6_single: +; CHECK-NEXT: .long 1067869798 +; CHECK-LABEL: t6_double: +; CHECK-NEXT: .quad 4608533498688228557 + +.code + +END From 7c44ee8e1937c7402a106f3fa6a356caa73a14e8 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 14 Sep 2020 14:11:29 -0400 Subject: [PATCH 0567/1079] [ms] [llvm-ml] Fix struct padding logic MASM structs are end-padded to have size a multiple of the smaller of the requested alignment and the size of their largest field (taken recursively, if they have a field of STRUCT type). This matches the behavior of ml.exe and ml64.exe. Our original implementation followed the MASM 6.0 documentation, which instead specified that MASM structs were padded to a multiple of their requested alignment. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D87248 --- llvm/lib/MC/MCParser/MasmParser.cpp | 22 ++++++---- llvm/test/tools/llvm-ml/struct_alignment.test | 44 +++++++++++++++++++ 2 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/struct_alignment.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index ea18cf8936ded..c1917d729c856 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -124,10 +124,12 @@ struct StructInfo { bool IsUnion = false; size_t Alignment = 0; size_t Size = 0; + size_t AlignmentSize = 0; std::vector Fields; StringMap FieldsByName; - FieldInfo &addField(StringRef FieldName, FieldType FT, size_t FieldSize); + FieldInfo &addField(StringRef FieldName, FieldType FT, + size_t FieldAlignmentSize); StructInfo() = default; @@ -331,7 +333,7 @@ struct FieldInfo { }; FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, - size_t FieldSize) { + size_t FieldAlignmentSize) { if (!FieldName.empty()) FieldsByName[FieldName] = Fields.size(); Fields.emplace_back(FT); @@ -339,9 +341,10 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, if (IsUnion) { Field.Offset = 0; } else { - Size = llvm::alignTo(Size, std::min(Alignment, FieldSize)); + Size = llvm::alignTo(Size, std::min(Alignment, FieldAlignmentSize)); Field.Offset = Size; } + AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize); return Field; } @@ -3973,7 +3976,8 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) { // Declare a field in the current struct. bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { StructInfo &OwningStruct = StructInProgress.back(); - FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT, Structure.Size); + FieldInfo &Field = + OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize); StructFieldInfo &StructInfo = Field.Contents.StructInfo; StructInfo.Structure = Structure; @@ -4101,8 +4105,10 @@ bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) { return Error(NameLoc, "mismatched name in ENDS directive; expected '" + StructInProgress.back().Name + "'"); StructInfo Structure = StructInProgress.pop_back_val(); - // Pad to make the structure's size divisible by its alignment. - Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment); + // Pad to make the structure's size divisible by the smaller of its alignment + // and the size of its largest field. + Structure.Size = llvm::alignTo( + Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize)); Structs[Name.lower()] = Structure; if (parseToken(AsmToken::EndOfStatement)) @@ -4147,8 +4153,8 @@ bool MasmParser::parseDirectiveNestedEnds() { else ParentStruct.Size += Structure.Size; } else { - FieldInfo &Field = - ParentStruct.addField(Structure.Name, FT_STRUCT, Structure.Size); + FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT, + Structure.AlignmentSize); StructFieldInfo &StructInfo = Field.Contents.StructInfo; Field.Type = Structure.Size; Field.LengthOf = 1; diff --git a/llvm/test/tools/llvm-ml/struct_alignment.test b/llvm/test/tools/llvm-ml/struct_alignment.test new file mode 100644 index 0000000000000..cfe803872c3ba --- /dev/null +++ b/llvm/test/tools/llvm-ml/struct_alignment.test @@ -0,0 +1,44 @@ +; RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +FOO STRUCT 8 + f FWORD -1 +FOO ENDS + +t1 FOO <> +; CHECK-LABEL: t1: +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .short 65535 +; CHECK-NOT: .zero + +BAZ STRUCT + b BYTE 3 DUP (-1) + f FWORD -1 +BAZ ENDS + +FOOBAR STRUCT 8 + f1 BAZ <> + f2 BAZ <> + h BYTE -1 +FOOBAR ENDS + +t2 FOOBAR <> +; CHECK-LABEL: t2: +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .short 65535 +; CHECK-NEXT: .zero 3 +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .short 65535 +; CHECK-NEXT: .byte -1 +; CHECK-NEXT: .zero 2 + +.code + +END From 20201dc76aaf68eb940eb14bfc6dd4983292fb79 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 14 Sep 2020 14:25:39 -0400 Subject: [PATCH 0568/1079] [ms] [llvm-ml] Add support for size queries in MASM Add support for size inference, sizeof, typeof, and lengthof. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86947 --- llvm/include/llvm/MC/MCParser/MCAsmParser.h | 28 +- .../llvm/MC/MCParser/MCTargetAsmParser.h | 2 +- llvm/lib/MC/MCParser/AsmParser.cpp | 14 +- llvm/lib/MC/MCParser/MasmParser.cpp | 243 ++++++++++++------ .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 2 +- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 171 +++++++++--- llvm/test/tools/llvm-ml/size_inference.test | 27 ++ llvm/test/tools/llvm-ml/struct.test | 108 ++++---- llvm/test/tools/llvm-ml/type_operators.test | 237 +++++++++++++++++ 9 files changed, 650 insertions(+), 182 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/size_inference.test create mode 100644 llvm/test/tools/llvm-ml/type_operators.test diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index a68066e0f50b5..2040810eac141 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -90,6 +90,20 @@ struct InlineAsmIdentifierInfo { IdKind Kind; }; +// Generic type information for an assembly object. +// All sizes measured in bytes. +struct AsmTypeInfo { + StringRef Name; + unsigned Size = 0; + unsigned ElementSize = 0; + unsigned Length = 0; +}; + +struct AsmFieldInfo { + AsmTypeInfo Type; + unsigned Offset = 0; +}; + /// Generic Sema callback for assembly parser. class MCAsmParserSemaCallback { public: @@ -170,12 +184,15 @@ class MCAsmParser { virtual bool isParsingMasm() const { return false; } - virtual bool lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const { + virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const { return true; } - virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const { + virtual bool lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const { + return true; + } + + virtual bool lookUpType(StringRef Name, AsmTypeInfo &Info) const { return true; } @@ -281,7 +298,8 @@ class MCAsmParser { /// \param Res - The value of the expression. The result is undefined /// on error. /// \return - False on success. - virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) = 0; + virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, + AsmTypeInfo *TypeInfo) = 0; /// Parse an arbitrary expression, assuming that an initial '(' has /// already been consumed. diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 1d10c66b4201f..5d6511372f6e1 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -370,7 +370,7 @@ class MCTargetAsmParser : public MCAsmParserExtension { // Target-specific parsing of expression. virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { - return getParser().parsePrimaryExpr(Res, EndLoc); + return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 497f73e411057..f5a06f0a91fe0 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -244,7 +244,8 @@ class AsmParser : public MCAsmParser { bool parseExpression(const MCExpr *&Res); bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override; - bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; + bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, + AsmTypeInfo *TypeInfo) override; bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override; bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res, SMLoc &EndLoc) override; @@ -1068,7 +1069,8 @@ bool AsmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= number /// primaryexpr ::= '.' /// primaryexpr ::= ~,+,- primaryexpr -bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { +bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, + AsmTypeInfo *TypeInfo) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); switch (FirstTokenKind) { @@ -1079,7 +1081,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return true; case AsmToken::Exclaim: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, TypeInfo)) return true; Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc); return false; @@ -1238,19 +1240,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return parseBracketExpr(Res, EndLoc); case AsmToken::Minus: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, TypeInfo)) return true; Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc); return false; case AsmToken::Plus: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, TypeInfo)) return true; Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc); return false; case AsmToken::Tilde: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, TypeInfo)) return true; Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); return false; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index c1917d729c856..cc82ffbcb7cb6 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -122,14 +123,14 @@ struct FieldInfo; struct StructInfo { StringRef Name; bool IsUnion = false; - size_t Alignment = 0; - size_t Size = 0; - size_t AlignmentSize = 0; + unsigned Alignment = 0; + unsigned Size = 0; + unsigned AlignmentSize = 0; std::vector Fields; StringMap FieldsByName; FieldInfo &addField(StringRef FieldName, FieldType FT, - size_t FieldAlignmentSize); + unsigned FieldAlignmentSize); StructInfo() = default; @@ -319,13 +320,13 @@ struct FieldInfo { size_t Offset = 0; // Total size of the field (= LengthOf * Type). - size_t SizeOf = 0; + unsigned SizeOf = 0; // Number of elements in the field (1 if scalar, >1 if an array). - size_t LengthOf = 0; + unsigned LengthOf = 0; // Size of a single entry in this field, in bytes ("type" in MASM standards). - size_t Type = 0; + unsigned Type = 0; FieldInitializer Contents; @@ -333,9 +334,9 @@ struct FieldInfo { }; FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, - size_t FieldAlignmentSize) { + unsigned FieldAlignmentSize) { if (!FieldName.empty()) - FieldsByName[FieldName] = Fields.size(); + FieldsByName[FieldName.lower()] = Fields.size(); Fields.emplace_back(FT); FieldInfo &Field = Fields.back(); if (IsUnion) { @@ -390,8 +391,8 @@ class MasmParser : public MCAsmParser { /// Maps struct tags to struct definitions. StringMap Structs; - /// Maps data location names to user-defined types. - StringMap KnownType; + /// Maps data location names to types. + StringMap KnownType; /// Stack of active macro instantiations. std::vector ActiveMacros; @@ -494,10 +495,11 @@ class MasmParser : public MCAsmParser { bool isParsingMasm() const override { return true; } - bool lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const override; - bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const override; + bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override; + bool lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const override; + + bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override; bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, @@ -509,7 +511,8 @@ class MasmParser : public MCAsmParser { bool parseExpression(const MCExpr *&Res); bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override; - bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; + bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, + AsmTypeInfo *TypeInfo) override; bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override; bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res, SMLoc &EndLoc) override; @@ -568,7 +571,7 @@ class MasmParser : public MCAsmParser { static void DiagHandler(const SMDiagnostic &Diag, void *Context); bool lookUpField(const StructInfo &Structure, StringRef Member, - StringRef &Type, unsigned &Offset) const; + AsmFieldInfo &Info) const; /// Should we emit DWARF describing this assembler source? (Returns false if /// the source has .file directives, which means we don't want to generate @@ -756,23 +759,24 @@ class MasmParser : public MCAsmParser { bool parseScalarInstList( unsigned Size, SmallVectorImpl &Values, const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); - bool emitIntegralValues(unsigned Size); + bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr); bool addIntegralField(StringRef Name, unsigned Size); bool parseDirectiveValue(StringRef IDVal, unsigned Size); - bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name, - SMLoc NameLoc); + bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size, + StringRef Name, SMLoc NameLoc); // "real4", "real8" - bool emitRealValues(const fltSemantics &Semantics); + bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr); bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, size_t Size); bool parseRealInstList( const fltSemantics &Semantics, SmallVectorImpl &Values, const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); - bool parseDirectiveNamedRealValue(StringRef IDVal, - const fltSemantics &Semantics, size_t Size, - StringRef Name, SMLoc NameLoc); + bool parseDirectiveNamedRealValue(StringRef TypeName, + const fltSemantics &Semantics, + unsigned Size, StringRef Name, + SMLoc NameLoc); bool parseOptionalAngleBracketOpen(); bool parseAngleBracketClose(const Twine &Msg = "expected '>'"); @@ -816,7 +820,7 @@ class MasmParser : public MCAsmParser { const StructInitializer &Initializer); // User-defined types (structs, unions): - bool emitStructValues(const StructInfo &Structure); + bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr); bool addStructField(StringRef Name, const StructInfo &Structure); bool parseDirectiveStructValue(const StructInfo &Structure, StringRef Directive, SMLoc DirLoc); @@ -1321,7 +1325,8 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= number /// primaryexpr ::= '.' /// primaryexpr ::= ~,+,-,'not' primaryexpr -bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { +bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, + AsmTypeInfo *TypeInfo) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); switch (FirstTokenKind) { @@ -1332,7 +1337,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return true; case AsmToken::Exclaim: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, nullptr)) return true; Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc); return false; @@ -1360,7 +1365,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } // Parse named bitwise negation. if (Identifier.equals_lower("not")) { - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, nullptr)) return true; Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); return false; @@ -1415,24 +1420,19 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } // Find the field offset if used. - StringRef Type; - unsigned Offset = 0; + AsmFieldInfo Info; Split = SymbolName.split('.'); - if (!Split.second.empty()) { + if (Split.second.empty()) { + } else { SymbolName = Split.first; - if (Structs.count(SymbolName.lower()) && - !lookUpField(SymbolName, Split.second, Type, Offset)) { - // This is actually a reference to a field offset. - Res = MCConstantExpr::create(Offset, getContext()); - return false; - } - - auto TypeIt = KnownType.find(SymbolName); - if (TypeIt == KnownType.end() || - lookUpField(*TypeIt->second, Split.second, Type, Offset)) { + if (lookUpField(SymbolName, Split.second, Info)) { std::pair BaseMember = Split.second.split('.'); StringRef Base = BaseMember.first, Member = BaseMember.second; - lookUpField(Base, Member, Type, Offset); + lookUpField(Base, Member, Info); + } else if (Structs.count(SymbolName.lower())) { + // This is actually a reference to a field offset. + Res = MCConstantExpr::create(Info.Offset, getContext()); + return false; } } @@ -1458,13 +1458,23 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // Otherwise create a symbol ref. const MCExpr *SymRef = MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc); - if (Offset) { - Res = MCBinaryExpr::create(MCBinaryExpr::Add, SymRef, - MCConstantExpr::create(Offset, getContext()), - getContext()); + if (Info.Offset) { + Res = MCBinaryExpr::create( + MCBinaryExpr::Add, SymRef, + MCConstantExpr::create(Info.Offset, getContext()), getContext()); } else { Res = SymRef; } + if (TypeInfo) { + if (Info.Type.Name.empty()) { + auto TypeIt = KnownType.find(Identifier.lower()); + if (TypeIt != KnownType.end()) { + Info.Type = TypeIt->second; + } + } + + *TypeInfo = Info.Type; + } return false; } case AsmToken::BigNum: @@ -1528,19 +1538,19 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return parseBracketExpr(Res, EndLoc); case AsmToken::Minus: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, nullptr)) return true; Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc); return false; case AsmToken::Plus: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, nullptr)) return true; Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc); return false; case AsmToken::Tilde: Lex(); // Eat the operator. - if (parsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc, nullptr)) return true; Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); return false; @@ -3309,7 +3319,7 @@ bool MasmParser::parseScalarInstList(unsigned Size, return false; } -bool MasmParser::emitIntegralValues(unsigned Size) { +bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) { SmallVector Values; if (checkForValidSection() || parseScalarInstList(Size, Values)) return true; @@ -3317,6 +3327,8 @@ bool MasmParser::emitIntegralValues(unsigned Size) { for (auto Value : Values) { emitIntValue(Value, Size); } + if (Count) + *Count = Values.size(); return false; } @@ -3356,16 +3368,24 @@ bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) { /// parseDirectiveNamedValue /// ::= name (byte | word | ... ) [ expression (, expression)* ] -bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size, +bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size, StringRef Name, SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); - if (emitIntegralValues(Size)) - return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + unsigned Count; + if (emitIntegralValues(Size, &Count)) + return addErrorSuffix(" in '" + Twine(TypeName) + "' directive"); + + AsmTypeInfo Type; + Type.Name = TypeName; + Type.Size = Size * Count; + Type.ElementSize = Size; + Type.Length = Count; + KnownType[Name.lower()] = Type; } else if (addIntegralField(Name, Size)) { - return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + return addErrorSuffix(" in '" + Twine(TypeName) + "' directive"); } return false; @@ -3482,7 +3502,8 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics, } // Initialize real data values. -bool MasmParser::emitRealValues(const fltSemantics &Semantics) { +bool MasmParser::emitRealValues(const fltSemantics &Semantics, + unsigned *Count) { if (checkForValidSection()) return true; @@ -3494,6 +3515,8 @@ bool MasmParser::emitRealValues(const fltSemantics &Semantics) { getStreamer().emitIntValue(AsInt.getLimitedValue(), AsInt.getBitWidth() / 8); } + if (Count) + *Count = ValuesAsInt.size(); return false; } @@ -3536,18 +3559,26 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, /// parseDirectiveNamedRealValue /// ::= name (real4 | real8) [ expression (, expression)* ] -bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, +bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName, const fltSemantics &Semantics, - size_t Size, StringRef Name, + unsigned Size, StringRef Name, SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); - if (emitRealValues(Semantics)) - return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + unsigned Count; + if (emitRealValues(Semantics, &Count)) + return addErrorSuffix(" in '" + TypeName + "' directive"); + + AsmTypeInfo Type; + Type.Name = TypeName; + Type.Size = Size * Count; + Type.ElementSize = Size; + Type.Length = Count; + KnownType[Name.lower()] = Type; } else if (addRealField(Name, Semantics, Size)) { - return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + return addErrorSuffix(" in '" + TypeName + "' directive"); } return false; } @@ -3960,7 +3991,8 @@ bool MasmParser::emitStructInitializer(const StructInfo &Structure, } // Set data values from initializers. -bool MasmParser::emitStructValues(const StructInfo &Structure) { +bool MasmParser::emitStructValues(const StructInfo &Structure, + unsigned *Count) { std::vector Initializers; if (parseStructInstList(Structure, Initializers)) return true; @@ -3970,6 +4002,8 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) { return true; } + if (Count) + *Count = Initializers.size(); return false; } @@ -4020,9 +4054,15 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure, // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); - KnownType[Name] = &Structure; - if (emitStructValues(Structure)) + unsigned Count; + if (emitStructValues(Structure, &Count)) return true; + AsmTypeInfo Type; + Type.Name = Structure.Name; + Type.Size = Structure.Size * Count; + Type.ElementSize = Structure.Size; + Type.Length = Count; + KnownType[Name.lower()] = Type; } else if (addStructField(Name, Structure)) { return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); } @@ -6564,37 +6604,39 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA, llvm_unreachable("Unstable rewrite sort."); } -bool MasmParser::lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const { +bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const { const std::pair BaseMember = Name.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; - return lookUpField(Base, Member, Type, Offset); + return lookUpField(Base, Member, Info); } -bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const { +bool MasmParser::lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const { if (Base.empty()) return true; - unsigned BaseOffset = 0; - if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset)) - Base = Type; - - auto TypeIt = KnownType.find(Base); - if (TypeIt != KnownType.end()) - return lookUpField(*TypeIt->second, Member, Type, Offset); + AsmFieldInfo BaseInfo; + if (Base.contains('.') && !lookUpField(Base, BaseInfo)) + Base = BaseInfo.Type.Name; auto StructIt = Structs.find(Base.lower()); + auto TypeIt = KnownType.find(Base.lower()); + if (TypeIt != KnownType.end()) { + StructIt = Structs.find(TypeIt->second.Name.lower()); + } if (StructIt != Structs.end()) - return lookUpField(StructIt->second, Member, Type, Offset); + return lookUpField(StructIt->second, Member, Info); return true; } bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, - StringRef &Type, unsigned &Offset) const { + AsmFieldInfo &Info) const { if (Member.empty()) { - Type = Structure.Name; + Info.Type.Name = Structure.Name; + Info.Type.Size = Structure.Size; + Info.Type.ElementSize = Structure.Size; + Info.Type.Length = 1; return false; } @@ -6603,7 +6645,7 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, auto StructIt = Structs.find(FieldName.lower()); if (StructIt != Structs.end()) - return lookUpField(StructIt->second, FieldMember, Type, Offset); + return lookUpField(StructIt->second, FieldMember, Info); auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); if (FieldIt == Structure.FieldsByName.end()) @@ -6611,9 +6653,12 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, const FieldInfo &Field = Structure.Fields[FieldIt->second]; if (FieldMember.empty()) { - Offset += Field.Offset; + Info.Offset += Field.Offset; + Info.Type.Size = Field.SizeOf; + Info.Type.ElementSize = Field.Type; + Info.Type.Length = Field.LengthOf; if (Field.Contents.FT == FT_STRUCT) - Type = Field.Contents.StructInfo.Structure.Name; + Info.Type.Name = Field.Contents.StructInfo.Structure.Name; return false; } @@ -6621,14 +6666,44 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, return true; const StructFieldInfo &StructInfo = Field.Contents.StructInfo; - bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset); - if (Result) + if (lookUpField(StructInfo.Structure, FieldMember, Info)) return true; - Offset += Field.Offset; + Info.Offset += Field.Offset; return false; } +bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const { + unsigned Size = StringSwitch(Name) + .CasesLower("byte", "db", "sbyte", 1) + .CasesLower("word", "dw", "sword", 2) + .CasesLower("dword", "dd", "sdword", 4) + .CasesLower("fword", "df", 6) + .CasesLower("qword", "dq", "sqword", 8) + .CaseLower("real4", 4) + .CaseLower("real8", 8) + .Default(0); + if (Size) { + Info.Name = Name; + Info.ElementSize = Size; + Info.Length = 1; + Info.Size = Size; + return false; + } + + auto StructIt = Structs.find(Name.lower()); + if (StructIt != Structs.end()) { + const StructInfo &Structure = StructIt->second; + Info.Name = Name; + Info.ElementSize = Structure.Size; + Info.Length = 1; + Info.Size = Structure.Size; + return false; + } + + return true; +} + bool MasmParser::parseMSInlineAsm( void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl> &OpDecls, diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index d2eb7c1726e27..0460d861aebea 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2541,7 +2541,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { // This syntax is not compatible with syntax of standard // MC expressions (due to the trailing '|'). SMLoc EndLoc; - if (getParser().parsePrimaryExpr(Expr, EndLoc)) + if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) return MatchOperand_ParseFail; } else { if (Parser.parseExpression(Expr)) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 5694105dcbd11..361a6c04e3f21 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -32,6 +32,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -150,6 +151,13 @@ class X86AsmParser : public MCTargetAsmParser { IOK_TYPE, }; + enum MasmOperatorKind { + MOK_INVALID = 0, + MOK_LENGTHOF, + MOK_SIZEOF, + MOK_TYPE, + }; + class InfixCalculator { typedef std::pair< InfixCalculatorTok, int64_t > ICToken; SmallVector InfixOperatorStack; @@ -367,7 +375,7 @@ class X86AsmParser : public MCTargetAsmParser { bool MemExpr; bool OffsetOperator; SMLoc OffsetOperatorLoc; - StringRef CurType; + AsmTypeInfo CurType; bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { if (Sym) { @@ -395,7 +403,10 @@ class X86AsmParser : public MCTargetAsmParser { unsigned getScale() { return Scale; } const MCExpr *getSym() { return Sym; } StringRef getSymName() { return SymName; } - StringRef getType() { return CurType; } + StringRef getType() { return CurType.Name; } + unsigned getSize() { return CurType.Size; } + unsigned getElementSize() { return CurType.ElementSize; } + unsigned getLength() { return CurType.Length; } int64_t getImm() { return Imm + IC.execute(); } bool isValidEndState() { return State == IES_RBRAC || State == IES_INTEGER; @@ -628,7 +639,8 @@ class X86AsmParser : public MCTargetAsmParser { } bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, const InlineAsmIdentifierInfo &IDInfo, - bool ParsingMSInlineAsm, StringRef &ErrMsg) { + const AsmTypeInfo &Type, bool ParsingMSInlineAsm, + StringRef &ErrMsg) { // InlineAsm: Treat an enum value as an integer if (ParsingMSInlineAsm) if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) @@ -647,6 +659,7 @@ class X86AsmParser : public MCTargetAsmParser { case IES_NOT: case IES_INIT: case IES_LBRAC: + case IES_LPAREN: if (setSymRef(SymRef, SymRefName, ErrMsg)) return true; MemExpr = true; @@ -654,6 +667,7 @@ class X86AsmParser : public MCTargetAsmParser { IC.pushOperand(IC_IMM); if (ParsingMSInlineAsm) Info = IDInfo; + setTypeInfo(Type); break; } return false; @@ -752,6 +766,8 @@ class X86AsmParser : public MCTargetAsmParser { case IES_RPAREN: State = IES_PLUS; IC.pushOperator(IC_PLUS); + CurType.Length = 1; + CurType.Size = CurType.ElementSize; break; case IES_INIT: case IES_CAST: @@ -835,8 +851,8 @@ class X86AsmParser : public MCTargetAsmParser { } } bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, - const InlineAsmIdentifierInfo &IDInfo, bool ParsingMSInlineAsm, - StringRef &ErrMsg) { + const InlineAsmIdentifierInfo &IDInfo, + bool ParsingMSInlineAsm, StringRef &ErrMsg) { PrevState = State; switch (State) { default: @@ -860,19 +876,19 @@ class X86AsmParser : public MCTargetAsmParser { } return false; } - void onCast(StringRef Type) { + void onCast(AsmTypeInfo Info) { PrevState = State; switch (State) { default: State = IES_ERROR; break; case IES_LPAREN: - setType(Type); + setTypeInfo(Info); State = IES_CAST; break; } } - void setType(StringRef Type) { CurType = Type; } + void setTypeInfo(AsmTypeInfo Type) { CurType = Type; } }; bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, @@ -909,6 +925,8 @@ class X86AsmParser : public MCTargetAsmParser { bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); unsigned IdentifyIntelInlineAsmOperator(StringRef Name); unsigned ParseIntelInlineAsmOperator(unsigned OpKind); + unsigned IdentifyMasmOperator(StringRef Name); + bool ParseMasmOperator(unsigned OpKind, int64_t &Val); bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands); bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, bool &ParseError, SMLoc &End); @@ -1653,6 +1671,13 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (ParseIntelDotOperator(SM, End)) return true; break; + case AsmToken::Dollar: + if (!Parser.isParsingMasm()) { + if ((Done = SM.isValidEndState())) + break; + return Error(Tok.getLoc(), "unknown token in expression"); + } + LLVM_FALLTHROUGH; case AsmToken::At: case AsmToken::String: case AsmToken::Identifier: { @@ -1664,7 +1689,10 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { const AsmToken &NextTok = getLexer().peekTok(); if (NextTok.is(AsmToken::Identifier) && NextTok.getIdentifier().equals_lower("ptr")) { - SM.onCast(Identifier); + AsmTypeInfo Info; + if (Parser.lookUpType(Identifier, Info)) + return Error(Tok.getLoc(), "unknown type"); + SM.onCast(Info); // Eat type and PTR. consumeToken(); End = consumeToken(); @@ -1689,16 +1717,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (SM.onRegister(Reg, ErrMsg)) return Error(IdentLoc, ErrMsg); - StringRef Type; - unsigned Offset = 0; + AsmFieldInfo Info; SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); - if (Parser.lookUpField(Field, Type, Offset)) + if (Parser.lookUpField(Field, Info)) return Error(FieldStartLoc, "unknown offset"); else if (SM.onPlus(ErrMsg)) return Error(getTok().getLoc(), ErrMsg); - else if (SM.onInteger(Offset, ErrMsg)) + else if (SM.onInteger(Info.Offset, ErrMsg)) return Error(IdentLoc, ErrMsg); - SM.setType(Type); + SM.setTypeInfo(Info.Type); End = consumeToken(); break; @@ -1714,6 +1741,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { } // Symbol reference, when parsing assembly content InlineAsmIdentifierInfo Info; + AsmTypeInfo Type; const MCExpr *Val; if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { // MS Dot Operator expression @@ -1740,13 +1768,24 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return Error(IdentLoc, "expected identifier"); if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) return true; - else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) + else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, true, ErrMsg)) return Error(IdentLoc, ErrMsg); break; } - if (getParser().parsePrimaryExpr(Val, End)) { + if (Parser.isParsingMasm()) { + if (unsigned OpKind = IdentifyMasmOperator(Identifier)) { + int64_t Val; + if (ParseMasmOperator(OpKind, Val)) + return true; + if (SM.onInteger(Val, ErrMsg)) + return Error(IdentLoc, ErrMsg); + break; + } + } + if (getParser().parsePrimaryExpr(Val, End, &Type)) { return Error(Tok.getLoc(), "Unexpected identifier!"); - } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { + } else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, false, + ErrMsg)) { return Error(IdentLoc, ErrMsg); } break; @@ -1769,8 +1808,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); InlineAsmIdentifierInfo Info; - if (SM.onIdentifierExpr(Val, Identifier, Info, isParsingMSInlineAsm(), - ErrMsg)) + AsmTypeInfo Type; + if (SM.onIdentifierExpr(Val, Identifier, Info, Type, + isParsingMSInlineAsm(), ErrMsg)) return Error(Loc, ErrMsg); End = consumeToken(); } else { @@ -1957,8 +1997,7 @@ bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) { bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { const AsmToken &Tok = getTok(); - StringRef Type; - unsigned Offset = 0; + AsmFieldInfo Info; // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); @@ -1969,27 +2008,28 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, if (Tok.is(AsmToken::Real)) { APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); - Offset = DotDisp.getZExtValue(); + Info.Offset = DotDisp.getZExtValue(); } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && Tok.is(AsmToken::Identifier)) { const std::pair BaseMember = DotDispStr.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; - if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) && - getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) && - getParser().lookUpField(DotDispStr, Type, Offset) && + if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && + getParser().lookUpField(SM.getSymName(), DotDispStr, Info) && + getParser().lookUpField(DotDispStr, Info) && (!SemaCallback || - SemaCallback->LookupInlineAsmField(Base, Member, Offset))) + SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset))) return Error(Tok.getLoc(), "Unable to lookup field reference!"); - } else + } else { return Error(Tok.getLoc(), "Unexpected token type!"); + } // Eat the DotExpression and update End End = SMLoc::getFromPointer(DotDispStr.data()); const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); while (Tok.getLoc().getPointer() < DotExprEndLoc) Lex(); - SM.addImm(Offset); - SM.setType(Type); + SM.addImm(Info.Offset); + SM.setTypeInfo(Info.Type); return false; } @@ -2004,7 +2044,7 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, if (!isParsingMSInlineAsm()) { if ((getTok().isNot(AsmToken::Identifier) && getTok().isNot(AsmToken::String)) || - getParser().parsePrimaryExpr(Val, End)) + getParser().parsePrimaryExpr(Val, End, nullptr)) return Error(Start, "unexpected token!"); } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { return Error(Start, "unable to lookup expression"); @@ -2059,6 +2099,73 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { return CVal; } +// Query a candidate string for being an Intel assembly operator +// Report back its kind, or IOK_INVALID if does not evaluated as a known one +unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { + return StringSwitch(Name.lower()) + .Case("type", MOK_TYPE) + .Cases("size", "sizeof", MOK_SIZEOF) + .Cases("length", "lengthof", MOK_LENGTHOF) + .Default(MOK_INVALID); +} + +/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator +/// returns the number of elements in an array. It returns the value 1 for +/// non-array variables. The SIZEOF operator returns the size of a type or +/// variable in bytes. A variable's size is the product of its LENGTH and TYPE. +/// The TYPE operator returns the size of a variable. If the variable is an +/// array, TYPE returns the size of a single element. +bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { + MCAsmParser &Parser = getParser(); + SMLoc OpLoc = Parser.getTok().getLoc(); + Parser.Lex(); // Eat operator. + + Val = 0; + if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) { + // Check for SIZEOF() and TYPE(). + bool InParens = Parser.getTok().is(AsmToken::LParen); + const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok(); + AsmTypeInfo Type; + if (IDTok.is(AsmToken::Identifier) && + !Parser.lookUpType(IDTok.getIdentifier(), Type)) { + Val = Type.Size; + + // Eat tokens. + if (InParens) + parseToken(AsmToken::LParen); + parseToken(AsmToken::Identifier); + if (InParens) + parseToken(AsmToken::RParen); + } + } + + if (!Val) { + IntelExprStateMachine SM; + SMLoc End, Start = Parser.getTok().getLoc(); + if (ParseIntelExpression(SM, End)) + return true; + + switch (OpKind) { + default: + llvm_unreachable("Unexpected operand kind!"); + case MOK_SIZEOF: + Val = SM.getSize(); + break; + case MOK_LENGTHOF: + Val = SM.getLength(); + break; + case MOK_TYPE: + Val = SM.getElementSize(); + break; + } + + if (!Val) + return Error(OpLoc, "expression has unknown type", SMRange(Start, End)); + } + + return false; +} + bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { Size = StringSwitch(getTok().getString()) .Cases("BYTE", "byte", 8) @@ -2161,6 +2268,8 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) { unsigned BaseReg = SM.getBaseReg(); unsigned IndexReg = SM.getIndexReg(); unsigned Scale = SM.getScale(); + if (!PtrInOperand) + Size = SM.getElementSize() << 3; if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && (IndexReg == X86::ESP || IndexReg == X86::RSP)) @@ -2617,7 +2726,7 @@ bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Res = X86MCExpr::create(RegNo, Parser.getContext()); return false; } - return Parser.parsePrimaryExpr(Res, EndLoc); + return Parser.parsePrimaryExpr(Res, EndLoc, nullptr); } bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, diff --git a/llvm/test/tools/llvm-ml/size_inference.test b/llvm/test/tools/llvm-ml/size_inference.test new file mode 100644 index 0000000000000..c24eb51fad42a --- /dev/null +++ b/llvm/test/tools/llvm-ml/size_inference.test @@ -0,0 +1,27 @@ +; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --dump-input=always + +.data + +FOO STRUCT + dword_field DWORD 3 + byte_field BYTE 4 DUP (1) +FOO ENDS + +var FOO <> + +.code + +t1 PROC + +mov eax, var.byte_field +; CHECK: error: invalid operand for instruction + +mov eax, [var].byte_field +; CHECK: error: invalid operand for instruction + +mov eax, [var.byte_field] +; CHECK: error: invalid operand for instruction + +t1 ENDP + +END diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test index 38fc763fc7e1f..facd7c14e4f4d 100644 --- a/llvm/test/tools/llvm-ml/struct.test +++ b/llvm/test/tools/llvm-ml/struct.test @@ -78,70 +78,70 @@ t2 FOOBAR <"gh",,<10,11>,<12>,"ijk"> .code t3: -mov eax, t2.f.h -mov eax, [t2].f.h -mov eax, [t2.f.h] +mov al, t2.f.h +mov al, [t2].f.h +mov al, [t2.f.h] ; CHECK: t3: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] t4: -mov eax, j.FOOBAR.f.h -mov eax, j.baz.b +mov al, j.FOOBAR.f.h +mov al, j.baz.b ; CHECK: t4: -; CHECK-NEXT: mov eax, dword ptr [rip + j+11] -; CHECK-NEXT: mov eax, dword ptr [rip + j+1] +; CHECK-NEXT: mov al, byte ptr [rip + j+11] +; CHECK-NEXT: mov al, byte ptr [rip + j+1] t5: -mov eax, [ebx].FOOBAR.f.h -mov eax, [ebx.FOOBAR].f.h -mov eax, [ebx.FOOBAR.f.h] +mov al, [ebx].FOOBAR.f.h +mov al, [ebx.FOOBAR].f.h +mov al, [ebx.FOOBAR.f.h] ; CHECK: t5: -; CHECK-NEXT: mov eax, dword ptr [ebx + 11] -; CHECK-NEXT: mov eax, dword ptr [ebx + 11] -; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov al, byte ptr [ebx + 11] +; CHECK-NEXT: mov al, byte ptr [ebx + 11] +; CHECK-NEXT: mov al, byte ptr [ebx + 11] t6: -mov eax, t2.FOOBAR.f.h -mov eax, [t2].FOOBAR.f.h -mov eax, [t2.FOOBAR].f.h -mov eax, [t2.FOOBAR.f.h] +mov al, t2.FOOBAR.f.h +mov al, [t2].FOOBAR.f.h +mov al, [t2.FOOBAR].f.h +mov al, [t2.FOOBAR.f.h] ; CHECK: t6: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] +; CHECK-NEXT: mov al, byte ptr [rip + t2+11] t7: -mov eax, [ebx].FOOBAR.e.b -mov eax, [ebx.FOOBAR].e.b -mov eax, [ebx.FOOBAR.e].b -mov eax, [ebx.FOOBAR.e.b] +mov al, [ebx].FOOBAR.e.b +mov al, [ebx.FOOBAR].e.b +mov al, [ebx.FOOBAR.e].b +mov al, [ebx.FOOBAR.e.b] ; CHECK: t7: -; CHECK-NEXT: mov eax, dword ptr [ebx + 9] -; CHECK-NEXT: mov eax, dword ptr [ebx + 9] -; CHECK-NEXT: mov eax, dword ptr [ebx + 9] -; CHECK-NEXT: mov eax, dword ptr [ebx + 9] +; CHECK-NEXT: mov al, byte ptr [ebx + 9] +; CHECK-NEXT: mov al, byte ptr [ebx + 9] +; CHECK-NEXT: mov al, byte ptr [ebx + 9] +; CHECK-NEXT: mov al, byte ptr [ebx + 9] t8: -mov eax, t2.FOOBAR.e.b -mov eax, [t2].FOOBAR.e.b -mov eax, [t2.FOOBAR].e.b -mov eax, [t2.FOOBAR.e].b -mov eax, [t2.FOOBAR.e.b] +mov al, t2.FOOBAR.e.b +mov al, [t2].FOOBAR.e.b +mov al, [t2.FOOBAR].e.b +mov al, [t2.FOOBAR.e].b +mov al, [t2.FOOBAR.e.b] ; CHECK: t8: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] -; CHECK-NEXT: mov eax, dword ptr [rip + (t2+8)+1] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] +; CHECK-NEXT: mov al, byte ptr [rip + t2+9] +; CHECK-NEXT: mov al, byte ptr [rip + t2+9] +; CHECK-NEXT: mov al, byte ptr [rip + t2+9] +; CHECK-NEXT: mov al, byte ptr [rip + (t2+8)+1] +; CHECK-NEXT: mov al, byte ptr [rip + t2+9] QUUX STRUCT u DWORD ? @@ -159,20 +159,20 @@ QUUX ENDS t9: mov eax, [ebx].QUUX.u -mov eax, [ebx].QUUX.v +mov ax, [ebx].QUUX.v mov eax, [ebx].QUUX.w -mov eax, [ebx].QUUX.x -mov eax, [ebx].QUUX.y -mov eax, [ebx].QUUX.after_struct +mov al, [ebx].QUUX.x +mov al, [ebx].QUUX.y +mov al, [ebx].QUUX.after_struct mov eax, [ebx].QUUX.z ; CHECK: t9: ; CHECK-NEXT: mov eax, dword ptr [ebx] +; CHECK-NEXT: mov ax, word ptr [ebx + 4] ; CHECK-NEXT: mov eax, dword ptr [ebx + 4] -; CHECK-NEXT: mov eax, dword ptr [ebx + 4] -; CHECK-NEXT: mov eax, dword ptr [ebx + 4] -; CHECK-NEXT: mov eax, dword ptr [ebx + 5] -; CHECK-NEXT: mov eax, dword ptr [ebx + 4] +; CHECK-NEXT: mov al, byte ptr [ebx + 4] +; CHECK-NEXT: mov al, byte ptr [ebx + 5] +; CHECK-NEXT: mov al, byte ptr [ebx + 4] ; CHECK-NEXT: mov eax, dword ptr [ebx + 8] t10: @@ -184,11 +184,11 @@ mov eax, FOOBAR.f.h ; CHECK-NEXT: mov eax, 11 t11: -mov eax, (FOOBAR PTR [ebx]).f -mov eax, (FOOBAR PTR t1).f +mov ax, (FOOBAR PTR [ebx]).f +mov ax, (FOOBAR PTR t1).f ; CHECK: t11: -; CHECK-NEXT: mov eax, dword ptr [ebx + 10] -; CHECK-NEXT: mov eax, dword ptr [rip + t1+10] +; CHECK-NEXT: mov ax, word ptr [ebx + 10] +; CHECK-NEXT: mov ax, word ptr [rip + t1+10] END diff --git a/llvm/test/tools/llvm-ml/type_operators.test b/llvm/test/tools/llvm-ml/type_operators.test new file mode 100644 index 0000000000000..b8546927e3efb --- /dev/null +++ b/llvm/test/tools/llvm-ml/type_operators.test @@ -0,0 +1,237 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +FOO STRUCT 2 + x BYTE ? + y WORD 5 DUP (?) +FOO ENDS + +.code + +t1: +; CHECK-LABEL: t1: + +mov eax, sizeof BYTE +mov eax, (sizeof sBYTE) +mov eax, sizeof(Db) +mov eax, type BYTE +mov eax, (type sBYTE) +mov eax, type(Db) +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 + +mov eax, sizeof(word) +mov eax, type(word) +; CHECK: mov eax, 2 +; CHECK: mov eax, 2 +mov eax, sizeof(dword) +mov eax, type(dword) +; CHECK: mov eax, 4 +; CHECK: mov eax, 4 +mov eax, sizeof(fword) +mov eax, type(fword) +; CHECK: mov eax, 6 +; CHECK: mov eax, 6 +mov eax, sizeof(qword) +mov eax, type(qword) +; CHECK: mov eax, 8 +; CHECK: mov eax, 8 + +mov eax, sizeof(real4) +mov eax, type(real4) +; CHECK: mov eax, 4 +; CHECK: mov eax, 4 +mov eax, sizeof(real8) +mov eax, type(real8) +; CHECK: mov eax, 8 +; CHECK: mov eax, 8 + +mov eax, sizeof(FOO) +mov eax, type(FOO) +; CHECK: mov eax, 12 +; CHECK: mov eax, 12 + + +t2_full BYTE "ab" +t2_short DB ? +t2_signed SBYTE 3 DUP (?) + +t2: +; CHECK-LABEL: t2: + +mov eax, sizeof(t2_full) +mov eax, lengthof(t2_full) +mov eax, type(t2_full) +; CHECK: mov eax, 2 +; CHECK: mov eax, 2 +; CHECK: mov eax, 1 + +mov eax, sizeof(t2_short) +mov eax, lengthof(t2_short) +mov eax, type(t2_short) +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 +; CHECK: mov eax, 1 + +mov eax, sizeof(t2_signed) +mov eax, lengthof(t2_signed) +mov eax, type(t2_signed) +; CHECK: mov eax, 3 +; CHECK: mov eax, 3 +; CHECK: mov eax, 1 + + +t3_full WORD 2 DUP (?) +t3_short DW ? +t3_signed SWORD 3 DUP (?) + +t3: +; CHECK-LABEL: t3: + +mov eax, sizeof(t3_full) +mov eax, lengthof(t3_full) +mov eax, type(t3_full) +; CHECK: mov eax, 4 +; CHECK: mov eax, 2 +; CHECK: mov eax, 2 + +mov eax, sizeof(t3_short) +mov eax, lengthof(t3_short) +mov eax, type(t3_short) +; CHECK: mov eax, 2 +; CHECK: mov eax, 1 +; CHECK: mov eax, 2 + +mov eax, sizeof(t3_signed) +mov eax, lengthof(t3_signed) +mov eax, type(t3_signed) +; CHECK: mov eax, 6 +; CHECK: mov eax, 3 +; CHECK: mov eax, 2 + + +t4_full DWORD 2 DUP (?) +t4_short DD ? +t4_signed SDWORD 3 DUP (?) + +t4: +; CHECK-LABEL: t4: + +mov eax, sizeof(t4_full) +mov eax, lengthof(t4_full) +mov eax, type(t4_full) +; CHECK: mov eax, 8 +; CHECK: mov eax, 2 +; CHECK: mov eax, 4 + +mov eax, sizeof(t4_short) +mov eax, lengthof(t4_short) +mov eax, type(t4_short) +; CHECK: mov eax, 4 +; CHECK: mov eax, 1 +; CHECK: mov eax, 4 + +mov eax, sizeof(t4_signed) +mov eax, lengthof(t4_signed) +mov eax, type(t4_signed) +; CHECK: mov eax, 12 +; CHECK: mov eax, 3 +; CHECK: mov eax, 4 + + +t5_full FWORD 2 DUP (?) +t5_short DF ? + +t5: +; CHECK-LABEL: t5: + +mov eax, sizeof(t5_full) +mov eax, lengthof(t5_full) +mov eax, type(t5_full) +; CHECK: mov eax, 12 +; CHECK: mov eax, 2 +; CHECK: mov eax, 6 + +mov eax, sizeof(t5_short) +mov eax, lengthof(t5_short) +mov eax, type(t5_short) +; CHECK: mov eax, 6 +; CHECK: mov eax, 1 +; CHECK: mov eax, 6 + + +t6_full QWORD 2 DUP (?) +t6_short DQ ? +t6_signed SQWORD 3 DUP (?) + +t6: +; CHECK-LABEL: t6: + +mov eax, sizeof(t6_full) +mov eax, lengthof(t6_full) +mov eax, type(t6_full) +; CHECK: mov eax, 16 +; CHECK: mov eax, 2 +; CHECK: mov eax, 8 + +mov eax, sizeof(t6_short) +mov eax, lengthof(t6_short) +mov eax, type(t6_short) +; CHECK: mov eax, 8 +; CHECK: mov eax, 1 +; CHECK: mov eax, 8 + +mov eax, sizeof(t6_signed) +mov eax, lengthof(t6_signed) +mov eax, type(t6_signed) +; CHECK: mov eax, 24 +; CHECK: mov eax, 3 +; CHECK: mov eax, 8 + + +t7_single REAL4 2 DUP (?) +t7_double REAL8 ? + +t7: +; CHECK-LABEL: t7: + +mov eax, sizeof(t7_single) +mov eax, lengthof(t7_single) +mov eax, type(t7_single) +; CHECK: mov eax, 8 +; CHECK: mov eax, 2 +; CHECK: mov eax, 4 + +mov eax, sizeof(t7_double) +mov eax, lengthof(t7_double) +mov eax, type(t7_double) +; CHECK: mov eax, 8 +; CHECK: mov eax, 1 +; CHECK: mov eax, 8 + + +t8_var FOO <>, <> + +t8: +; CHECK-LABEL: t8: + +mov eax, sizeof(t8_var) +mov eax, lengthof(t8_var) +mov eax, type(t8_var) +; CHECK: mov eax, 24 +; CHECK: mov eax, 2 +; CHECK: mov eax, 12 + +mov eax, sizeof(t8_var.y) +mov eax, lengthof(t8_var.y) +mov eax, type(t8_var.y) +; CHECK: mov eax, 10 +; CHECK: mov eax, 5 +; CHECK: mov eax, 2 + +END From 23a2b03221c5664fefc658c3eb26e7b6ecd1a1e8 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 14 Sep 2020 14:32:33 -0400 Subject: [PATCH 0569/1079] [ms] [llvm-ml] Add basic support for SEH, including PROC FRAME Add basic support for SEH, including PROC FRAME Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86948 --- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 66 +++++++++++++------ llvm/lib/MC/MCParser/MasmParser.cpp | 12 +++- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 15 +++-- llvm/test/tools/llvm-ml/proc.test | 18 +++++ llvm/test/tools/llvm-ml/proc_frame.test | 34 ++++++++++ 5 files changed, 118 insertions(+), 27 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/proc.test create mode 100644 llvm/test/tools/llvm-ml/proc_frame.test diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index b7c48e92961b3..532ded038043f 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -53,6 +53,9 @@ class COFFMasmParser : public MCAsmParserExtension { bool ParseDirectiveSegmentEnd(StringRef, SMLoc); bool ParseDirectiveIncludelib(StringRef, SMLoc); + bool ParseSEHDirectiveAllocStack(StringRef, SMLoc); + bool ParseSEHDirectiveEndProlog(StringRef, SMLoc); + bool IgnoreDirective(StringRef, SMLoc) { while (!getLexer().is(AsmToken::EndOfStatement)) { Lex(); @@ -65,13 +68,10 @@ class COFFMasmParser : public MCAsmParserExtension { MCAsmParserExtension::Initialize(Parser); // x64 directives - // .allocstack - // .endprolog - // .pushframe - // .pushreg - // .savereg - // .savexmm128 - // .setframe + addDirectiveHandler<&COFFMasmParser::ParseSEHDirectiveAllocStack>( + ".allocstack"); + addDirectiveHandler<&COFFMasmParser::ParseSEHDirectiveEndProlog>( + ".endprolog"); // Code label directives // label @@ -92,16 +92,12 @@ class COFFMasmParser : public MCAsmParserExtension { // Data allocation directives // align - // byte/sbyte - // dword/sdword // even - // fword - // qword - // real4 - // real8 + // mmword // real10 // tbyte - // word/sword + // xmmword + // ymmword // Listing control directives addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".cref"); @@ -133,14 +129,11 @@ class COFFMasmParser : public MCAsmParserExtension { // .fpo addDirectiveHandler<&COFFMasmParser::ParseDirectiveIncludelib>( "includelib"); - // mmword // option // popcontext // pushcontext // .radix // .safeseh - // xmmword - // ymmword // Procedure directives addDirectiveHandler<&COFFMasmParser::ParseDirectiveEndProc>("endp"); @@ -148,7 +141,7 @@ class COFFMasmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFMasmParser::ParseDirectiveProc>("proc"); // proto - // Processor directives + // Processor directives; all ignored addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".386"); addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".386P"); addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".387"); @@ -202,11 +195,8 @@ class COFFMasmParser : public MCAsmParserExtension { // substr (equivalent to TEXTEQU @SubStr()) // Structure and record directives - // ends // record - // struct // typedef - // union } bool ParseSectionDirectiveCode(StringRef, SMLoc) { @@ -234,6 +224,7 @@ class COFFMasmParser : public MCAsmParserExtension { } StringRef CurrentProcedure; + bool CurrentProcedureFramed; public: COFFMasmParser() = default; @@ -361,8 +352,17 @@ bool COFFMasmParser::ParseDirectiveProc(StringRef Directive, SMLoc Loc) { getStreamer().EmitCOFFSymbolType(0x20); getStreamer().EndCOFFSymbolDef(); + bool Framed = false; + if (getLexer().is(AsmToken::Identifier) && + getTok().getString().equals_lower("frame")) { + Lex(); + Framed = true; + getStreamer().EmitWinCFIStartProc(Sym, Loc); + } getStreamer().emitLabel(Sym, Loc); + CurrentProcedure = Label; + CurrentProcedureFramed = Framed; return false; } bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) { @@ -376,6 +376,30 @@ bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) { else if (CurrentProcedure != Label) return Error(LabelLoc, "endp does not match current procedure '" + CurrentProcedure + "'"); + + if (CurrentProcedureFramed) { + getStreamer().EmitWinCFIEndProc(Loc); + } + CurrentProcedure = ""; + CurrentProcedureFramed = false; + return false; +} + +bool COFFMasmParser::ParseSEHDirectiveAllocStack(StringRef Directive, + SMLoc Loc) { + int64_t Size; + SMLoc SizeLoc = getTok().getLoc(); + if (getParser().parseAbsoluteExpression(Size)) + return Error(SizeLoc, "expected integer size"); + if (Size % 8 != 0) + return Error(SizeLoc, "stack size must be a multiple of 8"); + getStreamer().EmitWinCFIAllocStack(static_cast(Size), Loc); + return false; +} + +bool COFFMasmParser::ParseSEHDirectiveEndProlog(StringRef Directive, + SMLoc Loc) { + getStreamer().EmitWinCFIEndProlog(Loc); return false; } diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index cc82ffbcb7cb6..ca9b2df7cf231 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -726,7 +726,12 @@ class MasmParser : public MCAsmParser { DK_STRUCT, DK_UNION, DK_ENDS, - DK_END + DK_END, + DK_PUSHFRAME, + DK_PUSHREG, + DK_SAVEREG, + DK_SAVEXMM128, + DK_SETFRAME, }; /// Maps directive name --> DirectiveKind enum, for directives parsed by this @@ -6333,6 +6338,11 @@ void MasmParser::initializeDirectiveKindMap() { DirectiveKindMap[".erridni"] = DK_ERRIDNI; DirectiveKindMap[".erre"] = DK_ERRE; DirectiveKindMap[".errnz"] = DK_ERRNZ; + DirectiveKindMap[".pushframe"] = DK_PUSHFRAME; + DirectiveKindMap[".pushreg"] = DK_PUSHREG; + DirectiveKindMap[".savereg"] = DK_SAVEREG; + DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128; + DirectiveKindMap[".setframe"] = DK_SETFRAME; // DirectiveKindMap[".altmacro"] = DK_ALTMACRO; // DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO; DirectiveKindMap["db"] = DK_DB; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 361a6c04e3f21..3270932a76d08 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -4172,15 +4172,20 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); else if (IDVal == ".cv_fpo_endproc") return parseDirectiveFPOEndProc(DirectiveID.getLoc()); - else if (IDVal == ".seh_pushreg") + else if (IDVal == ".seh_pushreg" || + (Parser.isParsingMasm() && IDVal.equals_lower(".pushreg"))) return parseDirectiveSEHPushReg(DirectiveID.getLoc()); - else if (IDVal == ".seh_setframe") + else if (IDVal == ".seh_setframe" || + (Parser.isParsingMasm() && IDVal.equals_lower(".setframe"))) return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); - else if (IDVal == ".seh_savereg") + else if (IDVal == ".seh_savereg" || + (Parser.isParsingMasm() && IDVal.equals_lower(".savereg"))) return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); - else if (IDVal == ".seh_savexmm") + else if (IDVal == ".seh_savexmm" || + (Parser.isParsingMasm() && IDVal.equals_lower(".savexmm128"))) return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); - else if (IDVal == ".seh_pushframe") + else if (IDVal == ".seh_pushframe" || + (Parser.isParsingMasm() && IDVal.equals_lower(".pushframe"))) return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); return true; diff --git a/llvm/test/tools/llvm-ml/proc.test b/llvm/test/tools/llvm-ml/proc.test new file mode 100644 index 0000000000000..ad117f7fb1dde --- /dev/null +++ b/llvm/test/tools/llvm-ml/proc.test @@ -0,0 +1,18 @@ +# RUN: llvm-ml -m32 -filetype=asm %s | FileCheck %s +# RUN: llvm-ml -m64 -filetype=asm %s | FileCheck %s + +.code + +t1 PROC + ret +t1 ENDP + +; CHECK: .def t1 +; CHECK-NEXT: .scl 2 +; CHECK-NEXT: .type 32 +; CHECK-NEXT: .endef + +; CHECK: t1: +; CHECK: ret + +END diff --git a/llvm/test/tools/llvm-ml/proc_frame.test b/llvm/test/tools/llvm-ml/proc_frame.test new file mode 100644 index 0000000000000..3bf1c3a3ca4ba --- /dev/null +++ b/llvm/test/tools/llvm-ml/proc_frame.test @@ -0,0 +1,34 @@ +# RUN: llvm-ml -m64 -filetype=asm %s | FileCheck %s + +.code + +t1 PROC FRAME + push rbp + .pushreg rbp + mov rbp, rsp + .setframe rbp, 0 + pushfq + .allocstack 8 + .endprolog + ret +t1 ENDP + +; CHECK: .def t1 +; CHECK-NEXT: .scl 2 +; CHECK-NEXT: .type 32 +; CHECK-NEXT: .endef + +; CHECK: .seh_proc t1 + +; CHECK: t1: +; CHECK: push rbp +; CHECK: .seh_pushreg rbp +; CHECK: mov rbp, rsp +; CHECK: .seh_setframe rbp, 0 +; CHECK: pushfq +; CHECK: .seh_stackalloc 8 +; CHECK: .seh_endprologue +; CHECK: ret +; CHECK: .seh_endproc + +END From c0e3996bc7087a27e685c734480c0b92ff427d37 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 14 Sep 2020 20:37:28 +0200 Subject: [PATCH 0570/1079] [ARM] Add more tests for vecreduce soft float legalization (NFC) This mirrors the existing fadd tests to fmul, fmin and fmax. --- .../vecreduce-fmax-legalization-soft-float.ll | 142 ++++++++++++++++++ .../vecreduce-fmin-legalization-soft-float.ll | 142 ++++++++++++++++++ .../vecreduce-fmul-legalization-soft-float.ll | 102 +++++++++++++ 3 files changed, 386 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll create mode 100644 llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll create mode 100644 llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll new file mode 100644 index 0000000000000..e3852924f008a --- /dev/null +++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK + +declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>) +declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) +declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) +declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128>) + +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: mov r6, #255 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: orr r6, r6, #65280 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: and r0, r1, r6 +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r6 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: and r0, r7, r6 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: and r0, r8, r6 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r6, r7 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r4, r6 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a) + ret half %b +} + +define float @test_v4f32(<4 x float> %a) nounwind { +; CHECK-LABEL: test_v4f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r5, r7 +; CHECK-NEXT: cmp r8, #0 +; CHECK-NEXT: movne r4, r6 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a) + ret float %b +} + +define double @test_v2f64(<2 x double> %a) nounwind { +; CHECK-LABEL: test_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r7, r5 +; CHECK-NEXT: moveq r6, r4 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a) + ret double %b +} + +define fp128 @test_v2f128(<2 x fp128> %a) nounwind { +; CHECK-LABEL: test_v2f128: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #20 +; CHECK-NEXT: sub sp, sp, #20 +; CHECK-NEXT: ldr r8, [sp, #68] +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: ldr r9, [sp, #64] +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: ldr r10, [sp, #60] +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: ldr r11, [sp, #56] +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: str r8, [sp, #12] +; CHECK-NEXT: str r9, [sp, #8] +; CHECK-NEXT: str r10, [sp, #4] +; CHECK-NEXT: str r11, [sp] +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movle r7, r11 +; CHECK-NEXT: movle r6, r10 +; CHECK-NEXT: movle r5, r9 +; CHECK-NEXT: movle r4, r8 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: add sp, sp, #20 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) + ret fp128 %b +} diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll new file mode 100644 index 0000000000000..35e4c5dc5ad54 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK + +declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>) +declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) +declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) +declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128>) + +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: mov r6, #255 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: orr r6, r6, #65280 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: and r0, r1, r6 +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r6 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: and r0, r7, r6 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: and r0, r8, r6 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r6, r7 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r4, r6 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a) + ret half %b +} + +define float @test_v4f32(<4 x float> %a) nounwind { +; CHECK-LABEL: test_v4f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r5, r7 +; CHECK-NEXT: cmp r8, #0 +; CHECK-NEXT: movne r4, r6 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a) + ret float %b +} + +define double @test_v2f64(<2 x double> %a) nounwind { +; CHECK-LABEL: test_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: bl __aeabi_dcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r7, r5 +; CHECK-NEXT: moveq r6, r4 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a) + ret double %b +} + +define fp128 @test_v2f128(<2 x fp128> %a) nounwind { +; CHECK-LABEL: test_v2f128: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #20 +; CHECK-NEXT: sub sp, sp, #20 +; CHECK-NEXT: ldr r8, [sp, #68] +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: ldr r9, [sp, #64] +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: ldr r10, [sp, #60] +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: ldr r11, [sp, #56] +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: str r8, [sp, #12] +; CHECK-NEXT: str r9, [sp, #8] +; CHECK-NEXT: str r10, [sp, #4] +; CHECK-NEXT: str r11, [sp] +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movpl r7, r11 +; CHECK-NEXT: movpl r6, r10 +; CHECK-NEXT: movpl r5, r9 +; CHECK-NEXT: movpl r4, r8 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: add sp, sp, #20 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) + ret fp128 %b +} diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll new file mode 100644 index 0000000000000..88bc9e9726dae --- /dev/null +++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK + +declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half, <4 x half>) +declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) +declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>) +declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>) + +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r7, #255 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: orr r7, r7, #65280 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: and r0, r1, r7 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: and r0, r4, r7 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: and r0, r6, r7 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: and r0, r5, r7 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half 1.0, <4 x half> %a) + ret half %b +} + +define float @test_v4f32(<4 x float> %a) nounwind { +; CHECK-LABEL: test_v4f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: pop {r4, r5, r6, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a) + ret float %b +} + +define double @test_v2f64(<2 x double> %a) nounwind { +; CHECK-LABEL: test_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __aeabi_dmul +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a) + ret double %b +} + +define fp128 @test_v2f128(<2 x fp128> %a) nounwind { +; CHECK-LABEL: test_v2f128: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr r12, [sp, #36] +; CHECK-NEXT: str r12, [sp, #12] +; CHECK-NEXT: ldr r12, [sp, #32] +; CHECK-NEXT: str r12, [sp, #8] +; CHECK-NEXT: ldr r12, [sp, #28] +; CHECK-NEXT: str r12, [sp, #4] +; CHECK-NEXT: ldr r12, [sp, #24] +; CHECK-NEXT: str r12, [sp] +; CHECK-NEXT: bl __multf3 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr + %b = call fast fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a) + ret fp128 %b +} From 53f36f06afbc02d1ab96e3789b41ddeafe31f40e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 12 Sep 2020 22:38:51 +0200 Subject: [PATCH 0571/1079] [Legalize][ARM][X86] Add float legalization for VECREDUCE This adds SoftenFloatRes, PromoteFloatRes and SoftPromoteHalfRes legalizations for VECREDUCE, to fill the remaining hole in the SDAG legalization. These legalizations simply expand the reduction and let it be recursively legalized. For the PromoteFloatRes case at least it is possible to do better than that, but it's pretty tricky (because we need to consider the interaction of three different vector legalizations and the type promotion) and probably not really worthwhile. I haven't added ExpandFloatRes support, as I am not familiar with ppc_fp128. Differential Revision: https://reviews.llvm.org/D87569 --- .../SelectionDAG/LegalizeFloatTypes.cpp | 39 +++++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 + llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 11 +- .../vecreduce-fadd-legalization-soft-float.ll | 45 +++--- .../vecreduce-fmax-legalization-soft-float.ll | 135 ++++++------------ .../vecreduce-fmin-legalization-soft-float.ll | 135 ++++++------------ .../vecreduce-fmul-legalization-soft-float.ll | 45 +++--- .../CodeGen/X86/vector-reduce-fmax-nnan.ll | 65 +++++++++ .../CodeGen/X86/vector-reduce-fmin-nnan.ll | 66 +++++++++ 9 files changed, 300 insertions(+), 244 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2399525de6659..27105060c785c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -134,6 +134,12 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: + R = SoftenFloatRes_VECREDUCE(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -772,6 +778,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { return Tmp.first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE(SDNode *N) { + // Expand and soften recursively. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); + return SDValue(); +} + //===----------------------------------------------------------------------===// // Convert Float Operand to Integer @@ -2232,6 +2244,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: + R = PromoteFloatRes_VECREDUCE(N); + break; } if (R.getNode()) @@ -2463,6 +2481,15 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { N->getValueType(0))); } +SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE(SDNode *N) { + // Expand and promote recursively. + // TODO: This is non-optimal, but dealing with the concurrently happening + // vector-legalization is non-trivial. We could do something similar to + // PromoteFloatRes_EXTRACT_VECTOR_ELT here. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); + return SDValue(); +} + SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { EVT VT = N->getValueType(0); @@ -2571,6 +2598,12 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: + R = SoftPromoteHalfRes_VECREDUCE(N); + break; } if (R.getNode()) @@ -2763,6 +2796,12 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) { + // Expand and soften recursively. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); + return SDValue(); +} + //===----------------------------------------------------------------------===// // Half Operand Soft Promotion //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 86f4fcc023dd9..fbbb35cb905f2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -548,6 +548,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + SDValue SoftenFloatRes_VECREDUCE(SDNode *N); // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); @@ -666,6 +667,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatRes_UNDEF(SDNode *N); SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + SDValue PromoteFloatRes_VECREDUCE(SDNode *N); bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); @@ -703,6 +705,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); + SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N); bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 8b0fe30152a32..3ffe31ba883c4 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -197,16 +197,7 @@ class ARMTTIImpl : public BasicTTIImplBase { case Intrinsic::experimental_vector_reduce_v2_fadd: case Intrinsic::experimental_vector_reduce_v2_fmul: // We don't have legalization support for ordered FP reductions. - if (!II->getFastMathFlags().allowReassoc()) - return true; - // Can't legalize reductions with soft floats. - return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs(); - - case Intrinsic::experimental_vector_reduce_fmin: - case Intrinsic::experimental_vector_reduce_fmax: - // Can't legalize reductions with soft floats. - return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs(); - + return !II->getFastMathFlags().allowReassoc(); default: // Don't expand anything else, let legalization deal with it. return false; diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll index 164cfe1d88488..aaa376a0ba6e9 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll @@ -11,31 +11,28 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r7, #255 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: orr r7, r7, #65280 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: and r0, r1, r7 +; CHECK-NEXT: mov r4, #255 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: orr r4, r4, #65280 ; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: and r0, r4, r7 +; CHECK-NEXT: and r0, r5, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: and r0, r6, r7 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r7, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: and r0, r5, r7 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_fadd ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} @@ -47,20 +44,16 @@ define half @test_v4f16(<4 x half> %a) nounwind { define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: test_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: pop {r4, r5, r6, lr} +; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll index e3852924f008a..586a02b92bf3c 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll @@ -9,44 +9,33 @@ declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: mov r6, #255 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r4, #255 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: orr r6, r6, #65280 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: and r0, r1, r6 -; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: orr r4, r4, #65280 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r4, r6 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: and r0, r5, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: and r0, r7, r6 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r7, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r8, r6 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r6, r7 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movne r4, r5 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl fmaxf ; CHECK-NEXT: bl __aeabi_f2h -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr %b = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b @@ -55,30 +44,16 @@ define half @test_v4f16(<4 x half> %a) nounwind { define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: test_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: mov r4, r3 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: bl fmaxf ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: cmp r8, #0 -; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl fmaxf ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r4, r5 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a) ret float %b @@ -87,19 +62,10 @@ define float @test_v4f32(<4 x float> %a) nounwind { define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-LABEL: test_v2f64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl fmax +; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a) ret double %b @@ -108,34 +74,21 @@ define double @test_v2f64(<2 x double> %a) nounwind { define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #20 -; CHECK-NEXT: sub sp, sp, #20 -; CHECK-NEXT: ldr r8, [sp, #68] -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: ldr r9, [sp, #64] -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: ldr r10, [sp, #60] -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: ldr r11, [sp, #56] -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: str r8, [sp, #12] -; CHECK-NEXT: str r9, [sp, #8] -; CHECK-NEXT: str r10, [sp, #4] -; CHECK-NEXT: str r11, [sp] -; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movle r7, r11 -; CHECK-NEXT: movle r6, r10 -; CHECK-NEXT: movle r5, r9 -; CHECK-NEXT: movle r4, r8 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: add sp, sp, #20 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr r12, [sp, #36] +; CHECK-NEXT: str r12, [sp, #12] +; CHECK-NEXT: ldr r12, [sp, #32] +; CHECK-NEXT: str r12, [sp, #8] +; CHECK-NEXT: ldr r12, [sp, #28] +; CHECK-NEXT: str r12, [sp, #4] +; CHECK-NEXT: ldr r12, [sp, #24] +; CHECK-NEXT: str r12, [sp] +; CHECK-NEXT: bl fmaxl +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a) ret fp128 %b diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll index 35e4c5dc5ad54..b64e4473981bb 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll @@ -9,44 +9,33 @@ declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: mov r6, #255 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r4, #255 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: orr r6, r6, #65280 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: and r0, r1, r6 -; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: orr r4, r4, #65280 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r4, r6 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: and r0, r5, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: and r0, r7, r6 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r7, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r8, r6 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r6, r7 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movne r4, r5 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl fminf +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl fminf +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl fminf ; CHECK-NEXT: bl __aeabi_f2h -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr %b = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %b @@ -55,30 +44,16 @@ define half @test_v4f16(<4 x half> %a) nounwind { define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: test_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: mov r4, r3 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: bl fminf ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: cmp r8, #0 -; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl fminf ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r4, r5 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bl fminf +; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a) ret float %b @@ -87,19 +62,10 @@ define float @test_v4f32(<4 x float> %a) nounwind { define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-LABEL: test_v2f64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: bl __aeabi_dcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl fmin +; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a) ret double %b @@ -108,34 +74,21 @@ define double @test_v2f64(<2 x double> %a) nounwind { define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #20 -; CHECK-NEXT: sub sp, sp, #20 -; CHECK-NEXT: ldr r8, [sp, #68] -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: ldr r9, [sp, #64] -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: ldr r10, [sp, #60] -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: ldr r11, [sp, #56] -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: str r8, [sp, #12] -; CHECK-NEXT: str r9, [sp, #8] -; CHECK-NEXT: str r10, [sp, #4] -; CHECK-NEXT: str r11, [sp] -; CHECK-NEXT: bl __lttf2 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movpl r7, r11 -; CHECK-NEXT: movpl r6, r10 -; CHECK-NEXT: movpl r5, r9 -; CHECK-NEXT: movpl r4, r8 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: add sp, sp, #20 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr r12, [sp, #36] +; CHECK-NEXT: str r12, [sp, #12] +; CHECK-NEXT: ldr r12, [sp, #32] +; CHECK-NEXT: str r12, [sp, #8] +; CHECK-NEXT: ldr r12, [sp, #28] +; CHECK-NEXT: str r12, [sp, #4] +; CHECK-NEXT: ldr r12, [sp, #24] +; CHECK-NEXT: str r12, [sp] +; CHECK-NEXT: bl fminl +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) ret fp128 %b diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll index 88bc9e9726dae..62111e5f0f342 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll @@ -11,31 +11,28 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r7, #255 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: orr r7, r7, #65280 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: and r0, r1, r7 +; CHECK-NEXT: mov r4, #255 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: orr r4, r4, #65280 ; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: and r0, r4, r7 +; CHECK-NEXT: and r0, r5, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: and r0, r6, r7 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r7, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: and r0, r5, r7 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __aeabi_fmul -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fmul -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_fmul ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} @@ -47,20 +44,16 @@ define half @test_v4f16(<4 x half> %a) nounwind { define float @test_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: test_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: bl __aeabi_fmul -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fmul -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_fmul -; CHECK-NEXT: pop {r4, r5, r6, lr} +; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr %b = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index d304a925d24a0..dd3378411ecc8 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -498,6 +498,69 @@ define double @test_v16f64(<16 x double> %a0) { ret double %1 } +define half @test_v2f16(<2 x half> %a0) nounwind { +; SSE-LABEL: test_v2f16: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $16, %rsp +; SSE-NEXT: movl %edi, %ebx +; SSE-NEXT: movzwl %si, %edi +; SSE-NEXT: callq __gnu_h2f_ieee +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movzwl %bx, %edi +; SSE-NEXT: callq __gnu_h2f_ieee +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: cmpunordss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm2 +; SSE-NEXT: movaps (%rsp), %xmm3 # 16-byte Reload +; SSE-NEXT: andps %xmm3, %xmm2 +; SSE-NEXT: maxss %xmm0, %xmm3 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq __gnu_f2h_ieee +; SSE-NEXT: addq $16, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2f16: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbx +; AVX-NEXT: subq $16, %rsp +; AVX-NEXT: movl %esi, %ebx +; AVX-NEXT: movzwl %di, %edi +; AVX-NEXT: callq __gnu_h2f_ieee +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: movzwl %bx, %edi +; AVX-NEXT: callq __gnu_h2f_ieee +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload +; AVX-NEXT: # xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vcmpunordss %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __gnu_f2h_ieee +; AVX-NEXT: addq $16, %rsp +; AVX-NEXT: popq %rbx +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2f16: +; AVX512: # %bb.0: +; AVX512-NEXT: movzwl %di, %eax +; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: movzwl %si, %eax +; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq + %1 = call nnan half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half> %a0) + ret half %1 +} declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) @@ -508,3 +571,5 @@ declare double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double>) declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) + +declare half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half>) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 28e812748abaa..4354463dfdc28 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -507,6 +507,70 @@ define double @test_v16f64(<16 x double> %a0) { ret double %1 } +define half @test_v2f16(<2 x half> %a0) nounwind { +; SSE-LABEL: test_v2f16: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $16, %rsp +; SSE-NEXT: movl %edi, %ebx +; SSE-NEXT: movzwl %si, %edi +; SSE-NEXT: callq __gnu_h2f_ieee +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movzwl %bx, %edi +; SSE-NEXT: callq __gnu_h2f_ieee +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: cmpunordss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm2 +; SSE-NEXT: movaps (%rsp), %xmm3 # 16-byte Reload +; SSE-NEXT: andps %xmm3, %xmm2 +; SSE-NEXT: minss %xmm0, %xmm3 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq __gnu_f2h_ieee +; SSE-NEXT: addq $16, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2f16: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbx +; AVX-NEXT: subq $16, %rsp +; AVX-NEXT: movl %esi, %ebx +; AVX-NEXT: movzwl %di, %edi +; AVX-NEXT: callq __gnu_h2f_ieee +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: movzwl %bx, %edi +; AVX-NEXT: callq __gnu_h2f_ieee +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload +; AVX-NEXT: # xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vminss %xmm2, %xmm0, %xmm1 +; AVX-NEXT: vcmpunordss %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __gnu_f2h_ieee +; AVX-NEXT: addq $16, %rsp +; AVX-NEXT: popq %rbx +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2f16: +; AVX512: # %bb.0: +; AVX512-NEXT: movzwl %di, %eax +; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: movzwl %si, %eax +; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq + %1 = call nnan half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half> %a0) + ret half %1 +} + declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float>) declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float>) @@ -518,3 +582,5 @@ declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) + +declare half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half>) From abf1c82dcc5c54f2bbd65eb7b30cc40de2bd7147 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Fri, 11 Sep 2020 10:33:33 -0400 Subject: [PATCH 0572/1079] [libc] Extend MPFRMatcher to handle 2-input-1-output and support hypot function. Differential Revision: https://reviews.llvm.org/D87514 --- libc/utils/MPFRWrapper/MPFRUtils.cpp | 73 ++++++++++++++++++++++++++++ libc/utils/MPFRWrapper/MPFRUtils.h | 20 +++++++- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 0520d8ae3ed91..56764e9740b01 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -133,6 +133,12 @@ class MPFRNumber { return result; } + MPFRNumber hypot(const MPFRNumber &b) { + MPFRNumber result; + mpfr_hypot(result.value, value, b.value, MPFR_RNDN); + return result; + } + MPFRNumber remquo(const MPFRNumber &divisor, int "ient) { MPFRNumber remainder; long q; @@ -276,6 +282,18 @@ unaryOperationTwoOutputs(Operation op, InputType input, int &output) { } } +template +cpp::EnableIfType::Value, MPFRNumber> +binaryOperationOneOutput(Operation op, InputType x, InputType y) { + MPFRNumber inputX(x), inputY(y); + switch (op) { + case Operation::Hypot: + return inputX.hypot(inputY); + default: + __builtin_unreachable(); + } +} + template cpp::EnableIfType::Value, MPFRNumber> binaryOperationTwoOutputs(Operation op, InputType x, InputType y, int &output) { @@ -401,6 +419,41 @@ template void explainBinaryOperationTwoOutputsError( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); +template +void explainBinaryOperationOneOutputError(Operation op, + const BinaryInput &input, + T libcResult, + testutils::StreamWrapper &OS) { + MPFRNumber mpfrX(input.x); + MPFRNumber mpfrY(input.y); + FPBits xbits(input.x); + FPBits ybits(input.y); + MPFRNumber mpfrResult = binaryOperationOneOutput(op, input.x, input.y); + MPFRNumber mpfrMatchValue(libcResult); + + OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << '\n'; + __llvm_libc::fputil::testing::describeValue("First input bits: ", input.x, + OS); + __llvm_libc::fputil::testing::describeValue("Second input bits: ", input.y, + OS); + + OS << "Libc result: " << mpfrMatchValue.str() << '\n' + << "MPFR result: " << mpfrResult.str() << '\n'; + __llvm_libc::fputil::testing::describeValue( + "Libc floating point result bits: ", libcResult, OS); + __llvm_libc::fputil::testing::describeValue( + " MPFR rounded bits: ", mpfrResult.as(), OS); + OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult)) << '\n'; +} + +template void explainBinaryOperationOneOutputError( + Operation, const BinaryInput &, float, testutils::StreamWrapper &); +template void explainBinaryOperationOneOutputError( + Operation, const BinaryInput &, double, testutils::StreamWrapper &); +template void explainBinaryOperationOneOutputError( + Operation, const BinaryInput &, long double, + testutils::StreamWrapper &); + template bool compareUnaryOperationSingleOutput(Operation op, T input, T libcResult, double ulpError) { @@ -480,6 +533,26 @@ template bool compareBinaryOperationTwoOutputs( Operation, const BinaryInput &, const BinaryOutput &, double); +template +bool compareBinaryOperationOneOutput(Operation op, const BinaryInput &input, + T libcResult, double ulpError) { + MPFRNumber mpfrResult = binaryOperationOneOutput(op, input.x, input.y); + double ulp = mpfrResult.ulp(libcResult); + + bool bitsAreEven = ((FPBits(libcResult).bitsAsUInt() & 1) == 0); + return (ulp < ulpError) || + ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); +} + +template bool compareBinaryOperationOneOutput(Operation, + const BinaryInput &, + float, double); +template bool +compareBinaryOperationOneOutput(Operation, const BinaryInput &, + double, double); +template bool compareBinaryOperationOneOutput( + Operation, const BinaryInput &, long double, double); + } // namespace internal } // namespace mpfr diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index b46f09dd5e558..6fb9fe5c47b65 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -47,7 +47,7 @@ enum class Operation : int { // input and produce a single floating point number of the same type as // output. BeginBinaryOperationsSingleOutput, - // TODO: Add operations like hypot. + Hypot, EndBinaryOperationsSingleOutput, // Operations which take two floating point numbers of the same type as @@ -109,6 +109,10 @@ bool compareBinaryOperationTwoOutputs(Operation op, const BinaryInput &input, const BinaryOutput &libcOutput, double t); +template +bool compareBinaryOperationOneOutput(Operation op, const BinaryInput &input, + T libcOutput, double t); + template void explainUnaryOperationSingleOutputError(Operation op, T input, T matchValue, testutils::StreamWrapper &OS); @@ -122,6 +126,12 @@ void explainBinaryOperationTwoOutputsError(Operation op, const BinaryOutput &matchValue, testutils::StreamWrapper &OS); +template +void explainBinaryOperationOneOutputError(Operation op, + const BinaryInput &input, + T matchValue, + testutils::StreamWrapper &OS); + template class MPFRMatcher : public testing::Matcher { InputType input; @@ -153,7 +163,7 @@ class MPFRMatcher : public testing::Matcher { template static bool match(const BinaryInput &in, T out, double tolerance) { - // TODO: Implement the comparision function and error reporter. + return compareBinaryOperationOneOutput(op, in, out, tolerance); } template @@ -183,6 +193,12 @@ class MPFRMatcher : public testing::Matcher { testutils::StreamWrapper &OS) { explainBinaryOperationTwoOutputsError(op, in, out, OS); } + + template + static void explainError(const BinaryInput &in, T out, + testutils::StreamWrapper &OS) { + explainBinaryOperationOneOutputError(op, in, out, OS); + } }; } // namespace internal From f06090243d870c2c0f6f1551eff0688a45fab298 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 14 Sep 2020 15:12:13 -0400 Subject: [PATCH 0573/1079] [libc++] Use LLVM 11 instead of trunk on build bots Somehow the snapshot of LLVM trunk we use was seeing failures. --- libcxx/utils/docker/debian9/buildbot/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/docker/debian9/buildbot/docker-compose.yml b/libcxx/utils/docker/debian9/buildbot/docker-compose.yml index b65a91e4e255c..bd61dea4871c6 100644 --- a/libcxx/utils/docker/debian9/buildbot/docker-compose.yml +++ b/libcxx/utils/docker/debian9/buildbot/docker-compose.yml @@ -5,7 +5,7 @@ services: context: https://github.com/llvm/llvm-project.git#master:libcxx/utils/docker/debian9/buildbot args: gcc_tot: "ericwf/gcc:9.2.0" - llvm_tot: "ericwf/llvm:trunk-2020-09-11" + llvm_tot: "ericwf/llvm:11.x" image: llvm-buildbot-worker volumes: - /var/run/docker.sock:/var/run/docker.sock From cc947207283f934c72af0eb0b1a08978c59d40a2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 14 Sep 2020 21:11:56 +0200 Subject: [PATCH 0574/1079] [AArch64] Add additional vecreduce fmax/fmin legalization tests (NFC) Add a vector widening test with ninf flag to the existing fmax tests, and mirror them over into fmin tests. --- .../AArch64/vecreduce-fmax-legalization.ll | 12 +++ .../AArch64/vecreduce-fmin-legalization.ll | 89 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index 7d6d424d64a94..5fd7116e9068b 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -56,6 +56,18 @@ define float @test_v3f32(<3 x float> %a) nounwind { ret float %b } +define float @test_v3f32_ninf(<3 x float> %a) nounwind { +; CHECK-LABEL: test_v3f32_ninf: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v0.s[3], v1.s[0] +; CHECK-NEXT: fmaxnmv s0, v0.4s +; CHECK-NEXT: ret + %b = call nnan ninf float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a) + ret float %b +} + define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll new file mode 100644 index 0000000000000..7a37c0d047a13 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK + +declare half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a) +declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a) +declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a) +declare fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a) + +declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a) +declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) +declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a) + +define half @test_v1f16(<1 x half> %a) nounwind { +; CHECK-LABEL: test_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %b = call nnan half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a) + ret half %b +} + +define float @test_v1f32(<1 x float> %a) nounwind { +; CHECK-LABEL: test_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret + %b = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a) + ret float %b +} + +define double @test_v1f64(<1 x double> %a) nounwind { +; CHECK-LABEL: test_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %b = call nnan double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a) + ret double %b +} + +define fp128 @test_v1f128(<1 x fp128> %a) nounwind { +; CHECK-LABEL: test_v1f128: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a) + ret fp128 %b +} + +define float @test_v3f32(<3 x float> %a) nounwind { +; CHECK-LABEL: test_v3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v0.s[3], v1.s[0] +; CHECK-NEXT: fminnmv s0, v0.4s +; CHECK-NEXT: ret + %b = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a) + ret float %b +} + +define float @test_v3f32_ninf(<3 x float> %a) nounwind { +; CHECK-LABEL: test_v3f32_ninf: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v0.s[3], v1.s[0] +; CHECK-NEXT: fminnmv s0, v0.4s +; CHECK-NEXT: ret + %b = call nnan ninf float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a) + ret float %b +} + +define fp128 @test_v2f128(<2 x fp128> %a) nounwind { +; CHECK-LABEL: test_v2f128: +; CHECK: // %bb.0: +; CHECK-NEXT: b fminl + %b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a) + ret fp128 %b +} + +define float @test_v16f32(<16 x float> %a) nounwind { +; CHECK-LABEL: test_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm v1.4s, v1.4s, v3.4s +; CHECK-NEXT: fminnm v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fminnmv s0, v0.4s +; CHECK-NEXT: ret + %b = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a) + ret float %b +} From c0f199e5667a862819d333847059cfaa95354111 Mon Sep 17 00:00:00 2001 From: Kamau Bridgeman Date: Fri, 11 Sep 2020 10:33:33 -0400 Subject: [PATCH 0575/1079] [PowerPC] Implement Thread Local Storage Support for Local Exec This patch is the initial support for the Local Exec Thread Local Storage model to produce code sequence and relocations correct to the ABI for the model when using PC relative memory operations. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D83404 --- .../llvm/BinaryFormat/ELFRelocs/PowerPC64.def | 2 + .../MCTargetDesc/PPCELFObjectWriter.cpp | 8 +- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 ++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 + llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 4 + llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 2 + .../CodeGen/PowerPC/pcrel-tls-local-exec.ll | 74 +++++++++++++++++++ .../pcrel-tls-local-exec-address-load-reloc.s | 15 ++++ .../pcrel-tls-local-exec-value-load-reloc.s | 16 ++++ 11 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll create mode 100644 llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s create mode 100644 llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def index 2cf021a4cf6f2..901af679b9150 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -100,6 +100,7 @@ #undef R_PPC64_PCREL_OPT #undef R_PPC64_PCREL34 #undef R_PPC64_GOT_PCREL34 +#undef R_PPC64_TPREL34 #undef R_PPC64_GOT_TLSGD_PCREL34 #undef R_PPC64_GOT_TPREL_PCREL34 #undef R_PPC64_IRELATIVE @@ -200,6 +201,7 @@ ELF_RELOC(R_PPC64_REL24_NOTOC, 116) ELF_RELOC(R_PPC64_PCREL_OPT, 123) ELF_RELOC(R_PPC64_PCREL34, 132) ELF_RELOC(R_PPC64_GOT_PCREL34, 133) +ELF_RELOC(R_PPC64_TPREL34, 146) ELF_RELOC(R_PPC64_GOT_TLSGD_PCREL34, 148) ELF_RELOC(R_PPC64_GOT_TPREL_PCREL34, 150) ELF_RELOC(R_PPC64_IRELATIVE, 248) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 006cd57f517e9..601e11d4ee8e5 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -419,7 +419,13 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } break; case PPC::fixup_ppc_imm34: - report_fatal_error("Unsupported Modifier for fixup_ppc_imm34."); + switch (Modifier) { + default: + report_fatal_error("Unsupported Modifier for fixup_ppc_imm34."); + case MCSymbolRefExpr::VK_TPREL: + Type = ELF::R_PPC64_TPREL34; + break; + } break; case FK_Data_8: switch (Modifier) { diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 62bb5cc1e8062..a70e7468a15b2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -691,6 +691,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Offset = LD->getOffset(); if (!Offset.isUndef()) return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 469fe9701d065..66711f69a6457 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1512,6 +1512,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; + case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: + return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; case PPCISD::STRICT_FADDRTZ: @@ -3015,6 +3017,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, TLSModel::Model Model = TM.getTLSModel(GV); if (Model == TLSModel::LocalExec) { + if (Subtarget.isUsingPCRelativeCalls()) { + SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); + SDValue TGA = DAG.getTargetGlobalAddress( + GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)); + SDValue MatAddr = + DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr); + } + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 05c9a5d314133..3e900e2ce2999 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -441,6 +441,11 @@ namespace llvm { /// through an add like PADDI. TLS_DYNAMIC_MAT_PCREL_ADDR, + /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address + /// when using local exec access models, and when prefixed instructions are + /// available. This is used with ADD_TLS to produce an add like PADDI. + TLS_LOCAL_EXEC_MAT_ADDR, + // Constrained conversion from floating point to int STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCTIWZ, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index bf7ad639ab6e4..30605a22ea399 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -368,6 +368,8 @@ def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>; def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>; def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", SDTIntUnaryOp, []>; +def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", + SDTIntUnaryOp, []>; //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 73321dec99d37..55872a493dd68 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -829,6 +829,10 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in { // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize // tls global address with paddi instruction. def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + // PPCtlslocalexecmataddr node is used for TLS local exec models to + // materialize tls global address with paddi instruction. + def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), + (PADDI8 $in, $addr)>; } let Predicates = [PrefixInstrs] in { diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 795abed413e04..1358bec8e36f8 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -86,6 +86,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PCREL; else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG)) RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL; + else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)) + RefKind = MCSymbolRefExpr::VK_TPREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG) RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll b/llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll new file mode 100644 index 0000000000000..47245991d82fc --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll @@ -0,0 +1,74 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -enable-ppc-pcrel-tls -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-S +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -enable-ppc-pcrel-tls -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: --filetype=obj < %s | llvm-objdump --no-show-raw-insn --mcpu=pwr10 -dr - \ +; RUN: | FileCheck %s --check-prefix=CHECK-O + +; These test cases are to ensure that when using pc relative memory operations +; ABI correct code and relocations are produced for the Local Exec TLS Model. + +@x = thread_local global i32 0, align 4 +@y = thread_local global [5 x i32] [i32 0, i32 0, i32 0, i32 0, i32 0], align 4 + +define i32* @LocalExecAddressLoad() { +; CHECK-S-LABEL: LocalExecAddressLoad: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, x@TPREL, 0 +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 0: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000000: R_PPC64_TPREL34 x +; CHECK-O-NEXT: 8: blr +entry: + ret i32* @x +} + +define i32 @LocalExecValueLoad() { +; CHECK-S-LABEL: LocalExecValueLoad: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, x@TPREL, 0 +; CHECK-S-NEXT: lwz r3, 0(r3) +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 20: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000020: R_PPC64_TPREL34 x +; CHECK-O-NEXT: 28: lwz 3, 0(3) +; CHECK-O-NEXT: 2c: blr +entry: + %0 = load i32, i32* @x, align 4 + ret i32 %0 +} + +define i32 @LocalExecValueLoadOffset() { +; CHECK-S-LABEL: LocalExecValueLoadOffset: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, y@TPREL, 0 +; CHECK-S-NEXT: lwz r3, 12(r3) +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 40: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000040: R_PPC64_TPREL34 y +; CHECK-O-NEXT: 48: lwz 3, 12(3) +; CHECK-O-NEXT: 4c: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @y, i64 0, i64 3), align 4 + ret i32 %0 +} + + +define i32* @LocalExecValueLoadOffsetNoLoad() { +; CHECK-S-LABEL: LocalExecValueLoadOffsetNoLoad: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, y@TPREL, 0 +; CHECK-S-NEXT: addi r3, r3, 12 +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 60: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000060: R_PPC64_TPREL34 y +; CHECK-O-NEXT: 68: addi 3, 3, 12 +; CHECK-O-NEXT: 6c: blr +entry: + ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @y, i64 0, i64 3) +} diff --git a/llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s new file mode 100644 index 0000000000000..ae3eb8b886623 --- /dev/null +++ b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \ +# RUN: FileCheck %s -check-prefix=MC +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \ +# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ + +# This test checks that on Power PC we can correctly convert x@TPREL +# into R_PPC64_TPREL34 for local exec relocations with address loaded. + +# MC-NOT: error: invalid variant + +# READOBJ: 0x0 R_PPC64_TPREL34 x 0x0 + +LocalExec: + paddi 3, 13, x@TPREL, 0 + blr diff --git a/llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s new file mode 100644 index 0000000000000..6ebee2ff9cffb --- /dev/null +++ b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \ +# RUN: FileCheck %s -check-prefix=MC +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \ +# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ + +# This test checks that on Power PC we can correctly convert x@TPREL +# into R_PPC64_TPREL34 for local exec relocations with the value loaded. + +# MC-NOT: error: invalid variant + +# READOBJ: 0x0 R_PPC64_TPREL34 x 0x0 + +LocalExecLoad: + paddi 3, 13, x@TPREL, 0 + lwz 3, 0(3) + blr From f6f34024e9a4870eea6733dcbab6de89cc435262 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 8 Sep 2020 11:37:03 -0700 Subject: [PATCH 0576/1079] [ELF] Add documentation for --warn-backrefs: a GNU ld compatibility checking tool (and lesser of layering detection) Differential Revision: https://reviews.llvm.org/D86762 --- lld/docs/ELF/warn_backrefs.rst | 99 ++++++++++++++++++++++++++++++++++ lld/docs/index.rst | 1 + 2 files changed, 100 insertions(+) create mode 100644 lld/docs/ELF/warn_backrefs.rst diff --git a/lld/docs/ELF/warn_backrefs.rst b/lld/docs/ELF/warn_backrefs.rst new file mode 100644 index 0000000000000..d4388f9afbb42 --- /dev/null +++ b/lld/docs/ELF/warn_backrefs.rst @@ -0,0 +1,99 @@ +--warn-backrefs +=============== + +``--warn-backrefs`` gives a warning when an undefined symbol reference is +resolved by a definition in an archive to the left of it on the command line. + +A linker such as GNU ld makes a single pass over the input files from left to +right maintaining the set of undefined symbol references from the files loaded +so far. When encountering an archive or an object file surrounded by +``--start-lib`` and ``--end-lib`` that archive will be searched for resolving +symbol definitions; this may result in input files being loaded, updating the +set of undefined symbol references. When all resolving definitions have been +loaded from the archive, the linker moves on the next file and will not return +to it. This means that if an input file to the right of a archive cannot have +an undefined symbol resolved by a archive to the left of it. For example: + + ld def.a ref.o + +will result in an ``undefined reference`` error. If there are no cyclic +references, the archives can be ordered in such a way that there are no +backward references. If there are cyclic references then the ``--start-group`` +and ``--end-group`` options can be used, or the same archive can be placed on +the command line twice. + +LLD remembers the symbol table of archives that it has previously seen, so if +there is a reference from an input file to the right of an archive, LLD will +still search that archive for resolving any undefined references. This means +that an archive only needs to be included once on the command line and the +``--start-group`` and ``--end-group`` options are redundant. + +A consequence of the differing archive searching semantics is that the same +linker command line can result in different outcomes. A link may succeed with +LLD that will fail with GNU ld, or even worse both links succeed but they have +selected different objects from different archives that both define the same +symbols. + +The ``warn-backrefs`` option provides information that helps identify cases +where LLD and GNU ld archive selection may differ. + + % ld.lld --warn-backrefs ... -lB -lA + ld.lld: warning: backward reference detected: system in A.a(a.o) refers to B.a(b.o) + + % ld.lld --warn-backrefs ... --start-lib B/b.o --end-lib --start-lib A/a.o --end-lib + ld.lld: warning: backward reference detected: system in A/a.o refers to B/b.o + + # To suppress the warning, you can specify --warn-backrefs-exclude= to match B/b.o or B.a(b.o) + +The ``--warn-backrefs`` option can also provide a check to enforce a +topological order of archives, which can be useful to detect layering +violations (albeit unable to catch all cases). There are two cases where GNU ld +will result in an ``undefined reference`` error: + +* If adding the dependency does not form a cycle: conceptually ``A`` is higher + level library while ``B`` is at a lower level. When you are developing an + application ``P`` which depends on ``A``, but does not directly depend on + ``B``, your link may fail surprisingly with ``undefined symbol: + symbol_defined_in_B`` if the used/linked part of ``A`` happens to need some + components of ``B``. It is inappropriate for ``P`` to add a dependency on + ``B`` since ``P`` does not use ``B`` directly. +* If adding the dependency forms a cycle, e.g. ``B->C->A ~> B``. ``A`` + is supposed to be at the lowest level while ``B`` is supposed to be at the + highest level. When you are developing ``C_test`` testing ``C``, your link may + fail surprisingly with ``undefined symbol`` if there is somehow a dependency on + some components of ``B``. You could fix the issue by adding the missing + dependency (``B``), however, then every test (``A_test``, ``B_test``, + ``C_test``) will link against every library. This breaks the motivation + of splitting ``B``, ``C`` and ``A`` into separate libraries and makes binaries + unnecessarily large. Moreover, the layering violation makes lower-level + libraries (e.g. ``A``) vulnerable to changes to higher-level libraries (e.g. + ``B``, ``C``). + +Resolution: + +* Add a dependency from ``A`` to ``B``. +* The reference may be unintended and can be removed. +* The dependency may be intentionally omitted because there are multiple + libraries like ``B``. Consider linking ``B`` with object semantics by + surrounding it with ``--whole-archive`` and ``--no-whole-archive``. +* In the case of circular dependency, sometimes merging the libraries are the best. + +There are two cases like a library sandwich where GNU ld will select a +different object. + +* ``A.a B A2.so``: ``A.a`` may be used as an interceptor (e.g. it provides some + optimized libc functions and ``A2`` is libc). ``B`` does not need to know + about ``A.a``, and ``A.a`` may be pulled into the link by other part of the + program. For linker portability, consider ``--whole-archive`` and + ``--no-whole-archive``. + +* ``A.a B A2.a``: similar to the above case but ``--warn-backrefs`` does not + flag the problem, because ``A2.a`` may be a replicate of ``A.a``, which is + redundant but benign. In some cases ``A.a`` and ``B`` should be surrounded by + a pair of ``--start-group`` and ``--end-group``. This is especially common + among system libraries (e.g. ``-lc __isnanl references -lm``, ``-lc + _IO_funlockfile references -lpthread``, ``-lc __gcc_personality_v0 references + -lgcc_eh``, and ``-lpthread _Unwind_GetCFA references -lunwind``). + + In C++, this is likely an ODR violation. We probably need a dedicated option + for ODR detection. diff --git a/lld/docs/index.rst b/lld/docs/index.rst index b820d57e3d354..900ad8219fe07 100644 --- a/lld/docs/index.rst +++ b/lld/docs/index.rst @@ -177,3 +177,4 @@ document soon. Partitions ReleaseNotes ELF/linker_script + ELF/warn_backrefs From 4208ea3e19f8e3e8cd35e6f5a6c43f4aa066c6ec Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Sep 2020 12:52:54 -0700 Subject: [PATCH 0577/1079] [FastISel] Bail out of selectGetElementPtr for vector GEPs. The code that decomposes the GEP into ADD/MUL doesn't work properly for vector GEPs. It can create bad COPY instructions or possibly assert. For now just bail out to SelectionDAG. Fixes PR45906 --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 6 +++ .../test/CodeGen/X86/masked_gather_scatter.ll | 47 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 1b924037c3be0..178614cdadf4a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -690,6 +690,12 @@ bool FastISel::selectGetElementPtr(const User *I) { Register N = getRegForValue(I->getOperand(0)); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; + + // FIXME: The code below does not handle vector GEPs. Halt "fast" selection + // and bail. + if (isa(I->getType())) + return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); // Keep a running tab of the total offset to coalesce multiple N = N + Offset diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 88418fd85fe52..c82efa56655ea 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -3421,3 +3421,50 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ret void } +%struct.foo = type { i8*, i64, i16, i16, i32 } + +; This used to cause fast-isel to generate bad copy instructions that would +; cause an error in copyPhysReg. +define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) { +; KNL_64-LABEL: pr45906: +; KNL_64: # %bb.0: # %bb +; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1} +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: pr45906: +; KNL_32: # %bb.0: # %bb +; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] +; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 +; KNL_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1} +; KNL_32-NEXT: retl +; +; SKX_SMALL-LABEL: pr45906: +; SKX_SMALL: # %bb.0: # %bb +; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 +; SKX_SMALL-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1} +; SKX_SMALL-NEXT: retq +; +; SKX_LARGE-LABEL: pr45906: +; SKX_LARGE: # %bb.0: # %bb +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1 +; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 +; SKX_LARGE-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1} +; SKX_LARGE-NEXT: retq +; +; SKX_32-LABEL: pr45906: +; SKX_32: # %bb.0: # %bb +; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1 +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 +; SKX_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1} +; SKX_32-NEXT: retl +bb: + %tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1 + %tmp1 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> %tmp, i32 8, <8 x i1> , <8 x i64> undef) + ret <8 x i64> %tmp1 +} +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>) From becf15527583380b510ce269ee51abd364551f13 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 14 Sep 2020 16:12:47 -0400 Subject: [PATCH 0578/1079] [libc++] Add comment in atomic test to explain why part of it is disabled on Apple --- libcxx/test/std/atomics/types.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index 5740b758035ea..891bbbbd6d515 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -155,7 +155,7 @@ int main(int, char**) test(); test(); -#ifndef __APPLE__ +#ifndef __APPLE__ // Apple doesn't ship libatomic /* These aren't going to be lock-free, so some libatomic.a is necessary. From 226d80ebe20e2d796af6c1bc43d9fbdfbb9d4a07 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Mon, 14 Sep 2020 09:12:13 -0700 Subject: [PATCH 0579/1079] [MemProf] Rename HeapProfiler to MemProfiler for consistency This is consistent with the clang option added in 7ed8124d46f94601d5f1364becee9cee8538265e, and the comments on the runtime patch in D87120. Differential Revision: https://reviews.llvm.org/D87622 --- clang/include/clang/Basic/CodeGenOptions.def | 2 +- clang/include/clang/Driver/SanitizerArgs.h | 4 +- clang/lib/CodeGen/BackendUtil.cpp | 22 +- clang/lib/Driver/SanitizerArgs.cpp | 4 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 12 +- clang/lib/Frontend/CompilerInvocation.cpp | 2 +- clang/test/Driver/fmemprof.cpp | 4 +- llvm/include/llvm/InitializePasses.h | 4 +- .../Transforms/Instrumentation/HeapProfiler.h | 49 ---- .../Transforms/Instrumentation/MemProfiler.h | 49 ++++ llvm/lib/Passes/PassBuilder.cpp | 14 +- llvm/lib/Passes/PassRegistry.def | 4 +- .../Transforms/Instrumentation/CMakeLists.txt | 2 +- .../Instrumentation/Instrumentation.cpp | 4 +- .../{HeapProfiler.cpp => MemProfiler.cpp} | 238 +++++++++--------- .../Instrumentation/HeapProfiler/basic.ll | 32 +-- .../instrumentation-use-callbacks.ll | 26 +- .../HeapProfiler/masked-load-store.ll | 76 +++--- .../HeapProfiler/scale-granularity.ll | 8 +- .../HeapProfiler/version-mismatch-check.ll | 12 +- 20 files changed, 283 insertions(+), 285 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h create mode 100644 llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h rename llvm/lib/Transforms/Instrumentation/{HeapProfiler.cpp => MemProfiler.cpp} (68%) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 740d544710510..feb4ed01f6e86 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -145,7 +145,7 @@ CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions , 1, 0) ///< Set when -fmerge-functions is enabled. -CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemory-profile is enabled. +CODEGENOPT(MemProf , 1, 0) ///< Set when -fmemory-profile is enabled. CODEGENOPT(MSVolatile , 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index 95d6bcf35c786..ac2b817be1dc5 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -55,7 +55,7 @@ class SanitizerArgs { bool MinimalRuntime = false; // True if cross-dso CFI support if provided by the system (i.e. Android). bool ImplicitCfiRuntime = false; - bool NeedsHeapProfRt = false; + bool NeedsMemProfRt = false; public: /// Parses the sanitizer arguments from an argument list. @@ -63,7 +63,7 @@ class SanitizerArgs { bool needsSharedRt() const { return SharedRuntime; } - bool needsHeapProfRt() const { return NeedsHeapProfRt; } + bool needsMemProfRt() const { return NeedsMemProfRt; } bool needsAsanRt() const { return Sanitizers.has(SanitizerKind::Address); } bool needsHwasanRt() const { return Sanitizers.has(SanitizerKind::HWAddress); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 258f5fe69ff89..472d86ea2e360 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -67,8 +67,8 @@ #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" -#include "llvm/Transforms/Instrumentation/HeapProfiler.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" @@ -268,10 +268,10 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { return false; } -static void addHeapProfilerPasses(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - PM.add(createHeapProfilerFunctionPass()); - PM.add(createModuleHeapProfilerLegacyPassPass()); +static void addMemProfilerPasses(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + PM.add(createMemProfilerFunctionPass()); + PM.add(createModuleMemProfilerLegacyPassPass()); } static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -672,11 +672,11 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (LangOpts.Coroutines) addCoroutinePassesToExtensionPoints(PMBuilder); - if (CodeGenOpts.HeapProf) { + if (CodeGenOpts.MemProf) { PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, - addHeapProfilerPasses); + addMemProfilerPasses); PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, - addHeapProfilerPasses); + addMemProfilerPasses); } if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) { @@ -1384,9 +1384,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } - if (CodeGenOpts.HeapProf) { - MPM.addPass(createModuleToFunctionPassAdaptor(HeapProfilerPass())); - MPM.addPass(ModuleHeapProfilerPass()); + if (CodeGenOpts.MemProf) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); + MPM.addPass(ModuleMemProfilerPass()); } if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 0cb1e7b5282b6..be726adc6d04a 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -866,8 +866,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, LinkCXXRuntimes) || D.CCCIsCXX(); - NeedsHeapProfRt = Args.hasFlag(options::OPT_fmemory_profile, - options::OPT_fno_memory_profile, false); + NeedsMemProfRt = Args.hasFlag(options::OPT_fmemory_profile, + options::OPT_fno_memory_profile, false); // Finally, initialize the set of available and recoverable sanitizers. Sanitizers.Mask |= Kinds; diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 4a946721a551e..5dc5d834136e5 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -706,10 +706,10 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid()) HelperStaticRuntimes.push_back("asan-preinit"); } - if (SanArgs.needsHeapProfRt() && SanArgs.linkRuntimes()) { - SharedRuntimes.push_back("heapprof"); + if (SanArgs.needsMemProfRt() && SanArgs.linkRuntimes()) { + SharedRuntimes.push_back("memprof"); if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid()) - HelperStaticRuntimes.push_back("heapprof-preinit"); + HelperStaticRuntimes.push_back("memprof-preinit"); } if (SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) { if (SanArgs.requiresMinimalRuntime()) @@ -748,11 +748,11 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, StaticRuntimes.push_back("asan_cxx"); } - if (!SanArgs.needsSharedRt() && SanArgs.needsHeapProfRt() && + if (!SanArgs.needsSharedRt() && SanArgs.needsMemProfRt() && SanArgs.linkRuntimes()) { - StaticRuntimes.push_back("heapprof"); + StaticRuntimes.push_back("memprof"); if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("heapprof_cxx"); + StaticRuntimes.push_back("memprof_cxx"); } if (!SanArgs.needsSharedRt() && SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0d8b0f9d07ef5..8393ebe9c07a1 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1033,7 +1033,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.ThinLinkBitcodeFile = std::string(Args.getLastArgValue(OPT_fthin_link_bitcode_EQ)); - Opts.HeapProf = Args.hasArg(OPT_fmemory_profile); + Opts.MemProf = Args.hasArg(OPT_fmemory_profile); Opts.MSVolatile = Args.hasArg(OPT_fms_volatile); diff --git a/clang/test/Driver/fmemprof.cpp b/clang/test/Driver/fmemprof.cpp index a2b740e1e6e5e..69686442d4103 100644 --- a/clang/test/Driver/fmemprof.cpp +++ b/clang/test/Driver/fmemprof.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile %s -### 2>&1 | FileCheck %s // RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile -fno-memory-profile %s -### 2>&1 | FileCheck %s --check-prefix=OFF // CHECK: "-cc1" {{.*}} "-fmemory-profile" -// CHECK: ld{{.*}}libclang_rt.heapprof{{.*}}libclang_rt.heapprof_cxx +// CHECK: ld{{.*}}libclang_rt.memprof{{.*}}libclang_rt.memprof_cxx // OFF-NOT: "-fmemory-profile" -// OFF-NOT: libclang_rt.heapprof +// OFF-NOT: libclang_rt.memprof diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 63ae19d8495db..f9a9604d1305c 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -176,7 +176,7 @@ void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeHardwareLoopsPass(PassRegistry&); -void initializeHeapProfilerLegacyPassPass(PassRegistry &); +void initializeMemProfilerLegacyPassPass(PassRegistry &); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); void initializeIPSCCPLegacyPassPass(PassRegistry&); @@ -305,7 +305,7 @@ void initializeMergeICmpsLegacyPassPass(PassRegistry &); void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); -void initializeModuleHeapProfilerLegacyPassPass(PassRegistry &); +void initializeModuleMemProfilerLegacyPassPass(PassRegistry &); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h deleted file mode 100644 index 21943616c5e1b..0000000000000 --- a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h +++ /dev/null @@ -1,49 +0,0 @@ -//===--------- Definition of the HeapProfiler class -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the HeapProfiler class. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_HEAPPROFILER_H -#define LLVM_TRANSFORMS_INSTRUMENTATION_HEAPPROFILER_H - -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" - -namespace llvm { - -/// Public interface to the heap profiler pass for instrumenting code to -/// profile heap memory accesses. -/// -/// The profiler itself is a function pass that works by inserting various -/// calls to the HeapProfiler runtime library functions. The runtime library -/// essentially replaces malloc() and free() with custom implementations that -/// record data about the allocations. -class HeapProfilerPass : public PassInfoMixin { -public: - explicit HeapProfilerPass(); - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -/// Public interface to the heap profiler module pass for instrumenting code -/// to profile heap memory allocations and accesses. -class ModuleHeapProfilerPass : public PassInfoMixin { -public: - explicit ModuleHeapProfilerPass(); - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; - -// Insert HeapProfiler instrumentation -FunctionPass *createHeapProfilerFunctionPass(); -ModulePass *createModuleHeapProfilerLegacyPassPass(); - -} // namespace llvm - -#endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h new file mode 100644 index 0000000000000..6918a24183b0d --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -0,0 +1,49 @@ +//===--------- Definition of the MemProfiler class --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MemProfiler class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Public interface to the memory profiler pass for instrumenting code to +/// profile memory accesses. +/// +/// The profiler itself is a function pass that works by inserting various +/// calls to the MemProfiler runtime library functions. The runtime library +/// essentially replaces malloc() and free() with custom implementations that +/// record data about the allocations. +class MemProfilerPass : public PassInfoMixin { +public: + explicit MemProfilerPass(); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Public interface to the memory profiler module pass for instrumenting code +/// to profile memory allocations and accesses. +class ModuleMemProfilerPass : public PassInfoMixin { +public: + explicit ModuleMemProfilerPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +// Insert MemProfiler instrumentation +FunctionPass *createMemProfilerFunctionPass(); +ModulePass *createModuleMemProfilerLegacyPassPass(); + +} // namespace llvm + +#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index bae84784628d6..c47f612e71991 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -111,9 +111,9 @@ #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" -#include "llvm/Transforms/Instrumentation/HeapProfiler.h" #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" @@ -261,9 +261,9 @@ static cl::opt cl::Hidden, cl::desc("Enable inline deferral during PGO")); -static cl::opt EnableHeapProfiler("enable-heap-prof", cl::init(false), - cl::Hidden, cl::ZeroOrMore, - cl::desc("Enable heap profiler")); +static cl::opt EnableMemProfiler("enable-mem-prof", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable memory profiler")); PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; @@ -1042,9 +1042,9 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline( MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging)); - if (EnableHeapProfiler && Phase != ThinLTOPhase::PreLink) { - MPM.addPass(createModuleToFunctionPassAdaptor(HeapProfilerPass())); - MPM.addPass(ModuleHeapProfilerPass()); + if (EnableMemProfiler && Phase != ThinLTOPhase::PreLink) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); + MPM.addPass(ModuleMemProfilerPass()); } return MPM; diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index b0d1d2a63a830..4b4f71a718702 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -98,7 +98,7 @@ MODULE_PASS("msan-module", MemorySanitizerPass({})) MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) -MODULE_PASS("heapprof-module", ModuleHeapProfilerPass()) +MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) #undef MODULE_PASS @@ -279,7 +279,7 @@ FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false)) FUNCTION_PASS("msan", MemorySanitizerPass({})) FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true})) FUNCTION_PASS("tsan", ThreadSanitizerPass()) -FUNCTION_PASS("heapprof", HeapProfilerPass()) +FUNCTION_PASS("memprof", MemProfilerPass()) #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 1fc0b140be035..63bc57ac9c440 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -5,7 +5,7 @@ add_llvm_component_library(LLVMInstrumentation ControlHeightReduction.cpp DataFlowSanitizer.cpp GCOVProfiling.cpp - HeapProfiler.cpp + MemProfiler.cpp MemorySanitizer.cpp IndirectCallPromotion.cpp Instrumentation.cpp diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 5cf3c2e3e11b3..cfdf3cad97f73 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -105,8 +105,8 @@ Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T, void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerLegacyPassPass(Registry); initializeModuleAddressSanitizerLegacyPassPass(Registry); - initializeHeapProfilerLegacyPassPass(Registry); - initializeModuleHeapProfilerLegacyPassPass(Registry); + initializeMemProfilerLegacyPassPass(Registry); + initializeModuleMemProfilerLegacyPassPass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); initializeControlHeightReductionLegacyPassPass(Registry); initializeGCOVProfilerLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp similarity index 68% rename from llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp rename to llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 5f8671d7d88fc..7f2a5ae1a189a 100644 --- a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -1,4 +1,4 @@ -//===- HeapProfiler.cpp - heap allocation and access profiler -------------===// +//===- MemProfiler.cpp - memory allocation and access profiler ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// // -// This file is a part of HeapProfiler. Memory accesses are instrumented +// This file is a part of MemProfiler. Memory accesses are instrumented // to increment the access count held in a shadow memory location, or // alternatively to call into the runtime. Memory intrinsic calls (memmove, -// memcpy, memset) are changed to call the heap profiling runtime version +// memcpy, memset) are changed to call the memory profiling runtime version // instead. // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation/HeapProfiler.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -39,9 +39,9 @@ using namespace llvm; -#define DEBUG_TYPE "heapprof" +#define DEBUG_TYPE "memprof" -constexpr int LLVM_HEAP_PROFILER_VERSION = 1; +constexpr int LLVM_MEM_PROFILER_VERSION = 1; // Size of memory mapped to a single shadow location. constexpr uint64_t DefaultShadowGranularity = 64; @@ -49,74 +49,74 @@ constexpr uint64_t DefaultShadowGranularity = 64; // Scale from granularity down to shadow size. constexpr uint64_t DefaultShadowScale = 3; -constexpr char HeapProfModuleCtorName[] = "heapprof.module_ctor"; -constexpr uint64_t HeapProfCtorAndDtorPriority = 1; +constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; +constexpr uint64_t MemProfCtorAndDtorPriority = 1; // On Emscripten, the system needs more than one priorities for constructors. -constexpr uint64_t HeapProfEmscriptenCtorAndDtorPriority = 50; -constexpr char HeapProfInitName[] = "__heapprof_init"; -constexpr char HeapProfVersionCheckNamePrefix[] = - "__heapprof_version_mismatch_check_v"; +constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; +constexpr char MemProfInitName[] = "__memprof_init"; +constexpr char MemProfVersionCheckNamePrefix[] = + "__memprof_version_mismatch_check_v"; -constexpr char HeapProfShadowMemoryDynamicAddress[] = - "__heapprof_shadow_memory_dynamic_address"; +constexpr char MemProfShadowMemoryDynamicAddress[] = + "__memprof_shadow_memory_dynamic_address"; // Command-line flags. static cl::opt ClInsertVersionCheck( - "heapprof-guard-against-version-mismatch", + "memprof-guard-against-version-mismatch", cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, cl::init(true)); // This flag may need to be replaced with -f[no-]memprof-reads. -static cl::opt ClInstrumentReads("heapprof-instrument-reads", +static cl::opt ClInstrumentReads("memprof-instrument-reads", cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); static cl::opt - ClInstrumentWrites("heapprof-instrument-writes", + ClInstrumentWrites("memprof-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); static cl::opt ClInstrumentAtomics( - "heapprof-instrument-atomics", + "memprof-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); static cl::opt ClUseCalls( - "heapprof-use-callbacks", + "memprof-use-callbacks", cl::desc("Use callbacks instead of inline instrumentation sequences."), cl::Hidden, cl::init(false)); static cl::opt - ClMemoryAccessCallbackPrefix("heapprof-memory-access-callback-prefix", + ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), - cl::Hidden, cl::init("__heapprof_")); + cl::Hidden, cl::init("__memprof_")); // These flags allow to change the shadow mapping. // The shadow mapping looks like // Shadow = ((Mem & mask) >> scale) + offset -static cl::opt ClMappingScale("heapprof-mapping-scale", - cl::desc("scale of heapprof shadow mapping"), +static cl::opt ClMappingScale("memprof-mapping-scale", + cl::desc("scale of memprof shadow mapping"), cl::Hidden, cl::init(DefaultShadowScale)); static cl::opt - ClMappingGranularity("heapprof-mapping-granularity", - cl::desc("granularity of heapprof shadow mapping"), + ClMappingGranularity("memprof-mapping-granularity", + cl::desc("granularity of memprof shadow mapping"), cl::Hidden, cl::init(DefaultShadowGranularity)); // Debug flags. -static cl::opt ClDebug("heapprof-debug", cl::desc("debug"), cl::Hidden, +static cl::opt ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); -static cl::opt ClDebugFunc("heapprof-debug-func", cl::Hidden, +static cl::opt ClDebugFunc("memprof-debug-func", cl::Hidden, cl::desc("Debug func")); -static cl::opt ClDebugMin("heapprof-debug-min", cl::desc("Debug min inst"), +static cl::opt ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1)); -static cl::opt ClDebugMax("heapprof-debug-max", cl::desc("Debug max inst"), +static cl::opt ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), cl::Hidden, cl::init(-1)); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); @@ -139,8 +139,8 @@ struct ShadowMapping { }; static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { - return TargetTriple.isOSEmscripten() ? HeapProfEmscriptenCtorAndDtorPriority - : HeapProfCtorAndDtorPriority; + return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority + : MemProfCtorAndDtorPriority; } struct InterestingMemoryAccess { @@ -151,10 +151,10 @@ struct InterestingMemoryAccess { Value *MaybeMask = nullptr; }; -/// Instrument the code in module to profile heap accesses. -class HeapProfiler { +/// Instrument the code in module to profile memory accesses. +class MemProfiler { public: - HeapProfiler(Module &M) { + MemProfiler(Module &M) { C = &(M.getContext()); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -177,7 +177,7 @@ class HeapProfiler { void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool instrumentFunction(Function &F); - bool maybeInsertHeapProfInitAtFunctionEntry(Function &F); + bool maybeInsertMemProfInitAtFunctionEntry(Function &F); bool insertDynamicShadowAtFunctionEntry(Function &F); private: @@ -189,68 +189,67 @@ class HeapProfiler { ShadowMapping Mapping; // These arrays is indexed by AccessIsWrite - FunctionCallee HeapProfMemoryAccessCallback[2]; - FunctionCallee HeapProfMemoryAccessCallbackSized[2]; + FunctionCallee MemProfMemoryAccessCallback[2]; + FunctionCallee MemProfMemoryAccessCallbackSized[2]; - FunctionCallee HeapProfMemmove, HeapProfMemcpy, HeapProfMemset; + FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; Value *DynamicShadowOffset = nullptr; }; -class HeapProfilerLegacyPass : public FunctionPass { +class MemProfilerLegacyPass : public FunctionPass { public: static char ID; - explicit HeapProfilerLegacyPass() : FunctionPass(ID) { - initializeHeapProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); + explicit MemProfilerLegacyPass() : FunctionPass(ID) { + initializeMemProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); } - StringRef getPassName() const override { return "HeapProfilerFunctionPass"; } + StringRef getPassName() const override { return "MemProfilerFunctionPass"; } bool runOnFunction(Function &F) override { - HeapProfiler Profiler(*F.getParent()); + MemProfiler Profiler(*F.getParent()); return Profiler.instrumentFunction(F); } }; -class ModuleHeapProfiler { +class ModuleMemProfiler { public: - ModuleHeapProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } + ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } bool instrumentModule(Module &); private: Triple TargetTriple; ShadowMapping Mapping; - Function *HeapProfCtorFunction = nullptr; + Function *MemProfCtorFunction = nullptr; }; -class ModuleHeapProfilerLegacyPass : public ModulePass { +class ModuleMemProfilerLegacyPass : public ModulePass { public: static char ID; - explicit ModuleHeapProfilerLegacyPass() : ModulePass(ID) { - initializeModuleHeapProfilerLegacyPassPass( - *PassRegistry::getPassRegistry()); + explicit ModuleMemProfilerLegacyPass() : ModulePass(ID) { + initializeModuleMemProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); } - StringRef getPassName() const override { return "ModuleHeapProfiler"; } + StringRef getPassName() const override { return "ModuleMemProfiler"; } void getAnalysisUsage(AnalysisUsage &AU) const override {} bool runOnModule(Module &M) override { - ModuleHeapProfiler HeapProfiler(M); - return HeapProfiler.instrumentModule(M); + ModuleMemProfiler MemProfiler(M); + return MemProfiler.instrumentModule(M); } }; } // end anonymous namespace -HeapProfilerPass::HeapProfilerPass() {} +MemProfilerPass::MemProfilerPass() {} -PreservedAnalyses HeapProfilerPass::run(Function &F, - AnalysisManager &AM) { +PreservedAnalyses MemProfilerPass::run(Function &F, + AnalysisManager &AM) { Module &M = *F.getParent(); - HeapProfiler Profiler(M); + MemProfiler Profiler(M); if (Profiler.instrumentFunction(F)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -258,41 +257,41 @@ PreservedAnalyses HeapProfilerPass::run(Function &F, return PreservedAnalyses::all(); } -ModuleHeapProfilerPass::ModuleHeapProfilerPass() {} +ModuleMemProfilerPass::ModuleMemProfilerPass() {} -PreservedAnalyses ModuleHeapProfilerPass::run(Module &M, - AnalysisManager &AM) { - ModuleHeapProfiler Profiler(M); +PreservedAnalyses ModuleMemProfilerPass::run(Module &M, + AnalysisManager &AM) { + ModuleMemProfiler Profiler(M); if (Profiler.instrumentModule(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } -char HeapProfilerLegacyPass::ID = 0; +char MemProfilerLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(HeapProfilerLegacyPass, "heapprof", - "HeapProfiler: profile heap allocations and accesses.", +INITIALIZE_PASS_BEGIN(MemProfilerLegacyPass, "memprof", + "MemProfiler: profile memory allocations and accesses.", false, false) -INITIALIZE_PASS_END(HeapProfilerLegacyPass, "heapprof", - "HeapProfiler: profile heap allocations and accesses.", +INITIALIZE_PASS_END(MemProfilerLegacyPass, "memprof", + "MemProfiler: profile memory allocations and accesses.", false, false) -FunctionPass *llvm::createHeapProfilerFunctionPass() { - return new HeapProfilerLegacyPass(); +FunctionPass *llvm::createMemProfilerFunctionPass() { + return new MemProfilerLegacyPass(); } -char ModuleHeapProfilerLegacyPass::ID = 0; +char ModuleMemProfilerLegacyPass::ID = 0; -INITIALIZE_PASS(ModuleHeapProfilerLegacyPass, "heapprof-module", - "HeapProfiler: profile heap allocations and accesses." +INITIALIZE_PASS(ModuleMemProfilerLegacyPass, "memprof-module", + "MemProfiler: profile memory allocations and accesses." "ModulePass", false, false) -ModulePass *llvm::createModuleHeapProfilerLegacyPassPass() { - return new ModuleHeapProfilerLegacyPass(); +ModulePass *llvm::createModuleMemProfilerLegacyPassPass() { + return new ModuleMemProfilerLegacyPass(); } -Value *HeapProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { +Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // (Shadow & mask) >> scale Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -302,17 +301,17 @@ Value *HeapProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { } // Instrument memset/memmove/memcpy -void HeapProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { +void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { IRBuilder<> IRB(MI); if (isa(MI)) { IRB.CreateCall( - isa(MI) ? HeapProfMemmove : HeapProfMemcpy, + isa(MI) ? MemProfMemmove : MemProfMemcpy, {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); } else if (isa(MI)) { IRB.CreateCall( - HeapProfMemset, + MemProfMemset, {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); @@ -321,7 +320,7 @@ void HeapProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { } Optional -HeapProfiler::isInterestingMemoryAccess(Instruction *I) const { +MemProfiler::isInterestingMemoryAccess(Instruction *I) const { // Do not instrument the load fetching the dynamic shadow address. if (DynamicShadowOffset == I) return None; @@ -409,11 +408,10 @@ HeapProfiler::isInterestingMemoryAccess(Instruction *I) const { return Access; } -void HeapProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, - Value *Mask, Instruction *I, - Value *Addr, unsigned Alignment, - uint32_t TypeSize, - bool IsWrite) { +void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, + Instruction *I, Value *Addr, + unsigned Alignment, + uint32_t TypeSize, bool IsWrite) { auto *VTy = cast( cast(Addr->getType())->getElementType()); uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); @@ -446,8 +444,8 @@ void HeapProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, } } -void HeapProfiler::instrumentMop(Instruction *I, const DataLayout &DL, - InterestingMemoryAccess &Access) { +void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, + InterestingMemoryAccess &Access) { if (Access.IsWrite) NumInstrumentedWrites++; else @@ -465,14 +463,14 @@ void HeapProfiler::instrumentMop(Instruction *I, const DataLayout &DL, } } -void HeapProfiler::instrumentAddress(Instruction *OrigIns, - Instruction *InsertBefore, Value *Addr, - uint32_t TypeSize, bool IsWrite) { +void MemProfiler::instrumentAddress(Instruction *OrigIns, + Instruction *InsertBefore, Value *Addr, + uint32_t TypeSize, bool IsWrite) { IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (ClUseCalls) { - IRB.CreateCall(HeapProfMemoryAccessCallback[IsWrite], AddrLong); + IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); return; } @@ -488,24 +486,24 @@ void HeapProfiler::instrumentAddress(Instruction *OrigIns, IRB.CreateStore(ShadowValue, ShadowAddr); } -bool ModuleHeapProfiler::instrumentModule(Module &M) { +bool ModuleMemProfiler::instrumentModule(Module &M) { // Create a module constructor. - std::string HeapProfVersion = std::to_string(LLVM_HEAP_PROFILER_VERSION); + std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); std::string VersionCheckName = - ClInsertVersionCheck ? (HeapProfVersionCheckNamePrefix + HeapProfVersion) + ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) : ""; - std::tie(HeapProfCtorFunction, std::ignore) = - createSanitizerCtorAndInitFunctions(M, HeapProfModuleCtorName, - HeapProfInitName, /*InitArgTypes=*/{}, + std::tie(MemProfCtorFunction, std::ignore) = + createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, + MemProfInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, VersionCheckName); const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); - appendToGlobalCtors(M, HeapProfCtorFunction, Priority); + appendToGlobalCtors(M, MemProfCtorFunction, Priority); return true; } -void HeapProfiler::initializeCallbacks(Module &M) { +void MemProfiler::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { @@ -513,68 +511,68 @@ void HeapProfiler::initializeCallbacks(Module &M) { SmallVector Args2 = {IntptrTy, IntptrTy}; SmallVector Args1{1, IntptrTy}; - HeapProfMemoryAccessCallbackSized[AccessIsWrite] = + MemProfMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr + "N", FunctionType::get(IRB.getVoidTy(), Args2, false)); - HeapProfMemoryAccessCallback[AccessIsWrite] = + MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr, FunctionType::get(IRB.getVoidTy(), Args1, false)); } - HeapProfMemmove = M.getOrInsertFunction( + MemProfMemmove = M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); - HeapProfMemcpy = M.getOrInsertFunction( - ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); - HeapProfMemset = M.getOrInsertFunction( - ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); + MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy); + MemProfMemset = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt32Ty(), IntptrTy); } -bool HeapProfiler::maybeInsertHeapProfInitAtFunctionEntry(Function &F) { +bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. - // Therefore we need to instrument such methods with a call to __heapprof_init + // Therefore we need to instrument such methods with a call to __memprof_init // at the beginning in order to initialize our runtime before any access to // the shadow memory. // We cannot just ignore these methods, because they may call other // instrumented functions. if (F.getName().find(" load]") != std::string::npos) { - FunctionCallee HeapProfInitFunction = - declareSanitizerInitFunction(*F.getParent(), HeapProfInitName, {}); + FunctionCallee MemProfInitFunction = + declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); IRBuilder<> IRB(&F.front(), F.front().begin()); - IRB.CreateCall(HeapProfInitFunction, {}); + IRB.CreateCall(MemProfInitFunction, {}); return true; } return false; } -bool HeapProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { +bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { IRBuilder<> IRB(&F.front().front()); Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( - HeapProfShadowMemoryDynamicAddress, IntptrTy); + MemProfShadowMemoryDynamicAddress, IntptrTy); DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); return true; } -bool HeapProfiler::instrumentFunction(Function &F) { +bool MemProfiler::instrumentFunction(Function &F) { if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (ClDebugFunc == F.getName()) return false; - if (F.getName().startswith("__heapprof_")) + if (F.getName().startswith("__memprof_")) return false; bool FunctionModified = false; - // If needed, insert __heapprof_init. + // If needed, insert __memprof_init. // This function needs to be called even if the function body is not // instrumented. - if (maybeInsertHeapProfInitAtFunctionEntry(F)) + if (maybeInsertMemProfInitAtFunctionEntry(F)) FunctionModified = true; - LLVM_DEBUG(dbgs() << "HEAPPROF instrumenting:\n" << F << "\n"); + LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); @@ -607,8 +605,8 @@ bool HeapProfiler::instrumentFunction(Function &F) { if (NumInstrumented > 0) FunctionModified = true; - LLVM_DEBUG(dbgs() << "HEAPPROF done instrumenting: " << FunctionModified - << " " << F << "\n"); + LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " + << F << "\n"); return FunctionModified; } diff --git a/llvm/test/Instrumentation/HeapProfiler/basic.ll b/llvm/test/Instrumentation/HeapProfiler/basic.ll index a26dae15f5090..cf6320414bd38 100644 --- a/llvm/test/Instrumentation/HeapProfiler/basic.ll +++ b/llvm/test/Instrumentation/HeapProfiler/basic.ll @@ -1,15 +1,15 @@ ; Test basic address sanitizer instrumentation. ; -; RUN: opt < %s -heapprof -heapprof-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -memprof -memprof-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -memprof -memprof-module -memprof-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s -; We need the requires since both heapprof and heapprof-module require reading module level metadata which is done once by the heapprof-globals-md analysis -; RUN: opt < %s -passes='function(heapprof),module(heapprof-module)' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -passes='function(heapprof),module(heapprof-module)' -heapprof-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; We need the requires since both memprof and memprof-module require reading module level metadata which is done once by the memprof-globals-md analysis +; RUN: opt < %s -passes='function(memprof),module(memprof-module)' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='function(memprof),module(memprof-module)' -memprof-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @llvm.global_ctors = {{.*}}@heapprof.module_ctor +; CHECK: @llvm.global_ctors = {{.*}}@memprof.module_ctor define i32 @test_load(i32* %a) { entry: @@ -17,7 +17,7 @@ entry: ret i32 %tmp1 } ; CHECK-LABEL: @test_load -; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__heapprof_shadow_memory_dynamic_address +; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__memprof_shadow_memory_dynamic_address ; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64 ; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -64 ; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3 @@ -37,7 +37,7 @@ entry: ret void } ; CHECK-LABEL: @test_store -; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__heapprof_shadow_memory_dynamic_address +; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__memprof_shadow_memory_dynamic_address ; CHECK-NEXT: %[[STORE_ADDR:[^ ]*]] = ptrtoint i32* %a to i64 ; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[STORE_ADDR]], -64 ; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3 @@ -127,14 +127,14 @@ define void @i80test(i80* %a, i80* %b) nounwind uwtable { ; CHECK: store i80 %t, i80* %b ; CHECK: ret void -; heapprof should not instrument functions with available_externally linkage. +; memprof should not instrument functions with available_externally linkage. define available_externally i32 @f_available_externally(i32* %a) { entry: %tmp1 = load i32, i32* %a ret i32 %tmp1 } ; CHECK-LABEL: @f_available_externally -; CHECK-NOT: __heapprof_shadow_memory_dynamic_address +; CHECK-NOT: __memprof_shadow_memory_dynamic_address ; CHECK: ret i32 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @@ -150,9 +150,9 @@ define void @memintr_test(i8* %a, i8* %b) nounwind uwtable { } ; CHECK-LABEL: memintr_test -; CHECK: __heapprof_memset -; CHECK: __heapprof_memmove -; CHECK: __heapprof_memcpy +; CHECK: __memprof_memset +; CHECK: __memprof_memmove +; CHECK: __memprof_memcpy ; CHECK: ret void declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture writeonly, i8, i64, i32) nounwind @@ -161,7 +161,7 @@ declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture w define void @memintr_element_atomic_test(i8* %a, i8* %b) nounwind uwtable { ; This is a canary test to make sure that these don't get lowered into calls that don't - ; have the element-atomic property. Eventually, heapprof will have to be enhanced to lower + ; have the element-atomic property. Eventually, memprof will have to be enhanced to lower ; these properly. ; CHECK-LABEL: memintr_element_atomic_test ; CHECK: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %a, i8 0, i64 100, i32 1) @@ -175,5 +175,5 @@ define void @memintr_element_atomic_test(i8* %a, i8* %b) nounwind uwtable { } -; CHECK: define internal void @heapprof.module_ctor() -; CHECK: call void @__heapprof_init() +; CHECK: define internal void @memprof.module_ctor() +; CHECK: call void @__memprof_init() diff --git a/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll b/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll index 9df3df47d3d0a..e97274347588e 100644 --- a/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll +++ b/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll @@ -1,31 +1,31 @@ -; Test heapprof internal compiler flags: -; -heapprof-use-callbacks -; -heapprof-memory-access-callback-prefix +; Test memprof internal compiler flags: +; -memprof-use-callbacks +; -memprof-memory-access-callback-prefix -; RUN: opt < %s -heapprof -heapprof-module -heapprof-use-callbacks -S | FileCheck %s --check-prefix=CHECK-CALL --check-prefix=CHECK-CALL-DEFAULT -; RUN: opt < %s -heapprof -heapprof-module -heapprof-use-callbacks -heapprof-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CALL --check-prefix=CHECK-CALL-CUSTOM -; RUN: opt < %s -heapprof -heapprof-module -heapprof-use-callbacks=false -S | FileCheck %s --check-prefix=CHECK-INLINE -; RUN: opt < %s -heapprof -heapprof-module -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -memprof -memprof-module -memprof-use-callbacks -S | FileCheck %s --check-prefix=CHECK-CALL --check-prefix=CHECK-CALL-DEFAULT +; RUN: opt < %s -memprof -memprof-module -memprof-use-callbacks -memprof-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CALL --check-prefix=CHECK-CALL-CUSTOM +; RUN: opt < %s -memprof -memprof-module -memprof-use-callbacks=false -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -memprof -memprof-module -S | FileCheck %s --check-prefix=CHECK-INLINE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" define void @test_load(i32* %a, i64* %b, i512* %c, i80* %d) { entry: ; CHECK-CALL: %[[LOAD_ADDR1:[^ ]*]] = ptrtoint i32* %a to i64 -; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR1]]) +; CHECK-CALL-DEFAULT: call void @__memprof_load(i64 %[[LOAD_ADDR1]]) ; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR1]]) ; CHECK-CALL: %[[LOAD_ADDR2:[^ ]*]] = ptrtoint i64* %b to i64 -; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR2]]) +; CHECK-CALL-DEFAULT: call void @__memprof_load(i64 %[[LOAD_ADDR2]]) ; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR2]]) ; CHECK-CALL: %[[LOAD_ADDR3:[^ ]*]] = ptrtoint i512* %c to i64 -; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR3]]) +; CHECK-CALL-DEFAULT: call void @__memprof_load(i64 %[[LOAD_ADDR3]]) ; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR3]]) ; CHECK-CALL: %[[LOAD_ADDR4:[^ ]*]] = ptrtoint i80* %d to i64 -; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR4]]) +; CHECK-CALL-DEFAULT: call void @__memprof_load(i64 %[[LOAD_ADDR4]]) ; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR4]]) -; CHECK-CALL-DEFAULT-NOT: call void @__heapprof_load +; CHECK-CALL-DEFAULT-NOT: call void @__memprof_load ; CHECK-CALL-CUSTOM-NOT: call void @__foo_load -; CHECK-INLINE-NOT: call void @__heapprof_load +; CHECK-INLINE-NOT: call void @__memprof_load %tmp1 = load i32, i32* %a, align 4 %tmp2 = load i64, i64* %b, align 8 %tmp3 = load i512, i512* %c, align 32 diff --git a/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll b/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll index fa493a454ef10..dfae33d717b89 100644 --- a/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll +++ b/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll @@ -1,12 +1,12 @@ -; RUN: opt < %s -heapprof -heapprof-use-callbacks -S \ +; RUN: opt < %s -memprof -memprof-use-callbacks -S \ ; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=STORE -check-prefix=ALL -; RUN: opt < %s -heapprof -heapprof-use-callbacks -heapprof-instrument-reads=0 -S \ +; RUN: opt < %s -memprof -memprof-use-callbacks -memprof-instrument-reads=0 -S \ ; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=STORE -check-prefix=ALL -; RUN: opt < %s -heapprof -heapprof-use-callbacks -heapprof-instrument-writes=0 -S \ +; RUN: opt < %s -memprof -memprof-use-callbacks -memprof-instrument-writes=0 -S \ ; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=NOSTORE -check-prefix=ALL -; RUN: opt < %s -heapprof -heapprof-use-callbacks -heapprof-instrument-reads=0 -heapprof-instrument-writes=0 -S \ +; RUN: opt < %s -memprof -memprof-use-callbacks -memprof-instrument-reads=0 -memprof-instrument-writes=0 -S \ ; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=NOSTORE -check-prefix=ALL -; Support heap profiling instrumentation for constant-mask llvm.masked.{load,store} +; Support memory profiling instrumentation for constant-mask llvm.masked.{load,store} target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -22,16 +22,16 @@ declare void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*>, <4 x i32*>*, i32, define void @store.v4f32.1110(<4 x float> %arg) { ; ALL-LABEL: @store.v4f32.1110 %p = load <4 x float>*, <4 x float>** @v4f32, align 8 -; NOSTORE-NOT: call void @__heapprof_store +; NOSTORE-NOT: call void @__memprof_store ; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 ; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: call void @__memprof_store(i64 [[PGEP0]]) ; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1 ; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP1]]) +; STORE: call void @__memprof_store(i64 [[PGEP1]]) ; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 ; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP2]]) +; STORE: call void @__memprof_store(i64 [[PGEP2]]) ; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) ret void @@ -40,19 +40,19 @@ define void @store.v4f32.1110(<4 x float> %arg) { define void @store.v8i32.10010110(<8 x i32> %arg) { ; ALL-LABEL: @store.v8i32.10010110 %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8 -; NOSTORE-NOT: call void @__heapprof_store +; NOSTORE-NOT: call void @__memprof_store ; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0 ; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: call void @__memprof_store(i64 [[PGEP0]]) ; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 3 ; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP3]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP3]]) +; STORE: call void @__memprof_store(i64 [[PGEP3]]) ; STORE: [[GEP5:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 5 ; STORE: [[PGEP5:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP5]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP5]]) +; STORE: call void @__memprof_store(i64 [[PGEP5]]) ; STORE: [[GEP6:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 6 ; STORE: [[PGEP6:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP6]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP6]]) +; STORE: call void @__memprof_store(i64 [[PGEP6]]) ; STORE: tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> ) tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> ) ret void @@ -61,10 +61,10 @@ define void @store.v8i32.10010110(<8 x i32> %arg) { define void @store.v4i64.0001(<4 x i32*> %arg) { ; ALL-LABEL: @store.v4i64.0001 %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8 -; NOSTORE-NOT: call void @__heapprof_store +; NOSTORE-NOT: call void @__memprof_store ; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3 ; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP3]]) +; STORE: call void @__memprof_store(i64 [[PGEP3]]) ; STORE: tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> ) tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> ) ret void @@ -78,7 +78,7 @@ define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; STORE: [[THEN0]]: ; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 ; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: call void @__memprof_store(i64 [[PGEP0]]) ; STORE: br label %[[AFTER0]] ; STORE: [[AFTER0]]: @@ -87,7 +87,7 @@ define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; STORE: [[THEN1]]: ; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1 ; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP1]]) +; STORE: call void @__memprof_store(i64 [[PGEP1]]) ; STORE: br label %[[AFTER1]] ; STORE: [[AFTER1]]: @@ -96,7 +96,7 @@ define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; STORE: [[THEN2]]: ; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 ; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP2]]) +; STORE: call void @__memprof_store(i64 [[PGEP2]]) ; STORE: br label %[[AFTER2]] ; STORE: [[AFTER2]]: @@ -105,7 +105,7 @@ define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; STORE: [[THEN3]]: ; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 ; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP3]]) +; STORE: call void @__memprof_store(i64 [[PGEP3]]) ; STORE: br label %[[AFTER3]] ; STORE: [[AFTER3]]: @@ -120,12 +120,12 @@ define void @store.v4f32.1010.split(<4 x float> %arg) { %p = load <4 x float>*, <4 x float>** @v4f32, align 8 ; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 ; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: call void @__memprof_store(i64 [[PGEP0]]) ; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) ; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 ; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 -; STORE: call void @__heapprof_store(i64 [[PGEP1]]) +; STORE: call void @__memprof_store(i64 [[PGEP1]]) ; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) ret void @@ -139,19 +139,19 @@ declare <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>*, i32, <4 x i1 define <8 x i32> @load.v8i32.11100001(<8 x i32> %arg) { ; ALL-LABEL: @load.v8i32.11100001 %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8 -; NOLOAD-NOT: call void @__heapprof_load +; NOLOAD-NOT: call void @__memprof_load ; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0 ; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: call void @__memprof_load(i64 [[PGEP0]]) ; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 1 ; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP1]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP1]]) +; LOAD: call void @__memprof_load(i64 [[PGEP1]]) ; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 2 ; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP2]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP2]]) +; LOAD: call void @__memprof_load(i64 [[PGEP2]]) ; LOAD: [[GEP7:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 7 ; LOAD: [[PGEP7:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP7]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP7]]) +; LOAD: call void @__memprof_load(i64 [[PGEP7]]) ; LOAD: tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> , <8 x i32> %arg) %res = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> , <8 x i32> %arg) ret <8 x i32> %res @@ -160,13 +160,13 @@ define <8 x i32> @load.v8i32.11100001(<8 x i32> %arg) { define <4 x float> @load.v4f32.1001(<4 x float> %arg) { ; ALL-LABEL: @load.v4f32.1001 %p = load <4 x float>*, <4 x float>** @v4f32, align 8 -; NOLOAD-NOT: call void @__heapprof_load +; NOLOAD-NOT: call void @__memprof_load ; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 ; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: call void @__memprof_load(i64 [[PGEP0]]) ; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 ; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: call void @__memprof_load(i64 [[PGEP3]]) ; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) ret <4 x float> %res @@ -175,10 +175,10 @@ define <4 x float> @load.v4f32.1001(<4 x float> %arg) { define <4 x i32*> @load.v4i64.0001(<4 x i32*> %arg) { ; ALL-LABEL: @load.v4i64.0001 %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8 -; NOLOAD-NOT: call void @__heapprof_load +; NOLOAD-NOT: call void @__memprof_load ; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3 ; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: call void @__memprof_load(i64 [[PGEP3]]) ; LOAD: tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> , <4 x i32*> %arg) %res = tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> , <4 x i32*> %arg) ret <4 x i32*> %res @@ -192,7 +192,7 @@ define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; LOAD: [[THEN0]]: ; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 ; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: call void @__memprof_load(i64 [[PGEP0]]) ; LOAD: br label %[[AFTER0]] ; LOAD: [[AFTER0]]: @@ -201,7 +201,7 @@ define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; LOAD: [[THEN1]]: ; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1 ; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP1]]) +; LOAD: call void @__memprof_load(i64 [[PGEP1]]) ; LOAD: br label %[[AFTER1]] ; LOAD: [[AFTER1]]: @@ -210,7 +210,7 @@ define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; LOAD: [[THEN2]]: ; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 ; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP2]]) +; LOAD: call void @__memprof_load(i64 [[PGEP2]]) ; LOAD: br label %[[AFTER2]] ; LOAD: [[AFTER2]]: @@ -219,7 +219,7 @@ define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { ; LOAD: [[THEN3]]: ; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 ; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: call void @__memprof_load(i64 [[PGEP3]]) ; LOAD: br label %[[AFTER3]] ; LOAD: [[AFTER3]]: @@ -234,12 +234,12 @@ define <4 x float> @load.v4f32.1001.split(<4 x float> %arg) { %p = load <4 x float>*, <4 x float>** @v4f32, align 8 ; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 ; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: call void @__memprof_load(i64 [[PGEP0]]) ; LOAD: %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) ; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 ; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 -; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: call void @__memprof_load(i64 [[PGEP3]]) ; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %res) %res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %res) ret <4 x float> %res2 diff --git a/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll b/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll index c8c3a6d605db3..ff68584ed7f02 100644 --- a/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll +++ b/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll @@ -1,8 +1,8 @@ -; Test that the scale (-heapprof-mapping-scale) and granularity (-heapprof-mapping-granularity) command-line options work as expected +; Test that the scale (-memprof-mapping-scale) and granularity (-memprof-mapping-granularity) command-line options work as expected ; -; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-granularity 32 -S | FileCheck --check-prefix=CHECK-GRAN %s -; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s -; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-granularity 16 -heapprof-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s +; RUN: opt < %s -memprof -memprof-module -memprof-mapping-granularity 32 -S | FileCheck --check-prefix=CHECK-GRAN %s +; RUN: opt < %s -memprof -memprof-module -memprof-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s +; RUN: opt < %s -memprof -memprof-module -memprof-mapping-granularity 16 -memprof-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s target triple = "x86_64-unknown-linux-gnu" define i32 @read(i32* %a) { diff --git a/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll b/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll index 84e039551d702..d53e23cff471b 100644 --- a/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll +++ b/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll @@ -1,12 +1,12 @@ -; Check that the HeapProf module constructor guards against compiler/runtime version +; Check that the MemProf module constructor guards against compiler/runtime version ; mismatch. -; RUN: opt < %s -heapprof-module -S | FileCheck %s -; RUN: opt < %s -heapprof-module -heapprof-guard-against-version-mismatch=0 -S | FileCheck %s --check-prefix=NOGUARD +; RUN: opt < %s -memprof-module -S | FileCheck %s +; RUN: opt < %s -memprof-module -memprof-guard-against-version-mismatch=0 -S | FileCheck %s --check-prefix=NOGUARD target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK-LABEL: define internal void @heapprof.module_ctor() -; CHECK: call void @__heapprof_version_mismatch_check_v1 -; NOGUARD-NOT: call void @__heapprof_version_mismatch_check_ +; CHECK-LABEL: define internal void @memprof.module_ctor() +; CHECK: call void @__memprof_version_mismatch_check_v1 +; NOGUARD-NOT: call void @__memprof_version_mismatch_check_ From 2ad38f7a46b59a5b6653239245d29590d7977b29 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 14 Sep 2020 20:16:21 +0000 Subject: [PATCH 0580/1079] [gn build] Port 226d80ebe20 --- .../gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn index dbac54ab97041..edcf13309a578 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn @@ -16,11 +16,11 @@ static_library("Instrumentation") { "DataFlowSanitizer.cpp", "GCOVProfiling.cpp", "HWAddressSanitizer.cpp", - "HeapProfiler.cpp", "IndirectCallPromotion.cpp", "InstrOrderFile.cpp", "InstrProfiling.cpp", "Instrumentation.cpp", + "MemProfiler.cpp", "MemorySanitizer.cpp", "PGOInstrumentation.cpp", "PGOMemOPSizeOpt.cpp", From c2590de30df23ef0db39b496cdec62a83a61fbfa Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 31 Aug 2020 18:36:11 -0700 Subject: [PATCH 0581/1079] [docs][NewPM] Add docs for writing NPM passes As to not conflict with the legacy PM example passes under llvm/lib/Transforms/Hello, this is under HelloNew. This makes the CMakeLists.txt and general directory structure less confusing for people following the example. Much of the doc structure was taken from WritinAnLLVMPass.rst. This adds a HelloWorld pass which simply prints out each function name. More will follow after this, e.g. passes over different units of IR, analyses. https://llvm.org/docs/WritingAnLLVMPass.html contains a lot more. Reviewed By: ychen, asbirlea Differential Revision: https://reviews.llvm.org/D86979 --- llvm/docs/UserGuides.rst | 5 + llvm/docs/WritingAnLLVMNewPMPass.rst | 209 ++++++++++++++++++ llvm/docs/WritingAnLLVMPass.rst | 4 + .../llvm/Transforms/HelloNew/HelloWorld.h | 23 ++ llvm/lib/Passes/LLVMBuild.txt | 2 +- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/CMakeLists.txt | 1 + llvm/lib/Transforms/HelloNew/CMakeLists.txt | 6 + llvm/lib/Transforms/HelloNew/HelloWorld.cpp | 17 ++ llvm/lib/Transforms/HelloNew/LLVMBuild.txt | 22 ++ llvm/lib/Transforms/LLVMBuild.txt | 2 +- llvm/test/Transforms/HelloNew/helloworld.ll | 12 + .../gn/secondary/llvm/lib/Passes/BUILD.gn | 1 + .../llvm/lib/Transforms/HelloNew/BUILD.gn | 9 + 15 files changed, 313 insertions(+), 2 deletions(-) create mode 100644 llvm/docs/WritingAnLLVMNewPMPass.rst create mode 100644 llvm/include/llvm/Transforms/HelloNew/HelloWorld.h create mode 100644 llvm/lib/Transforms/HelloNew/CMakeLists.txt create mode 100644 llvm/lib/Transforms/HelloNew/HelloWorld.cpp create mode 100644 llvm/lib/Transforms/HelloNew/LLVMBuild.txt create mode 100644 llvm/test/Transforms/HelloNew/helloworld.ll create mode 100644 llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 2e0cffb711ef9..00e99db297f78 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -54,6 +54,7 @@ intermediate LLVM representation. TableGenFundamentals Vectorizers WritingAnLLVMPass + WritingAnLLVMNewPMPass WritingAnLLVMBackend yaml2obj @@ -107,6 +108,10 @@ Optimizations :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. +:doc:`WritingAnLLVMNewPMPass` + Information on how to write LLVM transformations under the new pass + manager. + :doc:`Passes` A list of optimizations and analyses implemented in LLVM. diff --git a/llvm/docs/WritingAnLLVMNewPMPass.rst b/llvm/docs/WritingAnLLVMNewPMPass.rst new file mode 100644 index 0000000000000..a876ec4ceb005 --- /dev/null +++ b/llvm/docs/WritingAnLLVMNewPMPass.rst @@ -0,0 +1,209 @@ +==================== +Writing an LLVM Pass +==================== + +.. program:: opt + +.. contents:: + :local: + +Introduction --- What is a pass? +================================ + +The LLVM pass framework is an important part of the LLVM system, because LLVM +passes are where most of the interesting parts of the compiler exist. Passes +perform the transformations and optimizations that make up the compiler, they +build the analysis results that are used by these transformations, and they +are, above all, a structuring technique for compiler code. + +Unlike passes under the legacy pass manager where the pass interface is +defined via inheritance, passes under the new pass manager rely on +concept-based polymorphism, meaning there is no explicit interface (see +comments in ``PassManager.h`` for more details). All LLVM passes inherit from +the CRTP mix-in ``PassInfoMixin``. The pass should have a ``run()`` +method which returns a ``PreservedAnalyses`` and takes in some unit of IR +along with an analysis manager. For example, a function pass would have a +``PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);`` method. + +We start by showing you how to construct a pass, from setting up the build, +creating the pass, to executing and testing it. Looking at existing passes is +always a great way to learn details. + +Quick Start --- Writing hello world +=================================== + +Here we describe how to write the "hello world" of passes. The "HelloWorld" +pass is designed to simply print out the name of non-external functions that +exist in the program being compiled. It does not modify the program at all, +it just inspects it. + +The code below already exists; feel free to create a pass with a different +name alongside the HelloWorld source files. + +.. _writing-an-llvm-npm-pass-build: + +Setting up the build +-------------------- + +First, configure and build LLVM as described in :doc:`GettingStarted`. + +Next, we will reuse an existing directory (creating a new directory involves +modifying more ``CMakeLists.txt``s and ``LLVMBuild.txt``s than we want). For +this example, we'll use ``llvm/lib/Transforms/HelloNew/HelloWorld.cpp``, +which has already been created. If you'd like to create your own pass, add a +new source file into ``llvm/lib/Transforms/HelloNew/CMakeLists.txt`` under +``HelloWorld.cpp``: + +.. code-block:: cmake + + add_llvm_component_library(LLVMHelloWorld + HelloWorld.cpp + + DEPENDS + intrinsics_gen + ) + +Now that we have the build set up for a new pass, we need to write the code +for the pass itself. + +.. _writing-an-llvm-npm-pass-basiccode: + +Basic code required +------------------- + +Now that the build is setup for a new pass, we just have to write it. + +First we need to define the pass in a header file. We'll create +``llvm/include/llvm/Transforms/HelloNew/HelloWorld.h``. The file should +contain the following boilerplate: + +.. code-block:: c++ + + #ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + #define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + + #include "llvm/IR/PassManager.h" + + namespace llvm { + + class HelloWorldPass : public PassInfoMixin { + public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + }; + + } // namespace llvm + + #endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + +This creates the class for the pass with a declaration of the ``run()`` +method which actually runs the pass. Inheriting from ``PassInfoMixin`` +sets up some more boilerplate so that we don't have to write it ourselves. + +Our class is in the ``llvm`` namespace so that we don't pollute the global +namespace. + +Next we'll create ``llvm/lib/Transforms/HelloNew/HelloWorld.cpp``, starting +with + +.. code-block:: c++ + + #include "llvm/Transforms/HelloNew/HelloWorld.h" + +... to include the header file we just created. + +.. code-block:: c++ + + using namespace llvm; + +... is required because the functions from the include files live in the llvm +namespace. This should only be done in non-header files. + +Next we have the pass's ``run()`` definition: + +.. code-block:: c++ + + PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); + } + +... which simply prints out the name of the function to stderr. The pass +manager will ensure that the pass will be run on every function in a module. +The ``PreservedAnalyses`` return value says that all analyses (e.g. dominator +tree) are still valid after this pass since we didn't modify any functions. + +That's it for the pass itself. Now in order to "register" the pass, we need +to add it to a couple places. Add the following to +``llvm\lib\Passes\PassRegistry.def`` in the ``FUNCTION_PASS`` section + +.. code-block:: c++ + + FUNCTION_PASS("helloworld", HelloWorldPass()) + +... which adds the pass under the name "helloworld". + +``llvm\lib\Passes\PassRegistry.def`` is #include'd into +``llvm\lib\Passes\PassBuilder.cpp`` multiple times for various reasons. Since +it constructs our pass, we need to also add the proper #include in +``llvm\lib\Passes\PassBuilder.cpp``: + +.. code-block:: c++ + + #include "llvm/Transforms/HelloNew/HelloWorld.h" + +This should be all the code necessary for our pass, now it's time to compile +and run it. + +Running a pass with ``opt`` +--------------------------- + +Now that you have a brand new shiny pass, we can build :program:`opt` and use +it to run some LLVM IR through the pass. + +.. code-block:: console + + $ ninja -C build/ opt + # or whatever build system/build directory you are using + + $ cat /tmp/a.ll + define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a + } + + define void @bar() { + ret void + } + + $ build/bin/opt -disable-output /tmp/a.ll -passes=helloworld + foo + bar + +Our pass ran and printed the names of functions as expected! + +Testing a pass +-------------- + +Testing our pass is important to prevent future regressions. We'll add a lit +test at ``llvm/test/Transforms/HelloNew/helloworld.ll``. See +:doc:`TestingGuide` for more information on testing. + +.. code-block:: llvm + + $ cat llvm/test/Transforms/HelloNew/helloworld.ll + ; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s + + ; CHECK: {{^}}foo{{$}} + define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a + } + + ; CHECK-NEXT: {{^}}bar{{$}} + define void @bar() { + ret void + } + + $ ninja -C build check-llvm + # runs our new test alongside all other llvm lit tests diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index 88f481ba6b076..7a24659e62942 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -34,6 +34,10 @@ We start by showing you how to construct a pass, everything from setting up the code, to compiling, loading, and executing it. After the basics are down, more advanced features are discussed. +This document deals with the legacy pass manager. LLVM is transitioning to +the new pass manager, which has its own way of defining passes. For more +details, see :doc:`WritingAnLLVMNewPMPass`. + Quick Start --- Writing hello world =================================== diff --git a/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h new file mode 100644 index 0000000000000..6c753032f913c --- /dev/null +++ b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h @@ -0,0 +1,23 @@ +//===-- HelloWorld.h - Example Transformations ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H +#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class HelloWorldPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H diff --git a/llvm/lib/Passes/LLVMBuild.txt b/llvm/lib/Passes/LLVMBuild.txt index 3e7a391154137..f49f7828d2b93 100644 --- a/llvm/lib/Passes/LLVMBuild.txt +++ b/llvm/lib/Passes/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Passes parent = Libraries -required_libraries = AggressiveInstCombine Analysis Core Coroutines IPO InstCombine ObjCARC Scalar Support Target TransformUtils Vectorize Instrumentation +required_libraries = AggressiveInstCombine Analysis Core Coroutines HelloNew IPO InstCombine ObjCARC Scalar Support Target TransformUtils Vectorize Instrumentation diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index c47f612e71991..cd64aecd81d73 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -75,6 +75,7 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" +#include "llvm/Transforms/HelloNew/HelloWorld.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 4b4f71a718702..1d70db3063470 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -197,6 +197,7 @@ FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false) FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true)) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) +FUNCTION_PASS("helloworld", HelloWorldPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index dda5f6de11e32..2a0abebdf19b5 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +add_subdirectory(HelloNew) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) diff --git a/llvm/lib/Transforms/HelloNew/CMakeLists.txt b/llvm/lib/Transforms/HelloNew/CMakeLists.txt new file mode 100644 index 0000000000000..a7a1a5b93b062 --- /dev/null +++ b/llvm/lib/Transforms/HelloNew/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_component_library(LLVMHelloNew + HelloWorld.cpp + + DEPENDS + intrinsics_gen + ) diff --git a/llvm/lib/Transforms/HelloNew/HelloWorld.cpp b/llvm/lib/Transforms/HelloNew/HelloWorld.cpp new file mode 100644 index 0000000000000..dea94f8a8f627 --- /dev/null +++ b/llvm/lib/Transforms/HelloNew/HelloWorld.cpp @@ -0,0 +1,17 @@ +//===-- HelloWorld.cpp - Example Transformations --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/HelloNew/HelloWorld.h" + +using namespace llvm; + +PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Transforms/HelloNew/LLVMBuild.txt b/llvm/lib/Transforms/HelloNew/LLVMBuild.txt new file mode 100644 index 0000000000000..cc66fb07c3e9d --- /dev/null +++ b/llvm/lib/Transforms/HelloNew/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Transforms/HelloNew/LLVMBuild.txt ------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HelloNew +parent = Transforms +library_name = HelloNew +required_libraries = Core diff --git a/llvm/lib/Transforms/LLVMBuild.txt b/llvm/lib/Transforms/LLVMBuild.txt index 5fb5efcc068c8..6c6a6bb317fa8 100644 --- a/llvm/lib/Transforms/LLVMBuild.txt +++ b/llvm/lib/Transforms/LLVMBuild.txt @@ -15,7 +15,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AggressiveInstCombine Coroutines IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC CFGuard +subdirectories = AggressiveInstCombine Coroutines HelloNew IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC CFGuard [component_0] type = Group diff --git a/llvm/test/Transforms/HelloNew/helloworld.ll b/llvm/test/Transforms/HelloNew/helloworld.ll new file mode 100644 index 0000000000000..48817c24801ae --- /dev/null +++ b/llvm/test/Transforms/HelloNew/helloworld.ll @@ -0,0 +1,12 @@ +; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s + +; CHECK: {{^}}foo{{$}} +define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a +} + +; CHECK-NEXT: {{^}}bar{{$}} +define void @bar() { + ret void +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index 9afe48db159b2..bb8a671dd6a7d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -8,6 +8,7 @@ static_library("Passes") { "//llvm/lib/Target", "//llvm/lib/Transforms/AggressiveInstCombine", "//llvm/lib/Transforms/Coroutines", + "//llvm/lib/Transforms/HelloNew", "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/InstCombine", "//llvm/lib/Transforms/Instrumentation", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn new file mode 100644 index 0000000000000..5e6167324a4ae --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn @@ -0,0 +1,9 @@ +static_library("HelloNew") { + output_name = "LLVMHelloNew" + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/IR", + "//llvm/lib/Support", + ] + sources = [ "HelloWorld.cpp" ] +} From 9d01612db48fa27d18c6320974b8d711572e5c67 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 14 Sep 2020 13:32:14 -0700 Subject: [PATCH 0582/1079] [Asan] Fix false leak report If user thread is in the allocator, the allocator may have no pointer into future user's part of the allocated block. AddrIsInside ignores such pointers and lsan reports a false memory leak. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87552 --- compiler-rt/lib/asan/asan_allocator.cpp | 14 ++++------ .../test/asan/TestCases/redzone_noleak.cpp | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 compiler-rt/test/asan/TestCases/redzone_noleak.cpp diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 8cc7de3a9862b..e4028dc10f48e 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -1111,19 +1111,17 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { *end = *begin + sizeof(__asan::get_allocator()); } -uptr PointsIntoChunk(void* p) { +uptr PointsIntoChunk(void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != __asan::CHUNK_ALLOCATED) return 0; - uptr chunk = m->Beg(); - if (m->AddrIsInside(addr, /*locked_version=*/true)) - return chunk; - if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), - addr)) - return chunk; - return 0; + // AsanChunk presence means that we point into some block from underlying + // allocators. Don't check whether p points into user memory, since until + // the return from AsanAllocator::Allocator we may have no such + // pointer anywhere. But we must already have a pointer to GetBlockBegin(). + return m->Beg(); } uptr GetUserBegin(uptr chunk) { diff --git a/compiler-rt/test/asan/TestCases/redzone_noleak.cpp b/compiler-rt/test/asan/TestCases/redzone_noleak.cpp new file mode 100644 index 0000000000000..f122c05e5108e --- /dev/null +++ b/compiler-rt/test/asan/TestCases/redzone_noleak.cpp @@ -0,0 +1,28 @@ +// Test whether pointers into left redzone count memory are reachable. +// If user thread is inside asan allocator code then we may have no +// pointers into user part of memory yet. However we should have a pointer +// into the allocated memory chunk. +// +// RUN: %clangxx_asan %s -o %t +// RUN: %run %t 2>&1 + +#include +#include +#include + +void *pointers[1000]; +void **cur = pointers; + +void leak(int n, int offset) { + printf("%d %d\n", n, offset); + for (int i = 0; i < 3; ++i) + *(cur++) = (new int[n]) + offset; +} + +int main(int argc, char **argv) { + for (int n = 1; n < 10000000; n = n * 2) { + leak(n, 0); + leak(n, -1); + } + return 0; +} From 7d1ed69c8aad00f3ba1e917da54508489de6d610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Federico=20Lebr=C3=B3n?= Date: Mon, 14 Sep 2020 20:01:07 +0000 Subject: [PATCH 0583/1079] Make namespace handling uniform across dialect backends. Now backends spell out which namespace they want to be in, instead of relying on clients #including them inside already-opened namespaces. This also means that cppNamespaces should be fully qualified, and there's no implicit "::mlir::" prepended to them anymore. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D86811 --- .../include/flang/Optimizer/Dialect/FIROps.h | 3 +- .../include/flang/Optimizer/Dialect/FIROps.td | 1 + flang/lib/Optimizer/Dialect/FIROps.cpp | 3 - mlir/examples/toy/Ch2/include/toy/Dialect.h | 6 +- mlir/examples/toy/Ch2/include/toy/Ops.td | 2 +- mlir/examples/toy/Ch3/include/toy/Dialect.h | 6 +- mlir/examples/toy/Ch3/include/toy/Ops.td | 2 +- mlir/examples/toy/Ch4/include/toy/Dialect.h | 6 +- mlir/examples/toy/Ch4/include/toy/Ops.td | 2 +- mlir/examples/toy/Ch5/include/toy/Dialect.h | 6 +- mlir/examples/toy/Ch5/include/toy/Ops.td | 2 +- mlir/examples/toy/Ch6/include/toy/Dialect.h | 6 +- mlir/examples/toy/Ch6/include/toy/Ops.td | 2 +- mlir/examples/toy/Ch7/include/toy/Dialect.h | 6 + mlir/examples/toy/Ch7/include/toy/Ops.td | 2 +- mlir/include/mlir/Dialect/AVX512/AVX512.td | 2 +- .../mlir/Dialect/AVX512/AVX512Dialect.h | 8 +- mlir/include/mlir/Dialect/GPU/GPUBase.td | 1 + mlir/include/mlir/Dialect/GPU/GPUDialect.h | 5 +- .../mlir/Dialect/GPU/ParallelLoopMapper.h | 3 + .../include/mlir/Dialect/LLVMIR/LLVMAVX512.td | 2 +- .../mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h | 6 - .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 7 +- .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 2 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 10 +- .../include/mlir/Dialect/LLVMIR/NVVMDialect.h | 6 - mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 2 +- .../mlir/Dialect/LLVMIR/ROCDLDialect.h | 6 - mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 2 +- .../mlir/Dialect/Linalg/IR/LinalgBase.td | 1 + .../mlir/Dialect/Linalg/IR/LinalgOps.h | 5 +- .../Linalg/IR/LinalgStructuredOpsInterface.td | 1 + .../mlir/Dialect/Linalg/IR/LinalgTypes.h | 3 +- mlir/include/mlir/Dialect/OpenACC/OpenACC.h | 7 +- .../mlir/Dialect/OpenACC/OpenACCOps.td | 2 +- .../mlir/Dialect/OpenMP/OpenMPDialect.h | 8 +- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 2 +- mlir/include/mlir/Dialect/PDL/IR/PDL.h | 4 - mlir/include/mlir/Dialect/PDL/IR/PDLBase.td | 2 +- .../mlir/Dialect/PDLInterp/IR/PDLInterp.h | 5 - .../mlir/Dialect/PDLInterp/IR/PDLInterpOps.td | 2 +- mlir/include/mlir/Dialect/Quant/QuantOps.h | 6 - .../mlir/Dialect/Quant/QuantOpsBase.td | 1 + mlir/include/mlir/Dialect/SCF/SCF.h | 6 +- mlir/include/mlir/Dialect/SCF/SCFOps.td | 2 +- .../mlir/Dialect/SPIRV/SPIRVAttributes.h | 2 +- mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td | 13 +- .../include/mlir/Dialect/SPIRV/SPIRVDialect.h | 4 +- mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h | 4 + mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h | 56 +++--- mlir/include/mlir/Dialect/Shape/IR/Shape.h | 6 +- .../mlir/Dialect/Shape/IR/ShapeBase.td | 2 +- mlir/include/mlir/Dialect/Vector/VectorOps.h | 6 +- mlir/include/mlir/Dialect/Vector/VectorOps.td | 2 +- mlir/include/mlir/IR/OpBase.td | 4 +- mlir/include/mlir/TableGen/Operator.h | 11 ++ mlir/lib/Dialect/AVX512/IR/AVX512Dialect.cpp | 5 - mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 5 - .../GPU/Transforms/ParallelLoopMapper.cpp | 3 +- .../Dialect/LLVMIR/IR/LLVMAVX512Dialect.cpp | 4 - mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 6 - mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp | 5 - mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 6 - mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 4 - mlir/lib/Dialect/PDL/IR/PDL.cpp | 6 - mlir/lib/Dialect/SCF/SCF.cpp | 4 - mlir/lib/Dialect/SPIRV/SPIRVAttributes.cpp | 3 +- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 4 + mlir/lib/Dialect/Shape/IR/Shape.cpp | 6 - mlir/lib/Dialect/Vector/VectorOps.cpp | 6 - mlir/lib/TableGen/Operator.cpp | 18 +- mlir/test/lib/Dialect/Test/TestDialect.h | 3 +- mlir/test/lib/Dialect/Test/TestOps.td | 2 +- mlir/test/mlir-tblgen/op-attribute.td | 16 ++ mlir/test/mlir-tblgen/op-decl.td | 4 +- mlir/tools/mlir-tblgen/DialectGen.cpp | 13 ++ mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 65 ++++--- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 174 +++++++++--------- mlir/tools/mlir-tblgen/RewriterGen.cpp | 5 +- 79 files changed, 328 insertions(+), 323 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index ece775bd6ffee..fe5e944fe267d 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -41,9 +41,10 @@ mlir::ParseResult parseSelector(mlir::OpAsmParser &parser, mlir::OpAsmParser::OperandType &selector, mlir::Type &type); +} // namespace fir + #define GET_OP_CLASSES #include "flang/Optimizer/Dialect/FIROps.h.inc" -} // namespace fir #endif // OPTIMIZER_DIALECT_FIROPS_H diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 0bc543882a268..e232ec5f01115 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -21,6 +21,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def fir_Dialect : Dialect { let name = "fir"; + let cppNamespace = "::fir"; } // Types and predicates diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 36334167184d5..079d16d74181a 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1552,11 +1552,8 @@ fir::GlobalOp fir::createGlobalOp(mlir::Location loc, mlir::ModuleOp module, return modBuilder.create(loc, name, type, attrs); } -namespace fir { - // Tablegen operators #define GET_OP_CLASSES #include "flang/Optimizer/Dialect/FIROps.cpp.inc" -} // namespace fir diff --git a/mlir/examples/toy/Ch2/include/toy/Dialect.h b/mlir/examples/toy/Ch2/include/toy/Dialect.h index 4ddc63c2b4dc8..8bcad903c5387 100644 --- a/mlir/examples/toy/Ch2/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch2/include/toy/Dialect.h @@ -34,12 +34,12 @@ class ToyDialect : public mlir::Dialect { static llvm::StringRef getDialectNamespace() { return "toy"; } }; +} // end namespace toy +} // end namespace mlir + /// Include the auto-generated header file containing the declarations of the /// toy operations. #define GET_OP_CLASSES #include "toy/Ops.h.inc" -} // end namespace toy -} // end namespace mlir - #endif // MLIR_TUTORIAL_TOY_DIALECT_H_ diff --git a/mlir/examples/toy/Ch2/include/toy/Ops.td b/mlir/examples/toy/Ch2/include/toy/Ops.td index 4a56edb57b3ec..db01e226384b1 100644 --- a/mlir/examples/toy/Ch2/include/toy/Ops.td +++ b/mlir/examples/toy/Ch2/include/toy/Ops.td @@ -20,7 +20,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" // can define our operations. def Toy_Dialect : Dialect { let name = "toy"; - let cppNamespace = "toy"; + let cppNamespace = "::mlir::toy"; } // Base class for toy dialect operations. This operation inherits from the base diff --git a/mlir/examples/toy/Ch3/include/toy/Dialect.h b/mlir/examples/toy/Ch3/include/toy/Dialect.h index 4ddc63c2b4dc8..8bcad903c5387 100644 --- a/mlir/examples/toy/Ch3/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch3/include/toy/Dialect.h @@ -34,12 +34,12 @@ class ToyDialect : public mlir::Dialect { static llvm::StringRef getDialectNamespace() { return "toy"; } }; +} // end namespace toy +} // end namespace mlir + /// Include the auto-generated header file containing the declarations of the /// toy operations. #define GET_OP_CLASSES #include "toy/Ops.h.inc" -} // end namespace toy -} // end namespace mlir - #endif // MLIR_TUTORIAL_TOY_DIALECT_H_ diff --git a/mlir/examples/toy/Ch3/include/toy/Ops.td b/mlir/examples/toy/Ch3/include/toy/Ops.td index f7320ebc1d12d..d889b81bef0a4 100644 --- a/mlir/examples/toy/Ch3/include/toy/Ops.td +++ b/mlir/examples/toy/Ch3/include/toy/Ops.td @@ -19,7 +19,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" // can define our operations. def Toy_Dialect : Dialect { let name = "toy"; - let cppNamespace = "toy"; + let cppNamespace = "::mlir::toy"; } // Base class for toy dialect operations. This operation inherits from the base diff --git a/mlir/examples/toy/Ch4/include/toy/Dialect.h b/mlir/examples/toy/Ch4/include/toy/Dialect.h index b1a38ec60a0cf..0853347408925 100644 --- a/mlir/examples/toy/Ch4/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch4/include/toy/Dialect.h @@ -36,12 +36,12 @@ class ToyDialect : public mlir::Dialect { static llvm::StringRef getDialectNamespace() { return "toy"; } }; +} // end namespace toy +} // end namespace mlir + /// Include the auto-generated header file containing the declarations of the /// toy operations. #define GET_OP_CLASSES #include "toy/Ops.h.inc" -} // end namespace toy -} // end namespace mlir - #endif // MLIR_TUTORIAL_TOY_DIALECT_H_ diff --git a/mlir/examples/toy/Ch4/include/toy/Ops.td b/mlir/examples/toy/Ch4/include/toy/Ops.td index 48c08a6a9369c..2ce4692e63f28 100644 --- a/mlir/examples/toy/Ch4/include/toy/Ops.td +++ b/mlir/examples/toy/Ch4/include/toy/Ops.td @@ -21,7 +21,7 @@ include "toy/ShapeInferenceInterface.td" // can define our operations. def Toy_Dialect : Dialect { let name = "toy"; - let cppNamespace = "toy"; + let cppNamespace = "::mlir::toy"; } // Base class for toy dialect operations. This operation inherits from the base diff --git a/mlir/examples/toy/Ch5/include/toy/Dialect.h b/mlir/examples/toy/Ch5/include/toy/Dialect.h index b1a38ec60a0cf..0853347408925 100644 --- a/mlir/examples/toy/Ch5/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch5/include/toy/Dialect.h @@ -36,12 +36,12 @@ class ToyDialect : public mlir::Dialect { static llvm::StringRef getDialectNamespace() { return "toy"; } }; +} // end namespace toy +} // end namespace mlir + /// Include the auto-generated header file containing the declarations of the /// toy operations. #define GET_OP_CLASSES #include "toy/Ops.h.inc" -} // end namespace toy -} // end namespace mlir - #endif // MLIR_TUTORIAL_TOY_DIALECT_H_ diff --git a/mlir/examples/toy/Ch5/include/toy/Ops.td b/mlir/examples/toy/Ch5/include/toy/Ops.td index 210513f22fec1..2a746bb2d800a 100644 --- a/mlir/examples/toy/Ch5/include/toy/Ops.td +++ b/mlir/examples/toy/Ch5/include/toy/Ops.td @@ -21,7 +21,7 @@ include "toy/ShapeInferenceInterface.td" // can define our operations. def Toy_Dialect : Dialect { let name = "toy"; - let cppNamespace = "toy"; + let cppNamespace = "::mlir::toy"; } // Base class for toy dialect operations. This operation inherits from the base diff --git a/mlir/examples/toy/Ch6/include/toy/Dialect.h b/mlir/examples/toy/Ch6/include/toy/Dialect.h index b1a38ec60a0cf..0853347408925 100644 --- a/mlir/examples/toy/Ch6/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch6/include/toy/Dialect.h @@ -36,12 +36,12 @@ class ToyDialect : public mlir::Dialect { static llvm::StringRef getDialectNamespace() { return "toy"; } }; +} // end namespace toy +} // end namespace mlir + /// Include the auto-generated header file containing the declarations of the /// toy operations. #define GET_OP_CLASSES #include "toy/Ops.h.inc" -} // end namespace toy -} // end namespace mlir - #endif // MLIR_TUTORIAL_TOY_DIALECT_H_ diff --git a/mlir/examples/toy/Ch6/include/toy/Ops.td b/mlir/examples/toy/Ch6/include/toy/Ops.td index a92f597fd178b..d9a612d00fe9c 100644 --- a/mlir/examples/toy/Ch6/include/toy/Ops.td +++ b/mlir/examples/toy/Ch6/include/toy/Ops.td @@ -21,7 +21,7 @@ include "toy/ShapeInferenceInterface.td" // can define our operations. def Toy_Dialect : Dialect { let name = "toy"; - let cppNamespace = "toy"; + let cppNamespace = "::mlir::toy"; } // Base class for toy dialect operations. This operation inherits from the base diff --git a/mlir/examples/toy/Ch7/include/toy/Dialect.h b/mlir/examples/toy/Ch7/include/toy/Dialect.h index 4eceb422efa63..fb2927834779b 100644 --- a/mlir/examples/toy/Ch7/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch7/include/toy/Dialect.h @@ -50,6 +50,9 @@ class ToyDialect : public mlir::Dialect { static llvm::StringRef getDialectNamespace() { return "toy"; } }; +} // end namespace toy +} // end namespace mlir + //===----------------------------------------------------------------------===// // Toy Operations //===----------------------------------------------------------------------===// @@ -59,6 +62,9 @@ class ToyDialect : public mlir::Dialect { #define GET_OP_CLASSES #include "toy/Ops.h.inc" +namespace mlir { +namespace toy { + //===----------------------------------------------------------------------===// // Toy Types //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch7/include/toy/Ops.td b/mlir/examples/toy/Ch7/include/toy/Ops.td index ab0cf9dbb0ff6..dc9472c569a9f 100644 --- a/mlir/examples/toy/Ch7/include/toy/Ops.td +++ b/mlir/examples/toy/Ch7/include/toy/Ops.td @@ -21,7 +21,7 @@ include "toy/ShapeInferenceInterface.td" // can define our operations. def Toy_Dialect : Dialect { let name = "toy"; - let cppNamespace = "toy"; + let cppNamespace = "::mlir::toy"; } // Base class for toy dialect operations. This operation inherits from the base diff --git a/mlir/include/mlir/Dialect/AVX512/AVX512.td b/mlir/include/mlir/Dialect/AVX512/AVX512.td index e1ed35c50e875..eee24ce1d5d54 100644 --- a/mlir/include/mlir/Dialect/AVX512/AVX512.td +++ b/mlir/include/mlir/Dialect/AVX512/AVX512.td @@ -21,7 +21,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def AVX512_Dialect : Dialect { let name = "avx512"; - let cppNamespace = "avx512"; + let cppNamespace = "::mlir::avx512"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/AVX512/AVX512Dialect.h b/mlir/include/mlir/Dialect/AVX512/AVX512Dialect.h index 544fb7c2a495f..aae3dbdf179fb 100644 --- a/mlir/include/mlir/Dialect/AVX512/AVX512Dialect.h +++ b/mlir/include/mlir/Dialect/AVX512/AVX512Dialect.h @@ -17,15 +17,9 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -namespace mlir { -namespace avx512 { +#include "mlir/Dialect/AVX512/AVX512Dialect.h.inc" #define GET_OP_CLASSES #include "mlir/Dialect/AVX512/AVX512.h.inc" -#include "mlir/Dialect/AVX512/AVX512Dialect.h.inc" - -} // namespace avx512 -} // namespace mlir - #endif // MLIR_DIALECT_AVX512_AVX512DIALECT_H_ diff --git a/mlir/include/mlir/Dialect/GPU/GPUBase.td b/mlir/include/mlir/Dialect/GPU/GPUBase.td index 32e0952a15b41..5641d60b0e285 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUBase.td +++ b/mlir/include/mlir/Dialect/GPU/GPUBase.td @@ -21,6 +21,7 @@ include "mlir/IR/OpBase.td" def GPU_Dialect : Dialect { let name = "gpu"; + let cppNamespace = "::mlir::gpu"; let hasOperationAttrVerify = 1; let extraClassDeclaration = [{ diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h index 35daee29aa6af..b55b0c8a3396a 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -34,12 +34,13 @@ struct KernelDim3 { Value z; }; +} // end namespace gpu +} // end namespace mlir + #include "mlir/Dialect/GPU/GPUOpsDialect.h.inc" #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.h.inc" -} // end namespace gpu -} // end namespace mlir #endif // MLIR_DIALECT_GPU_GPUDIALECT_H diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h index 298ec0c803f0f..8bce2fd0ad2bb 100644 --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h @@ -27,8 +27,11 @@ struct LogicalResult; class Operation; class Region; +} // namespace mlir + #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.h.inc" +namespace mlir { namespace scf { class ParallelOp; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512.td index 12668c4da41be..fcc90a2a801ed 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512.td @@ -21,7 +21,7 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td" def LLVMAVX512_Dialect : Dialect { let name = "llvm_avx512"; - let cppNamespace = "LLVM"; + let cppNamespace = "::mlir::LLVM"; } //----------------------------------------------------------------------------// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h index 27b98fd189107..c028fda514fe0 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h @@ -16,15 +16,9 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" -namespace mlir { -namespace LLVM { - #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMAVX512.h.inc" #include "mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h.inc" -} // namespace LLVM -} // namespace mlir - #endif // MLIR_DIALECT_LLVMIR_LLVMAVX512DIALECT_H_ diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index 2f465f07a97e4..5c16f33e9fc06 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -49,18 +49,23 @@ struct LLVMTypeStorage; struct LLVMDialectImpl; } // namespace detail +} // namespace LLVM +} // namespace mlir + ///// Ops ///// #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMOps.h.inc" #include "mlir/Dialect/LLVMIR/LLVMOpsDialect.h.inc" +namespace mlir { +namespace LLVM { /// Create an LLVM global containing the string "value" at the module containing /// surrounding the insertion point of builder. Obtain the address of that /// global and use it to compute the address of the first character in the /// string (operations inserted at the builder insertion point). Value createGlobalString(Location loc, OpBuilder &builder, StringRef name, - StringRef value, LLVM::Linkage linkage); + StringRef value, Linkage linkage); /// LLVM requires some operations to be inside of a Module operation. This /// function confirms that the Operation has the desired properties. diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 10755a436115f..a6be8ef6d8bae 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -23,7 +23,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def LLVM_Dialect : Dialect { let name = "llvm"; - let cppNamespace = "LLVM"; + let cppNamespace = "::mlir::LLVM"; /// FIXME: at the moment this is a dependency of the translation to LLVM IR, /// not really one of this dialect per-se. diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index b5bf4ac779727..626bc4b889892 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -59,7 +59,7 @@ def LLVM_VoidResultTypeOpBuilder : OpBuilder< "OpBuilder &builder, OperationState &result, Type resultType, " "ValueRange operands, ArrayRef attributes = {}", [{ - auto llvmType = resultType.dyn_cast(); (void)llvmType; + auto llvmType = resultType.dyn_cast(); (void)llvmType; assert(llvmType && "result must be an LLVM type"); assert(llvmType.isVoidTy() && "for zero-result operands, only 'void' is accepted as result type"); @@ -301,7 +301,7 @@ def LLVM_LoadOp : "unsigned alignment = 0, bool isVolatile = false, " "bool isNonTemporal = false", [{ - auto type = addr.getType().cast().getPointerElementTy(); + auto type = addr.getType().cast().getPointerElementTy(); build(b, result, type, addr, alignment, isVolatile, isNonTemporal); }]>, OpBuilder< @@ -494,8 +494,8 @@ def LLVM_ShuffleVectorOp "OpBuilder &b, OperationState &result, Value v1, Value v2, " "ArrayAttr mask, ArrayRef attrs = {}">]; let verifier = [{ - auto wrappedVectorType1 = v1().getType().cast(); - auto wrappedVectorType2 = v2().getType().cast(); + auto wrappedVectorType1 = v1().getType().cast(); + auto wrappedVectorType2 = v2().getType().cast(); if (!wrappedVectorType2.isVectorTy()) return emitOpError("expected LLVM IR Dialect vector type for operand #2"); if (wrappedVectorType1.getVectorElementType() != @@ -770,7 +770,7 @@ def LLVM_LLVMFuncOp let builders = [ OpBuilder<"OpBuilder &builder, OperationState &result, StringRef name, " - "LLVMType type, LLVM::Linkage linkage = LLVM::Linkage::External, " + "LLVMType type, Linkage linkage = Linkage::External, " "ArrayRef attrs = {}, " "ArrayRef argAttrs = {}"> ]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h index 9cc5314bdb901..fff82e3b9f4f4 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -19,16 +19,10 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -namespace mlir { -namespace NVVM { - ///// Ops ///// #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/NVVMOps.h.inc" #include "mlir/Dialect/LLVMIR/NVVMOpsDialect.h.inc" -} // namespace NVVM -} // namespace mlir - #endif /* MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ */ diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 7d47e5012ac9a..5f72ad35a6701 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -22,7 +22,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def NVVM_Dialect : Dialect { let name = "nvvm"; - let cppNamespace = "NVVM"; + let cppNamespace = "::mlir::NVVM"; let dependentDialects = ["LLVM::LLVMDialect"]; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h index eb40373c3f117..b00b8ac0b125a 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -27,16 +27,10 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -namespace mlir { -namespace ROCDL { - ///// Ops ///// #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.h.inc" #include "mlir/Dialect/LLVMIR/ROCDLOpsDialect.h.inc" -} // namespace ROCDL -} // namespace mlir - #endif /* MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ */ diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index f85c4f02899b4..c6d2ded073e63 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -22,7 +22,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def ROCDL_Dialect : Dialect { let name = "rocdl"; - let cppNamespace = "ROCDL"; + let cppNamespace = "::mlir::ROCDL"; let dependentDialects = ["LLVM::LLVMDialect"]; } diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td index 7955345f69668..8ac82b768ad3f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td @@ -31,6 +31,7 @@ def Linalg_Dialect : Dialect { are also available and should be read first before going in the details of the op semantics. }]; + let cppNamespace = "::mlir::linalg"; } // Whether a type is a RangeType. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h index 21bff4185abf8..09fc11bc49175 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -85,6 +85,9 @@ AffineMap extractOrIdentityMap(Optional maybeMap, unsigned rank, SmallVector concat(ArrayRef a, ArrayRef b); +} // namespace linalg +} // namespace mlir + #include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterfaces.h.inc" #define GET_OP_CLASSES @@ -93,7 +96,5 @@ SmallVector concat(ArrayRef a, #define GET_OP_CLASSES #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.h.inc" -} // namespace linalg -} // namespace mlir #endif // MLIR_DIALECT_LINALG_LINALGOPS_H_ diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index f32b70efd87e1..0e8216cc4268f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -18,6 +18,7 @@ include "mlir/Dialect/Linalg/IR/LinalgBase.td" // The linalg 'LinalgStructuredInterface' provides access to the 'LinalgOp' // interface. def LinalgStructuredInterface : OpInterface<"LinalgOp"> { + let cppNamespace = "::mlir::linalg"; let methods = [ //===------------------------------------------------------------------===// // Loop types handling. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h index 18b2c3aaa53d1..a4e32b9263e8c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h @@ -12,11 +12,12 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/Types.h" +#include "mlir/Dialect/Linalg/IR/LinalgOpsDialect.h.inc" + namespace mlir { class MLIRContext; namespace linalg { -#include "mlir/Dialect/Linalg/IR/LinalgOpsDialect.h.inc" /// A RangeType represents a minimal range abstraction (min, max, step). /// It is constructed by calling the linalg.range op with three values index of diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index 8f5e1daf9aebc..40700e6d1b736 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -16,15 +16,14 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/Dialect/OpenACC/OpenACCOpsDialect.h.inc" #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.h.inc" -namespace mlir { -namespace acc { - #define GET_OP_CLASSES #include "mlir/Dialect/OpenACC/OpenACCOps.h.inc" -#include "mlir/Dialect/OpenACC/OpenACCOpsDialect.h.inc" +namespace mlir { +namespace acc { /// Enumeration used to encode the execution mapping on a loop construct. /// They refer directly to the OpenACC 3.0 standard: diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 30d6f435b75fa..c0178ebe9e48a 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -24,7 +24,7 @@ def OpenACC_Dialect : Dialect { This dialect models the construct from the OpenACC 3.0 directive language. }]; - let cppNamespace = "acc"; + let cppNamespace = "::mlir::acc"; } // Base class for OpenACC dialect ops. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h index 8f0bb93e1043e..0715b9ddd394c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h @@ -16,16 +16,10 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/Dialect/OpenMP/OpenMPOpsDialect.h.inc" #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" -namespace mlir { -namespace omp { - #define GET_OP_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOps.h.inc" -#include "mlir/Dialect/OpenMP/OpenMPOpsDialect.h.inc" -} // namespace omp -} // namespace mlir - #endif // MLIR_DIALECT_OPENMP_OPENMPDIALECT_H_ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index eb92745d6fa5e..3ac7f2c5dda53 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -19,7 +19,7 @@ include "mlir/Dialect/OpenMP/OmpCommon.td" def OpenMP_Dialect : Dialect { let name = "omp"; - let cppNamespace = "omp"; + let cppNamespace = "::mlir::omp"; } class OpenMP_Op traits = []> : diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDL.h b/mlir/include/mlir/Dialect/PDL/IR/PDL.h index 64dbf8f74399f..14136021d26ce 100644 --- a/mlir/include/mlir/Dialect/PDL/IR/PDL.h +++ b/mlir/include/mlir/Dialect/PDL/IR/PDL.h @@ -19,8 +19,6 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -namespace mlir { -namespace pdl { //===----------------------------------------------------------------------===// // PDL Dialect //===----------------------------------------------------------------------===// @@ -34,7 +32,5 @@ namespace pdl { #define GET_OP_CLASSES #include "mlir/Dialect/PDL/IR/PDLOps.h.inc" -} // end namespace pdl -} // end namespace mlir #endif // MLIR_DIALECT_PDL_IR_PDL_H_ diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLBase.td b/mlir/include/mlir/Dialect/PDL/IR/PDLBase.td index 9802bf9431572..b372e594e2e73 100644 --- a/mlir/include/mlir/Dialect/PDL/IR/PDLBase.td +++ b/mlir/include/mlir/Dialect/PDL/IR/PDLBase.td @@ -63,7 +63,7 @@ def PDL_Dialect : Dialect { }]; let name = "pdl"; - let cppNamespace = "mlir::pdl"; + let cppNamespace = "::mlir::pdl"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterp.h b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterp.h index 6d895679b3d65..07c7f84c80784 100644 --- a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterp.h +++ b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterp.h @@ -18,8 +18,6 @@ #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -namespace mlir { -namespace pdl_interp { //===----------------------------------------------------------------------===// // PDLInterp Dialect //===----------------------------------------------------------------------===// @@ -33,7 +31,4 @@ namespace pdl_interp { #define GET_OP_CLASSES #include "mlir/Dialect/PDLInterp/IR/PDLInterpOps.h.inc" -} // end namespace pdl_interp -} // end namespace mlir - #endif // MLIR_DIALECT_PDLINTERP_IR_PDLINTERP_H_ diff --git a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td index 58a2032a21825..e95162bb65806 100644 --- a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td +++ b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td @@ -34,7 +34,7 @@ def PDLInterp_Dialect : Dialect { }]; let name = "pdl_interp"; - let cppNamespace = "mlir::pdl_interp"; + let cppNamespace = "::mlir::pdl_interp"; let dependentDialects = ["pdl::PDLDialect"]; } diff --git a/mlir/include/mlir/Dialect/Quant/QuantOps.h b/mlir/include/mlir/Dialect/Quant/QuantOps.h index 234a2b44c6f6b..00a6032a2fea0 100644 --- a/mlir/include/mlir/Dialect/Quant/QuantOps.h +++ b/mlir/include/mlir/Dialect/Quant/QuantOps.h @@ -18,15 +18,9 @@ #include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/Support/MathExtras.h" -namespace mlir { -namespace quant { - #include "mlir/Dialect/Quant/QuantOpsDialect.h.inc" #define GET_OP_CLASSES #include "mlir/Dialect/Quant/QuantOps.h.inc" -} // namespace quant -} // namespace mlir - #endif // MLIR_DIALECT_QUANT_QUANTOPS_H_ diff --git a/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td b/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td index aa7c311e20a3f..10339fcbcf5d8 100644 --- a/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td +++ b/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td @@ -17,6 +17,7 @@ include "mlir/IR/OpBase.td" def Quantization_Dialect : Dialect { let name = "quant"; + let cppNamespace = "::mlir::quant"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SCF/SCF.h b/mlir/include/mlir/Dialect/SCF/SCF.h index 3974b58cbfbba..55c8cbf5fa744 100644 --- a/mlir/include/mlir/Dialect/SCF/SCF.h +++ b/mlir/include/mlir/Dialect/SCF/SCF.h @@ -23,14 +23,18 @@ namespace mlir { namespace scf { - void buildTerminatedBody(OpBuilder &builder, Location loc); +} // namespace scf +} // namespace mlir #include "mlir/Dialect/SCF/SCFOpsDialect.h.inc" #define GET_OP_CLASSES #include "mlir/Dialect/SCF/SCFOps.h.inc" +namespace mlir { +namespace scf { + // Insert `loop.yield` at the end of the only region's only block if it // does not have a terminator already. If a new `loop.yield` is inserted, // the location is specified by `loc`. If the region is empty, insert a new diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 59ba50fbe2322..179b4d773a3a4 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -19,7 +19,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def SCF_Dialect : Dialect { let name = "scf"; - let cppNamespace = "scf"; + let cppNamespace = "::mlir::scf"; } // Base class for SCF dialect ops. diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h index b1909b3675535..a743fa9c30d98 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h @@ -17,10 +17,10 @@ #include "mlir/IR/Attributes.h" #include "mlir/Support/LLVM.h" -namespace mlir { // Pull in SPIR-V attribute definitions for target and ABI. #include "mlir/Dialect/SPIRV/TargetAndABI.h.inc" +namespace mlir { namespace spirv { enum class Capability : uint32_t; enum class Extension; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td index 21f926a1500c5..1fa72bf4dcaba 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td @@ -45,7 +45,7 @@ def SPIRV_Dialect : Dialect { high-level designs and implementation structures of the SPIR-V dialect. }]; - let cppNamespace = "spirv"; + let cppNamespace = "::mlir::spirv"; let hasConstantMaterializer = 1; let hasOperationAttrVerify = 1; let hasRegionArgAttrVerify = 1; @@ -226,21 +226,24 @@ class Capability capabilities> : Availability { let instance = "ref"; } +class SPIRVOpInterface : OpInterface { + let cppNamespace = "::mlir::spirv"; +} // TODO: the following interfaces definitions are duplicating with the above. // Remove them once we are able to support dialect-specific contents in ODS. -def QueryMinVersionInterface : OpInterface<"QueryMinVersionInterface"> { +def QueryMinVersionInterface : SPIRVOpInterface<"QueryMinVersionInterface"> { let methods = [InterfaceMethod<"", "::mlir::spirv::Version", "getMinVersion">]; } -def QueryMaxVersionInterface : OpInterface<"QueryMaxVersionInterface"> { +def QueryMaxVersionInterface : SPIRVOpInterface<"QueryMaxVersionInterface"> { let methods = [InterfaceMethod<"", "::mlir::spirv::Version", "getMaxVersion">]; } -def QueryExtensionInterface : OpInterface<"QueryExtensionInterface"> { +def QueryExtensionInterface : SPIRVOpInterface<"QueryExtensionInterface"> { let methods = [InterfaceMethod< "", "::llvm::SmallVector<::llvm::ArrayRef<::mlir::spirv::Extension>, 1>", "getExtensions">]; } -def QueryCapabilityInterface : OpInterface<"QueryCapabilityInterface"> { +def QueryCapabilityInterface : SPIRVOpInterface<"QueryCapabilityInterface"> { let methods = [InterfaceMethod< "", "::llvm::SmallVector<::llvm::ArrayRef<::mlir::spirv::Capability>, 1>", diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h index 2cffebec60ea6..1b37abb937644 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h @@ -20,9 +20,9 @@ namespace spirv { enum class Decoration : uint32_t; -#include "mlir/Dialect/SPIRV/SPIRVOpsDialect.h.inc" - } // end namespace spirv } // end namespace mlir +#include "mlir/Dialect/SPIRV/SPIRVOpsDialect.h.inc" + #endif // MLIR_DIALECT_SPIRV_SPIRVDIALECT_H_ diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h index 01a2c6081643a..61568df03dcd8 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h @@ -28,11 +28,15 @@ class VerCapExtAttr; // TableGen'erated operation interfaces for querying versions, extensions, and // capabilities. #include "mlir/Dialect/SPIRV/SPIRVAvailability.h.inc" +} // namespace spirv +} // namespace mlir // TablenGen'erated operation declarations. #define GET_OP_CLASSES #include "mlir/Dialect/SPIRV/SPIRVOps.h.inc" +namespace mlir { +namespace spirv { // TableGen'erated helper functions. // // Get the name used in the Op to refer to an enum value of the given diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h index 2d224effdee35..43fb708c7908d 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h @@ -77,25 +77,25 @@ class SPIRVType : public Type { /// The extension requirements for each type are following the /// ((Extension::A OR Extension::B) AND (Extension::C OR Extension::D)) /// convention. - using ExtensionArrayRefVector = SmallVectorImpl>; + using ExtensionArrayRefVector = SmallVectorImpl>; /// Appends to `extensions` the extensions needed for this type to appear in /// the given `storage` class. This method does not guarantee the uniqueness /// of extensions; the same extension may be appended multiple times. void getExtensions(ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); /// The capability requirements for each type are following the /// ((Capability::A OR Extension::B) AND (Capability::C OR Capability::D)) /// convention. - using CapabilityArrayRefVector = SmallVectorImpl>; + using CapabilityArrayRefVector = SmallVectorImpl>; /// Appends to `capabilities` the capabilities needed for this type to appear /// in the given `storage` class. This method does not guarantee the /// uniqueness of capabilities; the same capability may be appended multiple /// times. void getCapabilities(CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); /// Returns the size in bytes for each type. If no size can be calculated, /// returns `llvm::None`. Note that if the type has explicit layout, it is @@ -116,9 +116,9 @@ class ScalarType : public SPIRVType { static bool isValid(IntegerType); void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); Optional getSizeInBytes(); }; @@ -144,9 +144,9 @@ class CompositeType : public SPIRVType { bool hasCompileTimeKnownNumElements() const; void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); Optional getSizeInBytes(); }; @@ -172,9 +172,9 @@ class ArrayType : public Type::TypeBase storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); /// Returns the array size in bytes. Since array type may have an explicit /// stride declaration (in bytes), we also include it in the calculation. @@ -215,9 +215,9 @@ class ImageType // TODO: Add support for Access qualifier void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); }; // SPIR-V pointer type @@ -233,9 +233,9 @@ class PointerType : public Type::TypeBase storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); }; // SPIR-V run-time array type @@ -257,9 +257,9 @@ class RuntimeArrayType unsigned getArrayStride() const; void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); }; // SPIR-V struct type @@ -335,21 +335,21 @@ class StructType : public Type::TypeBase &memberDecorations) const; - // Returns in `decorationsInfo` all the spirv::Decorations (apart from - // Offset) associated with the `i`-th member of the StructType. + // Returns in `decorationsInfo` all the Decorations (apart from Offset) + // associated with the `i`-th member of the StructType. void getMemberDecorations(unsigned i, SmallVectorImpl &decorationsInfo) const; void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); }; llvm::hash_code @@ -362,21 +362,21 @@ class CooperativeMatrixNVType public: using Base::Base; - static CooperativeMatrixNVType get(Type elementType, spirv::Scope scope, + static CooperativeMatrixNVType get(Type elementType, Scope scope, unsigned rows, unsigned columns); Type getElementType() const; /// Return the scope of the cooperative matrix. - spirv::Scope getScope() const; + Scope getScope() const; /// return the number of rows of the matrix. unsigned getRows() const; /// return the number of columns of the matrix. unsigned getColumns() const; void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, - Optional storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); }; // SPIR-V matrix type @@ -412,9 +412,9 @@ class MatrixType : public Type::TypeBase storage = llvm::None); + Optional storage = llvm::None); void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, - Optional storage = llvm::None); + Optional storage = llvm::None); }; } // end namespace spirv diff --git a/mlir/include/mlir/Dialect/Shape/IR/Shape.h b/mlir/include/mlir/Dialect/Shape/IR/Shape.h index cc601bdedaca6..f40d6154544ae 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h +++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h @@ -67,12 +67,12 @@ class WitnessType : public Type::TypeBase { using Base::Base; }; +} // namespace shape +} // namespace mlir + #define GET_OP_CLASSES #include "mlir/Dialect/Shape/IR/ShapeOps.h.inc" #include "mlir/Dialect/Shape/IR/ShapeOpsDialect.h.inc" -} // namespace shape -} // namespace mlir - #endif // MLIR_SHAPE_IR_SHAPE_H diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td index 754dfcd6452f3..b038819bca3d1 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td @@ -36,7 +36,7 @@ def ShapeDialect : Dialect { concatting etc. on how to combine them). }]; - let cppNamespace = "shape"; + let cppNamespace = "::mlir::shape"; let hasConstantMaterializer = 1; } diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index 562e07f98774d..2354cc6abd890 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -128,13 +128,11 @@ namespace impl { AffineMap getTransferMinorIdentityMap(MemRefType memRefType, VectorType vectorType); } // namespace impl +} // end namespace vector +} // end namespace mlir #define GET_OP_CLASSES #include "mlir/Dialect/Vector/VectorOps.h.inc" - #include "mlir/Dialect/Vector/VectorOpsDialect.h.inc" -} // end namespace vector -} // end namespace mlir - #endif // MLIR_DIALECT_VECTOR_VECTOROPS_H diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index dceb850ad929c..3cb1265b38ce3 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -19,7 +19,7 @@ include "mlir/Interfaces/VectorInterfaces.td" def Vector_Dialect : Dialect { let name = "vector"; - let cppNamespace = "vector"; + let cppNamespace = "::mlir::vector"; let hasConstantMaterializer = 1; } diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 29f139f25069b..ec0e229ae627d 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1672,7 +1672,7 @@ class OpTrait; // purpose to wrap around C++ symbol string with this class is to make // traits specified for ops in TableGen less alien and more integrated. class NativeOpTrait : OpTrait { - string trait = "OpTrait::" # prop; + string trait = "::mlir::OpTrait::" # prop; } // ParamNativeOpTrait corresponds to the template-parameterized traits in the @@ -1687,7 +1687,7 @@ class ParamNativeOpTrait // affects op definition generator internals, like how op builders and // operand/attribute/result getters are generated. class GenInternalOpTrait : OpTrait { - string trait = "OpTrait::" # prop; + string trait = "::mlir::OpTrait::" # prop; } // PredOpTrait is an op trait implemented by way of a predicate on the op. diff --git a/mlir/include/mlir/TableGen/Operator.h b/mlir/include/mlir/TableGen/Operator.h index d7fac87af0be2..34c5506503644 100644 --- a/mlir/include/mlir/TableGen/Operator.h +++ b/mlir/include/mlir/TableGen/Operator.h @@ -242,6 +242,17 @@ class Operator { // debugging purposes. void print(llvm::raw_ostream &os) const; + // A helper RAII class to emit nested namespaces for this op. + class NamespaceEmitter { + public: + NamespaceEmitter(raw_ostream &os, Operator &op); + ~NamespaceEmitter(); + + private: + raw_ostream &os; + SmallVector namespaces; + }; + // Return whether all the result types are known. bool allResultTypesKnown() const { return allResultsHaveKnownTypes; }; diff --git a/mlir/lib/Dialect/AVX512/IR/AVX512Dialect.cpp b/mlir/lib/Dialect/AVX512/IR/AVX512Dialect.cpp index 3595970c38f25..697f00864b15b 100644 --- a/mlir/lib/Dialect/AVX512/IR/AVX512Dialect.cpp +++ b/mlir/lib/Dialect/AVX512/IR/AVX512Dialect.cpp @@ -25,10 +25,5 @@ void avx512::AVX512Dialect::initialize() { >(); } -namespace mlir { -namespace avx512 { #define GET_OP_CLASSES #include "mlir/Dialect/AVX512/AVX512.cpp.inc" -} // namespace avx512 -} // namespace mlir - diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 58f9480c37be0..7dc74f21e2fbf 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -777,10 +777,5 @@ static void print(OpAsmPrinter &p, GPUModuleOp op) { /*printBlockTerminators=*/false); } -// Namespace avoids ambiguous ReturnOpAdaptor. -namespace mlir { -namespace gpu { #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.cpp.inc" -} // namespace gpu -} // namespace mlir diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp index b42929039a974..b953bad676276 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -23,10 +23,9 @@ using namespace mlir; using namespace mlir::gpu; using namespace mlir::scf; +#include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" namespace mlir { - -#include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" namespace gpu { StringRef getMappingAttrName() { return "mapping"; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAVX512Dialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAVX512Dialect.cpp index 9f7e66b0ae0a9..512234cc87646 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAVX512Dialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAVX512Dialect.cpp @@ -27,9 +27,5 @@ void LLVM::LLVMAVX512Dialect::initialize() { >(); } -namespace mlir { -namespace LLVM { #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMAVX512.cpp.inc" -} // namespace LLVM -} // namespace mlir diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index cc809b581c843..e13a83854b1e3 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -16,7 +16,6 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Builders.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Operation.h" @@ -146,10 +145,5 @@ void NVVMDialect::initialize() { allowUnknownOperations(); } -namespace mlir { -namespace NVVM { #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc" -} // namespace NVVM -} // namespace mlir - diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp index 70c3558638e6a..afdd9537c6792 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -91,10 +91,5 @@ void ROCDLDialect::initialize() { allowUnknownOperations(); } -namespace mlir { -namespace ROCDL { #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc" -} // namespace ROCDL -} // namespace mlir - diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 7071cd385f770..efe2e45f78ea9 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1096,9 +1096,6 @@ static LogicalResult verify(PoolingSumOp op) { return verifySingleInputPoolingOp(op); } -namespace mlir { -namespace linalg { - #include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterfaces.cpp.inc" #define GET_OP_CLASSES @@ -1107,9 +1104,6 @@ namespace linalg { #define GET_OP_CLASSES #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" -} // namespace linalg -} // namespace mlir - AffineMap mlir::linalg::extractOrIdentityMap(Optional maybeMap, unsigned rank, MLIRContext *context) { diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 217588289e851..ec47177df84ce 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -271,9 +271,5 @@ static ParseResult parseParallelOp(OpAsmParser &parser, return success(); } -namespace mlir { -namespace omp { #define GET_OP_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOps.cpp.inc" -} // namespace omp -} // namespace mlir diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index 082229b6b3944..a0b9c969becf6 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -454,11 +454,5 @@ static LogicalResult verify(TypeOp op) { // TableGen'd op method definitions //===----------------------------------------------------------------------===// -namespace mlir { -namespace pdl { - #define GET_OP_CLASSES #include "mlir/Dialect/PDL/IR/PDLOps.cpp.inc" - -} // end namespace pdl -} // end namespace mlir diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 498246315d642..e36ffc2e6b815 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -899,9 +899,5 @@ static void print(OpAsmPrinter &p, scf::YieldOp op) { // TableGen'd op method definitions //===----------------------------------------------------------------------===// -namespace mlir { -namespace scf { #define GET_OP_CLASSES #include "mlir/Dialect/SCF/SCFOps.cpp.inc" -} // namespace scf -} // namespace mlir diff --git a/mlir/lib/Dialect/SPIRV/SPIRVAttributes.cpp b/mlir/lib/Dialect/SPIRV/SPIRVAttributes.cpp index c2bf4840ddc84..6773862a8cd73 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVAttributes.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVAttributes.cpp @@ -16,9 +16,10 @@ using namespace mlir; // DictionaryDict derived attributes //===----------------------------------------------------------------------===// -namespace mlir { #include "mlir/Dialect/SPIRV/TargetAndABI.cpp.inc" +namespace mlir { + //===----------------------------------------------------------------------===// // Attribute storage classes //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index c171a755891bb..a16dc1c8bc35d 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -3266,11 +3266,15 @@ namespace spirv { // TableGen'erated operation interfaces for querying versions, extensions, and // capabilities. #include "mlir/Dialect/SPIRV/SPIRVAvailability.cpp.inc" +} // namespace spirv +} // namespace mlir // TablenGen'erated operation definitions. #define GET_OP_CLASSES #include "mlir/Dialect/SPIRV/SPIRVOps.cpp.inc" +namespace mlir { +namespace spirv { // TableGen'erated operation availability interface implementations. #include "mlir/Dialect/SPIRV/SPIRVOpAvailabilityImpl.inc" diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index bcfaa896f63d2..cd722870f5072 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -938,11 +938,5 @@ static void print(OpAsmPrinter &p, ReduceOp op) { p.printOptionalAttrDict(op.getAttrs()); } -namespace mlir { -namespace shape { - #define GET_OP_CLASSES #include "mlir/Dialect/Shape/IR/ShapeOps.cpp.inc" - -} // namespace shape -} // namespace mlir diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index d00e56297532c..c2b6f31cf1143 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -2688,11 +2688,5 @@ void mlir::vector::populateVectorToVectorCanonicalizationPatterns( TransposeFolder>(context); } -namespace mlir { -namespace vector { - #define GET_OP_CLASSES #include "mlir/Dialect/Vector/VectorOps.cpp.inc" - -} // namespace vector -} // namespace mlir diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp index 0586cd837e073..24dffa36e13ee 100644 --- a/mlir/lib/TableGen/Operator.cpp +++ b/mlir/lib/TableGen/Operator.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" @@ -278,7 +279,7 @@ void Operator::populateTypeInferenceInfo( // Skip cases currently being custom generated. // TODO: Remove special cases. - if (getTrait("OpTrait::SameOperandsAndResultType")) + if (getTrait("::mlir::OpTrait::SameOperandsAndResultType")) return; // We create equivalence classes of argument/result types where arguments @@ -565,6 +566,21 @@ void Operator::print(llvm::raw_ostream &os) const { } } +Operator::NamespaceEmitter::NamespaceEmitter(raw_ostream &os, Operator &op) + : os(os) { + auto dialect = op.getDialect(); + if (!dialect) + return; + llvm::SplitString(dialect.getCppNamespace(), namespaces, "::"); + for (StringRef ns : namespaces) + os << "namespace " << ns << " {\n"; +} + +Operator::NamespaceEmitter::~NamespaceEmitter() { + for (StringRef ns : llvm::reverse(namespaces)) + os << "} // namespace " << ns << "\n"; +} + auto Operator::VariableDecoratorIterator::unwrap(llvm::Init *init) -> VariableDecorator { return VariableDecorator(cast(init)->getDef()); diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h index 34fc1a9534e8d..09f84d1ac1339 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.h +++ b/mlir/test/lib/Dialect/Test/TestDialect.h @@ -29,7 +29,6 @@ #include "TestOpEnums.h.inc" -namespace mlir { #include "TestOpStructs.h.inc" #include "TestOpsDialect.h.inc" @@ -37,8 +36,8 @@ namespace mlir { #define GET_OP_CLASSES #include "TestOps.h.inc" +namespace mlir { void registerTestDialect(DialectRegistry ®istry); - } // end namespace mlir #endif // MLIR_TESTDIALECT_H diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index f03c953396a4a..9ae36ed1710c0 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -22,7 +22,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def Test_Dialect : Dialect { let name = "test"; - let cppNamespace = ""; + let cppNamespace = "::mlir"; let hasOperationAttrVerify = 1; let hasRegionArgAttrVerify = 1; let hasRegionResultAttrVerify = 1; diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index edb387cfa2d49..457aeab18d9ea 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -275,3 +275,19 @@ def SomeTypedArrayAttr : TypedArrayAttrBase; // RECORD-LABEL: def SomeTypedArrayAttr // RECORD: Attr elementAttr = SomeAttr; + +def Test_Dialect_2 : Dialect { + let name = "dialect_2"; +} +def MyStruct : StructAttr<"MyStruct", Test_Dialect_2, +[StructFieldAttr<"potatoes", I64ElementsAttr>]> { + let description = "A structure describing a number of potatoes."; +} + +def StructAttrOp : NS_Op<"struct_attr_op", []> { + let arguments = (ins + MyStruct:$potatoes + ); +} + +// DECL: dialect_2::MyStruct potatoes(); diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td index d1b11556be308..8390dea18ae9e 100644 --- a/mlir/test/mlir-tblgen/op-decl.td +++ b/mlir/test/mlir-tblgen/op-decl.td @@ -61,8 +61,8 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: ::mlir::ValueRange odsOperands; // CHECK: }; -// CHECK: class AOp : public ::mlir::Op::Impl, OpTrait::AtLeastNResults<1>::Impl, OpTrait::ZeroSuccessor, OpTrait::AtLeastNOperands<1>::Impl, OpTrait::IsIsolatedFromAbove -// CHECK-NOT: OpTrait::IsIsolatedFromAbove +// CHECK: class AOp : public ::mlir::Op::Impl, ::mlir::OpTrait::AtLeastNResults<1>::Impl, ::mlir::OpTrait::ZeroSuccessor, ::mlir::OpTrait::AtLeastNOperands<1>::Impl, ::mlir::OpTrait::IsIsolatedFromAbove +// CHECK-NOT: ::mlir::OpTrait::IsIsolatedFromAbove // CHECK: public: // CHECK: using Op::Op; // CHECK: using Adaptor = AOpAdaptor; diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp index 3a19379da8a3a..4a9ec48b777e2 100644 --- a/mlir/tools/mlir-tblgen/DialectGen.cpp +++ b/mlir/tools/mlir-tblgen/DialectGen.cpp @@ -153,6 +153,15 @@ static void emitDialectDecl(Dialect &dialect, dialectsOs << llvm::formatv(dialectRegistrationTemplate, dependentDialect); } + + // Emit all nested namespaces. + StringRef cppNamespace = dialect.getCppNamespace(); + llvm::SmallVector namespaces; + llvm::SplitString(cppNamespace, namespaces, "::"); + + for (auto ns : namespaces) + os << "namespace " << ns << " {\n"; + // Emit the start of the decl. std::string cppName = dialect.getCppClassName(); os << llvm::formatv(dialectDeclBeginStr, cppName, dialect.getName(), @@ -179,6 +188,10 @@ static void emitDialectDecl(Dialect &dialect, // End the dialect decl. os << "};\n"; + + // Close all nested namespaces in reverse order. + for (auto ns : llvm::reverse(namespaces)) + os << "} // namespace " << ns << "\n"; } static bool emitDialectDecls(const llvm::RecordKeeper &recordKeeper, diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 0b3ad38b035ff..7f1d729e81b13 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -494,6 +494,7 @@ void OpEmitter::genAttrGetters() { FmtContext fctx; fctx.withBuilder("::mlir::Builder(this->getContext())"); + Dialect opDialect = op.getDialect(); // Emit the derived attribute body. auto emitDerivedAttr = [&](StringRef name, Attribute attr) { auto &method = opClass.newMethod(attr.getReturnType(), name); @@ -503,7 +504,16 @@ void OpEmitter::genAttrGetters() { // Emit with return type specified. auto emitAttrWithReturnType = [&](StringRef name, Attribute attr) { - auto &method = opClass.newMethod(attr.getReturnType(), name); + Dialect attrDialect = attr.getDialect(); + // Does the current operation have a different namespace than the attribute? + bool differentNamespace = + attrDialect && opDialect && attrDialect != opDialect; + std::string returnType = differentNamespace + ? (llvm::Twine(attrDialect.getCppNamespace()) + + "::" + attr.getReturnType()) + .str() + : attr.getReturnType().str(); + auto &method = opClass.newMethod(returnType, name); auto &body = method.body(); body << " auto attr = " << name << "Attr();\n"; if (attr.hasDefaultValue()) { @@ -684,9 +694,9 @@ static void generateNamedOperandGetters(const Operator &op, Class &opClass, const int numNormalOperands = numOperands - numVariadicOperands; const auto *sameVariadicSize = - op.getTrait("OpTrait::SameVariadicOperandSize"); + op.getTrait("::mlir::OpTrait::SameVariadicOperandSize"); const auto *attrSizedOperands = - op.getTrait("OpTrait::AttrSizedOperandSegments"); + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments"); if (numVariadicOperands > 1 && !sameVariadicSize && !attrSizedOperands) { PrintFatalError(op.getLoc(), "op has multiple variadic operands but no " @@ -748,7 +758,8 @@ void OpEmitter::genNamedOperandGetters() { } void OpEmitter::genNamedOperandSetters() { - auto *attrSizedOperands = op.getTrait("OpTrait::AttrSizedOperandSegments"); + auto *attrSizedOperands = + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments"); for (int i = 0, e = op.getNumOperands(); i != e; ++i) { const auto &operand = op.getOperand(i); if (operand.name.empty()) @@ -775,9 +786,10 @@ void OpEmitter::genNamedResultGetters() { // If we have more than one variadic results, we need more complicated logic // to calculate the value range for each result. - const auto *sameVariadicSize = op.getTrait("OpTrait::SameVariadicResultSize"); + const auto *sameVariadicSize = + op.getTrait("::mlir::OpTrait::SameVariadicResultSize"); const auto *attrSizedResults = - op.getTrait("OpTrait::AttrSizedResultSegments"); + op.getTrait("::mlir::OpTrait::AttrSizedResultSegments"); if (numVariadicResults > 1 && !sameVariadicSize && !attrSizedResults) { PrintFatalError(op.getLoc(), "op has multiple variadic results but no " @@ -1213,7 +1225,7 @@ void OpEmitter::genBuilder() { // use the first operand or attribute's type as all result types // to facilitate different call patterns. if (op.getNumVariableLengthResults() == 0) { - if (op.getTrait("OpTrait::SameOperandsAndResultType")) { + if (op.getTrait("::mlir::OpTrait::SameOperandsAndResultType")) { // If the operation has a single variadic input, then the build method // generated by `genUseOperandAsResultTypeSeparateParamBuilder` will be // ambiguous with the one generated by @@ -1230,7 +1242,7 @@ void OpEmitter::genBuilder() { if (!shouldGenerateInferredTypeCollectiveParamBuilder()) genUseOperandAsResultTypeCollectiveParamBuilder(); } - if (op.getTrait("OpTrait::FirstAttrDerivedResultType")) + if (op.getTrait("::mlir::OpTrait::FirstAttrDerivedResultType")) genUseAttrAsResultTypeBuilder(); } } @@ -1435,7 +1447,7 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder(OpMethodBody &body, } // If the operation has the operand segment size attribute, add it here. - if (op.getTrait("OpTrait::AttrSizedOperandSegments")) { + if (op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) { body << " " << builderOpState << ".addAttribute(\"operand_segment_sizes\", " "odsBuilder.getI32VectorAttr({"; @@ -1695,7 +1707,7 @@ void OpEmitter::genTypeInterfaceMethods() { continue; // TODO: We could verify equality here, but skipping that for verification. } - os << " return success();"; + os << " return ::mlir::success();"; } void OpEmitter::genParser() { @@ -1735,7 +1747,7 @@ void OpEmitter::genVerifier() { auto &body = method.body(); body << " if (failed(" << op.getAdaptorName() << "(*this).verify(this->getLoc()))) " - << "return failure();\n"; + << "return ::mlir::failure();\n"; auto *valueInit = def.getValueInit("verifier"); CodeInit *codeInit = dyn_cast(valueInit); @@ -1904,21 +1916,21 @@ static void addSizeCountTrait(OpClass &opClass, StringRef traitKind, int numTotal, int numVariadic) { if (numVariadic != 0) { if (numTotal == numVariadic) - opClass.addTrait("OpTrait::Variadic" + traitKind + "s"); + opClass.addTrait("::mlir::OpTrait::Variadic" + traitKind + "s"); else - opClass.addTrait("OpTrait::AtLeastN" + traitKind + "s<" + + opClass.addTrait("::mlir::OpTrait::AtLeastN" + traitKind + "s<" + Twine(numTotal - numVariadic) + ">::Impl"); return; } switch (numTotal) { case 0: - opClass.addTrait("OpTrait::Zero" + traitKind); + opClass.addTrait("::mlir::OpTrait::Zero" + traitKind); break; case 1: - opClass.addTrait("OpTrait::One" + traitKind); + opClass.addTrait("::mlir::OpTrait::One" + traitKind); break; default: - opClass.addTrait("OpTrait::N" + traitKind + "s<" + Twine(numTotal) + + opClass.addTrait("::mlir::OpTrait::N" + traitKind + "s<" + Twine(numTotal) + ">::Impl"); break; } @@ -1947,20 +1959,21 @@ void OpEmitter::genTraits() { // Add operand size trait. if (numVariadicOperands != 0) { if (numOperands == numVariadicOperands) - opClass.addTrait("OpTrait::VariadicOperands"); + opClass.addTrait("::mlir::OpTrait::VariadicOperands"); else - opClass.addTrait("OpTrait::AtLeastNOperands<" + + opClass.addTrait("::mlir::OpTrait::AtLeastNOperands<" + Twine(numOperands - numVariadicOperands) + ">::Impl"); } else { switch (numOperands) { case 0: - opClass.addTrait("OpTrait::ZeroOperands"); + opClass.addTrait("::mlir::OpTrait::ZeroOperands"); break; case 1: - opClass.addTrait("OpTrait::OneOperand"); + opClass.addTrait("::mlir::OpTrait::OneOperand"); break; default: - opClass.addTrait("OpTrait::NOperands<" + Twine(numOperands) + ">::Impl"); + opClass.addTrait("::mlir::OpTrait::NOperands<" + Twine(numOperands) + + ">::Impl"); break; } } @@ -2042,7 +2055,7 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter(const Operator &op) adaptor.newField("::mlir::ValueRange", "odsOperands"); adaptor.newField("::mlir::DictionaryAttr", "odsAttrs"); const auto *attrSizedOperands = - op.getTrait("OpTrait::AttrSizedOperandSegments"); + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments"); { auto &constructor = adaptor.newConstructor( attrSizedOperands @@ -2125,11 +2138,11 @@ void OpOperandAdaptorEmitter::addVerification() { // getODSOperands()/getODSResults() in the rest of the verifier. for (auto &trait : op.getTraits()) { if (auto *t = dyn_cast(&trait)) { - if (t->getTrait() == "OpTrait::AttrSizedOperandSegments") { + if (t->getTrait() == "::mlir::OpTrait::AttrSizedOperandSegments") { body << formatv(checkAttrSizedValueSegmentsCode, "operand_segment_sizes", op.getNumOperands(), "operand"); - } else if (t->getTrait() == "OpTrait::AttrSizedResultSegments") { + } else if (t->getTrait() == "::mlir::OpTrait::AttrSizedResultSegments") { body << formatv(checkAttrSizedValueSegmentsCode, "result_segment_sizes", op.getNumResults(), "result"); } @@ -2144,7 +2157,7 @@ void OpOperandAdaptorEmitter::addVerification() { "' op \"", /*emitVerificationRequiringOp*/ false, verifyCtx, body); - body << " return success();"; + body << " return ::mlir::success();"; } void OpOperandAdaptorEmitter::emitDecl(const Operator &op, raw_ostream &os) { @@ -2165,6 +2178,7 @@ static void emitOpClasses(const std::vector &defs, raw_ostream &os, os << "#undef GET_OP_FWD_DEFINES\n"; for (auto *def : defs) { Operator op(*def); + Operator::NamespaceEmitter emitter(os, op); os << "class " << op.getCppClassName() << ";\n"; } os << "#endif\n\n"; @@ -2173,6 +2187,7 @@ static void emitOpClasses(const std::vector &defs, raw_ostream &os, IfDefScope scope("GET_OP_CLASSES", os); for (auto *def : defs) { Operator op(*def); + Operator::NamespaceEmitter emitter(os, op); if (emitDecl) { os << formatv(opCommentHeader, op.getQualCppClassName(), "declarations"); OpOperandAdaptorEmitter::emitDecl(op, os); diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 1542e9c55e41c..5e10413577223 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -439,14 +439,14 @@ static bool shouldFormatSymbolNameAttr(const NamedAttribute *attr) { /// {1}: The type for the attribute. const char *const attrParserCode = R"( if (parser.parseAttribute({0}Attr{1}, "{0}", result.attributes)) - return failure(); + return ::mlir::failure(); )"; const char *const optionalAttrParserCode = R"( { ::mlir::OptionalParseResult parseResult = parser.parseOptionalAttribute({0}Attr{1}, "{0}", result.attributes); if (parseResult.hasValue() && failed(*parseResult)) - return failure(); + return ::mlir::failure(); } )"; @@ -455,7 +455,7 @@ const char *const optionalAttrParserCode = R"( /// {0}: The name of the attribute. const char *const symbolNameAttrParserCode = R"( if (parser.parseSymbolName({0}Attr, "{0}", result.attributes)) - return failure(); + return ::mlir::failure(); )"; const char *const optionalSymbolNameAttrParserCode = R"( // Parsing an optional symbol name doesn't fail, so no need to check the @@ -476,7 +476,7 @@ const char *const enumAttrParserCode = R"( auto loc = parser.getCurrentLocation(); if (parser.parseAttribute(attrVal, parser.getBuilder().getNoneType(), "{0}", attrStorage)) - return failure(); + return ::mlir::failure(); auto attrOptional = {1}::{2}(attrVal.getValue()); if (!attrOptional) @@ -498,7 +498,7 @@ const char *const optionalEnumAttrParserCode = R"( "{0}", attrStorage); if (parseResult.hasValue()) { if (failed(*parseResult)) - return failure(); + return ::mlir::failure(); auto attrOptional = {1}::{2}(attrVal.getValue()); if (!attrOptional) @@ -517,7 +517,7 @@ const char *const optionalEnumAttrParserCode = R"( const char *const variadicOperandParserCode = R"( {0}OperandsLoc = parser.getCurrentLocation(); if (parser.parseOperandList({0}Operands)) - return failure(); + return ::mlir::failure(); )"; const char *const optionalOperandParserCode = R"( { @@ -527,7 +527,7 @@ const char *const optionalOperandParserCode = R"( parser.parseOptionalOperand(operand); if (parseResult.hasValue()) { if (failed(*parseResult)) - return failure(); + return ::mlir::failure(); {0}Operands.push_back(operand); } } @@ -535,7 +535,7 @@ const char *const optionalOperandParserCode = R"( const char *const operandParserCode = R"( {0}OperandsLoc = parser.getCurrentLocation(); if (parser.parseOperand({0}RawOperands[0])) - return failure(); + return ::mlir::failure(); )"; /// The code snippet used to generate a parser call for a type list. @@ -543,7 +543,7 @@ const char *const operandParserCode = R"( /// {0}: The name for the type list. const char *const variadicTypeParserCode = R"( if (parser.parseTypeList({0}Types)) - return failure(); + return ::mlir::failure(); )"; const char *const optionalTypeParserCode = R"( { @@ -552,14 +552,14 @@ const char *const optionalTypeParserCode = R"( parser.parseOptionalType(optionalType); if (parseResult.hasValue()) { if (failed(*parseResult)) - return failure(); + return ::mlir::failure(); {0}Types.push_back(optionalType); } } )"; const char *const typeParserCode = R"( if (parser.parseType({0}RawTypes[0])) - return failure(); + return ::mlir::failure(); )"; /// The code snippet used to generate a parser call for a functional type. @@ -569,7 +569,7 @@ const char *const typeParserCode = R"( const char *const functionalTypeParserCode = R"( ::mlir::FunctionType {0}__{1}_functionType; if (parser.parseType({0}__{1}_functionType)) - return failure(); + return ::mlir::failure(); {0}Types = {0}__{1}_functionType.getInputs(); {1}Types = {0}__{1}_functionType.getResults(); )"; @@ -583,14 +583,14 @@ const char *regionListParserCode = R"( auto firstRegionResult = parser.parseOptionalRegion(region); if (firstRegionResult.hasValue()) { if (failed(*firstRegionResult)) - return failure(); + return ::mlir::failure(); {0}Regions.emplace_back(std::move(region)); // Parse any trailing regions. while (succeeded(parser.parseOptionalComma())) { region = std::make_unique<::mlir::Region>(); if (parser.parseRegion(*region)) - return failure(); + return ::mlir::failure(); {0}Regions.emplace_back(std::move(region)); } } @@ -610,7 +610,7 @@ const char *regionListEnsureTerminatorParserCode = R"( /// {0}: The name of the region. const char *optionalRegionParserCode = R"( if (parser.parseOptionalRegion(*{0}Region)) - return failure(); + return ::mlir::failure(); )"; /// The code snippet used to generate a parser call for a region. @@ -618,7 +618,7 @@ const char *optionalRegionParserCode = R"( /// {0}: The name of the region. const char *regionParserCode = R"( if (parser.parseRegion(*{0}Region)) - return failure(); + return ::mlir::failure(); )"; /// The code snippet used to ensure a region has a terminator. @@ -637,13 +637,13 @@ const char *successorListParserCode = R"( auto firstSucc = parser.parseOptionalSuccessor(succ); if (firstSucc.hasValue()) { if (failed(*firstSucc)) - return failure(); + return ::mlir::failure(); {0}Successors.emplace_back(succ); // Parse any trailing successors. while (succeeded(parser.parseOptionalComma())) { if (parser.parseSuccessor(succ)) - return failure(); + return ::mlir::failure(); {0}Successors.emplace_back(succ); } } @@ -655,7 +655,7 @@ const char *successorListParserCode = R"( /// {0}: The name of the successor. const char *successorParserCode = R"( if (parser.parseSuccessor({0}Successor)) - return failure(); + return ::mlir::failure(); )"; namespace { @@ -889,7 +889,7 @@ static void genCustomDirectiveParser(CustomDirective *dir, OpMethodBody &body) { genCustomParameterParser(param, body); body << "))\n" - << " return failure();\n"; + << " return ::mlir::failure();\n"; // After parsing, add handling for any of the optional constructs. for (Element ¶m : dir->getArguments()) { @@ -949,7 +949,7 @@ void OperationFormat::genParser(Operator &op, OpClass &opClass) { genParserSuccessorResolution(op, body); genParserVariadicSegmentResolution(op, body); - body << " return success();\n"; + body << " return ::mlir::success();\n"; } void OperationFormat::genElementParser(Element *element, OpMethodBody &body, @@ -1007,7 +1007,7 @@ void OperationFormat::genElementParser(Element *element, OpMethodBody &body, } else if (LiteralElement *literal = dyn_cast(element)) { body << " if (parser.parse"; genLiteralParser(literal->getLiteral(), body); - body << ")\n return failure();\n"; + body << ")\n return ::mlir::failure();\n"; /// Arguments. } else if (auto *attr = dyn_cast(element)) { @@ -1081,14 +1081,14 @@ void OperationFormat::genElementParser(Element *element, OpMethodBody &body, body << " if (parser.parseOptionalAttrDict" << (attrDict->isWithKeyword() ? "WithKeyword" : "") << "(result.attributes))\n" - << " return failure();\n"; + << " return ::mlir::failure();\n"; } else if (auto *customDir = dyn_cast(element)) { genCustomDirectiveParser(customDir, body); } else if (isa(element)) { body << " ::llvm::SMLoc allOperandLoc = parser.getCurrentLocation();\n" << " if (parser.parseOperandList(allOperands))\n" - << " return failure();\n"; + << " return ::mlir::failure();\n"; } else if (isa(element)) { body << llvm::formatv(regionListParserCode, "full"); @@ -1197,7 +1197,7 @@ void OperationFormat::genParserTypeResolution(Operator &op, if (allOperands) { body << " if (parser.resolveOperands(allOperands, allOperandTypes, " "allOperandLoc, result.operands))\n" - " return failure();\n"; + " return ::mlir::failure();\n"; return; } @@ -1214,7 +1214,7 @@ void OperationFormat::genParserTypeResolution(Operator &op, body << op.operand_begin()->name << "Operands"; } body << ", allOperandTypes, parser.getNameLoc(), result.operands))\n" - << " return failure();\n"; + << " return ::mlir::failure();\n"; return; } // Handle the case where all of the operands were grouped together. @@ -1238,7 +1238,7 @@ void OperationFormat::genParserTypeResolution(Operator &op, } body << ", allOperandLoc, result.operands))\n" - << " return failure();\n"; + << " return ::mlir::failure();\n"; return; } @@ -1270,7 +1270,7 @@ void OperationFormat::genParserTypeResolution(Operator &op, // overload. if (verifyOperandAndTypeSize) body << ", " << operand.name << "OperandsLoc"; - body << ", result.operands))\n return failure();\n"; + body << ", result.operands))\n return ::mlir::failure();\n"; } } @@ -1314,7 +1314,8 @@ void OperationFormat::genParserSuccessorResolution(Operator &op, void OperationFormat::genParserVariadicSegmentResolution(Operator &op, OpMethodBody &body) { - if (!allOperands && op.getTrait("OpTrait::AttrSizedOperandSegments")) { + if (!allOperands && + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) { body << " result.addAttribute(\"operand_segment_sizes\", " << "parser.getBuilder().getI32VectorAttr({"; auto interleaveFn = [&](const NamedTypeConstraint &operand) { @@ -1328,7 +1329,8 @@ void OperationFormat::genParserVariadicSegmentResolution(Operator &op, body << "}));\n"; } - if (!allResultTypes && op.getTrait("OpTrait::AttrSizedResultSegments")) { + if (!allResultTypes && + op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) { body << " result.addAttribute(\"result_segment_sizes\", " << "parser.getBuilder().getI32VectorAttr({"; auto interleaveFn = [&](const NamedTypeConstraint &result) { @@ -1369,9 +1371,11 @@ static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, body << " p.printOptionalAttrDict" << (withKeyword ? "WithKeyword" : "") << "(getAttrs(), /*elidedAttrs=*/{"; // Elide the variadic segment size attributes if necessary. - if (!fmt.allOperands && op.getTrait("OpTrait::AttrSizedOperandSegments")) + if (!fmt.allOperands && + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) body << "\"operand_segment_sizes\", "; - if (!fmt.allResultTypes && op.getTrait("OpTrait::AttrSizedResultSegments")) + if (!fmt.allResultTypes && + op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) body << "\"result_segment_sizes\", "; llvm::interleaveComma( fmt.usedAttributes, body, @@ -1607,7 +1611,7 @@ void OperationFormat::genElementPrinter(Element *element, OpMethodBody &body, } void OperationFormat::genPrinter(Operator &op, OpClass &opClass) { - auto &method = opClass.newMethod("void", "print", "OpAsmPrinter &p"); + auto &method = opClass.newMethod("void", "print", "::mlir::OpAsmPrinter &p"); auto &body = method.body(); // Emit the operation name, trimming the prefix if this is the standard @@ -2004,16 +2008,16 @@ class FormatParser { if (curToken.getKind() != kind) return emitError(curToken.getLoc(), msg); consumeToken(); - return success(); + return ::mlir::success(); } LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) { lexer.emitError(loc, msg); - return failure(); + return ::mlir::failure(); } LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, const Twine ¬e) { lexer.emitErrorAndNote(loc, msg, note); - return failure(); + return ::mlir::failure(); } //===--------------------------------------------------------------------===// @@ -2045,7 +2049,7 @@ LogicalResult FormatParser::parse() { while (curToken.getKind() != Token::eof) { std::unique_ptr element; if (failed(parseElement(element, /*isTopLevel=*/true))) - return failure(); + return ::mlir::failure(); fmt.elements.push_back(std::move(element)); } @@ -2075,11 +2079,11 @@ LogicalResult FormatParser::parse() { failed(verifyResults(loc, variableTyResolver)) || failed(verifyOperands(loc, variableTyResolver)) || failed(verifyRegions(loc)) || failed(verifySuccessors(loc))) - return failure(); + return ::mlir::failure(); // Collect the set of used attributes in the format. fmt.usedAttributes = seenAttrs.takeVector(); - return success(); + return ::mlir::success(); } LogicalResult FormatParser::verifyAttributes(llvm::SMLoc loc) { @@ -2093,8 +2097,8 @@ LogicalResult FormatParser::verifyAttributes(llvm::SMLoc loc) { iteratorStack.emplace_back(fmt.elements.begin(), fmt.elements.end()); while (!iteratorStack.empty()) if (failed(verifyAttributes(loc, iteratorStack))) - return failure(); - return success(); + return ::mlir::failure(); + return ::mlir::success(); } /// Verify the attribute elements at the back of the given stack of iterators. LogicalResult FormatParser::verifyAttributes( @@ -2109,7 +2113,7 @@ LogicalResult FormatParser::verifyAttributes( if (auto *optional = dyn_cast(element)) { auto elements = optional->getElements(); iteratorStack.emplace_back(elements.begin(), elements.end()); - return success(); + return ::mlir::success(); } // We are checking for an attribute element followed by a `:`, so there is @@ -2145,7 +2149,7 @@ LogicalResult FormatParser::verifyAttributes( } } iteratorStack.pop_back(); - return success(); + return ::mlir::success(); } LogicalResult FormatParser::verifyOperands( @@ -2193,13 +2197,13 @@ LogicalResult FormatParser::verifyOperands( auto it = buildableTypes.insert({*builder, buildableTypes.size()}); fmt.operandTypes[i].setBuilderIdx(it.first->second); } - return success(); + return ::mlir::success(); } LogicalResult FormatParser::verifyRegions(llvm::SMLoc loc) { // Check that all of the regions are within the format. if (hasAllRegions) - return success(); + return ::mlir::success(); for (unsigned i = 0, e = op.getNumRegions(); i != e; ++i) { const NamedRegion ®ion = op.getRegion(i); @@ -2211,7 +2215,7 @@ LogicalResult FormatParser::verifyRegions(llvm::SMLoc loc) { "' directive to the custom assembly format"); } } - return success(); + return ::mlir::success(); } LogicalResult FormatParser::verifyResults( @@ -2219,7 +2223,7 @@ LogicalResult FormatParser::verifyResults( llvm::StringMap &variableTyResolver) { // If we format all of the types together, there is nothing to check. if (fmt.allResultTypes) - return success(); + return ::mlir::success(); // Check that all of the result types can be inferred. auto &buildableTypes = fmt.buildableTypes; @@ -2252,13 +2256,13 @@ LogicalResult FormatParser::verifyResults( auto it = buildableTypes.insert({*builder, buildableTypes.size()}); fmt.resultTypes[i].setBuilderIdx(it.first->second); } - return success(); + return ::mlir::success(); } LogicalResult FormatParser::verifySuccessors(llvm::SMLoc loc) { // Check that all of the successors are within the format. if (hasAllSuccessors) - return success(); + return ::mlir::success(); for (unsigned i = 0, e = op.getNumSuccessors(); i != e; ++i) { const NamedSuccessor &successor = op.getSuccessor(i); @@ -2270,7 +2274,7 @@ LogicalResult FormatParser::verifySuccessors(llvm::SMLoc loc) { "' directive to the custom assembly format"); } } - return success(); + return ::mlir::success(); } void FormatParser::handleAllTypesMatchConstraint( @@ -2368,7 +2372,7 @@ LogicalResult FormatParser::parseVariable(std::unique_ptr &element, if (isTopLevel && !seenAttrs.insert(attr)) return emitError(loc, "attribute '" + name + "' is already bound"); element = std::make_unique(attr); - return success(); + return ::mlir::success(); } /// Operands if (const NamedTypeConstraint *operand = findArg(op.getOperands(), name)) { @@ -2377,7 +2381,7 @@ LogicalResult FormatParser::parseVariable(std::unique_ptr &element, return emitError(loc, "operand '" + name + "' is already bound"); } element = std::make_unique(operand); - return success(); + return ::mlir::success(); } /// Regions if (const NamedRegion *region = findArg(op.getRegions(), name)) { @@ -2386,14 +2390,14 @@ LogicalResult FormatParser::parseVariable(std::unique_ptr &element, if (hasAllRegions || !seenRegions.insert(region).second) return emitError(loc, "region '" + name + "' is already bound"); element = std::make_unique(region); - return success(); + return ::mlir::success(); } /// Results. if (const auto *result = findArg(op.getResults(), name)) { if (isTopLevel) return emitError(loc, "results can not be used at the top level"); element = std::make_unique(result); - return success(); + return ::mlir::success(); } /// Successors. if (const auto *successor = findArg(op.getSuccessors(), name)) { @@ -2402,7 +2406,7 @@ LogicalResult FormatParser::parseVariable(std::unique_ptr &element, if (hasAllSuccessors || !seenSuccessors.insert(successor).second) return emitError(loc, "successor '" + name + "' is already bound"); element = std::make_unique(successor); - return success(); + return ::mlir::success(); } return emitError(loc, "expected variable to refer to an argument, region, " "result, or successor"); @@ -2450,7 +2454,7 @@ LogicalResult FormatParser::parseLiteral(std::unique_ptr &element) { return emitError(literalTok.getLoc(), "expected valid literal"); element = std::make_unique(value); - return success(); + return ::mlir::success(); } LogicalResult FormatParser::parseOptional(std::unique_ptr &element, @@ -2467,11 +2471,11 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr &element, Optional anchorIdx; do { if (failed(parseOptionalChildElement(elements, seenVariables, anchorIdx))) - return failure(); + return ::mlir::failure(); } while (curToken.getKind() != Token::r_paren); consumeToken(); if (failed(parseToken(Token::question, "expected '?' after optional group"))) - return failure(); + return ::mlir::failure(); // The optional group is required to have an anchor. if (!anchorIdx) @@ -2494,22 +2498,22 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr &element, if (!seenVariables.count(var)) return emitError(curLoc, "type directive can only refer to variables " "within the optional group"); - return success(); + return ::mlir::success(); }; for (auto &ele : elements) { if (auto *typeEle = dyn_cast(ele.get())) { if (failed(checkTypeOperand(typeEle->getOperand()))) - return failure(); + return ::mlir::failure(); } else if (auto *typeEle = dyn_cast(ele.get())) { if (failed(checkTypeOperand(typeEle->getInputs())) || failed(checkTypeOperand(typeEle->getResults()))) - return failure(); + return ::mlir::failure(); } } optionalVariables.insert(seenVariables.begin(), seenVariables.end()); element = std::make_unique(std::move(elements), *anchorIdx); - return success(); + return ::mlir::success(); } LogicalResult FormatParser::parseOptionalChildElement( @@ -2519,7 +2523,7 @@ LogicalResult FormatParser::parseOptionalChildElement( llvm::SMLoc childLoc = curToken.getLoc(); childElements.push_back({}); if (failed(parseElement(childElements.back(), /*isTopLevel=*/true))) - return failure(); + return ::mlir::failure(); // Check to see if this element is the anchor of the optional group. bool isAnchor = curToken.getKind() == Token::caret; @@ -2538,7 +2542,7 @@ LogicalResult FormatParser::parseOptionalChildElement( if (isAnchor && !attrEle->getVar()->attr.isOptional()) return emitError(childLoc, "only optional attributes can be used to " "anchor an optional group"); - return success(); + return ::mlir::success(); }) // Only optional-like(i.e. variadic) operands can be within an optional // group. @@ -2547,12 +2551,12 @@ LogicalResult FormatParser::parseOptionalChildElement( return emitError(childLoc, "only variable length operands can be " "used within an optional group"); seenVariables.insert(ele->getVar()); - return success(); + return ::mlir::success(); }) .Case([&](RegionVariable *) { // TODO: When ODS has proper support for marking "optional" regions, add // a check here. - return success(); + return ::mlir::success(); }) // Literals, custom directives, and type directives may be used, // but they can't anchor the group. @@ -2561,7 +2565,7 @@ LogicalResult FormatParser::parseOptionalChildElement( if (isAnchor) return emitError(childLoc, "only variables can be used to anchor " "an optional group"); - return success(); + return ::mlir::success(); }) .Default([&](Element *) { return emitError(childLoc, "only literals, types, and variables can be " @@ -2581,7 +2585,7 @@ FormatParser::parseAttrDictDirective(std::unique_ptr &element, hasAttrDict = true; element = std::make_unique(withKeyword); - return success(); + return ::mlir::success(); } LogicalResult @@ -2592,7 +2596,7 @@ FormatParser::parseCustomDirective(std::unique_ptr &element, // Parse the custom directive name. if (failed( parseToken(Token::less, "expected '<' before custom directive name"))) - return failure(); + return ::mlir::failure(); Token nameTok = curToken; if (failed(parseToken(Token::identifier, @@ -2601,13 +2605,13 @@ FormatParser::parseCustomDirective(std::unique_ptr &element, "expected '>' after custom directive name")) || failed(parseToken(Token::l_paren, "expected '(' before custom directive parameters"))) - return failure(); + return ::mlir::failure(); // Parse the child elements for this optional group.= std::vector> elements; do { if (failed(parseCustomDirectiveParameter(elements))) - return failure(); + return ::mlir::failure(); if (curToken.getKind() != Token::comma) break; consumeToken(); @@ -2615,7 +2619,7 @@ FormatParser::parseCustomDirective(std::unique_ptr &element, if (failed(parseToken(Token::r_paren, "expected ')' after custom directive parameters"))) - return failure(); + return ::mlir::failure(); // After parsing all of the elements, ensure that all type directives refer // only to variables. @@ -2630,7 +2634,7 @@ FormatParser::parseCustomDirective(std::unique_ptr &element, element = std::make_unique(nameTok.getSpelling(), std::move(elements)); - return success(); + return ::mlir::success(); } LogicalResult FormatParser::parseCustomDirectiveParameter( @@ -2638,7 +2642,7 @@ LogicalResult FormatParser::parseCustomDirectiveParameter( llvm::SMLoc childLoc = curToken.getLoc(); parameters.push_back({}); if (failed(parseElement(parameters.back(), /*isTopLevel=*/true))) - return failure(); + return ::mlir::failure(); // Verify that the element can be placed within a custom directive. if (!isa &element, failed(parseToken(Token::comma, "expected ',' after inputs argument")) || failed(parseTypeDirectiveOperand(results)) || failed(parseToken(Token::r_paren, "expected ')' after argument list"))) - return failure(); + return ::mlir::failure(); element = std::make_unique(std::move(inputs), std::move(results)); - return success(); + return ::mlir::success(); } LogicalResult @@ -2679,7 +2683,7 @@ FormatParser::parseOperandsDirective(std::unique_ptr &element, fmt.allOperands = true; } element = std::make_unique(); - return success(); + return ::mlir::success(); } LogicalResult @@ -2691,7 +2695,7 @@ FormatParser::parseRegionsDirective(std::unique_ptr &element, return emitError(loc, "'regions' directive creates overlap in format"); hasAllRegions = true; element = std::make_unique(); - return success(); + return ::mlir::success(); } LogicalResult @@ -2701,7 +2705,7 @@ FormatParser::parseResultsDirective(std::unique_ptr &element, return emitError(loc, "'results' directive can not be used as a " "top-level directive"); element = std::make_unique(); - return success(); + return ::mlir::success(); } LogicalResult @@ -2714,7 +2718,7 @@ FormatParser::parseSuccessorsDirective(std::unique_ptr &element, return emitError(loc, "'successors' directive creates overlap in format"); hasAllSuccessors = true; element = std::make_unique(); - return success(); + return ::mlir::success(); } LogicalResult @@ -2728,16 +2732,16 @@ FormatParser::parseTypeDirective(std::unique_ptr &element, Token tok, if (failed(parseToken(Token::l_paren, "expected '(' before argument list")) || failed(parseTypeDirectiveOperand(operand)) || failed(parseToken(Token::r_paren, "expected ')' after argument list"))) - return failure(); + return ::mlir::failure(); element = std::make_unique(std::move(operand)); - return success(); + return ::mlir::success(); } LogicalResult FormatParser::parseTypeDirectiveOperand(std::unique_ptr &element) { llvm::SMLoc loc = curToken.getLoc(); if (failed(parseElement(element, /*isTopLevel=*/false))) - return failure(); + return ::mlir::failure(); if (isa(element.get())) return emitError( loc, "'type' directive operand expects variable or directive operand"); @@ -2765,7 +2769,7 @@ FormatParser::parseTypeDirectiveOperand(std::unique_ptr &element) { } else { return emitError(loc, "invalid argument to 'type' directive"); } - return success(); + return ::mlir::success(); } //===----------------------------------------------------------------------===// diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index 9884d1ccb077d..9b2f35f566246 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -887,8 +887,9 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // special cases listed below, DRR needs to supply types for all results // when building an op. bool isSameOperandsAndResultType = - resultOp.getTrait("OpTrait::SameOperandsAndResultType"); - bool useFirstAttr = resultOp.getTrait("OpTrait::FirstAttrDerivedResultType"); + resultOp.getTrait("::mlir::OpTrait::SameOperandsAndResultType"); + bool useFirstAttr = + resultOp.getTrait("::mlir::OpTrait::FirstAttrDerivedResultType"); if (isSameOperandsAndResultType || useFirstAttr) { // We know how to deduce the result type for ops with these traits and we've From db94df04fbfaa26cc3fda1ef77af32776bd10f21 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 14 Sep 2020 15:45:57 -0500 Subject: [PATCH 0584/1079] Update PowerPC backend ownership in CODE_OWNERS.TXT --- llvm/CODE_OWNERS.TXT | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT index cc1a568032a41..543858c29bd81 100644 --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -85,7 +85,11 @@ D: Branch weights and BlockFrequencyInfo N: Hal Finkel E: hfinkel@anl.gov -D: The loop reroller, alias analysis and the PowerPC target +D: The loop reroller and alias analysis + +N: Nemanja Ivanovic +E: nemanja.i.ibm@gmail.com +D: PowerPC Backend N: Dan Gohman E: llvm@sunfishcode.online From f859c30ecbbbeb33a90b00b76044a688b2e71879 Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Sun, 13 Sep 2020 22:32:48 -0700 Subject: [PATCH 0585/1079] [AMDGPU] Add XDL resource to scheduling model Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D87621 --- llvm/lib/Target/AMDGPU/SISchedule.td | 13 ++++-- .../CodeGen/AMDGPU/schedule-xdl-resource.ll | 44 +++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 932381c99e0b0..d6dff4b9c8899 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -104,6 +104,9 @@ def HWVALU : ProcResource<1> { def HWRC : ProcResource<1> { // Register destination cache let BufferSize = 1; } +def HWXDL : ProcResource<1> { // MFMA CU + let BufferSize = 0; +} class HWWriteRes resources, int latency> : WriteRes { @@ -138,9 +141,13 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; - def : HWVALUWriteRes; - def : HWVALUWriteRes; - def : HWVALUWriteRes; + + let ResourceCycles = [2] in + def : HWWriteRes; + let ResourceCycles = [8] in + def : HWWriteRes; + let ResourceCycles = [16] in + def : HWWriteRes; def : ReadAdvance; def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>; diff --git a/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll b/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll new file mode 100644 index 0000000000000..6beddf8fe947a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll @@ -0,0 +1,44 @@ +; RUN: llc -march=amdgcn -mcpu=gfx908 -debug-only=machine-scheduler -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope %s +; REQUIRES: asserts + +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x float>, i32, i32, i32) + +; CHECK: CritRes: {{[0-9]+}} HWXDL +; CHECK: Picking: Cand SU([[nid:[0-9]+]]) RES-DEMAND +; CHECK: Scheduling SU([[nid]]) {{.*}} V_MFMA_F32_32X32X4F16 +define amdgpu_kernel void @schedule-xdl-resource(<32 x float> addrspace(1)* %in, <32 x float> addrspace(1)* %out, <4 x half> addrspace(3)* %lds, i32 %stride) #0 { + %in_ptr.1 = getelementptr <32 x float>, <32 x float> addrspace(1)* %in, i32 %stride + %in_ptr.2 = getelementptr <32 x float>, <32 x float> addrspace(1)* %in_ptr.1, i32 %stride + %in_ptr.3 = getelementptr <32 x float>, <32 x float> addrspace(1)* %in_ptr.2, i32 %stride + %in.load.1 = load <32 x float>, <32 x float> addrspace (1)* %in_ptr.1 + %in.load.2 = load <32 x float>, <32 x float> addrspace (1)* %in_ptr.2 + %in.load.3 = load <32 x float>, <32 x float> addrspace (1)* %in_ptr.3 + %lds_ptr.1 = getelementptr <4 x half>, <4 x half> addrspace(3)* %lds, i32 %stride + %lds_ptr.2 = getelementptr <4 x half>, <4 x half> addrspace(3)* %lds_ptr.1, i32 %stride + %lds_ptr.3 = getelementptr <4 x half>, <4 x half> addrspace(3)* %lds_ptr.2, i32 %stride + %lds.load.1 = load <4 x half>, <4 x half> addrspace(3)* %lds_ptr.1 + %lds.load.2 = load <4 x half>, <4 x half> addrspace(3)* %lds_ptr.2 + %lds.load.3 = load <4 x half>, <4 x half> addrspace(3)* %lds_ptr.3 + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %lds.load.1, <4 x half> %lds.load.1, <32 x float> %in.load.1, i32 1, i32 1, i32 1) + %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %lds.load.2, <4 x half> %lds.load.2, <32 x float> %in.load.2, i32 1, i32 1, i32 1) + %mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %lds.load.3, <4 x half> %lds.load.3, <32 x float> %in.load.3, i32 1, i32 1, i32 1) + %mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %lds.load.1, <4 x half> %lds.load.1, <32 x float> %in.load.1, i32 2, i32 2, i32 2) + %mai.5 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %lds.load.2, <4 x half> %lds.load.2, <32 x float> %in.load.2, i32 2, i32 2, i32 2) + %mai.6 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %lds.load.3, <4 x half> %lds.load.3, <32 x float> %in.load.3, i32 2, i32 2, i32 2) + %out_ptr.1 = getelementptr <32 x float>, <32 x float> addrspace(1)* %out, i32 %stride + %out_ptr.2 = getelementptr <32 x float>, <32 x float> addrspace(1)* %out_ptr.1, i32 %stride + %out_ptr.3 = getelementptr <32 x float>, <32 x float> addrspace(1)* %out_ptr.2, i32 %stride + %out_ptr.4 = getelementptr <32 x float>, <32 x float> addrspace(1)* %out_ptr.3, i32 %stride + %out_ptr.5 = getelementptr <32 x float>, <32 x float> addrspace(1)* %out_ptr.4, i32 %stride + %out_ptr.6 = getelementptr <32 x float>, <32 x float> addrspace(1)* %out_ptr.5, i32 %stride + store <32 x float> %mai.1, <32 x float> addrspace(1)* %out_ptr.1 + store <32 x float> %mai.2, <32 x float> addrspace(1)* %out_ptr.2 + store <32 x float> %mai.3, <32 x float> addrspace(1)* %out_ptr.3 + store <32 x float> %mai.4, <32 x float> addrspace(1)* %out_ptr.4 + store <32 x float> %mai.5, <32 x float> addrspace(1)* %out_ptr.5 + store <32 x float> %mai.6, <32 x float> addrspace(1)* %out_ptr.6 + + ret void +} + +attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" } From c193a689b475f91e63adb25dc5855f7a7f068c9a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Sep 2020 13:54:50 -0700 Subject: [PATCH 0586/1079] [SelectionDAG] Use Align/MaybeAlign in calls to getLoad/getStore/getExtLoad/getTruncStore. The versions that take 'unsigned' will be removed in the future. I tried to use getOriginalAlign instead of getAlign in some places. getAlign factors in the minimum alignment implied by the offset in the pointer info. Since we're also passing the pointer info we can use the original alignment. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D87592 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 66 ++++++++----------- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 8 +-- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 27 ++++---- .../SelectionDAG/SelectionDAGBuilder.cpp | 17 ++--- .../CodeGen/SelectionDAG/TargetLowering.cpp | 41 ++++++------ .../Target/AArch64/AArch64ISelLowering.cpp | 24 ++++--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 +-- llvm/lib/Target/ARM/ARMISelLowering.cpp | 11 ++-- llvm/lib/Target/AVR/AVRISelLowering.cpp | 8 +-- .../Target/Hexagon/HexagonISelDAGToDAG.cpp | 4 +- llvm/lib/Target/Mips/MipsISelLowering.cpp | 8 +-- llvm/lib/Target/Mips/MipsSEISelLowering.cpp | 4 +- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 4 +- .../SystemZ/SystemZSelectionDAGInfo.cpp | 5 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 4 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 58 ++++++++-------- llvm/lib/Target/XCore/XCoreISelLowering.cpp | 17 +++-- llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 4 +- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 4 +- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 4 +- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 4 +- .../CodeGen/AMDGPU/private-element-size.ll | 10 +-- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 46 ++++++------- 24 files changed, 186 insertions(+), 208 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 48e964c107619..909698ded4edc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7048,7 +7048,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SDValue NewStore = DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), - FirstStore->getPointerInfo(), FirstStore->getAlignment()); + FirstStore->getPointerInfo(), FirstStore->getAlign()); // Rely on other DAG combine rules to remove the other individual stores. DAG.ReplaceAllUsesWith(N, NewStore.getNode()); @@ -7231,10 +7231,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { if (!Allowed || !Fast) return SDValue(); - SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, - SDLoc(N), VT, Chain, FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), MemVT, - FirstLoad->getAlignment()); + SDValue NewLoad = + DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT, + Chain, FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign()); // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) @@ -9789,7 +9789,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue BasePtr = LN0->getBasePtr(); for (unsigned Idx = 0; Idx < NumSplits; Idx++) { const unsigned Offset = Idx * Stride; - const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + const Align Align = commonAlignment(LN0->getAlign(), Offset); SDValue SplitLoad = DAG.getExtLoad( ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, @@ -11015,7 +11015,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ShAmt = AdjustBigEndianShift(ShAmt); uint64_t PtrOff = ShAmt / 8; - unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; @@ -11735,7 +11735,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { *LN0->getMemOperand())) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getAlignment(), + LN0->getPointerInfo(), LN0->getAlign(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; @@ -15712,8 +15712,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // Figure out the offset for the store and the alignment of the access. unsigned StOffset; - unsigned NewAlign = St->getAlignment(); - if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else @@ -15723,7 +15721,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, if (StOffset) { SDLoc DL(IVal); Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); - NewAlign = MinAlign(NewAlign, StOffset); } // Truncate down to the new size. @@ -15732,7 +15729,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign); + St->getPointerInfo().getWithOffset(StOffset), + St->getOriginalAlign()); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -16145,9 +16143,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; if (!UseTrunc) { - NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - FirstInChain->getAlignment()); + NewStore = + DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstInChain->getAlign()); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -16159,8 +16157,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); + FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); } // Replace all merged stores with the new store. @@ -16691,7 +16688,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, } LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); + Align FirstStoreAlign = FirstInChain->getAlign(); LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); // Scan the memory operations on the chain and find the first @@ -16786,7 +16783,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // the NumElem refers to array/index size. unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); NumElem = std::min(LastLegalType, NumElem); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); + Align FirstLoadAlign = FirstLoad->getAlign(); if (NumElem < 2) { // We know that candidate stores are in order and of correct @@ -16798,8 +16795,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // can here. unsigned NumSkip = 1; while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); @@ -16872,11 +16869,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), JointMemOpVT, FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), JointMemOpVT, - FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); + NewStore = DAG.getTruncStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); } // Transfer chain users from old loads to the new load. @@ -17078,17 +17074,15 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), - ST->getAlignment(), MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL); - Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -17421,7 +17415,6 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return SDValue(); // Start to split store. - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); @@ -17434,13 +17427,12 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { SDValue Ptr = ST->getBasePtr(); // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), - ST->getAlignment(), MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL); // Higher value store. - SDValue St1 = - DAG.getStore(St0, DL, Hi, Ptr, - ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), - Alignment / 2, MMOFlags, AAInfo); + SDValue St1 = DAG.getStore( + St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), + ST->getOriginalAlign(), MMOFlags, AAInfo); return St1; } @@ -21229,7 +21221,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. - unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + Align Alignment = std::min(LLD->getAlign(), RLD->getAlign()); MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 541edafc0ef56..9a718480aee8f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1772,9 +1772,9 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue Chain) { // Create the stack frame object. - unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( + Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); - SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT.getStoreSize(), SrcAlign); FrameIndexSDNode *StackPtrFI = cast(FIPtr); int SPFI = StackPtrFI->getIndex(); @@ -1785,7 +1785,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); + Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1803,7 +1803,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); - + assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 93b40803089e1..f94e0a034807c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6103,7 +6103,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -6127,13 +6127,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - commonAlignment(*SrcAlign, SrcOff).value(), SrcMMOFlags); + commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6253,10 +6253,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = DAG.getLoad( - VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags); + Value = + DAG.getLoad(VT, dl, Chain, + DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6268,10 +6268,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = DAG.getStore( - Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); + Store = + DAG.getStore(Chain, dl, LoadValues[i], + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -6371,7 +6371,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment.value(), + DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; @@ -7036,8 +7036,7 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->getAlignment(), MMOFlags, - LD->getAAInfo()); + LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo()); } SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7bcbb7ccddc8d..057ebebe87d73 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2539,7 +2539,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction().getParent(); - unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); + Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( @@ -6380,7 +6380,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else { EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); - unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Align Align = DL->getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); @@ -6411,9 +6411,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), FI), - /* Alignment = */ 0, MachineMemOperand::MOVolatile); + Res = DAG.getStore( + Chain, sdl, Src, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + MaybeAlign(), MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return; @@ -7245,9 +7246,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, } SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, - Ptr, MachinePointerInfo(PtrVal), - /* Alignment = */ 1); + SDValue LoadVal = + Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, + MachinePointerInfo(PtrVal), Align(1)); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b7f5ab3d6b85d..3446ee0efc450 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3601,10 +3601,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (bestOffset != 0) Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl); - unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad( - newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign); + SDValue NewLoad = + DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + Lod->getOriginalAlign()); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -6817,7 +6817,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, // the codegen worse. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR, - LD->getPointerInfo(), SrcIntVT, LD->getAlignment(), + LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); SmallVector Vals; @@ -6854,7 +6854,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue ScalarLoad = DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), + SrcEltVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride)); @@ -6917,7 +6917,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, } return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), - ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()); } @@ -6937,8 +6937,8 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride), - MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); + MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); Stores.push_back(Store); } @@ -7003,7 +7003,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Load one integer register's worth from the original location. SDValue Load = DAG.getLoad( RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore( @@ -7022,8 +7022,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), MemVT, - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -7053,7 +7053,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); NumBits >>= 1; - unsigned Alignment = LD->getAlignment(); + Align Alignment = LD->getOriginalAlign(); unsigned IncrementSize = NumBits / 8; ISD::LoadExtType HiExtType = LD->getExtensionType(); @@ -7071,8 +7071,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), @@ -7081,8 +7081,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); } // aggregate the two parts @@ -7106,7 +7106,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); - int Alignment = ST->getAlignment(); + Align Alignment = ST->getOriginalAlign(); auto &MF = DAG.getMachineFunction(); EVT StoreMemVT = ST->getMemoryVT(); @@ -7163,7 +7163,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(ST->getAlignment(), Offset), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags())); // Increment the pointers. Offset += RegBytes; @@ -7185,7 +7185,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), LoadMemVT, - MinAlign(ST->getAlignment(), Offset), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); @@ -7213,7 +7213,6 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, ST->getMemOperand()->getFlags()); Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); - Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6745b848f0eda..f9be060248522 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5273,7 +5273,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, SDValue FuncTLVGet = DAG.getLoad( PtrMemVT, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ PtrMemVT.getSizeInBits() / 8, + Align(PtrMemVT.getSizeInBits() / 8), MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); Chain = FuncTLVGet.getValue(1); @@ -6302,8 +6302,8 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, // void *__stack at offset 0 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT); - MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, - MachinePointerInfo(SV), /* Alignment = */ 8)); + MemOps.push_back( + DAG.getStore(Chain, DL, Stack, VAList, MachinePointerInfo(SV), Align(8))); // void *__gr_top at offset 8 int GPRSize = FuncInfo->getVarArgsGPRSize(); @@ -6318,8 +6318,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, DAG.getConstant(GPRSize, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, - MachinePointerInfo(SV, 8), - /* Alignment = */ 8)); + MachinePointerInfo(SV, 8), Align(8))); } // void *__vr_top at offset 16 @@ -6334,23 +6333,22 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, DAG.getConstant(FPRSize, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, - MachinePointerInfo(SV, 16), - /* Alignment = */ 8)); + MachinePointerInfo(SV, 16), Align(8))); } // int __gr_offs at offset 24 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT)); - MemOps.push_back(DAG.getStore( - Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr, - MachinePointerInfo(SV, 24), /* Alignment = */ 4)); + MemOps.push_back( + DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), + GROffsAddr, MachinePointerInfo(SV, 24), Align(4))); // int __vr_offs at offset 28 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT)); - MemOps.push_back(DAG.getStore( - Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr, - MachinePointerInfo(SV, 28), /* Alignment = */ 4)); + MemOps.push_back( + DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), + VROffsAddr, MachinePointerInfo(SV, 28), Align(4))); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index be8742c8dd47e..5fb072ff18aeb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4164,9 +4164,9 @@ SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); - return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, 4, + return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4), MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant); + MachineMemOperand::MOInvariant); } SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, @@ -4178,7 +4178,7 @@ SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset); SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32); - SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4, + SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4), MachineMemOperand::MODereferenceable); return Store; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d88ad58d3ab49..d5712206da91e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1665,9 +1665,9 @@ SDValue SITargetLowering::lowerKernargMemParameter( // TODO: If we passed in the base kernel offset we could have a better // alignment than 4, but we don't really need it. SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset); - SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, 4, + SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4), MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant); + MachineMemOperand::MOInvariant); SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, SL, MVT::i32); SDValue Extract = DAG.getNode(ISD::SRL, SL, MVT::i32, Load, ShiftAmt); @@ -3074,8 +3074,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, MemOpChains.push_back(Cpy); } else { - SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, - Alignment ? Alignment->value() : 0); + SDValue Store = + DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Alignment); MemOpChains.push_back(Store); } } @@ -5231,7 +5231,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, // be available and how do we get it? MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo, - MinAlign(64, StructOffset), + commonAlignment(Align(64), StructOffset), MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9c76a0da83eec..d9ccd86802c75 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2517,9 +2517,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 0, MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(), + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); @@ -3328,8 +3328,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( MVT::i32, DL, Chain, DescAddr, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 4, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4), MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); @@ -15336,7 +15335,7 @@ static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) { SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset, LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT, - Alignment.value(), MMOFlags, AAInfo); + Alignment, MMOFlags, AAInfo); Loads.push_back(NewLoad); Chains.push_back(SDValue(NewLoad.getNode(), 1)); } diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index bf9b32e1278e3..a816c2412b08c 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -676,7 +676,7 @@ SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL)); return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), - MachinePointerInfo(SV), 0); + MachinePointerInfo(SV)); } SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -1096,8 +1096,7 @@ SDValue AVRTargetLowering::LowerFormalArguments( // from this parameter. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL)); InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(MF, FI), - 0)); + MachinePointerInfo::getFixedStack(MF, FI))); } } @@ -1230,8 +1229,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getStore(Chain, DL, Arg, PtrOff, - MachinePointerInfo::getStack(MF, VA.getLocMemOffset()), - 0); + MachinePointerInfo::getStack(MF, VA.getLocMemOffset())); } } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index b4b389a7b9568..bdd5c7dd151e2 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -231,10 +231,10 @@ SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, if (Size >= 4) TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI, - Size); + Align(Size)); else TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, - PI, MVT::getIntegerVT(Size * 8), Size); + PI, MVT::getIntegerVT(Size * 8), Align(Size)); SDNode *StoreN; { diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 2da35020006e2..3416a56a1de18 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -3025,8 +3025,8 @@ SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), - /* Alignment = */ 0, MachineMemOperand::MOVolatile); + return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), MaybeAlign(), + MachineMemOperand::MOVolatile); } void MipsTargetLowering:: @@ -4404,7 +4404,7 @@ void MipsTargetLowering::passByValArg( SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, - MachinePointerInfo(), Alignment.value()); + MachinePointerInfo(), Alignment); MemOpChains.push_back(LoadVal.getValue(1)); unsigned ArgReg = ArgRegs[FirstReg + I]; RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); @@ -4431,7 +4431,7 @@ void MipsTargetLowering::passByValArg( PtrTy)); SDValue LoadVal = DAG.getExtLoad( ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), - MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment.value()); + MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); MemOpChains.push_back(LoadVal.getValue(1)); // Shift the loaded value. diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index bdf29c53cbd54..4a448a5f7c681 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -2307,7 +2307,7 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), - /* Alignment = */ 16); + Align(16)); } SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, @@ -2382,7 +2382,7 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), - /* Alignment = */ 16); + Align(16)); } SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 116352e083829..c0c79b6f59c61 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2139,7 +2139,7 @@ SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, int FI = MFI.CreateStackObject(16, Align(8), false); SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); Chain = DAG.getStore(Chain, DL, Entry.Node, FIPtr, MachinePointerInfo(), - /* Alignment = */ 8); + Align(8)); Entry.Node = FIPtr; Entry.Ty = PointerType::getUnqual(ArgTy); @@ -2198,7 +2198,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, // Load RetPtr to get the return value. return DAG.getLoad(Op.getValueType(), SDLoc(Op), Chain, RetPtr, - MachinePointerInfo(), /* Alignment = */ 8); + MachinePointerInfo(), Align(8)); } SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 6b4f35e5ba2b4..ca5ca7257bab2 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -117,9 +117,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( return Chain1; SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, DAG.getConstant(1, DL, PtrVT)); - SDValue Chain2 = - DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1), - /* Alignment = */ 1); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), Align(1)); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 8f5b7301e6532..425f8b86c9fbc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -904,7 +904,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getConstant(Offset, DL, PtrVT)); Chains.push_back( DAG.getStore(Chain, DL, Arg, Add, - MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); + MachinePointerInfo::getFixedStack(MF, FI, Offset))); } if (!Chains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); @@ -1331,7 +1331,7 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, MFI->getVarargBufferVreg(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), - MachinePointerInfo(SV), 0); + MachinePointerInfo(SV)); } SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 34a1517ac70f0..a704ac3345123 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19835,17 +19835,15 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, // Load the 64-bit value into an XMM register. SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo)); - SDValue CLod0 = - DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - /* Alignment = */ 16); + SDValue CLod0 = DAG.getLoad( + MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); - SDValue CLod1 = - DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - /* Alignment = */ 16); + SDValue CLod1 = DAG.getLoad( + MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); SDValue Sub; SDValue Chain; @@ -20211,17 +20209,17 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8); int SSFI = cast(StackSlot)->getIndex(); + Align SlotAlign(8); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); if (SrcVT == MVT::i32) { SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl); - SDValue Store1 = - DAG.getStore(Chain, dl, Src, StackSlot, MPI, 8 /*Align*/); + SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), - OffsetSlot, MPI.getWithOffset(4), 4); + OffsetSlot, MPI.getWithOffset(4), SlotAlign); std::pair Tmp = - BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, Align(8), DAG); + BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG); if (IsStrict) return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); @@ -20237,7 +20235,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); } SDValue Store = - DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Align(8)); + DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. We must be careful to do the computation in x87 extended // precision, not in SSE. @@ -20245,7 +20243,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue Ops[] = { Store, StackSlot }; SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI, - Align(8), MachineMemOperand::MOLoad); + SlotAlign, MachineMemOperand::MOLoad); Chain = Fild.getValue(1); @@ -26298,9 +26296,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, dl, MVT::i64)); - OutChains[1] = - DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2), - /* Alignment = */ 2); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, + MachinePointerInfo(TrmpAddr, 2), Align(2)); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td @@ -26312,9 +26309,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, dl, MVT::i64)); - OutChains[3] = - DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12), - /* Alignment = */ 2); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 12), Align(2)); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... @@ -26394,22 +26390,20 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, dl, MVT::i32)); - OutChains[1] = - DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1), - /* Alignment = */ 1); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 1), Align(1)); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, dl, MVT::i32)); - OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), - Addr, MachinePointerInfo(TrmpAddr, 5), - /* Alignment = */ 1); + OutChains[2] = + DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr, + MachinePointerInfo(TrmpAddr, 5), Align(1)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, dl, MVT::i32)); - OutChains[3] = - DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6), - /* Alignment = */ 1); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, + MachinePointerInfo(TrmpAddr, 6), Align(1)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } @@ -27197,8 +27191,8 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); Entry.Node = StackPtr; - InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, - MPI, /* Alignment = */ 16); + InChain = + DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16)); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Ty = PointerType::get(ArgTy,0); Entry.IsSExt = false; @@ -29059,7 +29053,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); Chain = DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr, - MPI, /*Align*/ 0, MachineMemOperand::MOStore); + MPI, MaybeAlign(), MachineMemOperand::MOStore); SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue LdOps[] = {Chain, StackPtr}; SDValue Value = diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 573aee02533db..db3dd7fb14383 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -443,16 +443,15 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } if (LD->getAlignment() == 2) { - SDValue Low = - DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, - LD->getPointerInfo(), MVT::i16, - /* Alignment = */ 2, LD->getMemOperand()->getFlags()); + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, + LD->getPointerInfo(), MVT::i16, Align(2), + LD->getMemOperand()->getFlags()); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(2, DL, MVT::i32)); SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, HighAddr, LD->getPointerInfo().getWithOffset(2), MVT::i16, - /* Alignment = */ 2, LD->getMemOperand()->getFlags()); + Align(2), LD->getMemOperand()->getFlags()); SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, DAG.getConstant(16, DL, MVT::i32)); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted); @@ -502,14 +501,14 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, DAG.getConstant(16, dl, MVT::i32)); - SDValue StoreLow = DAG.getTruncStore( - Chain, dl, Low, BasePtr, ST->getPointerInfo(), MVT::i16, - /* Alignment = */ 2, ST->getMemOperand()->getFlags()); + SDValue StoreLow = + DAG.getTruncStore(Chain, dl, Low, BasePtr, ST->getPointerInfo(), + MVT::i16, Align(2), ST->getMemOperand()->getFlags()); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, dl, MVT::i32)); SDValue StoreHigh = DAG.getTruncStore( Chain, dl, High, HighAddr, ST->getPointerInfo().getWithOffset(2), - MVT::i16, /* Alignment = */ 2, ST->getMemOperand()->getFlags()); + MVT::i16, Align(2), ST->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 893ed6445462f..1ae1ee43beeef 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -159,8 +159,8 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: strb w9, [x2] +; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -201,8 +201,8 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: strh w9, [x2] +; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 2cf6e896bed0a..c5a55f23913ae 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -160,8 +160,8 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: strb w9, [x2] +; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -202,8 +202,8 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: strh w9, [x2] +; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 40bbac2c05579..5f92f713573d1 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -159,8 +159,8 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: strb w9, [x2] +; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -201,8 +201,8 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: strh w9, [x2] +; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 3eacf03dc6a87..08114f49bdeb7 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -160,8 +160,8 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: strb w9, [x2] +; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -202,8 +202,8 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: strh w9, [x2] +; CHECK-NEXT: strh w8, [x2, #2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AMDGPU/private-element-size.ll b/llvm/test/CodeGen/AMDGPU/private-element-size.ll index 843f554b05134..94bebe7a31fcb 100644 --- a/llvm/test/CodeGen/AMDGPU/private-element-size.ll +++ b/llvm/test/CodeGen/AMDGPU/private-element-size.ll @@ -141,8 +141,8 @@ entry: ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} +; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -177,8 +177,8 @@ entry: ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} +; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -224,10 +224,10 @@ entry: ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} +; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} +; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} +; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} define amdgpu_kernel void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 3105f5ba5829a..0682d022c5e3f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1842,23 +1842,23 @@ entry: ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.10, $r2 :: (load 4 from got) ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.11, $r2 :: (load 4 from got) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 56, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 60, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 60, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 64, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 68, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 68, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 72, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 76, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 76, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 80, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[SCRATCHREG:[0-9]+]], 84, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[SCRATCHREG:[0-9]+]], 84, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 88, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 92, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 92, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 96, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 100, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 100, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 104, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 108, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 108, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 112, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 116, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 116, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 120, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 124, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 124, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 128, $r1 :: (store 4) ; 32BIT-DAG: renamable $r[[REGF1:[0-9]+]] = LWZtoc @f14, $r2 :: (load 4 from got) ; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGF1]] :: (load 4 from @f14) @@ -2243,33 +2243,33 @@ define void @caller_mix() { ; 32BIT-DAG: $r9 = LI 7 ; 32BIT-DAG: $r10 = LI 8 ; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG8:[0-9]+]], 84, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG8:[0-9]+]], 84, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG9:[0-9]+]], 88, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG10:[0-9]+]], 92, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG10:[0-9]+]], 92, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG11:[0-9]+]], 96, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG12:[0-9]+]], 100, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG12:[0-9]+]], 100, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG13:[0-9]+]], 104, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG14:[0-9]+]], 108, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG14:[0-9]+]], 108, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG15:[0-9]+]], 112, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG16:[0-9]+]], 116, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG16:[0-9]+]], 116, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG17:[0-9]+]], 120, $r1 :: (store 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG18:[0-9]+]], 128, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG19:[0-9]+]], 124, $r1 :: (store 4 + 4) -; 32BIT-DAG: STW killed renamable $r[[REG20:[0-9]+]], 132, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW renamable $r[[REG19:[0-9]+]], 124, $r1 :: (store 4 + 4, align 8) +; 32BIT-DAG: STW killed renamable $r[[REG20:[0-9]+]], 132, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG21:[0-9]+]], 136, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG22:[0-9]+]], 140, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[REG22:[0-9]+]], 140, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG23:[0-9]+]], 144, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG24:[0-9]+]], 148, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[REG24:[0-9]+]], 148, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG25:[0-9]+]], 152, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG26:[0-9]+]], 156, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[REG26:[0-9]+]], 156, $r1 :: (store 4 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REG27:[0-9]+]], 160, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG28:[0-9]+]], 164, $r1 :: (store 4 + 4) +; 32BIT-DAG: STW killed renamable $r[[REG28:[0-9]+]], 164, $r1 :: (store 4 + 4, align 8) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 168, 0, implicit-def dead $r1, implicit $r1 From 1ec02efee9b1d01cde89f31ca9ba6a46b7662ac5 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Mon, 14 Sep 2020 13:52:42 -0700 Subject: [PATCH 0587/1079] [libc++] Make rotate a constexpr. This patch makes `std::rotate` a constexpr. In doing so, this patch also updates the internal `__move` and `__move_backward` funtions to be constexpr. Reviewed By: ldionne Differential Revision: https://reviews.llvm.org/D65721 --- libcxx/include/algorithm | 68 +++++++++++++------ libcxx/include/iterator | 16 ++--- .../alg.move/move.pass.cpp | 42 +++++++++++- .../alg.move/move_backward.pass.cpp | 20 +++++- .../alg.rotate/rotate.pass.cpp | 11 ++- libcxx/www/cxx2a_status.html | 2 +- 6 files changed, 123 insertions(+), 36 deletions(-) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 83e49f19ab987..37f2b4dd76263 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -1631,7 +1631,7 @@ search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const // copy template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR _Iter __unwrap_iter(_Iter __i) { @@ -1639,7 +1639,7 @@ __unwrap_iter(_Iter __i) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1653,7 +1653,7 @@ __unwrap_iter(move_iterator<_Tp*> __i) #if _LIBCPP_DEBUG_LEVEL < 2 template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1665,7 +1665,7 @@ __unwrap_iter(__wrap_iter<_Tp*> __i) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1679,7 +1679,7 @@ __unwrap_iter(__wrap_iter __i) #else template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1859,18 +1859,28 @@ copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) // move +// __move_constexpr exists so that __move doesn't call itself when delegating to the constexpr +// version of __move. template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _OutputIterator -__move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +__move_constexpr(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { for (; __first != __last; ++__first, (void) ++__result) *__result = _VSTD::move(*__first); return __result; } +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +_OutputIterator +__move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +{ + return __move_constexpr(__first, __last, __result); +} + template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 typename enable_if < is_same::type, _Up>::value && @@ -1879,6 +1889,8 @@ typename enable_if >::type __move(_Tp* __first, _Tp* __last, _Up* __result) { + if (__libcpp_is_constant_evaluated()) + return __move_constexpr(__first, __last, __result); const size_t __n = static_cast(__last - __first); if (__n > 0) _VSTD::memmove(__result, __first, __n * sizeof(_Up)); @@ -1886,7 +1898,7 @@ __move(_Tp* __first, _Tp* __last, _Up* __result) } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { @@ -1895,18 +1907,28 @@ move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) // move_backward +// __move_backward_constexpr exists so that __move_backward doesn't call itself when delegating to +// the constexpr version of __move_backward. template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _OutputIterator -__move_backward(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +__move_backward_constexpr(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { while (__first != __last) *--__result = _VSTD::move(*--__last); return __result; } +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +_OutputIterator +__move_backward(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +{ + return __move_backward_constexpr(__first, __last, __result); +} + template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 typename enable_if < is_same::type, _Up>::value && @@ -1915,6 +1937,8 @@ typename enable_if >::type __move_backward(_Tp* __first, _Tp* __last, _Up* __result) { + if (__libcpp_is_constant_evaluated()) + return __move_backward_constexpr(__first, __last, __result); const size_t __n = static_cast(__last - __first); if (__n > 0) { @@ -1925,7 +1949,7 @@ __move_backward(_Tp* __first, _Tp* __last, _Up* __result) } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _BidirectionalIterator2 move_backward(_BidirectionalIterator1 __first, _BidirectionalIterator1 __last, _BidirectionalIterator2 __result) @@ -2333,7 +2357,7 @@ reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _Out // rotate template -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator __rotate_left(_ForwardIterator __first, _ForwardIterator __last) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; @@ -2344,7 +2368,7 @@ __rotate_left(_ForwardIterator __first, _ForwardIterator __last) } template -_BidirectionalIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; @@ -2356,7 +2380,7 @@ __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) } template -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX14 _ForwardIterator __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { _ForwardIterator __i = __middle; @@ -2392,7 +2416,7 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt template inline _LIBCPP_INLINE_VISIBILITY -_Integral +_LIBCPP_CONSTEXPR_AFTER_CXX14 _Integral __algo_gcd(_Integral __x, _Integral __y) { do @@ -2405,7 +2429,7 @@ __algo_gcd(_Integral __x, _Integral __y) } template -_RandomAccessIterator +_LIBCPP_CONSTEXPR_AFTER_CXX14 _RandomAccessIterator __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -2441,7 +2465,7 @@ __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran template inline _LIBCPP_INLINE_VISIBILITY -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator __rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, _VSTD::forward_iterator_tag) { @@ -2456,7 +2480,7 @@ __rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator _ template inline _LIBCPP_INLINE_VISIBILITY -_BidirectionalIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator __rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _VSTD::bidirectional_iterator_tag) { @@ -2473,7 +2497,7 @@ __rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _Bidir template inline _LIBCPP_INLINE_VISIBILITY -_RandomAccessIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _VSTD::random_access_iterator_tag) { @@ -2491,7 +2515,7 @@ __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomA template inline _LIBCPP_INLINE_VISIBILITY -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { if (__first == __middle) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 36571a50b8bc5..45516db24e7cd 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -1393,13 +1393,13 @@ operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOE template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy(_Ip, _Ip, _Op); template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy_backward(_B1, _B1, _B2); -template _Op _LIBCPP_INLINE_VISIBILITY move(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY move_backward(_B1, _B1, _B2); +template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move(_Ip, _Ip, _Op); +template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move_backward(_B1, _B1, _B2); #if _LIBCPP_DEBUG_LEVEL < 2 template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1410,7 +1410,7 @@ __unwrap_iter(__wrap_iter<_Tp*>); #else template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1604,12 +1604,12 @@ private: template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _Op copy(_Ip, _Ip, _Op); template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _B2 copy_backward(_B1, _B1, _B2); - template friend _Op move(_Ip, _Ip, _Op); - template friend _B2 move_backward(_B1, _B1, _B2); + template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _Op move(_Ip, _Ip, _Op); + template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _B2 move_backward(_B1, _B1, _B2); #if _LIBCPP_DEBUG_LEVEL < 2 template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend + _LIBCPP_CONSTEXPR friend typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1618,7 +1618,7 @@ private: __unwrap_iter(__wrap_iter<_Tp*>); #else template - inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp index cdb126d4942ce..721a568750f19 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp @@ -13,6 +13,10 @@ // OutIter // move(InIter first, InIter last, OutIter result); +// UNSUPPORTED: clang-6, clang-7 +// UNSUPPORTED: apple-clang-9, apple-clang-10, apple-clang-11 +// UNSUPPORTED: gcc-5, gcc-6, gcc-7, gcc-8 + #include #include #include @@ -21,11 +25,11 @@ #include "test_iterators.h" template -void +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool test() { const unsigned N = 1000; - int ia[N]; + int ia[N] = {}; for (unsigned i = 0; i < N; ++i) ia[i] = i; int ib[N] = {0}; @@ -34,6 +38,8 @@ test() assert(base(r) == ib+N); for (unsigned i = 0; i < N; ++i) assert(ia[i] == ib[i]); + + return true; } #if TEST_STD_VER >= 11 @@ -128,5 +134,37 @@ int main(int, char**) test1*, std::unique_ptr*>(); #endif // TEST_STD_VER >= 11 +#if TEST_STD_VER > 17 + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test >()); + static_assert(test >()); + static_assert(test >()); + static_assert(test >()); + static_assert(test()); +#endif // TEST_STD_VER > 17 + return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp index 365c1a1158d7e..1a845cc1a88ff 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp @@ -21,11 +21,11 @@ #include "test_iterators.h" template -void +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool test() { const unsigned N = 1000; - int ia[N]; + int ia[N] = {}; for (unsigned i = 0; i < N; ++i) ia[i] = i; int ib[N] = {0}; @@ -34,6 +34,8 @@ test() assert(base(r) == ib); for (unsigned i = 0; i < N; ++i) assert(ia[i] == ib[i]); + + return true; } #if TEST_STD_VER >= 11 @@ -82,5 +84,19 @@ int main(int, char**) test1*, std::unique_ptr*>(); #endif // TEST_STD_VER >= 11 +#if TEST_STD_VER > 17 + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test >()); + static_assert(test >()); + static_assert(test()); +#endif // TEST_STD_VER > 17 + return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp index 007faf685bfc2..7c905bc83f0fd 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp @@ -20,7 +20,7 @@ #include "test_iterators.h" template -void +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool test() { int ia[] = {0}; @@ -209,6 +209,8 @@ test() assert(ig[3] == 0); assert(ig[4] == 1); assert(ig[5] == 2); + + return true; } #if TEST_STD_VER >= 11 @@ -435,5 +437,12 @@ int main(int, char**) #endif +#if TEST_STD_VER > 17 + static_assert(test >()); + static_assert(test >()); + static_assert(test >()); + static_assert(test()); +#endif // TEST_STD_VER > 17 + return 0; } diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index 73a2c50c71c90..c6ccd9681d759 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -261,7 +261,7 @@

Paper Status

The missing bits in P0600 are in [mem.res.class], [mem.poly.allocator.class], and [container.node.overview]

-

The missing bits in P0202 are in copy, copy_backwards, move, and move_backwards (and the ones that call them: copy_n, rotate_copy, merge, set_union, set_difference, and set_symmetric_difference). This is because the first four algorithms have specializations that call memmove which is not constexpr. See Bug 25165

+

The missing bits in P0202 are in copy and copy_backwards (and the ones that call them: copy_n, rotate_copy, merge, set_union, set_difference, and set_symmetric_difference). This is because the first four algorithms have specializations that call memmove which is not constexpr. See Bug 25165

Library Working group Issues Status

From b552a30283cea1e9d3f90aef3cdd2ac24c366062 Mon Sep 17 00:00:00 2001 From: Nicholas-Baron Date: Mon, 14 Sep 2020 16:37:41 -0400 Subject: [PATCH 0588/1079] [libc++] Finish implementing P0202R3 cppreference lists the support for this paper as partial. I found 4 functions which the paper marks as `constexpr`, but did not use the appropriate macro. Differential Revision: https://reviews.llvm.org/D84275 --- libcxx/include/algorithm | 7 +- .../alg.rotate/rotate_copy.pass.cpp | 258 +++++++++--------- .../alg.sorting/alg.merge/merge.pass.cpp | 39 ++- .../alg.sorting/alg.merge/merge_comp.pass.cpp | 36 +-- libcxx/www/cxx2a_status.html | 2 +- 5 files changed, 170 insertions(+), 172 deletions(-) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 37f2b4dd76263..5d09b6c3c0150 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -2529,7 +2529,7 @@ rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __l // rotate_copy template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator rotate_copy(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, _OutputIterator __result) { @@ -4394,6 +4394,7 @@ binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va // merge template +_LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator __merge(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) @@ -4417,7 +4418,7 @@ __merge(_InputIterator1 __first1, _InputIterator1 __last1, } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator merge(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) @@ -4427,7 +4428,7 @@ merge(_InputIterator1 __first1, _InputIterator1 __last1, } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator merge(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result) diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp index d66bf8caad6e6..8acb1a129e386 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp @@ -18,139 +18,139 @@ #include "test_macros.h" #include "test_iterators.h" -// #if TEST_STD_VER > 17 -// TEST_CONSTEXPR bool test_constexpr() { -// int ia[] = {1, 3, 5, 2, 5, 6}; -// int ib[std::size(ia)] = {0}; -// -// const size_t N = 2; -// const auto middle = std::begin(ia) + N; -// auto it = std::rotate_copy(std::begin(ia), middle, std::end(ia), std::begin(ib)); -// -// return std::distance(std::begin(ib), it) == std::size(ia) -// && std::equal (std::begin(ia), middle, std::begin(ib) + std::size(ia) - N) -// && std::equal (middle, std::end(ia), std::begin(ib)) -// ; -// } -// #endif template -void -test() -{ - int ia[] = {0, 1, 2, 3}; - const unsigned sa = sizeof(ia)/sizeof(ia[0]); - int ib[sa] = {0}; - - OutIter r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia), OutIter(ib)); - assert(base(r) == ib); - - r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia+1), OutIter(ib)); - assert(base(r) == ib+1); - assert(ib[0] == 0); - - r = std::rotate_copy(InIter(ia), InIter(ia+1), InIter(ia+1), OutIter(ib)); - assert(base(r) == ib+1); - assert(ib[0] == 0); - - r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia+2), OutIter(ib)); - assert(base(r) == ib+2); - assert(ib[0] == 0); - assert(ib[1] == 1); - - r = std::rotate_copy(InIter(ia), InIter(ia+1), InIter(ia+2), OutIter(ib)); - assert(base(r) == ib+2); - assert(ib[0] == 1); - assert(ib[1] == 0); - - r = std::rotate_copy(InIter(ia), InIter(ia+2), InIter(ia+2), OutIter(ib)); - assert(base(r) == ib+2); - assert(ib[0] == 0); - assert(ib[1] == 1); - - r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia+3), OutIter(ib)); - assert(base(r) == ib+3); - assert(ib[0] == 0); - assert(ib[1] == 1); - assert(ib[2] == 2); - - r = std::rotate_copy(InIter(ia), InIter(ia+1), InIter(ia+3), OutIter(ib)); - assert(base(r) == ib+3); - assert(ib[0] == 1); - assert(ib[1] == 2); - assert(ib[2] == 0); - - r = std::rotate_copy(InIter(ia), InIter(ia+2), InIter(ia+3), OutIter(ib)); - assert(base(r) == ib+3); - assert(ib[0] == 2); - assert(ib[1] == 0); - assert(ib[2] == 1); - - r = std::rotate_copy(InIter(ia), InIter(ia+3), InIter(ia+3), OutIter(ib)); - assert(base(r) == ib+3); - assert(ib[0] == 0); - assert(ib[1] == 1); - assert(ib[2] == 2); - - r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia+4), OutIter(ib)); - assert(base(r) == ib+4); - assert(ib[0] == 0); - assert(ib[1] == 1); - assert(ib[2] == 2); - assert(ib[3] == 3); - - r = std::rotate_copy(InIter(ia), InIter(ia+1), InIter(ia+4), OutIter(ib)); - assert(base(r) == ib+4); - assert(ib[0] == 1); - assert(ib[1] == 2); - assert(ib[2] == 3); - assert(ib[3] == 0); - - r = std::rotate_copy(InIter(ia), InIter(ia+2), InIter(ia+4), OutIter(ib)); - assert(base(r) == ib+4); - assert(ib[0] == 2); - assert(ib[1] == 3); - assert(ib[2] == 0); - assert(ib[3] == 1); - - r = std::rotate_copy(InIter(ia), InIter(ia+3), InIter(ia+4), OutIter(ib)); - assert(base(r) == ib+4); - assert(ib[0] == 3); - assert(ib[1] == 0); - assert(ib[2] == 1); - assert(ib[3] == 2); - - r = std::rotate_copy(InIter(ia), InIter(ia+4), InIter(ia+4), OutIter(ib)); - assert(base(r) == ib+4); - assert(ib[0] == 0); - assert(ib[1] == 1); - assert(ib[2] == 2); - assert(ib[3] == 3); +TEST_CONSTEXPR_CXX20 void test() { + int ia[] = {0, 1, 2, 3}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + int ib[sa] = {0}; + + OutIter r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia), OutIter(ib)); + assert(base(r) == ib); + + r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia + 1), OutIter(ib)); + assert(base(r) == ib + 1); + assert(ib[0] == 0); + + r = std::rotate_copy(InIter(ia), InIter(ia + 1), InIter(ia + 1), OutIter(ib)); + assert(base(r) == ib + 1); + assert(ib[0] == 0); + + r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia + 2), OutIter(ib)); + assert(base(r) == ib + 2); + assert(ib[0] == 0); + assert(ib[1] == 1); + + r = std::rotate_copy(InIter(ia), InIter(ia + 1), InIter(ia + 2), OutIter(ib)); + assert(base(r) == ib + 2); + assert(ib[0] == 1); + assert(ib[1] == 0); + + r = std::rotate_copy(InIter(ia), InIter(ia + 2), InIter(ia + 2), OutIter(ib)); + assert(base(r) == ib + 2); + assert(ib[0] == 0); + assert(ib[1] == 1); + + r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia + 3), OutIter(ib)); + assert(base(r) == ib + 3); + assert(ib[0] == 0); + assert(ib[1] == 1); + assert(ib[2] == 2); + + r = std::rotate_copy(InIter(ia), InIter(ia + 1), InIter(ia + 3), OutIter(ib)); + assert(base(r) == ib + 3); + assert(ib[0] == 1); + assert(ib[1] == 2); + assert(ib[2] == 0); + + r = std::rotate_copy(InIter(ia), InIter(ia + 2), InIter(ia + 3), OutIter(ib)); + assert(base(r) == ib + 3); + assert(ib[0] == 2); + assert(ib[1] == 0); + assert(ib[2] == 1); + + r = std::rotate_copy(InIter(ia), InIter(ia + 3), InIter(ia + 3), OutIter(ib)); + assert(base(r) == ib + 3); + assert(ib[0] == 0); + assert(ib[1] == 1); + assert(ib[2] == 2); + + r = std::rotate_copy(InIter(ia), InIter(ia), InIter(ia + 4), OutIter(ib)); + assert(base(r) == ib + 4); + assert(ib[0] == 0); + assert(ib[1] == 1); + assert(ib[2] == 2); + assert(ib[3] == 3); + + r = std::rotate_copy(InIter(ia), InIter(ia + 1), InIter(ia + 4), OutIter(ib)); + assert(base(r) == ib + 4); + assert(ib[0] == 1); + assert(ib[1] == 2); + assert(ib[2] == 3); + assert(ib[3] == 0); + + r = std::rotate_copy(InIter(ia), InIter(ia + 2), InIter(ia + 4), OutIter(ib)); + assert(base(r) == ib + 4); + assert(ib[0] == 2); + assert(ib[1] == 3); + assert(ib[2] == 0); + assert(ib[3] == 1); + + r = std::rotate_copy(InIter(ia), InIter(ia + 3), InIter(ia + 4), OutIter(ib)); + assert(base(r) == ib + 4); + assert(ib[0] == 3); + assert(ib[1] == 0); + assert(ib[2] == 1); + assert(ib[3] == 2); + + r = std::rotate_copy(InIter(ia), InIter(ia + 4), InIter(ia + 4), OutIter(ib)); + assert(base(r) == ib + 4); + assert(ib[0] == 0); + assert(ib[1] == 1); + assert(ib[2] == 2); + assert(ib[3] == 3); + + { + int ints[] = {1, 3, 5, 2, 5, 6}; + int const n_ints = sizeof(ints)/sizeof(int); + int zeros[n_ints] = {0}; + + const size_t N = 2; + const auto middle = std::begin(ints) + N; + auto it = std::rotate_copy(std::begin(ints), middle, std::end(ints), std::begin(zeros)); + assert(std::distance(std::begin(zeros), it) == n_ints); + assert(std::equal(std::begin(ints), middle, std::begin(zeros) + n_ints - N)); + assert(std::equal(middle, std::end(ints), std::begin(zeros))); + } +} + +TEST_CONSTEXPR_CXX20 bool all_tests() { + test, output_iterator >(); + test, forward_iterator >(); + test, bidirectional_iterator >(); + test, random_access_iterator >(); + test, int*>(); + + test, output_iterator >(); + test, forward_iterator >(); + test, bidirectional_iterator >(); + test, random_access_iterator >(); + test, int*>(); + + test >(); + test >(); + test >(); + test >(); + test(); + + return true; } -int main(int, char**) -{ - test, output_iterator >(); - test, forward_iterator >(); - test, bidirectional_iterator >(); - test, random_access_iterator >(); - test, int*>(); - - test, output_iterator >(); - test, forward_iterator >(); - test, bidirectional_iterator >(); - test, random_access_iterator >(); - test, int*>(); - - test >(); - test >(); - test >(); - test >(); - test(); - -// #if TEST_STD_VER > 17 -// static_assert(test_constexpr()); -// #endif +int main(int, char**) { + all_tests(); +#if TEST_STD_VER > 17 + static_assert(all_tests()); +#endif return 0; } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp index 6c6f0c46d446f..167da9aa2dddf 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp @@ -24,28 +24,26 @@ #include "test_macros.h" #include "test_iterators.h" - -// #if TEST_STD_VER > 17 -// TEST_CONSTEXPR bool test_constexpr() { -// int ia[] = {0, 1, 2, 3, 4}; -// int ib[] = {2, 4, 6, 8}; -// int ic[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -// const int expected[] = {0, 1, 2, 2, 3, 4, 4, 6, 8}; -// -// auto it = std::merge(std::begin(ia), std::end(ia), std::begin(ib), std::end(ib), std::begin(ic)); -// return std::distance(std::begin(ic), it) == (std::size(ia) + std::size(ib)) -// && *it == 0 -// && std::equal(std::begin(ic), it, std::begin(expected), std::end(expected)) -// ; -// } -// #endif +#if TEST_STD_VER > 17 +TEST_CONSTEXPR bool test_constexpr() { + int ia[] = {0, 1, 2, 3, 4}; + int ib[] = {2, 4, 6, 8}; + int ic[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + const int expected[] = {0, 1, 2, 2, 3, 4, 4, 6, 8}; + + auto it = std::merge(std::begin(ia), std::end(ia), std::begin(ib), + std::end(ib), std::begin(ic)); + assert(std::distance(std::begin(ic), it) == (std::size(ia) + std::size(ib))); + assert(*it == 0); + assert(std::equal(std::begin(ic), it, std::begin(expected), std::end(expected))); + return true; +} +#endif std::mt19937 randomness; template -void -test() -{ +void test() { { unsigned N = 100000; int* ia = new int[N]; @@ -242,9 +240,8 @@ int main(int, char**) test(); #if TEST_STD_VER > 17 -// Not yet - waiting on std::copy -// static_assert(test_constexpr()); + static_assert(test_constexpr()); #endif - return 0; + return 0; } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp index afa7073581e54..8d2dbb7268587 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp @@ -28,22 +28,23 @@ #include "test_iterators.h" #include "counting_predicates.h" -// #if TEST_STD_VER > 17 -// TEST_CONSTEXPR bool test_constexpr() { -// int ia[] = {0, 1, 2, 3, 4}; -// int ib[] = {2, 4, 6, 8}; -// int ic[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -// const int expected[] = {0, 1, 2, 2, 3, 4, 4, 6, 8}; -// -// auto it = std::merge(std::begin(ia), std::end(ia), -// std::begin(ib), std::end(ib), -// std::begin(ic), [](int a, int b) {return a == b; }); -// return std::distance(std::begin(ic), it) == (std::size(ia) + std::size(ib)) -// && *it == 0 -// && std::equal(std::begin(ic), it, std::begin(expected), std::end(expected)) -// ; -// } -// #endif +#if TEST_STD_VER > 17 +TEST_CONSTEXPR bool test_constexpr() { + int ia[] = {0, 1, 2, 3, 4}; + int ib[] = {2, 4, 6, 8}; + int ic[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + const int expected[] = {0, 1, 2, 2, 3, 4, 4, 6, 8}; + + auto it = + std::merge(std::begin(ia), std::end(ia), std::begin(ib), std::end(ib), + std::begin(ic), [](int a, int b) { return a == b; }); + assert(std::distance(std::begin(ic), it) == (std::size(ia) + std::size(ib))); + assert(*it == 0); + assert( + std::equal(std::begin(ic), it, std::begin(expected), std::end(expected))); + return true; +} +#endif std::mt19937 randomness; @@ -253,8 +254,7 @@ int main(int, char**) test(); #if TEST_STD_VER > 17 -// Not yet - waiting on std::copy -// static_assert(test_constexpr()); + static_assert(test_constexpr()); #endif return 0; diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index c6ccd9681d759..88df02bcb117d 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -261,7 +261,7 @@

Paper Status

The missing bits in P0600 are in [mem.res.class], [mem.poly.allocator.class], and [container.node.overview]

-

The missing bits in P0202 are in copy and copy_backwards (and the ones that call them: copy_n, rotate_copy, merge, set_union, set_difference, and set_symmetric_difference). This is because the first four algorithms have specializations that call memmove which is not constexpr. See Bug 25165

+

The missing bits in P0202 are in copy and copy_backwards (and the ones that call them: copy_n, set_union, set_difference, and set_symmetric_difference). This is because the first two algorithms have specializations that call memmove which is not constexpr. See Bug 25165

Library Working group Issues Status

From 1dac073bdd95799ae2f3a40ba2073c34fd037f1b Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 14 Sep 2020 21:02:33 +0000 Subject: [PATCH 0589/1079] Fix MLIR standalone example to properly handle namespace ODS TableGen backend now requires to spell out which namespace they have to be nested in, in an absolute way. --- .../standalone/include/Standalone/StandaloneDialect.h | 6 ------ .../standalone/include/Standalone/StandaloneDialect.td | 2 +- mlir/examples/standalone/include/Standalone/StandaloneOps.h | 6 ------ mlir/examples/standalone/lib/Standalone/StandaloneOps.cpp | 4 ---- 4 files changed, 1 insertion(+), 17 deletions(-) diff --git a/mlir/examples/standalone/include/Standalone/StandaloneDialect.h b/mlir/examples/standalone/include/Standalone/StandaloneDialect.h index ac1ac86a178e4..d3eb24cc308df 100644 --- a/mlir/examples/standalone/include/Standalone/StandaloneDialect.h +++ b/mlir/examples/standalone/include/Standalone/StandaloneDialect.h @@ -11,12 +11,6 @@ #include "mlir/IR/Dialect.h" -namespace mlir { -namespace standalone { - #include "Standalone/StandaloneOpsDialect.h.inc" -} // namespace standalone -} // namespace mlir - #endif // STANDALONE_STANDALONEDIALECT_H diff --git a/mlir/examples/standalone/include/Standalone/StandaloneDialect.td b/mlir/examples/standalone/include/Standalone/StandaloneDialect.td index 403a83a712b15..a7fd789376e22 100644 --- a/mlir/examples/standalone/include/Standalone/StandaloneDialect.td +++ b/mlir/examples/standalone/include/Standalone/StandaloneDialect.td @@ -23,7 +23,7 @@ def Standalone_Dialect : Dialect { illustrate the basic setup required to develop MLIR-based tools without working inside of the LLVM source tree. }]; - let cppNamespace = "standalone"; + let cppNamespace = "::mlir::standalone"; } //===----------------------------------------------------------------------===// diff --git a/mlir/examples/standalone/include/Standalone/StandaloneOps.h b/mlir/examples/standalone/include/Standalone/StandaloneOps.h index 18b02aff856de..5a8c5d1040e62 100644 --- a/mlir/examples/standalone/include/Standalone/StandaloneOps.h +++ b/mlir/examples/standalone/include/Standalone/StandaloneOps.h @@ -13,13 +13,7 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -namespace mlir { -namespace standalone { - #define GET_OP_CLASSES #include "Standalone/StandaloneOps.h.inc" -} // namespace standalone -} // namespace mlir - #endif // STANDALONE_STANDALONEOPS_H diff --git a/mlir/examples/standalone/lib/Standalone/StandaloneOps.cpp b/mlir/examples/standalone/lib/Standalone/StandaloneOps.cpp index f15bf02b36af7..497eb98705d83 100644 --- a/mlir/examples/standalone/lib/Standalone/StandaloneOps.cpp +++ b/mlir/examples/standalone/lib/Standalone/StandaloneOps.cpp @@ -10,9 +10,5 @@ #include "Standalone/StandaloneDialect.h" #include "mlir/IR/OpImplementation.h" -namespace mlir { -namespace standalone { #define GET_OP_CLASSES #include "Standalone/StandaloneOps.cpp.inc" -} // namespace standalone -} // namespace mlir From b3445c839fac0bbe174f85e39e9b08756c847465 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 14 Sep 2020 17:05:49 -0400 Subject: [PATCH 0590/1079] [libc++][test] Portability fix of std::strstreambuf constructor test The standard does not require the constructor `strstreambuf(streamsize alsize_arg = 0)` leave the stream array unallocated when called with parameter `alsize_arg > 0`. Conformant implementations of this constructor may allocate minimal `alsize_arg` number of bytes forcing `str()` method to return non-null pointer. Thanks to Andrey Maksimov for the patch. Differential Revision: https://reviews.llvm.org/D72465 --- .../depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp index a7a3fbcf96f42..6ec30127ae592 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp @@ -26,7 +26,7 @@ int main(int, char**) } { std::strstreambuf s(1024); - assert(s.str() == nullptr); + LIBCPP_ASSERT(s.str() == nullptr); assert(s.pcount() == 0); } From 44da6c2369da239517cd073f96688895081bc395 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 14 Sep 2020 14:23:20 -0700 Subject: [PATCH 0591/1079] [docs] Update OrcV1 removal timeline. --- llvm/docs/ORCv2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst index 0396fb0ad8111..67ce6e3d103d3 100644 --- a/llvm/docs/ORCv2.rst +++ b/llvm/docs/ORCv2.rst @@ -468,7 +468,7 @@ are now referred to as ORCv1. The majority of the ORCv1 layers and utilities were renamed with a 'Legacy' prefix in LLVM 8.0, and have deprecation warnings attached in LLVM 9.0. In LLVM -10.0 ORCv1 will be removed entirely. +12.0 ORCv1 will be removed entirely. Transitioning from ORCv1 to ORCv2 should be easy for most clients. Most of the ORCv1 layers and utilities have ORCv2 counterparts [2]_ that can be directly From f3d834485448b42e72c2d908a8be3d02285bd660 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 12:07:17 -0700 Subject: [PATCH 0592/1079] [PruneEH][NFC] Use CallGraphUpdater in PruneEH In preparation for porting the pass to NPM. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87632 --- llvm/lib/Transforms/IPO/PruneEH.cpp | 72 +++++++++++++---------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp index a16dc664db64d..3f3b18771cd5f 100644 --- a/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" @@ -27,8 +28,10 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Local.h" #include + using namespace llvm; #define DEBUG_TYPE "prune-eh" @@ -45,11 +48,10 @@ namespace { // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC) override; - }; } -static bool SimplifyFunction(Function *F, CallGraph &CG); -static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG); +static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU); +static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU); char PruneEH::ID = 0; INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", @@ -60,20 +62,17 @@ INITIALIZE_PASS_END(PruneEH, "prune-eh", Pass *llvm::createPruneEHPass() { return new PruneEH(); } -static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { - SmallPtrSet SCCNodes; +static bool runImpl(CallGraphUpdater &CGU, SetVector &Functions) { +#ifndef NDEBUG + for (auto *F : Functions) + assert(F && "null Function"); +#endif bool MadeChange = false; - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphNode *I : SCC) - SCCNodes.insert(I); - // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. - for (CallGraphNode *I : SCC) - if (Function *F = I->getFunction()) - MadeChange |= SimplifyFunction(F, CG); + for (Function *F : Functions) + MadeChange |= SimplifyFunction(F, CGU); // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. @@ -83,13 +82,8 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); - (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { - Function *F = (*I)->getFunction(); - if (!F) { - SCCMightUnwind = true; - SCCMightReturn = true; - } else if (!F->hasExactDefinition()) { + for (Function *F : Functions) { + if (!F->hasExactDefinition()) { SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { @@ -125,10 +119,9 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { bool InstMightUnwind = true; if (const auto *CI = dyn_cast(&I)) { if (Function *Callee = CI->getCalledFunction()) { - CallGraphNode *CalleeNode = CG[Callee]; // If the callee is outside our current SCC then we may throw // because it might. If it is inside, do nothing. - if (SCCNodes.count(CalleeNode) > 0) + if (Functions.contains(Callee)) InstMightUnwind = false; } } @@ -140,18 +133,15 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { if (IA->hasSideEffects()) SCCMightReturn = true; } - + } if (SCCMightUnwind && SCCMightReturn) break; - } } } // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) - for (CallGraphNode *I : SCC) { - Function *F = I->getFunction(); - + for (Function *F : Functions) { if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { F->addFnAttr(Attribute::NoUnwind); MadeChange = true; @@ -163,30 +153,35 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { } } - for (CallGraphNode *I : SCC) { + for (Function *F : Functions) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. - if (Function *F = I->getFunction()) - MadeChange |= SimplifyFunction(F, CG); + MadeChange |= SimplifyFunction(F, CGU); } return MadeChange; } - bool PruneEH::runOnSCC(CallGraphSCC &SCC) { if (skipSCC(SCC)) return false; + SetVector Functions; + for (auto &N : SCC) { + if (auto *F = N->getFunction()) + Functions.insert(F); + } CallGraph &CG = getAnalysis().getCallGraph(); - return runImpl(SCC, CG); + CallGraphUpdater CGU; + CGU.initialize(CG, SCC); + return runImpl(CGU, Functions); } // SimplifyFunction - Given information about callees, simplify the specified // function if we have invokes to non-unwinding functions or code after calls to // no-return functions. -static bool SimplifyFunction(Function *F, CallGraph &CG) { +static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) { bool MadeChange = false; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast(BB->getTerminator())) @@ -196,7 +191,7 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) { // If the unwind block is now dead, nuke it. if (pred_empty(UnwindBlock)) - DeleteBasicBlock(UnwindBlock, CG); // Delete the new BB. + DeleteBasicBlock(UnwindBlock, CGU); // Delete the new BB. ++NumRemoved; MadeChange = true; @@ -216,7 +211,7 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) { BB->getInstList().pop_back(); new UnreachableInst(BB->getContext(), &*BB); - DeleteBasicBlock(New, CG); // Delete the new BB. + DeleteBasicBlock(New, CGU); // Delete the new BB. MadeChange = true; ++NumUnreach; break; @@ -229,12 +224,11 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) { /// DeleteBasicBlock - remove the specified basic block from the program, /// updating the callgraph to reflect any now-obsolete edges due to calls that /// exist in the BB. -static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) { +static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) { assert(pred_empty(BB) && "BB is not dead!"); Instruction *TokenInst = nullptr; - CallGraphNode *CGN = CG[BB->getParent()]; for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { --I; @@ -246,9 +240,9 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) { if (auto *Call = dyn_cast(&*I)) { const Function *Callee = Call->getCalledFunction(); if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID())) - CGN->removeCallEdgeFor(*Call); + CGU.removeCallSite(*Call); else if (!Callee->isIntrinsic()) - CGN->removeCallEdgeFor(*Call); + CGU.removeCallSite(*Call); } if (!I->use_empty()) From 5881bf0050398f4bb2d9761167d06a9ecfc8a371 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 14 Sep 2020 13:39:52 -0700 Subject: [PATCH 0593/1079] [flang] More clean-up of CookedSource API The std::string holding the content of a CookedSource no longer needs to be exposed in its API after the recent work that allows the parsing context to hold multiple instances of a CookedSource. So clean the API. These changes were extracted from some work in progress that was made easier by the API changes. Differential Revision: https://reviews.llvm.org/D87635 --- flang/include/flang/Parser/parse-state.h | 2 +- flang/include/flang/Parser/provenance.h | 15 +++------------ flang/include/flang/Semantics/semantics.h | 4 ++-- flang/lib/Lower/OpenACC.cpp | 2 +- flang/lib/Parser/prescan.cpp | 19 ++++++++----------- flang/lib/Parser/provenance.cpp | 10 +++++++++- flang/tools/f18/f18.cpp | 2 +- flang/unittests/Evaluate/intrinsics.cpp | 4 ++-- 8 files changed, 27 insertions(+), 31 deletions(-) diff --git a/flang/include/flang/Parser/parse-state.h b/flang/include/flang/Parser/parse-state.h index 5d96e95e4da7f..00291bac4dbb8 100644 --- a/flang/include/flang/Parser/parse-state.h +++ b/flang/include/flang/Parser/parse-state.h @@ -36,7 +36,7 @@ class ParseState { public: // TODO: Add a constructor for parsing a normalized module file. ParseState(const CookedSource &cooked) - : p_{&cooked.data().front()}, limit_{&cooked.data().back() + 1} {} + : p_{cooked.AsCharBlock().begin()}, limit_{cooked.AsCharBlock().end()} {} ParseState(const ParseState &that) : p_{that.p_}, limit_{that.limit_}, context_{that.context_}, userState_{that.userState_}, inFixedForm_{that.inFixedForm_}, diff --git a/flang/include/flang/Parser/provenance.h b/flang/include/flang/Parser/provenance.h index 52aac931e8995..1f0a0a90e7019 100644 --- a/flang/include/flang/Parser/provenance.h +++ b/flang/include/flang/Parser/provenance.h @@ -167,6 +167,7 @@ class AllSources { const std::string &message, bool echoSourceLine = false) const; const SourceFile *GetSourceFile( Provenance, std::size_t *offset = nullptr) const; + const char *GetSource(ProvenanceRange) const; std::optional GetSourcePosition(Provenance) const; std::optional GetFirstFileProvenance() const; std::string GetPath(Provenance) const; // __FILE__ @@ -219,16 +220,7 @@ class AllSources { // single instances of CookedSource. class CookedSource { public: - const std::string &data() const { return data_; } - - bool Contains(const char *p) const { - return p >= &data_.front() && p <= &data_.back() + 1; - } - bool Contains(CharBlock range) const { - return !range.empty() && Contains(range.begin()) && - Contains(range.end() - 1); - } - + CharBlock AsCharBlock() const { return CharBlock{data_}; } std::optional GetProvenanceRange(CharBlock) const; std::optional GetCharBlock(ProvenanceRange) const; @@ -253,7 +245,6 @@ class CookedSource { std::size_t BufferedBytes() const; void Marshal(AllSources &); // marshals text into one contiguous block void CompileProvenanceRangeToOffsetMappings(AllSources &); - std::string AcquireData() { return std::move(data_); } llvm::raw_ostream &Dump(llvm::raw_ostream &) const; private: @@ -276,7 +267,7 @@ class AllCookedSources { template // const char * or CharBlock const CookedSource *Find(A x) const { for (const auto &c : cooked_) { - if (c.Contains(x)) { + if (c.AsCharBlock().Contains(x)) { return &c; } } diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index 4c2c0e75992a4..de3d9aeac144e 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -204,10 +204,10 @@ class SemanticsContext { class Semantics { public: explicit Semantics(SemanticsContext &context, parser::Program &program, - const parser::CookedSource &cooked, bool debugModuleWriter = false) + parser::CharBlock charBlock, bool debugModuleWriter = false) : context_{context}, program_{program} { context.set_debugModuleWriter(debugModuleWriter); - context.globalScope().AddSourceRange(parser::CharBlock{cooked.data()}); + context.globalScope().AddSourceRange(charBlock); } SemanticsContext &context() const { return context_; } diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 7202d4ec03199..5c8c29e491d66 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -1,4 +1,4 @@ -//===-- OpenMP.cpp -- OpenACC directive lowering --------------------------===// +//===-- OpenACC.cpp -- OpenACC directive lowering -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 8e8e57c1334d9..3eb909fc1ae86 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -62,11 +62,8 @@ static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { void Prescanner::Prescan(ProvenanceRange range) { startProvenance_ = range.start(); - std::size_t offset{0}; - const SourceFile *source{ - allSources_.GetSourceFile(startProvenance_, &offset)}; - CHECK(source); - start_ = source->content().data() + offset; + start_ = allSources_.GetSource(range); + CHECK(start_); limit_ = start_ + range.size(); nextLine_ = start_; const bool beganInFixedForm{inFixedForm_}; @@ -75,7 +72,7 @@ void Prescanner::Prescan(ProvenanceRange range) { "too many nested INCLUDE/#include files, possibly circular"_err_en_US); return; } - while (nextLine_ < limit_) { + while (!IsAtEnd()) { Statement(); } if (inFixedForm_ != beganInFixedForm) { @@ -232,7 +229,7 @@ void Prescanner::Statement() { } TokenSequence Prescanner::TokenizePreprocessorDirective() { - CHECK(nextLine_ < limit_ && !inPreprocessorDirective_); + CHECK(!IsAtEnd() && !inPreprocessorDirective_); inPreprocessorDirective_ = true; BeginStatementAndAdvance(); TokenSequence tokens; @@ -360,7 +357,7 @@ void Prescanner::SkipCComments() { break; } } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && - at_[1] == '\n' && nextLine_ < limit_) { + at_[1] == '\n' && !IsAtEnd()) { BeginSourceLineAndAdvance(); } else { break; @@ -804,7 +801,7 @@ bool Prescanner::IsNextLinePreprocessorDirective() const { } bool Prescanner::SkipCommentLine(bool afterAmpersand) { - if (nextLine_ >= limit_) { + if (IsAtEnd()) { if (afterAmpersand && prescannerNesting_ > 0) { // A continuation marker at the end of the last line in an // include file inhibits the newline for that line. @@ -843,7 +840,7 @@ bool Prescanner::SkipCommentLine(bool afterAmpersand) { } const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { - if (nextLine_ >= limit_) { + if (IsAtEnd()) { return nullptr; } tabInCurrentLine_ = false; @@ -995,7 +992,7 @@ bool Prescanner::FreeFormContinuation() { // arguments to span multiple lines. bool Prescanner::IsImplicitContinuation() const { return !inPreprocessorDirective_ && !inCharLiteral_ && - delimiterNesting_ > 0 && nextLine_ < limit_ && + delimiterNesting_ > 0 && !IsAtEnd() && ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; } diff --git a/flang/lib/Parser/provenance.cpp b/flang/lib/Parser/provenance.cpp index bcb871bd7cb41..46a0dc9268225 100644 --- a/flang/lib/Parser/provenance.cpp +++ b/flang/lib/Parser/provenance.cpp @@ -301,6 +301,14 @@ const SourceFile *AllSources::GetSourceFile( origin.u); } +const char *AllSources::GetSource(ProvenanceRange range) const { + Provenance start{range.start()}; + const Origin &origin{MapToOrigin(start)}; + return origin.covers.Contains(range) + ? &origin[origin.covers.MemberOffset(start)] + : nullptr; +} + std::optional AllSources::GetSourcePosition( Provenance prov) const { const Origin &origin{MapToOrigin(prov)}; @@ -402,7 +410,7 @@ const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const { std::optional CookedSource::GetProvenanceRange( CharBlock cookedRange) const { - if (!Contains(cookedRange)) { + if (!AsCharBlock().Contains(cookedRange)) { return std::nullopt; } ProvenanceRange first{provenanceMap_.Map(cookedRange.begin() - &data_[0])}; diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index a33a167686e49..54a905133db76 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -251,7 +251,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, driver.dumpSymbols || driver.dumpUnparseWithSymbols || driver.getDefinition || driver.getSymbolsSources) { Fortran::semantics::Semantics semantics{semanticsContext, parseTree, - parsing.cooked(), driver.debugModuleWriter}; + parsing.cooked().AsCharBlock(), driver.debugModuleWriter}; semantics.Perform(); semantics.EmitMessages(llvm::errs()); if (driver.dumpSymbols) { diff --git a/flang/unittests/Evaluate/intrinsics.cpp b/flang/unittests/Evaluate/intrinsics.cpp index 4f2a21dfe6048..52507b8ef8b67 100644 --- a/flang/unittests/Evaluate/intrinsics.cpp +++ b/flang/unittests/Evaluate/intrinsics.cpp @@ -26,10 +26,10 @@ class CookedStrings { } void Marshal() { cooked_.Marshal(allSources_); } parser::CharBlock operator()(const std::string &s) { - return {cooked_.data().data() + offsets_[s], s.size()}; + return {cooked_.AsCharBlock().begin() + offsets_[s], s.size()}; } parser::ContextualMessages Messages(parser::Messages &buffer) { - return parser::ContextualMessages{cooked_.data(), &buffer}; + return parser::ContextualMessages{cooked_.AsCharBlock(), &buffer}; } void Emit(llvm::raw_ostream &o, const parser::Messages &messages) { messages.Emit(o, allCookedSources_); From ed653184ac6385945e32535feb7af2876ec52d40 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Mon, 14 Sep 2020 14:47:43 -0700 Subject: [PATCH 0594/1079] Revert "[libc++] Make rotate a constexpr." This reverts commit 1ec02efee9b1d01cde89f31ca9ba6a46b7662ac5. --- libcxx/include/algorithm | 68 ++++++------------- libcxx/include/iterator | 16 ++--- .../alg.move/move.pass.cpp | 42 +----------- .../alg.move/move_backward.pass.cpp | 20 +----- .../alg.rotate/rotate.pass.cpp | 11 +-- libcxx/www/cxx2a_status.html | 3 +- 6 files changed, 36 insertions(+), 124 deletions(-) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 5d09b6c3c0150..8c8bc748606d4 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -1631,7 +1631,7 @@ search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const // copy template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Iter __unwrap_iter(_Iter __i) { @@ -1639,7 +1639,7 @@ __unwrap_iter(_Iter __i) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1653,7 +1653,7 @@ __unwrap_iter(move_iterator<_Tp*> __i) #if _LIBCPP_DEBUG_LEVEL < 2 template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1665,7 +1665,7 @@ __unwrap_iter(__wrap_iter<_Tp*> __i) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1679,7 +1679,7 @@ __unwrap_iter(__wrap_iter __i) #else template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1859,28 +1859,18 @@ copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) // move -// __move_constexpr exists so that __move doesn't call itself when delegating to the constexpr -// version of __move. template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +inline _LIBCPP_INLINE_VISIBILITY _OutputIterator -__move_constexpr(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +__move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { for (; __first != __last; ++__first, (void) ++__result) *__result = _VSTD::move(*__first); return __result; } -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -_OutputIterator -__move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) -{ - return __move_constexpr(__first, __last, __result); -} - template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < is_same::type, _Up>::value && @@ -1889,8 +1879,6 @@ typename enable_if >::type __move(_Tp* __first, _Tp* __last, _Up* __result) { - if (__libcpp_is_constant_evaluated()) - return __move_constexpr(__first, __last, __result); const size_t __n = static_cast(__last - __first); if (__n > 0) _VSTD::memmove(__result, __first, __n * sizeof(_Up)); @@ -1898,7 +1886,7 @@ __move(_Tp* __first, _Tp* __last, _Up* __result) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +inline _LIBCPP_INLINE_VISIBILITY _OutputIterator move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { @@ -1907,28 +1895,18 @@ move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) // move_backward -// __move_backward_constexpr exists so that __move_backward doesn't call itself when delegating to -// the constexpr version of __move_backward. template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +inline _LIBCPP_INLINE_VISIBILITY _OutputIterator -__move_backward_constexpr(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +__move_backward(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { while (__first != __last) *--__result = _VSTD::move(*--__last); return __result; } -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -_OutputIterator -__move_backward(_InputIterator __first, _InputIterator __last, _OutputIterator __result) -{ - return __move_backward_constexpr(__first, __last, __result); -} - template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < is_same::type, _Up>::value && @@ -1937,8 +1915,6 @@ typename enable_if >::type __move_backward(_Tp* __first, _Tp* __last, _Up* __result) { - if (__libcpp_is_constant_evaluated()) - return __move_backward_constexpr(__first, __last, __result); const size_t __n = static_cast(__last - __first); if (__n > 0) { @@ -1949,7 +1925,7 @@ __move_backward(_Tp* __first, _Tp* __last, _Up* __result) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +inline _LIBCPP_INLINE_VISIBILITY _BidirectionalIterator2 move_backward(_BidirectionalIterator1 __first, _BidirectionalIterator1 __last, _BidirectionalIterator2 __result) @@ -2357,7 +2333,7 @@ reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _Out // rotate template -_LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator +_ForwardIterator __rotate_left(_ForwardIterator __first, _ForwardIterator __last) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; @@ -2368,7 +2344,7 @@ __rotate_left(_ForwardIterator __first, _ForwardIterator __last) } template -_LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator +_BidirectionalIterator __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; @@ -2380,7 +2356,7 @@ __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) } template -_LIBCPP_CONSTEXPR_AFTER_CXX14 _ForwardIterator +_ForwardIterator __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { _ForwardIterator __i = __middle; @@ -2416,7 +2392,7 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_CONSTEXPR_AFTER_CXX14 _Integral +_Integral __algo_gcd(_Integral __x, _Integral __y) { do @@ -2429,7 +2405,7 @@ __algo_gcd(_Integral __x, _Integral __y) } template -_LIBCPP_CONSTEXPR_AFTER_CXX14 _RandomAccessIterator +_RandomAccessIterator __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -2465,7 +2441,7 @@ __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator +_ForwardIterator __rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, _VSTD::forward_iterator_tag) { @@ -2480,7 +2456,7 @@ __rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator _ template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator +_BidirectionalIterator __rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _VSTD::bidirectional_iterator_tag) { @@ -2497,7 +2473,7 @@ __rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _Bidir template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator +_RandomAccessIterator __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _VSTD::random_access_iterator_tag) { @@ -2515,7 +2491,7 @@ __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomA template inline _LIBCPP_INLINE_VISIBILITY -_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator +_ForwardIterator rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { if (__first == __middle) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 45516db24e7cd..36571a50b8bc5 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -1393,13 +1393,13 @@ operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOE template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy(_Ip, _Ip, _Op); template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy_backward(_B1, _B1, _B2); -template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move_backward(_B1, _B1, _B2); +template _Op _LIBCPP_INLINE_VISIBILITY move(_Ip, _Ip, _Op); +template _B2 _LIBCPP_INLINE_VISIBILITY move_backward(_B1, _B1, _B2); #if _LIBCPP_DEBUG_LEVEL < 2 template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1410,7 +1410,7 @@ __unwrap_iter(__wrap_iter<_Tp*>); #else template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1604,12 +1604,12 @@ private: template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _Op copy(_Ip, _Ip, _Op); template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _B2 copy_backward(_B1, _B1, _B2); - template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _Op move(_Ip, _Ip, _Op); - template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _B2 move_backward(_B1, _B1, _B2); + template friend _Op move(_Ip, _Ip, _Op); + template friend _B2 move_backward(_B1, _B1, _B2); #if _LIBCPP_DEBUG_LEVEL < 2 template - _LIBCPP_CONSTEXPR friend + _LIBCPP_CONSTEXPR_IF_NODEBUG friend typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1618,7 +1618,7 @@ private: __unwrap_iter(__wrap_iter<_Tp*>); #else template - inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR + inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename enable_if < is_trivially_copy_assignable<_Tp>::value, diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp index 721a568750f19..cdb126d4942ce 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp @@ -13,10 +13,6 @@ // OutIter // move(InIter first, InIter last, OutIter result); -// UNSUPPORTED: clang-6, clang-7 -// UNSUPPORTED: apple-clang-9, apple-clang-10, apple-clang-11 -// UNSUPPORTED: gcc-5, gcc-6, gcc-7, gcc-8 - #include #include #include @@ -25,11 +21,11 @@ #include "test_iterators.h" template -_LIBCPP_CONSTEXPR_AFTER_CXX17 bool +void test() { const unsigned N = 1000; - int ia[N] = {}; + int ia[N]; for (unsigned i = 0; i < N; ++i) ia[i] = i; int ib[N] = {0}; @@ -38,8 +34,6 @@ test() assert(base(r) == ib+N); for (unsigned i = 0; i < N; ++i) assert(ia[i] == ib[i]); - - return true; } #if TEST_STD_VER >= 11 @@ -134,37 +128,5 @@ int main(int, char**) test1*, std::unique_ptr*>(); #endif // TEST_STD_VER >= 11 -#if TEST_STD_VER > 17 - static_assert(test, input_iterator >()); - static_assert(test, forward_iterator >()); - static_assert(test, bidirectional_iterator >()); - static_assert(test, random_access_iterator >()); - static_assert(test, int*>()); - - static_assert(test, input_iterator >()); - static_assert(test, forward_iterator >()); - static_assert(test, bidirectional_iterator >()); - static_assert(test, random_access_iterator >()); - static_assert(test, int*>()); - - static_assert(test, input_iterator >()); - static_assert(test, forward_iterator >()); - static_assert(test, bidirectional_iterator >()); - static_assert(test, random_access_iterator >()); - static_assert(test, int*>()); - - static_assert(test, input_iterator >()); - static_assert(test, forward_iterator >()); - static_assert(test, bidirectional_iterator >()); - static_assert(test, random_access_iterator >()); - static_assert(test, int*>()); - - static_assert(test >()); - static_assert(test >()); - static_assert(test >()); - static_assert(test >()); - static_assert(test()); -#endif // TEST_STD_VER > 17 - return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp index 1a845cc1a88ff..365c1a1158d7e 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp @@ -21,11 +21,11 @@ #include "test_iterators.h" template -_LIBCPP_CONSTEXPR_AFTER_CXX17 bool +void test() { const unsigned N = 1000; - int ia[N] = {}; + int ia[N]; for (unsigned i = 0; i < N; ++i) ia[i] = i; int ib[N] = {0}; @@ -34,8 +34,6 @@ test() assert(base(r) == ib); for (unsigned i = 0; i < N; ++i) assert(ia[i] == ib[i]); - - return true; } #if TEST_STD_VER >= 11 @@ -84,19 +82,5 @@ int main(int, char**) test1*, std::unique_ptr*>(); #endif // TEST_STD_VER >= 11 -#if TEST_STD_VER > 17 - static_assert(test, bidirectional_iterator >()); - static_assert(test, random_access_iterator >()); - static_assert(test, int*>()); - - static_assert(test, bidirectional_iterator >()); - static_assert(test, random_access_iterator >()); - static_assert(test, int*>()); - - static_assert(test >()); - static_assert(test >()); - static_assert(test()); -#endif // TEST_STD_VER > 17 - return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp index 7c905bc83f0fd..007faf685bfc2 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp @@ -20,7 +20,7 @@ #include "test_iterators.h" template -_LIBCPP_CONSTEXPR_AFTER_CXX17 bool +void test() { int ia[] = {0}; @@ -209,8 +209,6 @@ test() assert(ig[3] == 0); assert(ig[4] == 1); assert(ig[5] == 2); - - return true; } #if TEST_STD_VER >= 11 @@ -437,12 +435,5 @@ int main(int, char**) #endif -#if TEST_STD_VER > 17 - static_assert(test >()); - static_assert(test >()); - static_assert(test >()); - static_assert(test()); -#endif // TEST_STD_VER > 17 - return 0; } diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index 88df02bcb117d..f4fdba219bfce 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -261,8 +261,7 @@

Paper Status

The missing bits in P0600 are in [mem.res.class], [mem.poly.allocator.class], and [container.node.overview]

-

The missing bits in P0202 are in copy and copy_backwards (and the ones that call them: copy_n, set_union, set_difference, and set_symmetric_difference). This is because the first two algorithms have specializations that call memmove which is not constexpr. See Bug 25165

- +

The missing bits in P0202 are in copy, copy_backwards, move, and move_backwards (and the ones that call them: copy_n, rotate_copy, merge, set_union, set_difference, and set_symmetric_difference). This is because the first four algorithms have specializations that call memmove which is not constexpr. See Bug 25165

Library Working group Issues Status

From 3ed89b51da38f081fedb57727076262abb81d149 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Mon, 14 Sep 2020 18:11:08 -0400 Subject: [PATCH 0595/1079] [Take 2] [libc++] Make rotate a constexpr. This patch makes `std::rotate` a constexpr. In doing so, this patch also updates the internal `__move` and `__move_backward` funtions to be constexpr. This patch was previously reverted in ed653184ac63 because it was missing some UNSUPPORTED markup for older compilers. This commit adds it. Differential Revision: https://reviews.llvm.org/D65721 --- libcxx/include/algorithm | 68 +++++++++++++------ libcxx/include/iterator | 16 ++--- .../alg.move/move.pass.cpp | 43 +++++++++++- .../alg.move/move_backward.pass.cpp | 24 ++++++- .../alg.rotate/rotate.pass.cpp | 15 +++- .../alg.rotate/rotate_copy.pass.cpp | 4 ++ .../alg.sorting/alg.merge/merge.pass.cpp | 4 ++ .../alg.sorting/alg.merge/merge_comp.pass.cpp | 4 ++ libcxx/www/cxx2a_status.html | 3 +- 9 files changed, 145 insertions(+), 36 deletions(-) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 8c8bc748606d4..5d09b6c3c0150 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -1631,7 +1631,7 @@ search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const // copy template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR _Iter __unwrap_iter(_Iter __i) { @@ -1639,7 +1639,7 @@ __unwrap_iter(_Iter __i) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1653,7 +1653,7 @@ __unwrap_iter(move_iterator<_Tp*> __i) #if _LIBCPP_DEBUG_LEVEL < 2 template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1665,7 +1665,7 @@ __unwrap_iter(__wrap_iter<_Tp*> __i) } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1679,7 +1679,7 @@ __unwrap_iter(__wrap_iter __i) #else template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1859,18 +1859,28 @@ copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) // move +// __move_constexpr exists so that __move doesn't call itself when delegating to the constexpr +// version of __move. template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _OutputIterator -__move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +__move_constexpr(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { for (; __first != __last; ++__first, (void) ++__result) *__result = _VSTD::move(*__first); return __result; } +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +_OutputIterator +__move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +{ + return __move_constexpr(__first, __last, __result); +} + template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 typename enable_if < is_same::type, _Up>::value && @@ -1879,6 +1889,8 @@ typename enable_if >::type __move(_Tp* __first, _Tp* __last, _Up* __result) { + if (__libcpp_is_constant_evaluated()) + return __move_constexpr(__first, __last, __result); const size_t __n = static_cast(__last - __first); if (__n > 0) _VSTD::memmove(__result, __first, __n * sizeof(_Up)); @@ -1886,7 +1898,7 @@ __move(_Tp* __first, _Tp* __last, _Up* __result) } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { @@ -1895,18 +1907,28 @@ move(_InputIterator __first, _InputIterator __last, _OutputIterator __result) // move_backward +// __move_backward_constexpr exists so that __move_backward doesn't call itself when delegating to +// the constexpr version of __move_backward. template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _OutputIterator -__move_backward(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +__move_backward_constexpr(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { while (__first != __last) *--__result = _VSTD::move(*--__last); return __result; } +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +_OutputIterator +__move_backward(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +{ + return __move_backward_constexpr(__first, __last, __result); +} + template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 typename enable_if < is_same::type, _Up>::value && @@ -1915,6 +1937,8 @@ typename enable_if >::type __move_backward(_Tp* __first, _Tp* __last, _Up* __result) { + if (__libcpp_is_constant_evaluated()) + return __move_backward_constexpr(__first, __last, __result); const size_t __n = static_cast(__last - __first); if (__n > 0) { @@ -1925,7 +1949,7 @@ __move_backward(_Tp* __first, _Tp* __last, _Up* __result) } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _BidirectionalIterator2 move_backward(_BidirectionalIterator1 __first, _BidirectionalIterator1 __last, _BidirectionalIterator2 __result) @@ -2333,7 +2357,7 @@ reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _Out // rotate template -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator __rotate_left(_ForwardIterator __first, _ForwardIterator __last) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; @@ -2344,7 +2368,7 @@ __rotate_left(_ForwardIterator __first, _ForwardIterator __last) } template -_BidirectionalIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; @@ -2356,7 +2380,7 @@ __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) } template -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX14 _ForwardIterator __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { _ForwardIterator __i = __middle; @@ -2392,7 +2416,7 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt template inline _LIBCPP_INLINE_VISIBILITY -_Integral +_LIBCPP_CONSTEXPR_AFTER_CXX14 _Integral __algo_gcd(_Integral __x, _Integral __y) { do @@ -2405,7 +2429,7 @@ __algo_gcd(_Integral __x, _Integral __y) } template -_RandomAccessIterator +_LIBCPP_CONSTEXPR_AFTER_CXX14 _RandomAccessIterator __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -2441,7 +2465,7 @@ __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran template inline _LIBCPP_INLINE_VISIBILITY -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator __rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, _VSTD::forward_iterator_tag) { @@ -2456,7 +2480,7 @@ __rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator _ template inline _LIBCPP_INLINE_VISIBILITY -_BidirectionalIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator __rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _VSTD::bidirectional_iterator_tag) { @@ -2473,7 +2497,7 @@ __rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _Bidir template inline _LIBCPP_INLINE_VISIBILITY -_RandomAccessIterator +_LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _VSTD::random_access_iterator_tag) { @@ -2491,7 +2515,7 @@ __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomA template inline _LIBCPP_INLINE_VISIBILITY -_ForwardIterator +_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { if (__first == __middle) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 36571a50b8bc5..45516db24e7cd 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -1393,13 +1393,13 @@ operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOE template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy(_Ip, _Ip, _Op); template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy_backward(_B1, _B1, _B2); -template _Op _LIBCPP_INLINE_VISIBILITY move(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY move_backward(_B1, _B1, _B2); +template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move(_Ip, _Ip, _Op); +template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move_backward(_B1, _B1, _B2); #if _LIBCPP_DEBUG_LEVEL < 2 template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1410,7 +1410,7 @@ __unwrap_iter(__wrap_iter<_Tp*>); #else template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1604,12 +1604,12 @@ private: template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _Op copy(_Ip, _Ip, _Op); template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _B2 copy_backward(_B1, _B1, _B2); - template friend _Op move(_Ip, _Ip, _Op); - template friend _B2 move_backward(_B1, _B1, _B2); + template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _Op move(_Ip, _Ip, _Op); + template friend _LIBCPP_CONSTEXPR_AFTER_CXX17 _B2 move_backward(_B1, _B1, _B2); #if _LIBCPP_DEBUG_LEVEL < 2 template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend + _LIBCPP_CONSTEXPR friend typename enable_if < is_trivially_copy_assignable<_Tp>::value, @@ -1618,7 +1618,7 @@ private: __unwrap_iter(__wrap_iter<_Tp*>); #else template - inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename enable_if < is_trivially_copy_assignable<_Tp>::value, diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp index cdb126d4942ce..7e69c54797c82 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp @@ -13,6 +13,11 @@ // OutIter // move(InIter first, InIter last, OutIter result); +// Older compilers don't support std::is_constant_evaluated +// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 +// UNSUPPORTED: apple-clang-9, apple-clang-10, apple-clang-11 +// UNSUPPORTED: gcc-5, gcc-6, gcc-7, gcc-8 + #include #include #include @@ -21,11 +26,11 @@ #include "test_iterators.h" template -void +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool test() { const unsigned N = 1000; - int ia[N]; + int ia[N] = {}; for (unsigned i = 0; i < N; ++i) ia[i] = i; int ib[N] = {0}; @@ -34,6 +39,8 @@ test() assert(base(r) == ib+N); for (unsigned i = 0; i < N; ++i) assert(ia[i] == ib[i]); + + return true; } #if TEST_STD_VER >= 11 @@ -128,5 +135,37 @@ int main(int, char**) test1*, std::unique_ptr*>(); #endif // TEST_STD_VER >= 11 +#if TEST_STD_VER > 17 + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, input_iterator >()); + static_assert(test, forward_iterator >()); + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test >()); + static_assert(test >()); + static_assert(test >()); + static_assert(test >()); + static_assert(test()); +#endif // TEST_STD_VER > 17 + return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp index 365c1a1158d7e..5e1afe857cca2 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move_backward.pass.cpp @@ -6,6 +6,10 @@ // //===----------------------------------------------------------------------===// +// Older compilers don't support std::is_constant_evaluated +// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 +// UNSUPPORTED: apple-clang-9, apple-clang-10 + // // template @@ -21,11 +25,11 @@ #include "test_iterators.h" template -void +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool test() { const unsigned N = 1000; - int ia[N]; + int ia[N] = {}; for (unsigned i = 0; i < N; ++i) ia[i] = i; int ib[N] = {0}; @@ -34,6 +38,8 @@ test() assert(base(r) == ib); for (unsigned i = 0; i < N; ++i) assert(ia[i] == ib[i]); + + return true; } #if TEST_STD_VER >= 11 @@ -82,5 +88,19 @@ int main(int, char**) test1*, std::unique_ptr*>(); #endif // TEST_STD_VER >= 11 +#if TEST_STD_VER > 17 + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test, bidirectional_iterator >()); + static_assert(test, random_access_iterator >()); + static_assert(test, int*>()); + + static_assert(test >()); + static_assert(test >()); + static_assert(test()); +#endif // TEST_STD_VER > 17 + return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp index 007faf685bfc2..2617f9a6a126e 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp @@ -12,6 +12,10 @@ // Iter // rotate(Iter first, Iter middle, Iter last); +// Older compilers don't support std::is_constant_evaluated +// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 +// UNSUPPORTED: apple-clang-9, apple-clang-10 + #include #include #include @@ -20,7 +24,7 @@ #include "test_iterators.h" template -void +_LIBCPP_CONSTEXPR_AFTER_CXX17 bool test() { int ia[] = {0}; @@ -209,6 +213,8 @@ test() assert(ig[3] == 0); assert(ig[4] == 1); assert(ig[5] == 2); + + return true; } #if TEST_STD_VER >= 11 @@ -435,5 +441,12 @@ int main(int, char**) #endif +#if TEST_STD_VER > 17 + static_assert(test >()); + static_assert(test >()); + static_assert(test >()); + static_assert(test()); +#endif // TEST_STD_VER > 17 + return 0; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp index 8acb1a129e386..d9dca0c6ebf09 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp @@ -12,6 +12,10 @@ // constexpr OutIter // constexpr after C++17 // rotate_copy(InIter first, InIter middle, InIter last, OutIter result); +// Older compilers don't support std::is_constant_evaluated +// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 +// UNSUPPORTED: apple-clang-9, apple-clang-10 + #include #include diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp index 167da9aa2dddf..8730ecdbd572b 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge.pass.cpp @@ -8,6 +8,10 @@ // // REQUIRES: long_tests +// Older compilers don't support std::is_constant_evaluated +// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 +// UNSUPPORTED: apple-clang-9, apple-clang-10 + // // template diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp index 8d2dbb7268587..376ffd0d1d59a 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/merge_comp.pass.cpp @@ -8,6 +8,10 @@ // // REQUIRES: long_tests +// Older compilers don't support std::is_constant_evaluated +// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 +// UNSUPPORTED: apple-clang-9, apple-clang-10 + // // templatePaper Status

The missing bits in P0600 are in [mem.res.class], [mem.poly.allocator.class], and [container.node.overview]

-

The missing bits in P0202 are in copy, copy_backwards, move, and move_backwards (and the ones that call them: copy_n, rotate_copy, merge, set_union, set_difference, and set_symmetric_difference). This is because the first four algorithms have specializations that call memmove which is not constexpr. See Bug 25165

+

The missing bits in P0202 are in copy and copy_backwards (and the ones that call them: copy_n, set_union, set_difference, and set_symmetric_difference). This is because the first two algorithms have specializations that call memmove which is not constexpr. See Bug 25165

+

Library Working group Issues Status

From cdbfb47998cd37ab0384ad944fa8e4ba1e1b47d0 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Fri, 11 Sep 2020 11:02:04 -0700 Subject: [PATCH 0596/1079] [flang] Fix bug for forward referenced type A type name in an IMPLICIT declaration that was later used in a PARAMETER statement caused problems because the default symbol scope had not yet been initialized. I avoided dereferencing in the situation where the default scope was uninitialized and added a test that triggers the problem. Also, once I corrected the bad dereference, the compiler was putting out misleading error messages. The underlying error us due to violating section 7.5.10, paragraph 4, which states: A structure constructor shall not appear before the referenced type is defined. I fixed this by testing to see if a type that is used in a structure constructor is forward referenced. Differential Revision: https://reviews.llvm.org/D87535 --- flang/lib/Semantics/expression.cpp | 15 +++++++++++---- flang/test/Semantics/bad-forward-type.f90 | 3 +-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index fcce08db6ef6d..5a2a7df9fb98d 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1996,11 +1996,18 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::FunctionReference &funcRef, const auto &designator{std::get(call.t)}; if (const auto *name{std::get_if(&designator.u)}) { semantics::Scope &scope{context_.FindScope(name->source)}; + semantics::DerivedTypeSpec dtSpec{ + name->source, derivedType.GetUltimate()}; + if (dtSpec.IsForwardReferenced()) { + Say(call.source, + "Cannot construct value for derived type '%s' " + "before it is defined"_err_en_US, + name->source); + return std::nullopt; + } const semantics::DeclTypeSpec &type{ - semantics::FindOrInstantiateDerivedType(scope, - semantics::DerivedTypeSpec{ - name->source, derivedType.GetUltimate()}, - context_)}; + semantics::FindOrInstantiateDerivedType( + scope, std::move(dtSpec), context_)}; auto &mutableRef{const_cast(funcRef)}; *structureConstructor = mutableRef.ConvertToStructureConstructor(type.derivedTypeSpec()); diff --git a/flang/test/Semantics/bad-forward-type.f90 b/flang/test/Semantics/bad-forward-type.f90 index 2a8cbc0c9b1af..b7857e1f8af42 100644 --- a/flang/test/Semantics/bad-forward-type.f90 +++ b/flang/test/Semantics/bad-forward-type.f90 @@ -72,9 +72,8 @@ subroutine s7(x) end subroutine subroutine s8 - !ERROR: Derived type 't2' was used but never defined - !ERROR: The derived type 't2' was forward-referenced but not defined implicit type(t2)(x) + !ERROR: Cannot construct value for derived type 't2' before it is defined parameter(y=t2(12.3)) type t2 real :: c From 670c276232ec2233323fab5ad4c1aeef923e9e48 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Thu, 3 Sep 2020 16:06:14 -0700 Subject: [PATCH 0597/1079] [GlobalISel] Add G_UNMERGE_VALUES(G_MERGE_VALUES) combine Add the matching and applying function to the combiner helper for G_UNMERGE_VALUES(G_MERGE_VALUES). This combine also supports any merge-like input nodes, like G_BUILD_VECTORS and is robust against bitcasts in between int unmerge and merge nodes. When the input type of the merge node and the output type of the unmerge node are not the same, but the sizes are, the combine still applies but creates bitcasts between the sources and the destinations instead of reusing the destinations directly. Long term, the artifact combiner should probably reuse that helper, but as of today, it doesn't use any outside helper, so I kept it this way. Differential Revision: https://reviews.llvm.org/D87117 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 8 + .../include/llvm/Target/GlobalISel/Combine.td | 11 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 59 ++++++ .../AArch64/GlobalISel/combine-unmerge.mir | 183 ++++++++++++++++++ 4 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index a403f870ee5eb..44aa7a96aa730 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -244,6 +244,14 @@ class CombinerHelper { bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal); bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount); + /// Transform G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z. + bool + matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, + SmallVectorImpl &Operands); + bool + applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, + SmallVectorImpl &Operands); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 5c7e395d54976..e8a92012782c1 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -394,6 +394,15 @@ def fneg_fneg_fold: GICombineRule < (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) >; +// Fold (unmerge(merge x, y, z)) -> z, y, z. +def unmerge_merge_matchinfo : GIDefMatchData<"SmallVector">; +def unmerge_merge : GICombineRule< + (defs root:$d, unmerge_merge_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -424,4 +433,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold, opt_brcond_by_inverting_cond]>; + not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 377bbd6526597..1ec2a3f1e26fa 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1553,6 +1553,65 @@ bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, return true; } +static Register peekThroughBitcast(Register Reg, + const MachineRegisterInfo &MRI) { + while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) + ; + + return Reg; +} + +bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register SrcReg = + peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); + + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && + SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && + SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) + return false; + + // Check the source type of the merge. + LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); + if (SrcMergeTy != Dst0Ty && !SameSize) + return false; + // They are the same now (modulo a bitcast). + // We can collect all the src registers. + for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; + ++Idx) + Operands.push_back(SrcInstr->getOperand(Idx).getReg()); + return true; +} + +bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Operands.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + + LLT SrcTy = MRI.getType(Operands[0]); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + bool CanReuseInputDirectly = DstTy == SrcTy; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Register SrcReg = Operands[Idx]; + if (CanReuseInputDirectly) + replaceRegWith(MRI, DstReg, SrcReg); + else + Builder.buildCast(DstReg, SrcReg); + } + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir new file mode 100644 index 0000000000000..73401374ef9db --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -0,0 +1,183 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s + +# Simple unmerge(merge) case with two operands. +# The sources of the merge can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_merge +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + +# Simple unmerge(merge) case with three operands. +# The sources of the merge can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_merge_3ops +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge_3ops + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK: $w2 = COPY [[DEF2]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %5:_(s32) = G_IMPLICIT_DEF + %2:_(s96) = G_MERGE_VALUES %0(s32), %1(s32), %5(s32) + %3:_(s32), %4:_(s32), %6:_(s32) = G_UNMERGE_VALUES %2(s96) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + $w2 = COPY %6(s32) +... + +# Simple unmerge(buildvector) case with two operands. +# The sources of the buildvector can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_build_vector +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_build_vector + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(<2 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(<2 x s32>) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + +# Simple unmerge(buildvector) case with three operands. +# The sources of the buildvector can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_buildvector_3ops +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_buildvector_3ops + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK: $w2 = COPY [[DEF2]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %5:_(s32) = G_IMPLICIT_DEF + %2:_(<3 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %5(s32) + %3:_(s32), %4:_(s32), %6:_(s32) = G_UNMERGE_VALUES %2(<3 x s32>) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + $w2 = COPY %6(s32) +... + +# Simple unmerge(concatvectors) case. +# The sources of the concatvectors can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_concat_vectors +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_concat_vectors + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 + ; CHECK: $w0 = COPY [[COPY]](<2 x s16>) + ; CHECK: $w1 = COPY [[COPY1]](<2 x s16>) + %0:_(<2 x s16>) = COPY $w0 + %1:_(<2 x s16>) = COPY $w1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>) + %3:_(<2 x s16>), %4:_(<2 x s16>) = G_UNMERGE_VALUES %2(<4 x s16>) + $w0 = COPY %3(<2 x s16>) + $w1 = COPY %4(<2 x s16>) +... + +# Unmerge(merge) case with two operands and a bitcast in the middle. +# The sources of the merge can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_bitcast_merge +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5:_(<2 x s32>) = G_BITCAST %2(s64) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %5(<2 x s32>) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + +# Unmerge(merge) with incompatible types: unmerge destTy != merge inputTy. +# The sources of the merge cannot be used in place of +# the destinations of the unmerge, since the types don't match. +--- +name: test_combine_unmerge_merge_incompatible_types +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: $h0 = COPY [[UV]](s16) + ; CHECK: $h1 = COPY [[UV1]](s16) + ; CHECK: $h2 = COPY [[UV2]](s16) + ; CHECK: $h3 = COPY [[UV3]](s16) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %2(s64) + $h0 = COPY %3(s16) + $h1 = COPY %4(s16) + $h2 = COPY %5(s16) + $h3 = COPY %6(s16) +... + +# Unmerge(concatvectors) with incompatible types: unmerge destTy != merge inputTy +# but destTy.size() == inputTy.size(). +# The sources of the concatvectors can be used in place of +# the destinations of the unmerge with a bitcast since the sizes +# match. +--- +name: test_combine_unmerge_merge_incompatible_types_but_same_size +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types_but_same_size + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: $w0 = COPY [[BITCAST]](s32) + ; CHECK: $w1 = COPY [[BITCAST1]](s32) + %0:_(<2 x s16>) = COPY $w0 + %1:_(<2 x s16>) = COPY $w1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>) + %5:_(s64) = G_BITCAST %2(<4 x s16>) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %5(s64) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + From 39ec36415df5162fcffae09fde9b931e336a6f3d Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 15:55:17 -0700 Subject: [PATCH 0598/1079] Revert "[docs][NewPM] Add docs for writing NPM passes" This reverts commit c2590de30df23ef0db39b496cdec62a83a61fbfa. Breaks shared libs build --- llvm/docs/UserGuides.rst | 5 - llvm/docs/WritingAnLLVMNewPMPass.rst | 209 ------------------ llvm/docs/WritingAnLLVMPass.rst | 4 - .../llvm/Transforms/HelloNew/HelloWorld.h | 23 -- llvm/lib/Passes/LLVMBuild.txt | 2 +- llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/Transforms/CMakeLists.txt | 1 - llvm/lib/Transforms/HelloNew/CMakeLists.txt | 6 - llvm/lib/Transforms/HelloNew/HelloWorld.cpp | 17 -- llvm/lib/Transforms/HelloNew/LLVMBuild.txt | 22 -- llvm/lib/Transforms/LLVMBuild.txt | 2 +- llvm/test/Transforms/HelloNew/helloworld.ll | 12 - .../gn/secondary/llvm/lib/Passes/BUILD.gn | 1 - .../llvm/lib/Transforms/HelloNew/BUILD.gn | 9 - 15 files changed, 2 insertions(+), 313 deletions(-) delete mode 100644 llvm/docs/WritingAnLLVMNewPMPass.rst delete mode 100644 llvm/include/llvm/Transforms/HelloNew/HelloWorld.h delete mode 100644 llvm/lib/Transforms/HelloNew/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/HelloNew/HelloWorld.cpp delete mode 100644 llvm/lib/Transforms/HelloNew/LLVMBuild.txt delete mode 100644 llvm/test/Transforms/HelloNew/helloworld.ll delete mode 100644 llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 00e99db297f78..2e0cffb711ef9 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -54,7 +54,6 @@ intermediate LLVM representation. TableGenFundamentals Vectorizers WritingAnLLVMPass - WritingAnLLVMNewPMPass WritingAnLLVMBackend yaml2obj @@ -108,10 +107,6 @@ Optimizations :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. -:doc:`WritingAnLLVMNewPMPass` - Information on how to write LLVM transformations under the new pass - manager. - :doc:`Passes` A list of optimizations and analyses implemented in LLVM. diff --git a/llvm/docs/WritingAnLLVMNewPMPass.rst b/llvm/docs/WritingAnLLVMNewPMPass.rst deleted file mode 100644 index a876ec4ceb005..0000000000000 --- a/llvm/docs/WritingAnLLVMNewPMPass.rst +++ /dev/null @@ -1,209 +0,0 @@ -==================== -Writing an LLVM Pass -==================== - -.. program:: opt - -.. contents:: - :local: - -Introduction --- What is a pass? -================================ - -The LLVM pass framework is an important part of the LLVM system, because LLVM -passes are where most of the interesting parts of the compiler exist. Passes -perform the transformations and optimizations that make up the compiler, they -build the analysis results that are used by these transformations, and they -are, above all, a structuring technique for compiler code. - -Unlike passes under the legacy pass manager where the pass interface is -defined via inheritance, passes under the new pass manager rely on -concept-based polymorphism, meaning there is no explicit interface (see -comments in ``PassManager.h`` for more details). All LLVM passes inherit from -the CRTP mix-in ``PassInfoMixin``. The pass should have a ``run()`` -method which returns a ``PreservedAnalyses`` and takes in some unit of IR -along with an analysis manager. For example, a function pass would have a -``PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);`` method. - -We start by showing you how to construct a pass, from setting up the build, -creating the pass, to executing and testing it. Looking at existing passes is -always a great way to learn details. - -Quick Start --- Writing hello world -=================================== - -Here we describe how to write the "hello world" of passes. The "HelloWorld" -pass is designed to simply print out the name of non-external functions that -exist in the program being compiled. It does not modify the program at all, -it just inspects it. - -The code below already exists; feel free to create a pass with a different -name alongside the HelloWorld source files. - -.. _writing-an-llvm-npm-pass-build: - -Setting up the build --------------------- - -First, configure and build LLVM as described in :doc:`GettingStarted`. - -Next, we will reuse an existing directory (creating a new directory involves -modifying more ``CMakeLists.txt``s and ``LLVMBuild.txt``s than we want). For -this example, we'll use ``llvm/lib/Transforms/HelloNew/HelloWorld.cpp``, -which has already been created. If you'd like to create your own pass, add a -new source file into ``llvm/lib/Transforms/HelloNew/CMakeLists.txt`` under -``HelloWorld.cpp``: - -.. code-block:: cmake - - add_llvm_component_library(LLVMHelloWorld - HelloWorld.cpp - - DEPENDS - intrinsics_gen - ) - -Now that we have the build set up for a new pass, we need to write the code -for the pass itself. - -.. _writing-an-llvm-npm-pass-basiccode: - -Basic code required -------------------- - -Now that the build is setup for a new pass, we just have to write it. - -First we need to define the pass in a header file. We'll create -``llvm/include/llvm/Transforms/HelloNew/HelloWorld.h``. The file should -contain the following boilerplate: - -.. code-block:: c++ - - #ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - #define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - - #include "llvm/IR/PassManager.h" - - namespace llvm { - - class HelloWorldPass : public PassInfoMixin { - public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - }; - - } // namespace llvm - - #endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - -This creates the class for the pass with a declaration of the ``run()`` -method which actually runs the pass. Inheriting from ``PassInfoMixin`` -sets up some more boilerplate so that we don't have to write it ourselves. - -Our class is in the ``llvm`` namespace so that we don't pollute the global -namespace. - -Next we'll create ``llvm/lib/Transforms/HelloNew/HelloWorld.cpp``, starting -with - -.. code-block:: c++ - - #include "llvm/Transforms/HelloNew/HelloWorld.h" - -... to include the header file we just created. - -.. code-block:: c++ - - using namespace llvm; - -... is required because the functions from the include files live in the llvm -namespace. This should only be done in non-header files. - -Next we have the pass's ``run()`` definition: - -.. code-block:: c++ - - PreservedAnalyses HelloWorldPass::run(Function &F, - FunctionAnalysisManager &AM) { - errs() << F.getName() << "\n"; - return PreservedAnalyses::all(); - } - -... which simply prints out the name of the function to stderr. The pass -manager will ensure that the pass will be run on every function in a module. -The ``PreservedAnalyses`` return value says that all analyses (e.g. dominator -tree) are still valid after this pass since we didn't modify any functions. - -That's it for the pass itself. Now in order to "register" the pass, we need -to add it to a couple places. Add the following to -``llvm\lib\Passes\PassRegistry.def`` in the ``FUNCTION_PASS`` section - -.. code-block:: c++ - - FUNCTION_PASS("helloworld", HelloWorldPass()) - -... which adds the pass under the name "helloworld". - -``llvm\lib\Passes\PassRegistry.def`` is #include'd into -``llvm\lib\Passes\PassBuilder.cpp`` multiple times for various reasons. Since -it constructs our pass, we need to also add the proper #include in -``llvm\lib\Passes\PassBuilder.cpp``: - -.. code-block:: c++ - - #include "llvm/Transforms/HelloNew/HelloWorld.h" - -This should be all the code necessary for our pass, now it's time to compile -and run it. - -Running a pass with ``opt`` ---------------------------- - -Now that you have a brand new shiny pass, we can build :program:`opt` and use -it to run some LLVM IR through the pass. - -.. code-block:: console - - $ ninja -C build/ opt - # or whatever build system/build directory you are using - - $ cat /tmp/a.ll - define i32 @foo() { - %a = add i32 2, 3 - ret i32 %a - } - - define void @bar() { - ret void - } - - $ build/bin/opt -disable-output /tmp/a.ll -passes=helloworld - foo - bar - -Our pass ran and printed the names of functions as expected! - -Testing a pass --------------- - -Testing our pass is important to prevent future regressions. We'll add a lit -test at ``llvm/test/Transforms/HelloNew/helloworld.ll``. See -:doc:`TestingGuide` for more information on testing. - -.. code-block:: llvm - - $ cat llvm/test/Transforms/HelloNew/helloworld.ll - ; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s - - ; CHECK: {{^}}foo{{$}} - define i32 @foo() { - %a = add i32 2, 3 - ret i32 %a - } - - ; CHECK-NEXT: {{^}}bar{{$}} - define void @bar() { - ret void - } - - $ ninja -C build check-llvm - # runs our new test alongside all other llvm lit tests diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index 7a24659e62942..88f481ba6b076 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -34,10 +34,6 @@ We start by showing you how to construct a pass, everything from setting up the code, to compiling, loading, and executing it. After the basics are down, more advanced features are discussed. -This document deals with the legacy pass manager. LLVM is transitioning to -the new pass manager, which has its own way of defining passes. For more -details, see :doc:`WritingAnLLVMNewPMPass`. - Quick Start --- Writing hello world =================================== diff --git a/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h deleted file mode 100644 index 6c753032f913c..0000000000000 --- a/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h +++ /dev/null @@ -1,23 +0,0 @@ -//===-- HelloWorld.h - Example Transformations ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H -#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -class HelloWorldPass : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -} // namespace llvm - -#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H diff --git a/llvm/lib/Passes/LLVMBuild.txt b/llvm/lib/Passes/LLVMBuild.txt index f49f7828d2b93..3e7a391154137 100644 --- a/llvm/lib/Passes/LLVMBuild.txt +++ b/llvm/lib/Passes/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Passes parent = Libraries -required_libraries = AggressiveInstCombine Analysis Core Coroutines HelloNew IPO InstCombine ObjCARC Scalar Support Target TransformUtils Vectorize Instrumentation +required_libraries = AggressiveInstCombine Analysis Core Coroutines IPO InstCombine ObjCARC Scalar Support Target TransformUtils Vectorize Instrumentation diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index cd64aecd81d73..c47f612e71991 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -75,7 +75,6 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" -#include "llvm/Transforms/HelloNew/HelloWorld.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 1d70db3063470..4b4f71a718702 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -197,7 +197,6 @@ FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false) FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true)) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) -FUNCTION_PASS("helloworld", HelloWorldPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 2a0abebdf19b5..dda5f6de11e32 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -6,7 +6,6 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) -add_subdirectory(HelloNew) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) diff --git a/llvm/lib/Transforms/HelloNew/CMakeLists.txt b/llvm/lib/Transforms/HelloNew/CMakeLists.txt deleted file mode 100644 index a7a1a5b93b062..0000000000000 --- a/llvm/lib/Transforms/HelloNew/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_component_library(LLVMHelloNew - HelloWorld.cpp - - DEPENDS - intrinsics_gen - ) diff --git a/llvm/lib/Transforms/HelloNew/HelloWorld.cpp b/llvm/lib/Transforms/HelloNew/HelloWorld.cpp deleted file mode 100644 index dea94f8a8f627..0000000000000 --- a/llvm/lib/Transforms/HelloNew/HelloWorld.cpp +++ /dev/null @@ -1,17 +0,0 @@ -//===-- HelloWorld.cpp - Example Transformations --------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/HelloNew/HelloWorld.h" - -using namespace llvm; - -PreservedAnalyses HelloWorldPass::run(Function &F, - FunctionAnalysisManager &AM) { - errs() << F.getName() << "\n"; - return PreservedAnalyses::all(); -} diff --git a/llvm/lib/Transforms/HelloNew/LLVMBuild.txt b/llvm/lib/Transforms/HelloNew/LLVMBuild.txt deleted file mode 100644 index cc66fb07c3e9d..0000000000000 --- a/llvm/lib/Transforms/HelloNew/LLVMBuild.txt +++ /dev/null @@ -1,22 +0,0 @@ -;===- ./lib/Transforms/HelloNew/LLVMBuild.txt ------------------*- Conf -*--===; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = HelloNew -parent = Transforms -library_name = HelloNew -required_libraries = Core diff --git a/llvm/lib/Transforms/LLVMBuild.txt b/llvm/lib/Transforms/LLVMBuild.txt index 6c6a6bb317fa8..5fb5efcc068c8 100644 --- a/llvm/lib/Transforms/LLVMBuild.txt +++ b/llvm/lib/Transforms/LLVMBuild.txt @@ -15,7 +15,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AggressiveInstCombine Coroutines HelloNew IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC CFGuard +subdirectories = AggressiveInstCombine Coroutines IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC CFGuard [component_0] type = Group diff --git a/llvm/test/Transforms/HelloNew/helloworld.ll b/llvm/test/Transforms/HelloNew/helloworld.ll deleted file mode 100644 index 48817c24801ae..0000000000000 --- a/llvm/test/Transforms/HelloNew/helloworld.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s - -; CHECK: {{^}}foo{{$}} -define i32 @foo() { - %a = add i32 2, 3 - ret i32 %a -} - -; CHECK-NEXT: {{^}}bar{{$}} -define void @bar() { - ret void -} diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index bb8a671dd6a7d..9afe48db159b2 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -8,7 +8,6 @@ static_library("Passes") { "//llvm/lib/Target", "//llvm/lib/Transforms/AggressiveInstCombine", "//llvm/lib/Transforms/Coroutines", - "//llvm/lib/Transforms/HelloNew", "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/InstCombine", "//llvm/lib/Transforms/Instrumentation", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn deleted file mode 100644 index 5e6167324a4ae..0000000000000 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn +++ /dev/null @@ -1,9 +0,0 @@ -static_library("HelloNew") { - output_name = "LLVMHelloNew" - deps = [ - "//llvm/lib/Analysis", - "//llvm/lib/IR", - "//llvm/lib/Support", - ] - sources = [ "HelloWorld.cpp" ] -} From 46f9137e43f3eb2de9990765a4c482b45b0f8dd5 Mon Sep 17 00:00:00 2001 From: Aditya Nandakumar Date: Mon, 14 Sep 2020 15:43:52 -0700 Subject: [PATCH 0599/1079] [GISel]: Add combine for G_FABS to G_FABS https://reviews.llvm.org/D87554 Patch adds one new GICombinerRule for G_FABS. The combine rule folds G_FABS(G_FABS(X)) to G_FABS(X). Patch additionally adds new combiner tests for the AArch64 target to test this new combiner rule. Patch by mkitzan. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 +++ .../include/llvm/Target/GlobalISel/Combine.td | 12 ++++++- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 15 +++++++++ .../AArch64/GlobalISel/combine-fabs.mir | 32 +++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 44aa7a96aa730..8a5e80386e7ee 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -280,6 +280,10 @@ class CombinerHelper { /// Transform fneg(fneg(x)) to x. bool matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg); + /// Match fabs(fabs(x)) to fabs(x). + bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index e8a92012782c1..f99252935db42 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -403,6 +403,15 @@ def unmerge_merge : GICombineRule< (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]) >; +// Fold (fabs (fabs x)) -> (fabs x). +def fabs_fabs_fold_matchinfo : GIDefMatchData<"Register">; +def fabs_fabs_fold: GICombineRule< + (defs root:$root, fabs_fabs_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_FABS):$root, + [{ return Helper.matchCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -433,4 +442,5 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge]>; + not_cmp_fold, opt_brcond_by_inverting_cond, + unmerge_merge, fabs_fabs_fold]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1ec2a3f1e26fa..a2a7d6b928d4a 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1878,6 +1878,21 @@ bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); } +bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Src = MI.getOperand(1).getReg(); + Register AbsSrc; + return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); +} + +bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Register Dst = MI.getOperand(0).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, Dst, Src); + return true; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir new file mode 100644 index 0000000000000..32aa60fe6045f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: test_combine_fabs_fabs +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_fabs_fabs + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK: $w0 = COPY [[FABS]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FABS %0(s32) + %2:_(s32) = G_FABS %1(s32) + $w0 = COPY %2(s32) +... +--- +name: test_combine_fabs_fabs_vec +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_fabs_fabs_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 + ; CHECK: [[FABS:%[0-9]+]]:_(<2 x s32>) = G_FABS [[COPY]] + ; CHECK: $x0 = COPY [[FABS]](<2 x s32>) + %0:_(<2 x s32>) = COPY $x0 + %1:_(<2 x s32>) = G_FABS %0(<2 x s32>) + %2:_(<2 x s32>) = G_FABS %1(<2 x s32>) + $x0 = COPY %2(<2 x s32>) +... From 10b12d4035de40e5eaedddda82d9c533854eefcb Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 31 Aug 2020 18:36:11 -0700 Subject: [PATCH 0600/1079] Reland [docs][NewPM] Add docs for writing NPM passes As to not conflict with the legacy PM example passes under llvm/lib/Transforms/Hello, this is under HelloNew. This makes the CMakeLists.txt and general directory structure less confusing for people following the example. Much of the doc structure was taken from WritinAnLLVMPass.rst. This adds a HelloWorld pass which simply prints out each function name. More will follow after this, e.g. passes over different units of IR, analyses. https://llvm.org/docs/WritingAnLLVMPass.html contains a lot more. Relanded with missing "Support" dependency in LLVMBuild.txt. Reviewed By: ychen, asbirlea Differential Revision: https://reviews.llvm.org/D86979 --- llvm/docs/UserGuides.rst | 5 + llvm/docs/WritingAnLLVMNewPMPass.rst | 209 ++++++++++++++++++ llvm/docs/WritingAnLLVMPass.rst | 4 + .../llvm/Transforms/HelloNew/HelloWorld.h | 23 ++ llvm/lib/Passes/LLVMBuild.txt | 2 +- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/CMakeLists.txt | 1 + llvm/lib/Transforms/HelloNew/CMakeLists.txt | 6 + llvm/lib/Transforms/HelloNew/HelloWorld.cpp | 17 ++ llvm/lib/Transforms/HelloNew/LLVMBuild.txt | 22 ++ llvm/lib/Transforms/LLVMBuild.txt | 2 +- llvm/test/Transforms/HelloNew/helloworld.ll | 12 + .../gn/secondary/llvm/lib/Passes/BUILD.gn | 1 + .../llvm/lib/Transforms/HelloNew/BUILD.gn | 9 + 15 files changed, 313 insertions(+), 2 deletions(-) create mode 100644 llvm/docs/WritingAnLLVMNewPMPass.rst create mode 100644 llvm/include/llvm/Transforms/HelloNew/HelloWorld.h create mode 100644 llvm/lib/Transforms/HelloNew/CMakeLists.txt create mode 100644 llvm/lib/Transforms/HelloNew/HelloWorld.cpp create mode 100644 llvm/lib/Transforms/HelloNew/LLVMBuild.txt create mode 100644 llvm/test/Transforms/HelloNew/helloworld.ll create mode 100644 llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 2e0cffb711ef9..00e99db297f78 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -54,6 +54,7 @@ intermediate LLVM representation. TableGenFundamentals Vectorizers WritingAnLLVMPass + WritingAnLLVMNewPMPass WritingAnLLVMBackend yaml2obj @@ -107,6 +108,10 @@ Optimizations :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. +:doc:`WritingAnLLVMNewPMPass` + Information on how to write LLVM transformations under the new pass + manager. + :doc:`Passes` A list of optimizations and analyses implemented in LLVM. diff --git a/llvm/docs/WritingAnLLVMNewPMPass.rst b/llvm/docs/WritingAnLLVMNewPMPass.rst new file mode 100644 index 0000000000000..a876ec4ceb005 --- /dev/null +++ b/llvm/docs/WritingAnLLVMNewPMPass.rst @@ -0,0 +1,209 @@ +==================== +Writing an LLVM Pass +==================== + +.. program:: opt + +.. contents:: + :local: + +Introduction --- What is a pass? +================================ + +The LLVM pass framework is an important part of the LLVM system, because LLVM +passes are where most of the interesting parts of the compiler exist. Passes +perform the transformations and optimizations that make up the compiler, they +build the analysis results that are used by these transformations, and they +are, above all, a structuring technique for compiler code. + +Unlike passes under the legacy pass manager where the pass interface is +defined via inheritance, passes under the new pass manager rely on +concept-based polymorphism, meaning there is no explicit interface (see +comments in ``PassManager.h`` for more details). All LLVM passes inherit from +the CRTP mix-in ``PassInfoMixin``. The pass should have a ``run()`` +method which returns a ``PreservedAnalyses`` and takes in some unit of IR +along with an analysis manager. For example, a function pass would have a +``PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);`` method. + +We start by showing you how to construct a pass, from setting up the build, +creating the pass, to executing and testing it. Looking at existing passes is +always a great way to learn details. + +Quick Start --- Writing hello world +=================================== + +Here we describe how to write the "hello world" of passes. The "HelloWorld" +pass is designed to simply print out the name of non-external functions that +exist in the program being compiled. It does not modify the program at all, +it just inspects it. + +The code below already exists; feel free to create a pass with a different +name alongside the HelloWorld source files. + +.. _writing-an-llvm-npm-pass-build: + +Setting up the build +-------------------- + +First, configure and build LLVM as described in :doc:`GettingStarted`. + +Next, we will reuse an existing directory (creating a new directory involves +modifying more ``CMakeLists.txt``s and ``LLVMBuild.txt``s than we want). For +this example, we'll use ``llvm/lib/Transforms/HelloNew/HelloWorld.cpp``, +which has already been created. If you'd like to create your own pass, add a +new source file into ``llvm/lib/Transforms/HelloNew/CMakeLists.txt`` under +``HelloWorld.cpp``: + +.. code-block:: cmake + + add_llvm_component_library(LLVMHelloWorld + HelloWorld.cpp + + DEPENDS + intrinsics_gen + ) + +Now that we have the build set up for a new pass, we need to write the code +for the pass itself. + +.. _writing-an-llvm-npm-pass-basiccode: + +Basic code required +------------------- + +Now that the build is setup for a new pass, we just have to write it. + +First we need to define the pass in a header file. We'll create +``llvm/include/llvm/Transforms/HelloNew/HelloWorld.h``. The file should +contain the following boilerplate: + +.. code-block:: c++ + + #ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + #define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + + #include "llvm/IR/PassManager.h" + + namespace llvm { + + class HelloWorldPass : public PassInfoMixin { + public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + }; + + } // namespace llvm + + #endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + +This creates the class for the pass with a declaration of the ``run()`` +method which actually runs the pass. Inheriting from ``PassInfoMixin`` +sets up some more boilerplate so that we don't have to write it ourselves. + +Our class is in the ``llvm`` namespace so that we don't pollute the global +namespace. + +Next we'll create ``llvm/lib/Transforms/HelloNew/HelloWorld.cpp``, starting +with + +.. code-block:: c++ + + #include "llvm/Transforms/HelloNew/HelloWorld.h" + +... to include the header file we just created. + +.. code-block:: c++ + + using namespace llvm; + +... is required because the functions from the include files live in the llvm +namespace. This should only be done in non-header files. + +Next we have the pass's ``run()`` definition: + +.. code-block:: c++ + + PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); + } + +... which simply prints out the name of the function to stderr. The pass +manager will ensure that the pass will be run on every function in a module. +The ``PreservedAnalyses`` return value says that all analyses (e.g. dominator +tree) are still valid after this pass since we didn't modify any functions. + +That's it for the pass itself. Now in order to "register" the pass, we need +to add it to a couple places. Add the following to +``llvm\lib\Passes\PassRegistry.def`` in the ``FUNCTION_PASS`` section + +.. code-block:: c++ + + FUNCTION_PASS("helloworld", HelloWorldPass()) + +... which adds the pass under the name "helloworld". + +``llvm\lib\Passes\PassRegistry.def`` is #include'd into +``llvm\lib\Passes\PassBuilder.cpp`` multiple times for various reasons. Since +it constructs our pass, we need to also add the proper #include in +``llvm\lib\Passes\PassBuilder.cpp``: + +.. code-block:: c++ + + #include "llvm/Transforms/HelloNew/HelloWorld.h" + +This should be all the code necessary for our pass, now it's time to compile +and run it. + +Running a pass with ``opt`` +--------------------------- + +Now that you have a brand new shiny pass, we can build :program:`opt` and use +it to run some LLVM IR through the pass. + +.. code-block:: console + + $ ninja -C build/ opt + # or whatever build system/build directory you are using + + $ cat /tmp/a.ll + define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a + } + + define void @bar() { + ret void + } + + $ build/bin/opt -disable-output /tmp/a.ll -passes=helloworld + foo + bar + +Our pass ran and printed the names of functions as expected! + +Testing a pass +-------------- + +Testing our pass is important to prevent future regressions. We'll add a lit +test at ``llvm/test/Transforms/HelloNew/helloworld.ll``. See +:doc:`TestingGuide` for more information on testing. + +.. code-block:: llvm + + $ cat llvm/test/Transforms/HelloNew/helloworld.ll + ; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s + + ; CHECK: {{^}}foo{{$}} + define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a + } + + ; CHECK-NEXT: {{^}}bar{{$}} + define void @bar() { + ret void + } + + $ ninja -C build check-llvm + # runs our new test alongside all other llvm lit tests diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index 88f481ba6b076..7a24659e62942 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -34,6 +34,10 @@ We start by showing you how to construct a pass, everything from setting up the code, to compiling, loading, and executing it. After the basics are down, more advanced features are discussed. +This document deals with the legacy pass manager. LLVM is transitioning to +the new pass manager, which has its own way of defining passes. For more +details, see :doc:`WritingAnLLVMNewPMPass`. + Quick Start --- Writing hello world =================================== diff --git a/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h new file mode 100644 index 0000000000000..6c753032f913c --- /dev/null +++ b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h @@ -0,0 +1,23 @@ +//===-- HelloWorld.h - Example Transformations ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H +#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class HelloWorldPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H diff --git a/llvm/lib/Passes/LLVMBuild.txt b/llvm/lib/Passes/LLVMBuild.txt index 3e7a391154137..f49f7828d2b93 100644 --- a/llvm/lib/Passes/LLVMBuild.txt +++ b/llvm/lib/Passes/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Passes parent = Libraries -required_libraries = AggressiveInstCombine Analysis Core Coroutines IPO InstCombine ObjCARC Scalar Support Target TransformUtils Vectorize Instrumentation +required_libraries = AggressiveInstCombine Analysis Core Coroutines HelloNew IPO InstCombine ObjCARC Scalar Support Target TransformUtils Vectorize Instrumentation diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index c47f612e71991..cd64aecd81d73 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -75,6 +75,7 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" +#include "llvm/Transforms/HelloNew/HelloWorld.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 4b4f71a718702..1d70db3063470 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -197,6 +197,7 @@ FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false) FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true)) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) +FUNCTION_PASS("helloworld", HelloWorldPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index dda5f6de11e32..2a0abebdf19b5 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +add_subdirectory(HelloNew) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) diff --git a/llvm/lib/Transforms/HelloNew/CMakeLists.txt b/llvm/lib/Transforms/HelloNew/CMakeLists.txt new file mode 100644 index 0000000000000..a7a1a5b93b062 --- /dev/null +++ b/llvm/lib/Transforms/HelloNew/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_component_library(LLVMHelloNew + HelloWorld.cpp + + DEPENDS + intrinsics_gen + ) diff --git a/llvm/lib/Transforms/HelloNew/HelloWorld.cpp b/llvm/lib/Transforms/HelloNew/HelloWorld.cpp new file mode 100644 index 0000000000000..dea94f8a8f627 --- /dev/null +++ b/llvm/lib/Transforms/HelloNew/HelloWorld.cpp @@ -0,0 +1,17 @@ +//===-- HelloWorld.cpp - Example Transformations --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/HelloNew/HelloWorld.h" + +using namespace llvm; + +PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Transforms/HelloNew/LLVMBuild.txt b/llvm/lib/Transforms/HelloNew/LLVMBuild.txt new file mode 100644 index 0000000000000..06d3c81333b78 --- /dev/null +++ b/llvm/lib/Transforms/HelloNew/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Transforms/HelloNew/LLVMBuild.txt ------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HelloNew +parent = Transforms +library_name = HelloNew +required_libraries = Core Support diff --git a/llvm/lib/Transforms/LLVMBuild.txt b/llvm/lib/Transforms/LLVMBuild.txt index 5fb5efcc068c8..6c6a6bb317fa8 100644 --- a/llvm/lib/Transforms/LLVMBuild.txt +++ b/llvm/lib/Transforms/LLVMBuild.txt @@ -15,7 +15,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AggressiveInstCombine Coroutines IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC CFGuard +subdirectories = AggressiveInstCombine Coroutines HelloNew IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC CFGuard [component_0] type = Group diff --git a/llvm/test/Transforms/HelloNew/helloworld.ll b/llvm/test/Transforms/HelloNew/helloworld.ll new file mode 100644 index 0000000000000..48817c24801ae --- /dev/null +++ b/llvm/test/Transforms/HelloNew/helloworld.ll @@ -0,0 +1,12 @@ +; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s + +; CHECK: {{^}}foo{{$}} +define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a +} + +; CHECK-NEXT: {{^}}bar{{$}} +define void @bar() { + ret void +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index 9afe48db159b2..bb8a671dd6a7d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -8,6 +8,7 @@ static_library("Passes") { "//llvm/lib/Target", "//llvm/lib/Transforms/AggressiveInstCombine", "//llvm/lib/Transforms/Coroutines", + "//llvm/lib/Transforms/HelloNew", "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/InstCombine", "//llvm/lib/Transforms/Instrumentation", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn new file mode 100644 index 0000000000000..5e6167324a4ae --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/HelloNew/BUILD.gn @@ -0,0 +1,9 @@ +static_library("HelloNew") { + output_name = "LLVMHelloNew" + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/IR", + "//llvm/lib/Support", + ] + sources = [ "HelloWorld.cpp" ] +} From 6352381039c43c66f01a23be19472f7e611ffcdf Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 14 Sep 2020 16:06:10 -0500 Subject: [PATCH 0601/1079] [Hexagon] Some HVX DAG combines 1. VINSERTW0 x, undef -> x 2. VROR (VROR x, a), b) -> VROR x, a+b --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 65bc2e3577cc4..51804e5f53277 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -2112,22 +2112,40 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.isBeforeLegalizeOps()) return SDValue(); + const SDLoc &dl(N); + SelectionDAG &DAG = DCI.DAG; SDValue Op(N, 0); unsigned Opc = Op.getOpcode(); - if (Opc == ISD::VSELECT) { - // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) - SDValue Cond = Op.getOperand(0); - if (Cond->getOpcode() == ISD::XOR) { - SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); - if (C1->getOpcode() == HexagonISD::QTRUE) { - SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, - Op.getOperand(2), Op.getOperand(1)); - return VSel; + switch (Opc) { + case ISD::VSELECT: { + // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) + SDValue Cond = Op.getOperand(0); + if (Cond->getOpcode() == ISD::XOR) { + SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); + if (C1->getOpcode() == HexagonISD::QTRUE) + return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, + Op.getOperand(2), Op.getOperand(1)); + } + break; + } + case HexagonISD::VINSERTW0: + if (isUndef(Op.getOperand(1))) + return Op.getOperand(0); + break; + case HexagonISD::VROR: { + SDValue Op0 = Op.getOperand(0); + if (Op0.getOpcode() == HexagonISD::VROR) { + SDValue Vec = Op0.getOperand(0); + SDValue Rot0 = Op.getOperand(1), Rot1 = Op0.getOperand(1); + SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1}); + return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot}); } + break; } } + return SDValue(); } From bb877d1af2ec2f624caa380350c8da00c984e754 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 14 Sep 2020 14:04:54 -0500 Subject: [PATCH 0602/1079] [Hexagon] Widen loads and handle any-/sign-/zero-extensions --- .../Target/Hexagon/HexagonISelLowering.cpp | 24 ++++ llvm/lib/Target/Hexagon/HexagonISelLowering.h | 5 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 131 +++++++++++++++--- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 22 ++- .../test/CodeGen/Hexagon/autohvx/widen-ext.ll | 99 +++++++++++++ 5 files changed, 256 insertions(+), 25 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 645d28de2b20d..20e5e5a91b124 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1863,6 +1863,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VALIGN: return "HexagonISD::VALIGN"; case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR"; case HexagonISD::VPACKL: return "HexagonISD::VPACKL"; + case HexagonISD::VUNPACK: return "HexagonISD::VUNPACK"; + case HexagonISD::VUNPACKU: return "HexagonISD::VUNPACKU"; case HexagonISD::OP_END: break; } return nullptr; @@ -2650,6 +2652,28 @@ HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) llvm_unreachable("Invalid type for zero"); } +SDValue +HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG) + const { + MVT ValTy = ty(Val); + assert(ValTy.getVectorElementType() == ResTy.getVectorElementType()); + + unsigned ValLen = ValTy.getVectorNumElements(); + unsigned ResLen = ResTy.getVectorNumElements(); + if (ValLen == ResLen) + return Val; + + const SDLoc &dl(Val); + assert(ValLen < ResLen); + assert(ResLen % ValLen == 0); + + SmallVector Concats = {Val}; + for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i) + Concats.push_back(DAG.getUNDEF(ValTy)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats); +} + SDValue HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { MVT VecTy = ty(Op); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index a396ff8ef7ec2..cc34a4cd03963 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -94,6 +94,8 @@ enum NodeType : unsigned { // the low halfwords and pack them into the first 32 // halfwords of the output. The rest of the output is // unspecified. + VUNPACK, // Unpacking into low elements with sign extension. + VUNPACKU, // Unpacking into low elements with zero extension. OP_END }; @@ -367,6 +369,7 @@ class HexagonTargetLowering : public TargetLowering { SDValue contractPredicate(SDValue Vec64, const SDLoc &dl, SelectionDAG &DAG) const; SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const; + SDValue appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG) const; bool isUndef(SDValue Op) const { if (Op.isMachineOpcode()) @@ -481,7 +484,9 @@ class HexagonTargetLowering : public TargetLowering { SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const; std::pair diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 51804e5f53277..a61d79ab3364a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -234,8 +234,12 @@ HexagonTargetLowering::initializeHVXLowering() { MVT VecTy = MVT::getVectorVT(ElemTy, N); auto Action = getPreferredVectorAction(VecTy); if (Action == TargetLoweringBase::TypeWidenVector) { - setOperationAction(ISD::STORE, VecTy, Custom); - setOperationAction(ISD::TRUNCATE, VecTy, Custom); + setOperationAction(ISD::LOAD, VecTy, Custom); + setOperationAction(ISD::STORE, VecTy, Custom); + setOperationAction(ISD::TRUNCATE, VecTy, Custom); + setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); + setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); + setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); } } } @@ -1886,6 +1890,38 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable(Name.c_str()); } +SDValue +HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); + auto *LoadN = cast(Op.getNode()); + assert(LoadN->isUnindexed() && "Not widening indexed loads yet"); + assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && + "Not widening loads of i1 yet"); + + SDValue Chain = LoadN->getChain(); + SDValue Base = LoadN->getBasePtr(); + SDValue Offset = DAG.getUNDEF(MVT::i32); + + MVT ResTy = ty(Op); + unsigned HwLen = Subtarget.getVectorLength(); + unsigned ResLen = ResTy.getStoreSize(); + assert(ResLen < HwLen && "vsetq(v1) prerequisite"); + + MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); + SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, + {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG); + + MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen); + MachineFunction &MF = DAG.getMachineFunction(); + auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen); + + SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask, + DAG.getUNDEF(LoadTy), LoadTy, MemOp, + ISD::UNINDEXED, ISD::NON_EXTLOAD, false); + SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG); + return DAG.getMergeValues({Value, Chain}, dl); +} + SDValue HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); @@ -1912,12 +1948,45 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); - SDValue StoreQ = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, - {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); + SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, + {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); MachineFunction &MF = DAG.getMachineFunction(); - auto *MOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen); - return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, StoreQ, ty(Value), - MOp, ISD::UNINDEXED, false, false); + auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen); + return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value), + MemOp, ISD::UNINDEXED, false, false); +} + +SDValue +HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); + unsigned HwWidth = 8*Subtarget.getVectorLength(); + + SDValue Op0 = Op.getOperand(0); + MVT ResTy = ty(Op); + MVT OpTy = ty(Op0); + if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) + return SDValue(); + + // .-res, op-> ScalarVec Illegal HVX + // Scalar ok - - + // Illegal widen(insert) widen - + // HVX - widen ok + + auto getFactor = [HwWidth](MVT Ty) { + unsigned Width = Ty.getSizeInBits(); + return HwWidth > Width ? HwWidth / Width : 1; + }; + + auto getWideTy = [getFactor](MVT Ty) { + unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); + return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); + }; + + unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK + : HexagonISD::VUNPACKU; + SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); + SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp); + return WideRes; } SDValue @@ -1931,10 +2000,10 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) return SDValue(); - // .-res, op-> Scalar Illegal HVX - // Scalar ok extract(widen) - - // Illegal - widen widen - // HVX - - ok + // .-res, op-> ScalarVec Illegal HVX + // Scalar ok extract(widen) - + // Illegal - widen widen + // HVX - - ok auto getFactor = [HwWidth](MVT Ty) { unsigned Width = Ty.getSizeInBits(); @@ -1952,17 +2021,13 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); - MVT WideOpTy = getWideTy(OpTy); - SmallVector Concats = {Op0}; - for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i) - Concats.push_back(DAG.getUNDEF(OpTy)); - - SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats); - SDValue V = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); + SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), + WideOp); // If the original result wasn't legal and was supposed to be widened, // we're done. if (shouldWidenToHvx(ResTy, DAG)) - return V; + return WideRes; // The original result type wasn't meant to be widened to HVX, so // leave it as it is. Standard legalization should be able to deal @@ -1970,7 +2035,7 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { // node). assert(ResTy.isVector()); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, - {V, getZero(dl, MVT::i32, DAG)}); + {WideRes, getZero(dl, MVT::i32, DAG)}); } SDValue @@ -2053,12 +2118,18 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, SDValue Op(N, 0); switch (Opc) { - case ISD::TRUNCATE: { + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?"); + if (SDValue T = WidenHvxExtend(Op, DAG)) + Results.push_back(T); + break; + case ISD::TRUNCATE: assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?"); if (SDValue T = WidenHvxTruncate(Op, DAG)) Results.push_back(T); break; - } case ISD::STORE: { assert(shouldWidenToHvx(ty(cast(N)->getValue()), DAG) && "Not widening?"); @@ -2089,11 +2160,25 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, unsigned Opc = N->getOpcode(); SDValue Op(N, 0); switch (Opc) { - case ISD::TRUNCATE: { + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); + if (SDValue T = WidenHvxExtend(Op, DAG)) + Results.push_back(T); + break; + case ISD::TRUNCATE: assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); if (SDValue T = WidenHvxTruncate(Op, DAG)) Results.push_back(T); break; + case ISD::LOAD: { + assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); + SDValue Load = WidenHvxLoad(Op, DAG); + assert(Load->getOpcode() == ISD::MERGE_VALUES); + Results.push_back(Load.getOperand(0)); + Results.push_back(Load.getOperand(1)); + break; } case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 630fd7a17040d..64e24f2466263 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -41,6 +41,8 @@ def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; +def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>; +def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>; def vzero: PatFrag<(ops), (HexagonVZERO)>; def qtrue: PatFrag<(ops), (HexagonQTRUE)>; @@ -48,8 +50,10 @@ def qfalse: PatFrag<(ops), (HexagonQFALSE)>; def qcat: PatFrag<(ops node:$Qs, node:$Qt), (HexagonQCAT node:$Qs, node:$Qt)>; -def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; -def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; +def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; +def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; +def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>; +def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>; def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; @@ -416,6 +420,20 @@ let Predicates = [UseHVX] in { def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>; def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>; + def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>; + def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>; + def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>; + def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>; + def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>; + def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>; + + def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>; + def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>; + def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>; + def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>; + def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>; + def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>; + def: Pat<(VecI16 (bswap HVI16:$Vs)), (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>; def: Pat<(VecI32 (bswap HVI32:$Vs)), diff --git a/llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll b/llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll new file mode 100644 index 0000000000000..eb4f115220820 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; v32i8 -> v32i16 +; CHECK-LABEL: f0: +; CHECK: r[[R0:[0-9]+]] = #64 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]] +define void @f0(<32 x i8>* %a0, <32 x i16>* %a1) #0 { + %v0 = load <32 x i8>, <32 x i8>* %a0, align 128 + %v1 = sext <32 x i8> %v0 to <32 x i16> + store <32 x i16> %v1, <32 x i16>* %a1, align 128 + ret void +} + +; v32i8 -> v32i32 +; CHECK-LABEL: f1: +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b) +; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h) +; CHECK: vmem(r1+#0) = v[[V4]] +define void @f1(<32 x i8>* %a0, <32 x i32>* %a1) #0 { + %v0 = load <32 x i8>, <32 x i8>* %a0, align 128 + %v1 = sext <32 x i8> %v0 to <32 x i32> + store <32 x i32> %v1, <32 x i32>* %a1, align 128 + ret void +} + +; v64i8 -> v64i16 +; CHECK-LABEL: f2: +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b) +; CHECK: vmem(r1+#0) = v[[V2]] +define void @f2(<64 x i8>* %a0, <64 x i16>* %a1) #0 { + %v0 = load <64 x i8>, <64 x i8>* %a0, align 128 + %v1 = sext <64 x i8> %v0 to <64 x i16> + store <64 x i16> %v1, <64 x i16>* %a1, align 128 + ret void +} + +; v64i8 -> v64i32 +; CHECK-LABEL: f3: +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b) +; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h) +; CHECK-DAG: vmem(r1+#0) = v[[V4]] +; CHECK-DAG: vmem(r1+#1) = v[[V3]] +define void @f3(<64 x i8>* %a0, <64 x i32>* %a1) #0 { + %v0 = load <64 x i8>, <64 x i8>* %a0, align 128 + %v1 = sext <64 x i8> %v0 to <64 x i32> + store <64 x i32> %v1, <64 x i32>* %a1, align 128 + ret void +} + +; v16i16 -> v16i32 +; CHECK-LABEL: f4: +; CHECK: r[[R0:[0-9]+]] = #64 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]] +define void @f4(<16 x i16>* %a0, <16 x i32>* %a1) #0 { + %v0 = load <16 x i16>, <16 x i16>* %a0, align 128 + %v1 = sext <16 x i16> %v0 to <16 x i32> + store <16 x i32> %v1, <16 x i32>* %a1, align 128 + ret void +} + +; v32i16 -> v32i32 +; CHECK-LABEL: f5: +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h) +; CHECK: vmem(r1+#0) = v[[V2]] +define void @f5(<32 x i16>* %a0, <32 x i32>* %a1) #0 { + %v0 = load <32 x i16>, <32 x i16>* %a0, align 128 + %v1 = sext <32 x i16> %v0 to <32 x i32> + store <32 x i32> %v1, <32 x i32>* %a1, align 128 + ret void +} + +; v8i8 -> v8i32 +; CHECK-LABEL: f6: +; CHECK: r[[R0:[0-9]+]]:[[R1:[0-9]+]] = memd(r0+#0) +; CHECK-DAG: v[[V0:[0-9]+]].w = vinsert(r[[R0]]) +; CHECK-DAG: v[[V0]].w = vinsert(r[[R1]]) +; CHECK-DAG: q[[Q0:[0-3]]] = vsetq +; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b) +; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V4]] +define void @f6(<8 x i8>* %a0, <8 x i32>* %a1) #0 { + %v0 = load <8 x i8>, <8 x i8>* %a0, align 128 + %v1 = sext <8 x i8> %v0 to <8 x i32> + store <8 x i32> %v1, <8 x i32>* %a1, align 128 + ret void +} + +attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length128b,-packets" } + From f35617ad809b978635d10c0c39553840a03ac41f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 14 Sep 2020 16:37:41 -0500 Subject: [PATCH 0603/1079] [Hexagon] Add more detailed testcase for widening truncates --- .../CodeGen/Hexagon/autohvx/widen-trunc.ll | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll diff --git a/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll b/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll new file mode 100644 index 0000000000000..e23fcb0e427ae --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll @@ -0,0 +1,106 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; If the "rx = #N, vsetq(rx)" get reordered with the rest, update the test. + +; v32i16 -> v32i8 +; CHECK-LABEL: f0: +; CHECK: r[[R0:[0-9]+]] = #32 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]].b = vdeal(v[[V0]].b) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]] +define void @f0(<32 x i16>* %a0, <32 x i8>* %a1) #0 { + %v0 = load <32 x i16>, <32 x i16>* %a0, align 128 + %v1 = trunc <32 x i16> %v0 to <32 x i8> + store <32 x i8> %v1, <32 x i8>* %a1, align 128 + ret void +} + +; v32i32 -> v32i8 +; CHECK-LABEL: f1: +; CHECK: r[[R0:[0-9]+]] = #32 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]].b = vdeale({{.*}},v[[V0]].b) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]] +define void @f1(<32 x i32>* %a0, <32 x i8>* %a1) #0 { + %v0 = load <32 x i32>, <32 x i32>* %a0, align 128 + %v1 = trunc <32 x i32> %v0 to <32 x i8> + store <32 x i8> %v1, <32 x i8>* %a1, align 128 + ret void +} + +; v64i16 -> v64i8 +; CHECK-LABEL: f2: +; CHECK: r[[R0:[0-9]+]] = #64 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]].b = vdeal(v[[V0]].b) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]] +define void @f2(<64 x i16>* %a0, <64 x i8>* %a1) #0 { + %v0 = load <64 x i16>, <64 x i16>* %a0, align 128 + %v1 = trunc <64 x i16> %v0 to <64 x i8> + store <64 x i8> %v1, <64 x i8>* %a1, align 128 + ret void +} + +; v64i32 -> v64i8 +; CHECK-LABEL: f3: +; CHECK-DAG: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK-DAG: v[[V1:[0-9]+]] = vmem(r0+#1) +; CHECK-DAG: q[[Q0:[0-3]]] = vsetq +; CHECK: v[[V2:[0-9]+]].b = vdeale(v[[V1]].b,v[[V0]].b) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]] +define void @f3(<64 x i32>* %a0, <64 x i8>* %a1) #0 { + %v0 = load <64 x i32>, <64 x i32>* %a0, align 128 + %v1 = trunc <64 x i32> %v0 to <64 x i8> + store <64 x i8> %v1, <64 x i8>* %a1, align 128 + ret void +} + +; v16i32 -> v16i16 +; CHECK-LABEL: f4: +; CHECK: r[[R0:[0-9]+]] = #32 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]].h = vdeal(v[[V0]].h) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]] +define void @f4(<16 x i32>* %a0, <16 x i16>* %a1) #0 { + %v0 = load <16 x i32>, <16 x i32>* %a0, align 128 + %v1 = trunc <16 x i32> %v0 to <16 x i16> + store <16 x i16> %v1, <16 x i16>* %a1, align 128 + ret void +} + +; v32i32 -> v32i16 +; CHECK-LABEL: f5: +; CHECK: r[[R0:[0-9]+]] = #64 +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]].h = vdeal(v[[V0]].h) +; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]]) +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]] +define void @f5(<32 x i32>* %a0, <32 x i16>* %a1) #0 { + %v0 = load <32 x i32>, <32 x i32>* %a0, align 128 + %v1 = trunc <32 x i32> %v0 to <32 x i16> + store <32 x i16> %v1, <32 x i16>* %a1, align 128 + ret void +} + +; v8i32 -> v8i8 +; CHECK-LABEL: f6: +; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0) +; CHECK: v[[V1:[0-9]+]].b = vdeale({{.*}},v[[V0]].b) +; CHECK: vmem(r[[R0:[0-9]+]]+#0) = v[[V1]] +; CHECK-DAG: r[[R1:[0-9]+]] = memw(r[[R0]]+#0) +; CHECK-DAG: r[[R2:[0-9]+]] = memw(r[[R0]]+#4) +; CHECK: memd(r1+#0) = r[[R2]]:[[R1]] +define void @f6(<8 x i32>* %a0, <8 x i8>* %a1) #0 { + %v0 = load <8 x i32>, <8 x i32>* %a0, align 128 + %v1 = trunc <8 x i32> %v0 to <8 x i8> + store <8 x i8> %v1, <8 x i8>* %a1, align 128 + ret void +} + + +attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length128b,-packets" } + From da55e9ba1273284f1af61bceeaeb25e487838034 Mon Sep 17 00:00:00 2001 From: Chris Hamilton Date: Mon, 14 Sep 2020 18:12:12 -0500 Subject: [PATCH 0604/1079] [Sema] Address-space sensitive index check for unbounded arrays Check applied to unbounded (incomplete) arrays and pointers to spot cases where the computed address is beyond the largest possible addressable extent of the array, based on the address space in which the array is delcared, or which the pointer refers to. Check helps to avoid cases of nonsense pointer math and array indexing which could lead to linker failures or runtime exceptions. Of particular interest when building for embedded systems with small address spaces. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D86796 --- .../clang/Basic/DiagnosticSemaKinds.td | 8 ++ clang/lib/Sema/SemaChecking.cpp | 85 ++++++++++++++++--- clang/test/Sema/const-eval.c | 8 +- clang/test/Sema/unbounded-array-bounds.c | 70 +++++++++++++++ .../SemaCXX/constant-expression-cxx1y.cpp | 3 +- 5 files changed, 157 insertions(+), 17 deletions(-) create mode 100644 clang/test/Sema/unbounded-array-bounds.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e0d700c66724a..e0be2072bb6e2 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8847,6 +8847,14 @@ def warn_array_index_precedes_bounds : Warning< def warn_array_index_exceeds_bounds : Warning< "array index %0 is past the end of the array (which contains %1 " "element%s2)">, InGroup; +def warn_ptr_arith_exceeds_max_addressable_bounds : Warning< + "the pointer incremented by %0 refers past the last possible element for an array in %1-bit " + "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, + InGroup; +def warn_array_index_exceeds_max_addressable_bounds : Warning< + "array index %0 refers past the last possible element for an array in %1-bit " + "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, + InGroup; def note_array_declared_here : Note< "array %0 declared here">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f2b70be1d431b..dbfa329993c8b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14038,11 +14038,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, const ConstantArrayType *ArrayTy = Context.getAsConstantArrayType(BaseExpr->getType()); - if (!ArrayTy) - return; - - const Type *BaseType = ArrayTy->getElementType().getTypePtr(); - if (EffectiveType->isDependentType() || BaseType->isDependentType()) + const Type *BaseType = + ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr(); + bool IsUnboundedArray = (BaseType == nullptr); + if (EffectiveType->isDependentType() || + (!IsUnboundedArray && BaseType->isDependentType())) return; Expr::EvalResult Result; @@ -14059,6 +14059,69 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); + if (IsUnboundedArray) { + if (index.isUnsigned() || !index.isNegative()) { + const auto &ASTC = getASTContext(); + unsigned AddrBits = + ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace( + EffectiveType->getCanonicalTypeInternal())); + if (index.getBitWidth() < AddrBits) + index = index.zext(AddrBits); + CharUnits ElemCharUnits = ASTC.getTypeSizeInChars(EffectiveType); + llvm::APInt ElemBytes(index.getBitWidth(), ElemCharUnits.getQuantity()); + // If index has more active bits than address space, we already know + // we have a bounds violation to warn about. Otherwise, compute + // address of (index + 1)th element, and warn about bounds violation + // only if that address exceeds address space. + if (index.getActiveBits() <= AddrBits) { + bool Overflow; + llvm::APInt Product(index); + Product += 1; + Product = Product.umul_ov(ElemBytes, Overflow); + if (!Overflow && Product.getActiveBits() <= AddrBits) + return; + } + + // Need to compute max possible elements in address space, since that + // is included in diag message. + llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits); + MaxElems = MaxElems.zext(std::max(AddrBits + 1, ElemBytes.getBitWidth())); + MaxElems += 1; + ElemBytes = ElemBytes.zextOrTrunc(MaxElems.getBitWidth()); + MaxElems = MaxElems.udiv(ElemBytes); + + unsigned DiagID = + ASE ? diag::warn_array_index_exceeds_max_addressable_bounds + : diag::warn_ptr_arith_exceeds_max_addressable_bounds; + + // Diag message shows element size in bits and in "bytes" (platform- + // dependent CharUnits) + DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, + PDiag(DiagID) + << index.toString(10, true) << AddrBits + << (unsigned)ASTC.toBits(ElemCharUnits) + << ElemBytes.toString(10, false) + << MaxElems.toString(10, false) + << (unsigned)MaxElems.getLimitedValue(~0U) + << IndexExpr->getSourceRange()); + + if (!ND) { + // Try harder to find a NamedDecl to point at in the note. + while (const auto *ASE = dyn_cast(BaseExpr)) + BaseExpr = ASE->getBase()->IgnoreParenCasts(); + if (const auto *DRE = dyn_cast(BaseExpr)) + ND = DRE->getDecl(); + if (const auto *ME = dyn_cast(BaseExpr)) + ND = ME->getMemberDecl(); + } + + if (ND) + DiagRuntimeBehavior(ND->getBeginLoc(), BaseExpr, + PDiag(diag::note_array_declared_here) << ND); + } + return; + } + if (index.isUnsigned() || !index.isNegative()) { // It is possible that the type of the base expression after // IgnoreParenCasts is incomplete, even though the type of the base @@ -14121,9 +14184,8 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, } } - unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; - if (ASE) - DiagID = diag::warn_array_index_exceeds_bounds; + unsigned DiagID = ASE ? diag::warn_array_index_exceeds_bounds + : diag::warn_ptr_arith_exceeds_bounds; DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, PDiag(DiagID) << index.toString(10, true) @@ -14144,12 +14206,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (!ND) { // Try harder to find a NamedDecl to point at in the note. - while (const ArraySubscriptExpr *ASE = - dyn_cast(BaseExpr)) + while (const auto *ASE = dyn_cast(BaseExpr)) BaseExpr = ASE->getBase()->IgnoreParenCasts(); - if (const DeclRefExpr *DRE = dyn_cast(BaseExpr)) + if (const auto *DRE = dyn_cast(BaseExpr)) ND = DRE->getDecl(); - if (const MemberExpr *ME = dyn_cast(BaseExpr)) + if (const auto *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); } diff --git a/clang/test/Sema/const-eval.c b/clang/test/Sema/const-eval.c index bbcbb0e25237e..c94539ab1de27 100644 --- a/clang/test/Sema/const-eval.c +++ b/clang/test/Sema/const-eval.c @@ -140,10 +140,10 @@ EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a con // We evaluate these by providing 2s' complement semantics in constant // expressions, like we do for integers. -void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; -void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; -__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; -void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; +void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; // expected-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}} +void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; // expected-warning {{refers past the last possible element}} +__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}} +void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; // expected-warning {{refers past the last possible element}} struct PR35214_X { int k; diff --git a/clang/test/Sema/unbounded-array-bounds.c b/clang/test/Sema/unbounded-array-bounds.c new file mode 100644 index 0000000000000..18a8225b84697 --- /dev/null +++ b/clang/test/Sema/unbounded-array-bounds.c @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s \ +// RUN: --implicit-check-not 'past the last possible element' +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \ +// RUN: --implicit-check-not 'past the last possible element' +// RUN: %clang_cc1 -triple avr-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s \ +// RUN: --implicit-check-not 'past the last possible element' + +struct S { + long long a; + char b; + long long c; + short d; +}; + +struct S s[]; + +void f1() { + ++s[3].a; + ++s[7073650413200313099].b; + // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: array index 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) + ++s[7073650].c; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) +} + +long long ll[]; + +void f2() { + ++ll[3]; + ++ll[2705843009213693952]; + // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) + ++ll[847073650]; + // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) + // CHECK-AVR-ADDR16: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) +} + +void f3(struct S p[]) { + ++p[3].a; + ++p[7073650413200313099].b; + // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) + ++p[7073650].c; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) +} + +void f4(struct S *p) { + p += 3; + p += 7073650413200313099; + // CHECK-X86-ADDR64: :[[@LINE-1]]:3: warning: the pointer incremented by 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) + p += 7073650; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) +} + +struct BQ { + struct S bigblock[3276]; +}; + +struct BQ bq[]; + +void f5() { + ++bq[0].bigblock[0].a; + ++bq[1].bigblock[0].a; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element) +} diff --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp index 8bc4f88a63a96..7fe71d4853508 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp @@ -1018,8 +1018,9 @@ constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant } constexpr void PR28739(int n) { // expected-error {{never produces a constant}} - int *p = &n; + int *p = &n; // expected-note {{declared here}} p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} + // expected-warning@-1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}} } constexpr void Void(int n) { From 32515938901685bcbc438d5f5bb03cb8a9f4c637 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Sep 2020 16:28:11 -0700 Subject: [PATCH 0605/1079] [X86] Place new constant node in topological order in X86DAGToDAGISel::matchBitExtract. Fixes PR47525 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 1 + llvm/test/CodeGen/X86/pr47525.ll | 42 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr47525.ll diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 840f132ec6664..3b5a29ef31fcf 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3502,6 +3502,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { // Shift NBits left by 8 bits, thus producing 'control'. // This makes the low 8 bits to be zero. SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); + insertDAGNode(*CurDAG, SDValue(Node, 0), C8); SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); insertDAGNode(*CurDAG, SDValue(Node, 0), Control); diff --git a/llvm/test/CodeGen/X86/pr47525.ll b/llvm/test/CodeGen/X86/pr47525.ll new file mode 100644 index 0000000000000..e0f01f3c51152 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr47525.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=bmi | FileCheck %s + +@a = external local_unnamed_addr global i32, align 4 +@f = external local_unnamed_addr global i32, align 4 + +define void @g(i32* %x, i32* %y, i32* %z) { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: sete %cl +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: orl (%rdi), %ecx +; CHECK-NEXT: movl $0, (%rsi) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: bextrl %eax, {{.*}}(%rip), %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl %eax, (%rdx) +; CHECK-NEXT: retq +entry: + %0 = load i32, i32* @a, align 4 + %1 = tail call i32 asm "", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %0) + %2 = icmp eq i32 %1, 0 + %shl1 = select i1 %2, i32 2, i32 0 + %3 = load i32, i32* %x, align 4 + %or = or i32 %3, %shl1 + store i32 0, i32* %y, align 4 + %4 = tail call i32 asm "", "=r,~{dirflag},~{fpsr},~{flags}"() + %notmask = shl nsw i32 -1, %4 + %sub = xor i32 %notmask, -1 + %5 = load i32, i32* @f, align 4 + %and4 = and i32 %5, %sub + %or6 = or i32 %and4, %or + store i32 %or6, i32* %z, align 4 + ret void +} From a36278c2f8b5ba7e964ef2cdc14ef8c3f8b8a045 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Fri, 4 Sep 2020 14:27:42 -0700 Subject: [PATCH 0606/1079] [GlobalISel] Add G_UNMERGE(Cst) -> Cst1, Cst2, ... combine Add a combiner helper that replaces G_UNMERGE of big constants into direct use of smaller constants. Differential Revision: https://reviews.llvm.org/D87166 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 + .../include/llvm/Target/GlobalISel/Combine.td | 11 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 42 ++++ .../AArch64/GlobalISel/combine-unmerge.mir | 111 +++++++++++ .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 182 ++++++++---------- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 160 +++++++-------- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 168 ++++++++-------- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 182 ++++++++---------- .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll | 100 +++++----- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 104 +++++----- 10 files changed, 603 insertions(+), 463 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 8a5e80386e7ee..2854025b01910 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -252,6 +252,12 @@ class CombinerHelper { applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl &Operands); + /// Transform G_UNMERGE Constant -> Constant1, Constant2, ... + bool matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts); + bool applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index f99252935db42..95da231f517f7 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -412,6 +412,15 @@ def fabs_fabs_fold: GICombineRule< (apply [{ return Helper.applyCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]) >; +// Fold (unmerge cst) -> cst1, cst2, ... +def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector">; +def unmerge_cst : GICombineRule< + (defs root:$d, unmerge_cst_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -443,4 +452,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, - unmerge_merge, fabs_fabs_fold]>; + unmerge_merge, fabs_fabs_fold, unmerge_cst]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index a2a7d6b928d4a..ccc75d44a9ab9 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1612,6 +1612,48 @@ bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( return true; } +bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && + SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + // Break down the big constant in smaller ones. + const MachineOperand &CstVal = SrcInstr->getOperand(1); + APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT + ? CstVal.getCImm()->getValue() + : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned ShiftAmt = Dst0Ty.getSizeInBits(); + // Unmerge a constant. + for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { + Csts.emplace_back(Val.trunc(ShiftAmt)); + Val = Val.lshr(ShiftAmt); + } + + return true; +} + +bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Csts.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Builder.buildConstant(DstReg, Csts[Idx]); + } + + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir index 73401374ef9db..52f0836efec42 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -181,3 +181,114 @@ body: | $w1 = COPY %4(s32) ... +# Unmerge a constant into a bunch of smaller constant. +# Constant is 0x0102030405060708090a0b0c0d0e0f10 and we break it down into +# bytes: +# cst1 0x10 +# cst2 0x0f +# cst3 0x0e +# ... +--- +name: test_combine_unmerge_cst +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_cst + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16 + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 + ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13 + ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 + ; CHECK: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11 + ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10 + ; CHECK: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 + ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 + ; CHECK: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 + ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 + ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK: $b0 = COPY [[C]](s8) + ; CHECK: $b1 = COPY [[C1]](s8) + ; CHECK: $b2 = COPY [[C2]](s8) + ; CHECK: $b3 = COPY [[C3]](s8) + ; CHECK: $b4 = COPY [[C4]](s8) + ; CHECK: $b5 = COPY [[C5]](s8) + ; CHECK: $b6 = COPY [[C6]](s8) + ; CHECK: $b7 = COPY [[C7]](s8) + ; CHECK: $b8 = COPY [[C8]](s8) + ; CHECK: $b9 = COPY [[C9]](s8) + ; CHECK: $b10 = COPY [[C10]](s8) + ; CHECK: $b11 = COPY [[C11]](s8) + ; CHECK: $b12 = COPY [[C12]](s8) + ; CHECK: $b13 = COPY [[C13]](s8) + ; CHECK: $b14 = COPY [[C14]](s8) + ; CHECK: $b15 = COPY [[C15]](s8) + %0:_(s128) = G_CONSTANT i128 1339673755198158349044581307228491536 + %1:_(s8),%2:_(s8),%3:_(s8),%4:_(s8),%5:_(s8),%6:_(s8),%7:_(s8),%8:_(s8),%9:_(s8),%10:_(s8),%11:_(s8),%12:_(s8),%13:_(s8),%14:_(s8),%15:_(s8),%16:_(s8) = G_UNMERGE_VALUES %0(s128) + $b0 = COPY %1(s8) + $b1 = COPY %2(s8) + $b2 = COPY %3(s8) + $b3 = COPY %4(s8) + $b4 = COPY %5(s8) + $b5 = COPY %6(s8) + $b6 = COPY %7(s8) + $b7 = COPY %8(s8) + $b8 = COPY %9(s8) + $b9 = COPY %10(s8) + $b10 = COPY %11(s8) + $b11 = COPY %12(s8) + $b12 = COPY %13(s8) + $b13 = COPY %14(s8) + $b14 = COPY %15(s8) + $b15 = COPY %16(s8) +... + +# Unmerge a constant on a non-power of 2 type into a bunch of smaller constant. +# Constant is a 3 | 2 | 1 in chunks of 13-bit. +--- +name: test_combine_unmerge_cst_36bit +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_cst_36bit + ; CHECK: [[C:%[0-9]+]]:_(s13) = G_CONSTANT i13 1 + ; CHECK: [[C1:%[0-9]+]]:_(s13) = G_CONSTANT i13 2 + ; CHECK: [[C2:%[0-9]+]]:_(s13) = G_CONSTANT i13 3 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[C]](s13) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[C1]](s13) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[C2]](s13) + ; CHECK: $h0 = COPY [[ZEXT]](s16) + ; CHECK: $h1 = COPY [[ZEXT1]](s16) + ; CHECK: $h2 = COPY [[ZEXT2]](s16) + %0:_(s39) = G_CONSTANT i39 201342977 + %1:_(s13),%2:_(s13),%3:_(s13) = G_UNMERGE_VALUES %0(s39) + %4:_(s16) = G_ZEXT %1(s13) + %5:_(s16) = G_ZEXT %2(s13) + %6:_(s16) = G_ZEXT %3(s13) + $h0 = COPY %4(s16) + $h1 = COPY %5(s16) + $h2 = COPY %6(s16) +... + +# Unmerge floating point constant. +--- +name: test_combine_unmerge_fpcst +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_fpcst + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: $h0 = COPY [[C]](s16) + ; CHECK: $h1 = COPY [[C1]](s16) + ; CHECK: $h2 = COPY [[C2]](s16) + ; CHECK: $h3 = COPY [[C3]](s16) + %0:_(s64) = G_FCONSTANT double 0x0004000300020001 + %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) + $h0 = COPY %1(s16) + $h1 = COPY %2(s16) + $h2 = COPY %3(s16) + $h3 = COPY %4(s16) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index dad8a5ac58e8d..26a8d81120548 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -4999,24 +4999,22 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: s_brev_b32 s8, 1 -; GFX6-NEXT: v_mov_b32_e32 v1, s8 -; GFX6-NEXT: v_add_i32_e64 v4, s[6:7], 0, v0 +; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v6 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v3, v7, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_add_i32_e64 v6, s[6:7], 0, v2 -; GFX6-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 +; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_saddsat_v2i64: @@ -5027,24 +5025,22 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: s_brev_b32 s8, 1 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: v_add_u32_e64 v4, s[6:7], 0, v0 +; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v6 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v3, v7, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_add_u32_e64 v6, s[6:7], 0, v2 -; GFX8-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 +; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v2i64: @@ -5055,56 +5051,53 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: s_brev_b32 s8, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[6:7], 0, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v2, v6 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v3, v7, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_add_co_u32_e64 v6, s[6:7], 0, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_saddsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: v_mov_b32_e32 v10, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, v2 -; GFX10-NEXT: v_mov_b32_e32 v14, v3 +; GFX10-NEXT: v_mov_b32_e32 v14, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, v1 +; GFX10-NEXT: v_mov_b32_e32 v17, v2 +; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v9, v4 -; GFX10-NEXT: s_brev_b32 s8, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v23, vcc_lo, v13, v6 +; GFX10-NEXT: v_add_co_u32_e64 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_gt_i64_e64 s6, 0, v[6:7] -; GFX10-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v17, v6 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo +; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10] -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v24 -; GFX10-NEXT: v_add_co_u32_e64 v4, s5, v0, 0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s5, s8, v0, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[23:24], v[13:14] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v1, 0 +; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 +; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] +; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, s8, v1, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v19, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v20, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo ; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v23, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v24, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v20, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ret <2 x i64> %result @@ -6225,15 +6218,14 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0, v0 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_brev_b32 s4, 1 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 +; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6248,43 +6240,42 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_ashr_i64 v[12:13], v[10:11], s6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], s6 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[10:11], s8 -; GFX6-NEXT: s_and_b32 s6, 1, s5 +; GFX6-NEXT: s_and_b32 s6, 1, s4 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[10:11], s7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s6, 1, s9 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s5, 1, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v12, s4 ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6334,15 +6325,14 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: s_brev_b32 s4, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6357,43 +6347,42 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX8-NEXT: s_and_b32 s6, 1, s5 +; GFX8-NEXT: s_and_b32 s6, 1, s4 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s6, 1, s9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s5, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0, v4 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX8-NEXT: v_mov_b32_e32 v12, s4 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6443,15 +6432,14 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_brev_b32 s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6466,43 +6454,42 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX9-NEXT: s_and_b32 s6, 1, s5 +; GFX9-NEXT: s_and_b32 s6, 1, s4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s6, 1, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s5, 1, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, 0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v12, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v12, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6561,7 +6548,6 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 ; GFX10-NEXT: v_xor_b32_e32 v9, v10, v20 -; GFX10-NEXT: s_brev_b32 s8, 1 ; GFX10-NEXT: s_cmp_lt_u32 s5, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc_lo @@ -6571,7 +6557,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX10-NEXT: v_add_co_u32_e64 v8, s4, v26, v12 ; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v27, v13, s4 @@ -6619,7 +6605,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v19, v21, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, s8, v4, s4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v5, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v6, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v7, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 9e2f881ee8df8..f188fc05f3637 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1057,10 +1057,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1075,9 +1074,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1104,9 +1103,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1114,6 +1113,7 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -1502,10 +1502,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s6, 0x1000 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xfffff000 +; CGP-NEXT: s_movk_i32 s6, 0xf000 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -1520,19 +1519,19 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -1553,9 +1552,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -1563,7 +1562,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -1588,6 +1587,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -1606,9 +1606,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 @@ -1617,8 +1617,8 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v10 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 @@ -1627,7 +1627,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc @@ -1646,9 +1646,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 @@ -1677,9 +1677,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -1734,9 +1734,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 @@ -1745,8 +1745,8 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 @@ -1755,7 +1755,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc @@ -1780,10 +1780,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1798,9 +1797,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1827,9 +1826,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1837,6 +1836,7 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -2225,10 +2225,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_mov_b32 s6, 0xffed2705 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -2243,19 +2242,19 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -2276,9 +2275,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -2286,7 +2285,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -2311,6 +2310,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -2329,9 +2329,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 @@ -2340,8 +2340,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v10 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 @@ -2350,7 +2350,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc @@ -2369,9 +2369,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 @@ -2400,9 +2400,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -2457,9 +2457,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 @@ -2468,8 +2468,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 @@ -2478,7 +2478,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 2217e17358b33..f769b826b1ea8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1037,10 +1037,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1055,9 +1054,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1084,9 +1083,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1094,6 +1093,7 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -1478,10 +1478,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_srem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s6, 0x1000 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xfffff000 +; CGP-NEXT: s_movk_i32 s6, 0xf000 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -1496,19 +1495,19 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -1529,9 +1528,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -1539,7 +1538,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -1564,6 +1563,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -1582,9 +1582,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 @@ -1592,20 +1592,20 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s6, v9 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 ; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc @@ -1619,9 +1619,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 @@ -1651,9 +1651,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -1708,9 +1708,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 @@ -1718,20 +1718,20 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc @@ -1752,10 +1752,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1770,9 +1769,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1799,9 +1798,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1809,6 +1808,7 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -2193,10 +2193,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_srem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_mov_b32 s6, 0xffed2705 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -2211,19 +2210,19 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -2244,9 +2243,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -2254,7 +2253,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -2279,6 +2278,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -2297,9 +2297,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 @@ -2307,20 +2307,20 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s6, v9 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 ; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc @@ -2334,9 +2334,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 @@ -2366,9 +2366,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -2423,9 +2423,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 @@ -2433,20 +2433,20 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index d2c65aa5a1784..76aa2f511b141 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -4984,24 +4984,22 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: s_brev_b32 s8, 1 -; GFX6-NEXT: v_mov_b32_e32 v1, s8 -; GFX6-NEXT: v_add_i32_e64 v4, s[6:7], 0, v0 +; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v2, v6 ; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_add_i32_e64 v6, s[6:7], 0, v2 -; GFX6-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 +; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ssubsat_v2i64: @@ -5012,24 +5010,22 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: s_brev_b32 s8, 1 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: v_add_u32_e64 v4, s[6:7], 0, v0 +; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v2, v6 ; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_add_u32_e64 v6, s[6:7], 0, v2 -; GFX8-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 +; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v2i64: @@ -5040,56 +5036,53 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: s_brev_b32 s8, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[6:7], 0, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v2, v6 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v7, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_add_co_u32_e64 v6, s[6:7], 0, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_ssubsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: v_mov_b32_e32 v10, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, v2 -; GFX10-NEXT: v_mov_b32_e32 v14, v3 +; GFX10-NEXT: v_mov_b32_e32 v14, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, v1 +; GFX10-NEXT: v_mov_b32_e32 v17, v2 +; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v9, v4 -; GFX10-NEXT: s_brev_b32 s8, 1 -; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v23, vcc_lo, v13, v6 +; GFX10-NEXT: v_sub_co_u32_e64 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, 0, v[6:7] -; GFX10-NEXT: v_sub_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo +; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo +; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v17, v6 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo +; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10] -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v24 -; GFX10-NEXT: v_add_co_u32_e64 v4, s5, v0, 0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s5, s8, v0, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[23:24], v[13:14] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v1, 0 +; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 +; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] +; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, s8, v1, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v19, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v20, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo ; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v23, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v24, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v20, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ret <2 x i64> %result @@ -6210,15 +6203,14 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0, v0 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_brev_b32 s4, 1 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 +; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6233,43 +6225,42 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_ashr_i64 v[12:13], v[10:11], s6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], s6 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[10:11], s8 -; GFX6-NEXT: s_and_b32 s6, 1, s5 +; GFX6-NEXT: s_and_b32 s6, 1, s4 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[10:11], s7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s6, 1, s9 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s5, 1, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v12, s4 ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6319,15 +6310,14 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: s_brev_b32 s4, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6342,43 +6332,42 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX8-NEXT: s_and_b32 s6, 1, s5 +; GFX8-NEXT: s_and_b32 s6, 1, s4 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s6, 1, s9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s5, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0, v4 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX8-NEXT: v_mov_b32_e32 v12, s4 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6428,15 +6417,14 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_brev_b32 s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6451,43 +6439,42 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX9-NEXT: s_and_b32 s6, 1, s5 +; GFX9-NEXT: s_and_b32 s6, 1, s4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s6, 1, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s5, 1, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, 0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v12, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v12, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6546,7 +6533,6 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 ; GFX10-NEXT: v_xor_b32_e32 v9, v10, v20 -; GFX10-NEXT: s_brev_b32 s8, 1 ; GFX10-NEXT: s_cmp_lt_u32 s5, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc_lo @@ -6556,7 +6542,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX10-NEXT: v_sub_co_u32_e64 v8, s4, v26, v12 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s4, v27, v13, s4 @@ -6604,7 +6590,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v19, v21, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, s8, v4, s4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v5, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v6, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v7, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 402ae90219eb0..f0984a2397368 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -963,22 +963,22 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_udiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 +; CHECK-NEXT: s_movk_i32 s7, 0x1000 +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1005,10 +1005,10 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1055,11 +1055,11 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 ; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 @@ -1069,16 +1069,16 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -1364,14 +1364,14 @@ define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_udiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s8, 0xf000 ; CGP-NEXT: s_movk_i32 s10, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 -; CGP-NEXT: s_mov_b32 s8, 0xfffff000 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 @@ -1624,22 +1624,22 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_udiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1666,10 +1666,10 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1716,11 +1716,11 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 ; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 @@ -1730,16 +1730,16 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -2025,14 +2025,14 @@ define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_udiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s10, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s8, 0xffed2705 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 348f38ef250e4..e79c300a56b84 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -949,22 +949,22 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_urem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 +; CHECK-NEXT: s_movk_i32 s7, 0x1000 +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -991,10 +991,10 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1041,30 +1041,30 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v3, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s7, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc @@ -1344,14 +1344,14 @@ define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_urem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s8, 0xf000 ; CGP-NEXT: s_movk_i32 s10, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 -; CGP-NEXT: s_mov_b32 s8, 0xfffff000 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 @@ -1600,22 +1600,22 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_urem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1642,10 +1642,10 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1692,30 +1692,30 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v3, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s7, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc @@ -1995,14 +1995,14 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_urem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s10, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s8, 0xffed2705 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 From 1d70984fa220f966ddcecd7906c5f10368fe1b93 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 14 Sep 2020 16:32:25 -0700 Subject: [PATCH 0607/1079] [Asan] Accept __lsan_ignore_object for redzone pointer The check that the pointer inside of the user part of the chunk does not adds any value, but it's the last user of AddrIsInside. I'd like to simplify AsanChunk in followup patches. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87642 --- compiler-rt/lib/asan/asan_allocator.cpp | 19 +++++++------------ .../test/asan/TestCases/lsan_annotations.cpp | 12 ++++++++++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index e4028dc10f48e..5aeb4d14e9a3e 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -162,9 +162,6 @@ class AsanChunk : public ChunkBase { } return reinterpret_cast(Beg() - RZLog2Size(rz_log)); } - bool AddrIsInside(uptr addr, bool locked_version = false) { - return (addr >= Beg()) && (addr < Beg() + UsedSize(locked_version)); - } }; struct QuarantineCallback { @@ -1172,16 +1169,14 @@ void ForEachChunk(ForEachChunkCallback callback, void *arg) { IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); - if (!m) return kIgnoreObjectInvalid; - if ((atomic_load(&m->chunk_state, memory_order_acquire) == - __asan::CHUNK_ALLOCATED) && - m->AddrIsInside(addr)) { - if (m->lsan_tag == kIgnored) - return kIgnoreObjectAlreadyIgnored; - m->lsan_tag = __lsan::kIgnored; - return kIgnoreObjectSuccess; + if (!m || (atomic_load(&m->chunk_state, memory_order_acquire) != + __asan::CHUNK_ALLOCATED)) { + return kIgnoreObjectInvalid; } - return kIgnoreObjectInvalid; + if (m->lsan_tag == kIgnored) + return kIgnoreObjectAlreadyIgnored; + m->lsan_tag = __lsan::kIgnored; + return kIgnoreObjectSuccess; } } // namespace __lsan diff --git a/compiler-rt/test/asan/TestCases/lsan_annotations.cpp b/compiler-rt/test/asan/TestCases/lsan_annotations.cpp index f52b0ff66a8df..ce7c19b8f2d05 100644 --- a/compiler-rt/test/asan/TestCases/lsan_annotations.cpp +++ b/compiler-rt/test/asan/TestCases/lsan_annotations.cpp @@ -5,12 +5,20 @@ #include #include +int *x, *y, *z; + int main() { - int *x = new int; + x = new int; __lsan_ignore_object(x); + { __lsan::ScopedDisabler disabler; - double *y = new double; + y = new int; } + + z = new int; + __lsan_ignore_object(z - 1); + + x = y = z = nullptr; return 0; } From e6bc7037d386184d94bf68b184d0ac62b96a4098 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 14 Sep 2020 16:38:48 -0700 Subject: [PATCH 0608/1079] [AArch64] Statepoint support for AArch64. Differential Revision: https://reviews.llvm.org/D66012 Patch By: loicottet (with major rebase by me) --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 46 ++++ .../Target/AArch64/AArch64ISelLowering.cpp | 1 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 + .../Target/AArch64/AArch64RegisterInfo.cpp | 5 +- .../AArch64/AArch64TargetTransformInfo.cpp | 4 + .../AArch64/statepoint-call-lowering.ll | 218 ++++++++++++++++++ 6 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 3a94820dac8d3..8cbd60d749708 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -95,6 +95,8 @@ class AArch64AsmPrinter : public AsmPrinter { const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); + void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); @@ -944,6 +946,47 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); } +void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + StatepointOpers SOpers(&MI); + if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { + assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + for (unsigned i = 0; i < PatchBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + } else { + // Lower call target and choose correct opcode + const MachineOperand &CallTarget = SOpers.getCallTarget(); + MCOperand CallTargetMCOp; + unsigned CallOpcode; + switch (CallTarget.getType()) { + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp); + CallOpcode = AArch64::BL; + break; + case MachineOperand::MO_Immediate: + CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); + CallOpcode = AArch64::BL; + break; + case MachineOperand::MO_Register: + CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); + CallOpcode = AArch64::BLR; + break; + default: + llvm_unreachable("Unsupported operand type in statepoint call target"); + break; + } + + EmitToStreamer(OutStreamer, + MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp)); + } + + auto &Ctx = OutStreamer.getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer.emitLabel(MILabel); + SM.recordStatepoint(*MILabel, MI); +} + void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { Register DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) { @@ -1225,6 +1268,9 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { case TargetOpcode::PATCHPOINT: return LowerPATCHPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::STATEPOINT: + return LowerSTATEPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: LowerPATCHABLE_FUNCTION_ENTER(*MI); return; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f9be060248522..8206614547839 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1804,6 +1804,7 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: + case TargetOpcode::STATEPOINT: return emitPatchPoint(MI, BB); case AArch64::CATCHRET: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 9e37d0292e7a7..fb26b2430bf0c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -107,6 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); break; + case TargetOpcode::STATEPOINT: + NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + // No patch bytes means a normal call inst is emitted + if (NumBytes == 0) + NumBytes = 4; + break; case AArch64::TLSDESC_CALLSEQ: // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index b3694411966b5..e0685d766655a 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -611,9 +611,10 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED; Register FrameReg; - // Special handling of dbg_value, stackmap and patchpoint instructions. + // Special handling of dbg_value, stackmap patchpoint statepoint instructions. if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) { StackOffset Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, /*PreferFP=*/true, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 5f5da63b21b64..fb23bc641573e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -192,6 +192,10 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TTI::TCC_Free; break; + case Intrinsic::experimental_gc_statepoint: + if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; } return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); } diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll new file mode 100644 index 0000000000000..9819f64a9546a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -0,0 +1,218 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; A collection of basic functionality tests for statepoint lowering - most +; interesting cornercases are exercised through the x86 tests. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%struct = type { i64, i64 } + +declare zeroext i1 @return_i1() +declare zeroext i32 @return_i32() +declare i32* @return_i32ptr() +declare float @return_float() +declare %struct @return_struct() +declare void @varargf(i32, ...) + +define i1 @test_i1_return() gc "statepoint-example" { +; CHECK-LABEL: test_i1_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ret +; This is just checking that a i1 gets lowered normally when there's no extra +; state arguments to the statepoint +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +define i32 @test_i32_return() gc "statepoint-example" { +; CHECK-LABEL: test_i32_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bl return_i32 +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) + ret i32 %call1 +} + +define i32* @test_i32ptr_return() gc "statepoint-example" { +; CHECK-LABEL: test_i32ptr_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bl return_i32ptr +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0) + %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token) + ret i32* %call1 +} + +define float @test_float_return() gc "statepoint-example" { +; CHECK-LABEL: test_float_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bl return_float +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0) + %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token) + ret float %call1 +} + +define %struct @test_struct_return() gc "statepoint-example" { +; CHECK-LABEL: test_struct_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bl return_struct +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, %struct ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_structf(i64 0, i32 0, %struct ()* @return_struct, i32 0, i32 0, i32 0, i32 0) + %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token) + ret %struct %call1 +} + +define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { +; CHECK-LABEL: test_relocate: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 // =16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: add sp, sp, #16 // =16 +; CHECK-NEXT: ret +; Check that an ununsed relocate has no code-generation impact +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)] + %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call2 +} + +define void @test_void_vararg() gc "statepoint-example" { +; CHECK-LABEL: test_void_vararg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: mov w1, #43 +; CHECK-NEXT: bl varargf +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: ret +; Check a statepoint wrapping a *void* returning vararg function works +entry: + %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0) + ;; if we try to use the result from a statepoint wrapping a + ;; non-void-returning varargf, we will experience a crash. + ret void +} + +define i1 @test_i1_return_patchable() gc "statepoint-example" { +; CHECK-LABEL: test_i1_return_patchable: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: nop +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ret +; A patchable variant of test_i1_return +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 4, i1 ()*null, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +declare void @consume(i32 addrspace(1)* %obj) + +define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" { +; CHECK-LABEL: test_cross_bb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: mov w20, w1 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp10: +; CHECK-NEXT: tbz w20, #0, .LBB8_2 +; CHECK-NEXT: // %bb.1: // %left +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr x0, [sp, #8] +; CHECK-NEXT: bl consume +; CHECK-NEXT: and w0, w19, #0x1 +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: // %right +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: .LBB8_3: // %right +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)] + br i1 %external_cond, label %left, label %right + +left: + %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(i32 addrspace(1)* %call1) + ret i1 %call2 + +right: + ret i1 true +} + +%struct2 = type { i64, i64, i64 } + +declare void @consume_attributes(i32, i8* nest, i32, %struct2* byval) + +define void @test_attributes(%struct2* byval %s) gc "statepoint-example" { +; CHECK-LABEL: test_attributes: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 // =32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: ldr x8, [sp, #48] +; CHECK-NEXT: ldr q0, [sp, #32] +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: mov w1, #17 +; CHECK-NEXT: mov x18, xzr +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bl consume_attributes +; CHECK-NEXT: .Ltmp11: +; CHECK-NEXT: add sp, sp, #32 // =32 +; CHECK-NEXT: ret +entry: +; Check that arguments with attributes are lowered correctly. +; We call a function that has a nest argument and a byval argument. + %statepoint_token = call token (i64, i32, void (i32, i8*, i32, %struct2*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32p0i8i32p0s_struct2sf(i64 0, i32 0, void (i32, i8*, i32, %struct2*)* @consume_attributes, i32 4, i32 0, i32 42, i8* nest null, i32 17, %struct2* byval %s, i32 0, i32 0) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) + +declare token @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...) +declare i32 @llvm.experimental.gc.result.i32(token) + +declare token @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...) +declare i32* @llvm.experimental.gc.result.p0i32(token) + +declare token @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...) +declare float @llvm.experimental.gc.result.f32(token) + +declare token @llvm.experimental.gc.statepoint.p0f_structf(i64, i32, %struct ()*, i32, i32, ...) +declare %struct @llvm.experimental.gc.result.struct(token) + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...) + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32p0i8i32p0s_struct2sf(i64, i32, void (i32, i8*, i32, %struct2*)*, i32, i32, ...) + +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) From 4706880f06fbaf5f95dab2b6fd4cd2a5cf1693e6 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 14 Sep 2020 14:09:01 -0700 Subject: [PATCH 0609/1079] [flang] Allow Fortran comments after #include path C-style /*comments*/ are removed during preprocessing directive tokenization, but Fortran !comments need to be specifically allowed. Fixes LLVM bugzilla 47466. Differential Revision: https://reviews.llvm.org/D87638 --- flang/lib/Parser/preprocessor.cpp | 16 +++++++++------- flang/test/Preprocessing/empty.h | 0 flang/test/Preprocessing/include-comment.F90 | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 flang/test/Preprocessing/empty.h create mode 100644 flang/test/Preprocessing/include-comment.F90 diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index a1f07967d9b08..823adda8e95af 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -540,7 +540,7 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { return; } std::string include; - if (dir.TokenAt(j).ToString() == "<") { + if (dir.TokenAt(j).ToString() == "<") { // #include std::size_t k{j + 1}; if (k >= tokens) { prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), @@ -553,15 +553,12 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { if (k >= tokens) { prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), "#include: expected '>' at end of included file"_en_US); - } else if (k + 1 < tokens) { - prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1), - "#include: extra stuff ignored after '>'"_en_US); } TokenSequence braced{dir, j + 1, k - j - 1}; include = ReplaceMacros(braced, *prescanner).ToString(); - } else if (j + 1 == tokens && - (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && - include.substr(include.size() - 1, 1) == "\"") { + j = k; + } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && + include.substr(include.size() - 1, 1) == "\"") { // #include "foo" include = include.substr(1, include.size() - 2); } else { prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), @@ -573,6 +570,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { "#include: empty include file name"_err_en_US); return; } + j = dir.SkipBlanks(j + 1); + if (j < tokens && dir.TokenAt(j).ToString() != "!") { + prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), + "#include: extra stuff ignored after file name"_en_US); + } std::string buf; llvm::raw_string_ostream error{buf}; const SourceFile *included{allSources_.Open(include, error)}; diff --git a/flang/test/Preprocessing/empty.h b/flang/test/Preprocessing/empty.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Preprocessing/include-comment.F90 b/flang/test/Preprocessing/include-comment.F90 new file mode 100644 index 0000000000000..6ac475f76e46e --- /dev/null +++ b/flang/test/Preprocessing/include-comment.F90 @@ -0,0 +1,18 @@ +! RUN: %f18 -I%S -E %s 2>&1 | FileCheck %s +! CHECK-NOT: :3: +#include ! comment +! CHECK-NOT: :5: +#include /* comment */ +! CHECK-NOT: :7: +#include !comment +! CHECK: :9:20: #include: extra stuff ignored after file name +#include comment +! CHECK-NOT: :11: +#include "empty.h" ! comment +! CHECK-NOT: :13: +#include "empty.h" /* comment */ +! CHECK-NOT: :15: +#include "empty.h" !comment +! CHECK: :17:20: #include: extra stuff ignored after file name +#include "empty.h" comment +end From da1aaa0b7080049e0d6ef82a4a6784e89c20f059 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Sep 2020 16:33:08 -0700 Subject: [PATCH 0610/1079] Revert "[X86] Place new constant node in topological order in X86DAGToDAGISel::matchBitExtract." I got the bug number wrong. This reverts commit 32515938901685bcbc438d5f5bb03cb8a9f4c637. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 1 - llvm/test/CodeGen/X86/pr47525.ll | 42 ------------------------- 2 files changed, 43 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/pr47525.ll diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 3b5a29ef31fcf..840f132ec6664 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3502,7 +3502,6 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { // Shift NBits left by 8 bits, thus producing 'control'. // This makes the low 8 bits to be zero. SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); - insertDAGNode(*CurDAG, SDValue(Node, 0), C8); SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); insertDAGNode(*CurDAG, SDValue(Node, 0), Control); diff --git a/llvm/test/CodeGen/X86/pr47525.ll b/llvm/test/CodeGen/X86/pr47525.ll deleted file mode 100644 index e0f01f3c51152..0000000000000 --- a/llvm/test/CodeGen/X86/pr47525.ll +++ /dev/null @@ -1,42 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=bmi | FileCheck %s - -@a = external local_unnamed_addr global i32, align 4 -@f = external local_unnamed_addr global i32, align 4 - -define void @g(i32* %x, i32* %y, i32* %z) { -; CHECK-LABEL: g: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{.*}}(%rip), %eax -; CHECK-NEXT: #APP -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: sete %cl -; CHECK-NEXT: addl %ecx, %ecx -; CHECK-NEXT: orl (%rdi), %ecx -; CHECK-NEXT: movl $0, (%rsi) -; CHECK-NEXT: #APP -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: shll $8, %eax -; CHECK-NEXT: bextrl %eax, {{.*}}(%rip), %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: movl %eax, (%rdx) -; CHECK-NEXT: retq -entry: - %0 = load i32, i32* @a, align 4 - %1 = tail call i32 asm "", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %0) - %2 = icmp eq i32 %1, 0 - %shl1 = select i1 %2, i32 2, i32 0 - %3 = load i32, i32* %x, align 4 - %or = or i32 %3, %shl1 - store i32 0, i32* %y, align 4 - %4 = tail call i32 asm "", "=r,~{dirflag},~{fpsr},~{flags}"() - %notmask = shl nsw i32 -1, %4 - %sub = xor i32 %notmask, -1 - %5 = load i32, i32* @f, align 4 - %and4 = and i32 %5, %sub - %or6 = or i32 %and4, %or - store i32 %or6, i32* %z, align 4 - ret void -} From 46673763fe598aa9d3f0edaf1ba7a1645c4eacfe Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Sep 2020 16:33:23 -0700 Subject: [PATCH 0611/1079] [X86] Place new constant node in topological order in X86DAGToDAGISel::matchBitExtract Fixes PR47482 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 1 + llvm/test/CodeGen/X86/pr47482.ll | 42 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr47482.ll diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 840f132ec6664..3b5a29ef31fcf 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3502,6 +3502,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { // Shift NBits left by 8 bits, thus producing 'control'. // This makes the low 8 bits to be zero. SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); + insertDAGNode(*CurDAG, SDValue(Node, 0), C8); SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); insertDAGNode(*CurDAG, SDValue(Node, 0), Control); diff --git a/llvm/test/CodeGen/X86/pr47482.ll b/llvm/test/CodeGen/X86/pr47482.ll new file mode 100644 index 0000000000000..e0f01f3c51152 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr47482.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=bmi | FileCheck %s + +@a = external local_unnamed_addr global i32, align 4 +@f = external local_unnamed_addr global i32, align 4 + +define void @g(i32* %x, i32* %y, i32* %z) { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: sete %cl +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: orl (%rdi), %ecx +; CHECK-NEXT: movl $0, (%rsi) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: bextrl %eax, {{.*}}(%rip), %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl %eax, (%rdx) +; CHECK-NEXT: retq +entry: + %0 = load i32, i32* @a, align 4 + %1 = tail call i32 asm "", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %0) + %2 = icmp eq i32 %1, 0 + %shl1 = select i1 %2, i32 2, i32 0 + %3 = load i32, i32* %x, align 4 + %or = or i32 %3, %shl1 + store i32 0, i32* %y, align 4 + %4 = tail call i32 asm "", "=r,~{dirflag},~{fpsr},~{flags}"() + %notmask = shl nsw i32 -1, %4 + %sub = xor i32 %notmask, -1 + %5 = load i32, i32* @f, align 4 + %and4 = and i32 %5, %sub + %or6 = or i32 %and4, %or + store i32 %or6, i32* %z, align 4 + ret void +} From b2cf572b562048f54b774d9cef88cf792a33ab31 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 14 Sep 2020 16:11:45 -0700 Subject: [PATCH 0612/1079] [flang] Respect BZ mode in exponent parts, too The Fortran standard discusses BZ mode (treat blanks as zero digits) explicitly in its effect on the editing of the digits prior to the exponent part, but doesn't mention it in description of the exponent part. Other compilers honor BZ mode in the exponent, so we should do so too. So "1 e 1 " is 1.E11 in BZ mode. Differential Revision: https://reviews.llvm.org/D87653 --- flang/runtime/edit-input.cpp | 23 +++++++++++++++++------ flang/unittests/Runtime/hello.cpp | 1 + 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index 998edc954ba75..da281aa68e435 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -180,10 +180,11 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, first == 'E' || first == 'D' || first == 'Q') { Put('.'); // input field is normalized to a fraction auto start{got}; + bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; for (; next; next = io.NextInField(remaining)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { - if (edit.modes.editingFlags & blankZero) { + if (bzMode) { ch = '0'; // BZ mode - treat blank as if it were zero } else { continue; @@ -206,19 +207,29 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, if (next && (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || *next == 'q' || *next == 'Q')) { + // Optional exponent letter. Blanks are allowed between the + // optional exponent letter and the exponent value. io.SkipSpaces(remaining); next = io.NextInField(remaining); } - exponent = -edit.modes.scale; // default exponent is -kP + // The default exponent is -kP, but the scale factor doesn't affect + // an explicit exponent. + exponent = -edit.modes.scale; if (next && - (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9'))) { + (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || + (bzMode && (*next == ' ' || *next == '\t')))) { bool negExpo{*next == '-'}; if (negExpo || *next == '+') { next = io.NextInField(remaining); } - for (exponent = 0; next && (*next >= '0' && *next <= '9'); - next = io.NextInField(remaining)) { - exponent = 10 * exponent + *next - '0'; + for (exponent = 0; next; next = io.NextInField(remaining)) { + if (*next >= '0' && *next <= '9') { + exponent = 10 * exponent + *next - '0'; + } else if (bzMode && (*next == ' ' || *next == '\t')) { + exponent = 10 * exponent; + } else { + break; + } } if (negExpo) { exponent = -exponent; diff --git a/flang/unittests/Runtime/hello.cpp b/flang/unittests/Runtime/hello.cpp index c38aedf4f6549..c1daccae383ac 100644 --- a/flang/unittests/Runtime/hello.cpp +++ b/flang/unittests/Runtime/hello.cpp @@ -481,6 +481,7 @@ int main() { realInTest("(-1P,F18.0)", " 125", 0x4093880000000000); // 1250 realInTest("(1P,F18.0)", " 125", 0x4029000000000000); // 12.5 realInTest("(BZ,F18.0)", " 125 ", 0x4093880000000000); // 1250 + realInTest("(BZ,F18.0)", " 125 . e +1 ", 0x42a6bcc41e900000); // 1.25e13 realInTest("(DC,F18.0)", " 12,5", 0x4029000000000000); listInputTest(); From 8bd0dc5bfe23fdfba110eefd33ff658289a307ab Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 14 Sep 2020 17:16:46 -0400 Subject: [PATCH 0613/1079] [libc++abi] Do not declare __cxa_finalize and __cxa_atexit in These functions are not defined by libc++abi, so they don't belong in . Differential Revision: https://reviews.llvm.org/D75795 --- libcxxabi/include/cxxabi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxxabi/include/cxxabi.h b/libcxxabi/include/cxxabi.h index 29e28a69a9195..43ce6f5f740d5 100644 --- a/libcxxabi/include/cxxabi.h +++ b/libcxxabi/include/cxxabi.h @@ -137,9 +137,9 @@ __cxa_vec_cctor(void *dest_array, void *src_array, size_t element_count, void (*destructor)(void *)); // 3.3.5.3 Runtime API -extern _LIBCXXABI_FUNC_VIS int __cxa_atexit(void (*f)(void *), void *p, - void *d); -extern _LIBCXXABI_FUNC_VIS int __cxa_finalize(void *); +// These functions are part of the C++ ABI, but they are not defined in libc++abi: +// int __cxa_atexit(void (*)(void *), void *, void *); +// void __cxa_finalize(void *); // 3.4 Demangler API extern _LIBCXXABI_FUNC_VIS char *__cxa_demangle(const char *mangled_name, From d2321129bda712a0e7ee222c7cb6a62e5ca5b6f4 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Fri, 4 Sep 2020 17:09:38 -0700 Subject: [PATCH 0614/1079] [GlobalISel] Add `X,Y = G_UNMERGE Z` -> X = G_TRUNC Z Add a combiner helper that replaces G_UNMERGE where all the destination lanes are dead except the first one with a G_TRUNC. Differential Revision: https://reviews.llvm.org/D87174 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 + .../include/llvm/Target/GlobalISel/Combine.td | 10 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 33 + .../AArch64/GlobalISel/combine-unmerge.mir | 77 + .../AMDGPU/GlobalISel/combine-shl-narrow.mir | 16 +- ...legalize-llvm.amdgcn.image.store.2d.d16.ll | 39 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 123 +- .../postlegalizercombiner-select.mir | 5 +- .../regbankselect-amdgcn.s.buffer.load.ll | 1374 ++++++++++++++++- 9 files changed, 1581 insertions(+), 100 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 2854025b01910..d740aa07848e5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -258,6 +258,10 @@ class CombinerHelper { bool applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl &Csts); + /// Transform X, Y = G_UNMERGE Z -> X = G_TRUNC Z. + bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); + bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 95da231f517f7..be76980b55006 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -421,6 +421,14 @@ def unmerge_cst : GICombineRule< (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }]) >; +// Transform x,y = unmerge z -> x = trunc z. +def unmerge_dead_to_trunc : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeWithDeadLanesToTrunc(*${d}); }]), + (apply [{ return Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -452,4 +460,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, - unmerge_merge, fabs_fabs_fold, unmerge_cst]>; + unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ccc75d44a9ab9..f622b8a089fb5 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1654,6 +1654,39 @@ bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, return true; } +bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + // Check that all the lanes are dead except the first one. + for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { + if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) + return false; + } + return true; +} + +bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { + Builder.setInstrAndDebugLoc(MI); + Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); + // Truncating a vector is going to truncate every single lane, + // whereas we want the full lowbits. + // Do the operation on a scalar instead. + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + SrcReg = + Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + if (Dst0Ty.isVector()) { + auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); + Builder.buildCast(Dst0Reg, MIB); + } else + Builder.buildTrunc(Dst0Reg, SrcReg); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir index 52f0836efec42..64ce862274396 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -292,3 +292,80 @@ body: | $h2 = COPY %3(s16) $h3 = COPY %4(s16) ... + +# Transform unmerge into trunc when only the first definition is live. +--- +name: test_combine_unmerge_dead_to_trunc +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK: $h0 = COPY [[TRUNC]](s16) + %0:_(s64) = COPY $x0 + %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) + $h0 = COPY %1(s16) +... + +# Don't transform unmerge into trunc when middle lanes are live. +--- +name: test_dont_combine_unmerge_dead_to_trunc +body: | + bb.1: + ; CHECK-LABEL: name: test_dont_combine_unmerge_dead_to_trunc + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: $h0 = COPY [[UV2]](s16) + %0:_(s64) = COPY $x0 + %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) + $h0 = COPY %3(s16) +... + +# Transform unmerge into trunc when only the first definition is live, even +# if the input and output types are vectors. +--- +name: test_combine_unmerge_dead_to_trunc_vec_in_n_out +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32) + ; CHECK: $w0 = COPY [[BITCAST1]](<2 x s16>) + %0:_(<2 x s32>) = COPY $x0 + %1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<2 x s32>) + $w0 = COPY %1(<2 x s16>) +... + +# Transform unmerge into trunc when only the first definition is live, even +# if the input type is vector. +--- +name: test_combine_unmerge_dead_to_trunc_vec_in +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64) + ; CHECK: $h0 = COPY [[TRUNC]](s16) + %0:_(<2 x s32>) = COPY $x0 + %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>) + $h0 = COPY %1(s16) +... + +# Transform unmerge into trunc when only the first definition is live, even +# if the output type are vector. +--- +name: test_combine_unmerge_dead_to_trunc_vec_out +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_out + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32) + ; CHECK: $w0 = COPY [[BITCAST]](<2 x s16>) + %0:_(s64) = COPY $x0 + %1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(s64) + $w0 = COPY %1(<2 x s16>) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir index 41d0260c81f20..1cc5c9ce659d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir @@ -12,9 +12,9 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_32_s64amt ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[TRUNC]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CONSTANT i64 32 @@ -32,9 +32,9 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[TRUNC]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 32 @@ -52,9 +52,9 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) @@ -93,9 +93,9 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_63 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index 387630adabcee..390b91ea80c11 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -110,15 +110,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) ; UNPACKED: S_ENDPGM 0 @@ -140,9 +141,29 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; PACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>) + ; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) + ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[EXTRACT]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 7ff60e57d9646..43d7968832335 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -174,22 +174,20 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 - ; GFX6: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 - ; GFX6: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] + ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 + ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 + ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 + ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 + ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 @@ -203,22 +201,20 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 - ; GFX7: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 - ; GFX7: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] + ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 + ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 + ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 + ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 + ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 @@ -232,22 +228,20 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 - ; GFX8: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 - ; GFX8: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] + ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 + ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 + ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 + ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 + ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1600,15 +1594,12 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 - ; GFX6: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 - ; GFX6: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX6: $vgpr0 = COPY [[COPY11]] - ; GFX6: $vgpr1 = COPY [[COPY12]] - ; GFX6: $vgpr2 = COPY [[COPY13]] + ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 + ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 + ; GFX6: $vgpr0 = COPY [[COPY8]] + ; GFX6: $vgpr1 = COPY [[COPY9]] + ; GFX6: $vgpr2 = COPY [[COPY10]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): @@ -1626,15 +1617,12 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 - ; GFX7: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 - ; GFX7: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX7: $vgpr0 = COPY [[COPY11]] - ; GFX7: $vgpr1 = COPY [[COPY12]] - ; GFX7: $vgpr2 = COPY [[COPY13]] + ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 + ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 + ; GFX7: $vgpr0 = COPY [[COPY8]] + ; GFX7: $vgpr1 = COPY [[COPY9]] + ; GFX7: $vgpr2 = COPY [[COPY10]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): @@ -1652,15 +1640,12 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 - ; GFX8: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 - ; GFX8: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX8: $vgpr0 = COPY [[COPY11]] - ; GFX8: $vgpr1 = COPY [[COPY12]] - ; GFX8: $vgpr2 = COPY [[COPY13]] + ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 + ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 + ; GFX8: $vgpr0 = COPY [[COPY8]] + ; GFX8: $vgpr1 = COPY [[COPY9]] + ; GFX8: $vgpr2 = COPY [[COPY10]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir index b8109fe6c87cf..1941ad593f96d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir @@ -37,8 +37,9 @@ body: | ; GCN-LABEL: name: select_from_same_results_of_unmerge_values ; GCN: liveins: $vgpr0 ; GCN: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>) + ; GCN: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) + ; GCN: $vgpr0 = COPY [[TRUNC]](s32) ; GCN: SI_RETURN_TO_EPILOG $vgpr0 %0:_(<2 x s32>) = G_IMPLICIT_DEF %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 670c9898c2798..96b66d48e23dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -amdgpu-global-isel-new-legality -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s -; RUN: llc -amdgpu-global-isel-new-legality -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s +; RUN: llc -amdgpu-global-isel-new-legality -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s --check-prefix=GREEDY ; Natural mapping define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { @@ -18,6 +18,20 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; CHECK: $sgpr0 = COPY [[INT]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GREEDY-LABEL: name: s_buffer_load_i32 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY: $sgpr0 = COPY [[INT]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val } @@ -41,6 +55,24 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) ; CHECK: $sgpr1 = COPY [[INT1]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GREEDY-LABEL: name: s_buffer_load_v2i32 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) + ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY: $sgpr0 = COPY [[INT]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val } @@ -58,18 +90,46 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:sgpr(<3 x s32>), [[UV1:%[0-9]+]]:sgpr(<3 x s32>), [[UV2:%[0-9]+]]:sgpr(<3 x s32>), [[UV3:%[0-9]+]]:sgpr(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) ; CHECK: $sgpr2 = COPY [[INT2]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GREEDY-LABEL: name: s_buffer_load_v3i32 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GREEDY: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; GREEDY: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY: $sgpr0 = COPY [[INT]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val } @@ -111,6 +171,42 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) ; CHECK: $sgpr7 = COPY [[INT7]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8i32 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) + ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY: $sgpr0 = COPY [[INT]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; GREEDY: $sgpr3 = COPY [[INT3]](s32) + ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; GREEDY: $sgpr4 = COPY [[INT4]](s32) + ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; GREEDY: $sgpr5 = COPY [[INT5]](s32) + ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; GREEDY: $sgpr6 = COPY [[INT6]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; GREEDY: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val } @@ -176,6 +272,66 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; CHECK: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) ; CHECK: $sgpr15 = COPY [[INT15]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GREEDY-LABEL: name: s_buffer_load_v16i32 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) + ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY: $sgpr0 = COPY [[INT]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; GREEDY: $sgpr3 = COPY [[INT3]](s32) + ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; GREEDY: $sgpr4 = COPY [[INT4]](s32) + ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; GREEDY: $sgpr5 = COPY [[INT5]](s32) + ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; GREEDY: $sgpr6 = COPY [[INT6]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; GREEDY: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) + ; GREEDY: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) + ; GREEDY: $sgpr8 = COPY [[INT8]](s32) + ; GREEDY: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) + ; GREEDY: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) + ; GREEDY: $sgpr9 = COPY [[INT9]](s32) + ; GREEDY: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) + ; GREEDY: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) + ; GREEDY: $sgpr10 = COPY [[INT10]](s32) + ; GREEDY: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) + ; GREEDY: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) + ; GREEDY: $sgpr11 = COPY [[INT11]](s32) + ; GREEDY: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) + ; GREEDY: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) + ; GREEDY: $sgpr12 = COPY [[INT12]](s32) + ; GREEDY: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) + ; GREEDY: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) + ; GREEDY: $sgpr13 = COPY [[INT13]](s32) + ; GREEDY: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) + ; GREEDY: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) + ; GREEDY: $sgpr14 = COPY [[INT14]](s32) + ; GREEDY: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) + ; GREEDY: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) + ; GREEDY: $sgpr15 = COPY [[INT15]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val } @@ -196,6 +352,20 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -217,6 +387,22 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GREEDY-LABEL: name: s_buffer_load_v2f32_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val } @@ -238,12 +424,38 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<3 x s32>), [[UV1:%[0-9]+]]:vgpr(<3 x s32>), [[UV2:%[0-9]+]]:vgpr(<3 x s32>), [[UV3:%[0-9]+]]:vgpr(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV4]](s32) - ; CHECK: $vgpr1 = COPY [[UV5]](s32) - ; CHECK: $vgpr2 = COPY [[UV6]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GREEDY-LABEL: name: s_buffer_load_v3f32_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) + ; GREEDY: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; GREEDY: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val } @@ -267,6 +479,24 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: $vgpr2 = COPY [[UV2]](s32) ; CHECK: $vgpr3 = COPY [[UV3]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GREEDY-LABEL: name: s_buffer_load_v4f32_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val } @@ -296,6 +526,30 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val } @@ -335,6 +589,40 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; CHECK: $vgpr14 = COPY [[UV14]](s32) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val } @@ -356,6 +644,22 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) ; CHECK: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) + ; GREEDY: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) store i96 %val, i96 addrspace(1)* undef ret void @@ -384,6 +688,27 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) + ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) store i256 %val, i256 addrspace(1)* undef ret void @@ -420,6 +745,35 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) ; CHECK: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) + ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) store i512 %val, i512 addrspace(1)* undef ret void @@ -448,6 +802,27 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef` + 16, align 32, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef` + 16, align 32, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <16 x i16> %val, <16 x i16> addrspace(1)* undef ret void @@ -484,6 +859,35 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) ; CHECK: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 48, align 64, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) + ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 16, align 64, addrspace 1) + ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 32, align 64, addrspace 1) + ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 48, align 64, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <32 x i16> %val, <32 x i16> addrspace(1)* undef ret void @@ -512,6 +916,27 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef` + 16, align 32, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef` + 16, align 32, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i64> %val, <4 x i64> addrspace(1)* undef ret void @@ -548,6 +973,35 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) ; CHECK: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 48, align 64, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 16, align 64, addrspace 1) + ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 32, align 64, addrspace 1) + ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 48, align 64, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i64> %val, <8 x i64> addrspace(1)* undef ret void @@ -576,6 +1030,27 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, align 32, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) + ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, align 32, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef ret void @@ -612,6 +1087,35 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) ; CHECK: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, align 64, addrspace 1) ; CHECK: S_ENDPGM 0 + ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) + ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, align 64, addrspace 1) + ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 64, addrspace 1) + ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, align 64, addrspace 1) + ; GREEDY: S_ENDPGM 0 %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef ret void @@ -635,6 +1139,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -658,6 +1179,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -680,6 +1218,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -714,6 +1268,33 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -747,6 +1328,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -790,6 +1397,43 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; CHECK: $vgpr14 = COPY [[UV14]](s32) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -832,6 +1476,42 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; CHECK: $vgpr14 = COPY [[UV14]](s32) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV]](s32) + ; GREEDY: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -878,6 +1558,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK: bb.4: ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %8(s32), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -924,6 +1643,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK: bb.4: ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -972,6 +1731,47 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK: bb.4: ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1018,6 +1818,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK: bb.4: ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load 4 + 4095, align 1) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val } @@ -1063,6 +1902,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK: bb.4: ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val } @@ -1122,6 +2000,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1183,6 +2113,59 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %31, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %23(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1242,6 +2225,59 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %31, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %23(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1300,6 +2336,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1358,6 +2446,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1416,6 +2556,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1473,6 +2665,57 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK: $vgpr6 = COPY [[UV8]](s32) ; CHECK: $vgpr7 = COPY [[UV9]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3, %bb.2 + ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2 + ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2 + ; GREEDY: [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2 + ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load 16 + 4064, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load 16 + 4064, align 4) + ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GREEDY: bb.3: + ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY: bb.4: + ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val } @@ -1494,6 +2737,22 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -1516,6 +2775,22 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -1542,6 +2817,26 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -1569,6 +2864,26 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -1595,6 +2910,24 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -1621,6 +2954,25 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr + ; GREEDY: bb.1 (%ir-block.0): + ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) From b3afad046301d8bb1f4471aceaad704b87de3a69 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 9 Sep 2020 18:03:00 -0700 Subject: [PATCH 0615/1079] [GlobalISel] Add a `X, Y = G_UNMERGE(G_ZEXT Z)` -> X = G_ZEXT Z; Y = 0 combine Add a combiner helper to transform unmerge of zext into one zext and a constant 0 Differential Revision: https://reviews.llvm.org/D87427 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 + .../include/llvm/Target/GlobalISel/Combine.td | 11 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 61 +++++++ .../AArch64/GlobalISel/combine-unmerge.mir | 107 ++++++++++++ .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll | 155 +++++++---------- .../AMDGPU/GlobalISel/shl-ext-reduce.ll | 4 +- .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll | 159 +++++++----------- .../CodeGen/AMDGPU/GlobalISel/udiv.i32.ll | 157 +++++++---------- .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll | 157 +++++++---------- 9 files changed, 424 insertions(+), 391 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index d740aa07848e5..3fd55386b054b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -262,6 +262,10 @@ class CombinerHelper { bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); + /// Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0 + bool matchCombineUnmergeZExtToZExt(MachineInstr &MI); + bool applyCombineUnmergeZExtToZExt(MachineInstr &MI); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index be76980b55006..fa75d7d95489b 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -429,6 +429,14 @@ def unmerge_dead_to_trunc : GICombineRule< (apply [{ return Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }]) >; +// Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0. +def unmerge_zext_to_zext : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeZExtToZExt(*${d}); }]), + (apply [{ return Helper.applyCombineUnmergeZExtToZExt(*${d}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -460,4 +468,5 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, - unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc]>; + unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, + unmerge_zext_to_zext]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index f622b8a089fb5..5eff975127d77 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1687,6 +1687,67 @@ bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { return true; } +bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + // G_ZEXT on vector applies to each lane, so it will + // affect all destinations. Therefore we won't be able + // to simplify the unmerge to just the first definition. + if (Dst0Ty.isVector()) + return false; + Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + return false; + + Register ZExtSrcReg; + if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) + return false; + + // Finally we can replace the first definition with + // a zext of the source if the definition is big enough to hold + // all of ZExtSrc bits. + LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); + return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); +} + +bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + + Register Dst0Reg = MI.getOperand(0).getReg(); + + MachineInstr *ZExtInstr = + MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); + assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && + "Expecting a G_ZEXT"); + + Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); + + Builder.setInstrAndDebugLoc(MI); + + if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { + Builder.buildZExt(Dst0Reg, ZExtSrcReg); + } else { + assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && + "ZExt src doesn't fit in destination"); + replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); + } + + Register ZeroReg; + for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { + if (!ZeroReg) + ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); + replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); + } + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir index 64ce862274396..53c75b4d84d95 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -369,3 +369,110 @@ body: | %1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(s64) $w0 = COPY %1(<2 x s16>) ... + +# Transform unmerge(zext) into zext. +# In that test, the source of the zext is same size as the first definition +# of the unmerge. Therefore a we can just reuse the input of the zext for +# this definition. +--- +name: test_combine_unmerge_zext_to_zext_same_size +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_zext_to_zext_same_size + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[COPY]](s32) + ; CHECK: $w1 = COPY [[C]](s32) + %0:_(s32) = COPY $w0 + %3:_(s64) = G_ZEXT %0(s32) + %1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(s64) + $w0 = COPY %1(s32) + $w1 = COPY %2(s32) +... + +# Transform unmerge(zext) into zext. +# In that test, the source of the zext is smaller than the first definition +# of the unmerge. Therefore a G_ZEXT is required. +--- +name: test_combine_unmerge_zext_to_zext +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_zext_to_zext + ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK: $h0 = COPY [[ZEXT]](s16) + ; CHECK: $h1 = COPY [[C]](s16) + ; CHECK: $h2 = COPY [[C]](s16) + ; CHECK: $h3 = COPY [[C]](s16) + %0:_(s8) = COPY $b0 + %3:_(s64) = G_ZEXT %0(s8) + %1:_(s16),%2:_(s16),%4:_(s16),%5:_(s16) = G_UNMERGE_VALUES %3(s64) + $h0 = COPY %1(s16) + $h1 = COPY %2(s16) + $h2 = COPY %4(s16) + $h3 = COPY %5(s16) +... + +# Check that we don't apply the unmerge(zext) to zext transformation +# when the first destination of the unmerge is smaller than the source +# of the zext. +--- +name: test_dont_combine_unmerge_zext_to_zext_src_bigger +body: | + bb.1: + ; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_bigger + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: $h0 = COPY [[UV]](s16) + ; CHECK: $h1 = COPY [[UV1]](s16) + ; CHECK: $h2 = COPY [[UV2]](s16) + ; CHECK: $h3 = COPY [[UV3]](s16) + %0:_(s32) = COPY $w0 + %3:_(s64) = G_ZEXT %0(s32) + %1:_(s16),%2:_(s16),%4:_(s16),%5:_(s16) = G_UNMERGE_VALUES %3(s64) + $h0 = COPY %1(s16) + $h1 = COPY %2(s16) + $h2 = COPY %4(s16) + $h3 = COPY %5(s16) +... + +# Check that we don't apply the unmerge(zext) to zext transformation +# when the input zext deals with a vector type. +--- +name: test_dont_combine_unmerge_zext_to_zext_src_vector +body: | + bb.1: + ; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_vector + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>) + ; CHECK: $w0 = COPY [[UV]](s32) + ; CHECK: $w1 = COPY [[UV1]](s32) + %0:_(<2 x s16>) = COPY $w0 + %3:_(<2 x s32>) = G_ZEXT %0(<2 x s16>) + %1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(<2 x s32>) + $w0 = COPY %1(s32) + $w1 = COPY %2(s32) +... + +# Check that we don't apply the unmerge(zext) to zext transformation +# when the destination type is a vector type. +# We could actually handle this case but we would need to insert a cast. +--- +name: test_dont_combine_unmerge_zext_to_zext_dst_vector +body: | + bb.1: + ; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_dst_vector + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: $w0 = COPY [[UV]](<2 x s16>) + ; CHECK: $w1 = COPY [[UV1]](<2 x s16>) + %0:_(s32) = COPY $w0 + %3:_(s64) = G_ZEXT %0(s32) + %1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %3(s64) + $w0 = COPY %1(<2 x s16>) + $w1 = COPY %2(<2 x s16>) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll index 57737aeb886fa..3aee949b5bde6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll @@ -50,20 +50,16 @@ define i32 @v_sdiv_i32(i32 %num, i32 %den) { ; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v5, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 @@ -127,34 +123,29 @@ define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) { ; CGP-NEXT: s_add_i32 s0, s0, s2 ; CGP-NEXT: s_add_i32 s1, s1, s3 ; CGP-NEXT: s_xor_b32 s0, s0, s2 -; CGP-NEXT: s_xor_b32 s5, s1, s3 -; CGP-NEXT: v_cvt_f32_u32_e32 v0, s5 -; CGP-NEXT: s_sub_i32 s1, 0, s5 -; CGP-NEXT: s_bfe_u64 s[2:3], s[0:1], 0x200000 +; CGP-NEXT: s_xor_b32 s2, s1, s3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CGP-NEXT: s_sub_i32 s1, 0, s2 ; CGP-NEXT: v_rcp_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, s2, 0 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v2, s1, v0 -; CGP-NEXT: v_mul_lo_u32 v3, v0, 0 -; CGP-NEXT: v_mul_lo_u32 v4, 0, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_mul_lo_u32 v2, s3, v0 -; CGP-NEXT: v_mul_hi_u32 v0, s2, v0 +; CGP-NEXT: v_mul_lo_u32 v1, s1, v0 +; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 +; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v0, s5 +; CGP-NEXT: v_mul_lo_u32 v1, v0, s2 ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CGP-NEXT: v_subrev_i32_e64 v2, s[0:1], s5, v1 +; CGP-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_xor_b32_e32 v0, s4, v0 ; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 @@ -246,36 +237,28 @@ define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v11, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v10, v10, v7 -; CGP-NEXT: v_mul_lo_u32 v13, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v10, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v11 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 @@ -715,42 +698,34 @@ define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 ; CGP-NEXT: v_xor_b32_e32 v4, v4, v6 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_mul_lo_u32 v8, v0, 0 ; CGP-NEXT: v_xor_b32_e32 v5, v5, v7 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v10, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v6, v6 -; CGP-NEXT: v_rcp_f32_e32 v10, v10 +; CGP-NEXT: v_rcp_f32_e32 v8, v8 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; CGP-NEXT: v_mul_f32_e32 v10, 0x4f7ffffe, v10 +; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 -; CGP-NEXT: v_mul_lo_u32 v12, v6, 0 -; CGP-NEXT: v_mul_lo_u32 v11, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v13, v10, 0 -; CGP-NEXT: v_mul_lo_u32 v14, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v10, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v11 -; CGP-NEXT: v_mul_lo_u32 v10, 0, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 -; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_mul_lo_u32 v8, v6, v2 @@ -828,20 +803,16 @@ define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) { ; CGP-NEXT: v_and_b32_e32 v1, s4, v1 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 @@ -937,36 +908,28 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: v_and_b32_e32 v3, s4, v3 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll index b2f3dd8b2bf41..74832a1cfb257 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -10,7 +10,7 @@ define amdgpu_ps i64 @s_shl_i64_zext_i32(i32 inreg %x) { ; GCN: ; %bb.0: ; GCN-NEXT: s_andn2_b32 s0, s0, -2.0 ; GCN-NEXT: s_lshl_b32 s0, s0, 2 -; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: ; return to shader part epilog %and = and i32 %x, 1073741823 %ext = zext i32 %and to i64 @@ -37,7 +37,7 @@ define amdgpu_ps i64 @s_shl_i64_sext_i32(i32 inreg %x) { ; GCN: ; %bb.0: ; GCN-NEXT: s_and_b32 s0, s0, 0x1fffffff ; GCN-NEXT: s_lshl_b32 s0, s0, 2 -; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: ; return to shader part epilog %and = and i32 %x, 536870911 %ext = sext i32 %and to i64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll index 320d814be8a94..ec1b610fdd819 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll @@ -46,20 +46,16 @@ define i32 @v_srem_i32(i32 %num, i32 %den) { ; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v5, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v3, v3 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v3, 0 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -112,29 +108,24 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { ; ; CGP-LABEL: s_srem_i32: ; CGP: ; %bb.0: -; CGP-NEXT: s_ashr_i32 s4, s0, 31 -; CGP-NEXT: s_ashr_i32 s2, s1, 31 -; CGP-NEXT: s_add_i32 s0, s0, s4 -; CGP-NEXT: s_add_i32 s1, s1, s2 -; CGP-NEXT: s_xor_b32 s0, s0, s4 -; CGP-NEXT: s_xor_b32 s1, s1, s2 +; CGP-NEXT: s_ashr_i32 s2, s0, 31 +; CGP-NEXT: s_ashr_i32 s3, s1, 31 +; CGP-NEXT: s_add_i32 s0, s0, s2 +; CGP-NEXT: s_add_i32 s1, s1, s3 +; CGP-NEXT: s_xor_b32 s0, s0, s2 +; CGP-NEXT: s_xor_b32 s1, s1, s3 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 -; CGP-NEXT: s_sub_i32 s5, 0, s1 -; CGP-NEXT: s_bfe_u64 s[2:3], s[0:1], 0x200000 +; CGP-NEXT: s_sub_i32 s3, 0, s1 ; CGP-NEXT: v_rcp_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, s2, 0 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v2, s5, v0 -; CGP-NEXT: v_mul_lo_u32 v3, v0, 0 -; CGP-NEXT: v_mul_lo_u32 v4, 0, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_mul_lo_u32 v2, s3, v0 -; CGP-NEXT: v_mul_hi_u32 v0, s2, v0 +; CGP-NEXT: v_mul_lo_u32 v1, s3, v0 +; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 +; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; CGP-NEXT: v_mul_lo_u32 v0, v0, s1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 @@ -144,8 +135,8 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v0, s4, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 +; CGP-NEXT: v_xor_b32_e32 v0, s2, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 ; CGP-NEXT: v_readfirstlane_b32 s0, v0 ; CGP-NEXT: ; return to shader part epilog %result = srem i32 %num, %den @@ -226,36 +217,28 @@ define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v8, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v9, v3 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v11, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v5, v5 -; CGP-NEXT: v_rcp_f32_e32 v9, v9 +; CGP-NEXT: v_rcp_f32_e32 v8, v8 ; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 -; CGP-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 +; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v12, v5, 0 -; CGP-NEXT: v_mul_lo_u32 v10, v10, v9 -; CGP-NEXT: v_mul_lo_u32 v13, v9, 0 -; CGP-NEXT: v_mul_lo_u32 v14, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v5, v7 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v9, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v10 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v0, v5 -; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v11 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2 @@ -661,41 +644,33 @@ define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_mul_lo_u32 v8, v0, 0 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v10, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v6, v6 -; CGP-NEXT: v_rcp_f32_e32 v10, v10 +; CGP-NEXT: v_rcp_f32_e32 v8, v8 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; CGP-NEXT: v_mul_f32_e32 v10, 0x4f7ffffe, v10 +; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 -; CGP-NEXT: v_mul_lo_u32 v12, v6, 0 -; CGP-NEXT: v_mul_lo_u32 v11, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v13, v10, 0 -; CGP-NEXT: v_mul_lo_u32 v14, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v10, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v11 -; CGP-NEXT: v_mul_lo_u32 v10, 0, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 -; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2 @@ -766,20 +741,16 @@ define i32 @v_srem_i32_24bit(i32 %num, i32 %den) { ; CGP-NEXT: v_and_b32_e32 v1, s4, v1 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -867,36 +838,28 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: v_and_b32_e32 v3, s4, v3 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll index 54eebc9205796..6e0ffe656dfa2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll @@ -34,20 +34,16 @@ define i32 @v_udiv_i32(i32 %num, i32 %den) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 @@ -95,22 +91,17 @@ define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) { ; CGP-LABEL: s_udiv_i32: ; CGP: ; %bb.0: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 -; CGP-NEXT: s_sub_i32 s4, 0, s1 -; CGP-NEXT: s_bfe_u64 s[2:3], s[0:1], 0x200000 +; CGP-NEXT: s_sub_i32 s2, 0, s1 ; CGP-NEXT: v_rcp_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, s2, 0 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v2, s4, v0 -; CGP-NEXT: v_mul_lo_u32 v3, v0, 0 -; CGP-NEXT: v_mul_lo_u32 v4, 0, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_mul_lo_u32 v2, s3, v0 -; CGP-NEXT: v_mul_hi_u32 v0, s2, v0 +; CGP-NEXT: v_mul_lo_u32 v1, s2, v0 +; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 +; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v0, s1 ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 @@ -178,36 +169,28 @@ define <2 x i32> @v_udiv_v2i32(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 @@ -553,42 +536,34 @@ define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 -; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 ; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 ; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 +; CGP-NEXT: v_rcp_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_f32_e32 v6, v6 -; CGP-NEXT: v_rcp_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 -; CGP-NEXT: v_mul_lo_u32 v10, v6, 0 -; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 -; CGP-NEXT: v_mul_lo_u32 v11, v8, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v6 -; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 ; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 ; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 @@ -651,20 +626,16 @@ define i32 @v_udiv_i32_24bit(i32 %num, i32 %den) { ; CGP-NEXT: v_and_b32_e32 v1, s4, v1 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 @@ -742,36 +713,28 @@ define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: v_and_b32_e32 v3, s4, v3 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll index f331deea89e54..500e967c86d64 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -32,20 +32,16 @@ define i32 @v_urem_i32(i32 %num, i32 %den) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -89,22 +85,17 @@ define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) { ; CGP-LABEL: s_urem_i32: ; CGP: ; %bb.0: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 -; CGP-NEXT: s_sub_i32 s4, 0, s1 -; CGP-NEXT: s_bfe_u64 s[2:3], s[0:1], 0x200000 +; CGP-NEXT: s_sub_i32 s2, 0, s1 ; CGP-NEXT: v_rcp_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, s2, 0 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v2, s4, v0 -; CGP-NEXT: v_mul_lo_u32 v3, v0, 0 -; CGP-NEXT: v_mul_lo_u32 v4, 0, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_mul_lo_u32 v2, s3, v0 -; CGP-NEXT: v_mul_hi_u32 v0, s2, v0 +; CGP-NEXT: v_mul_lo_u32 v1, s2, v0 +; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 +; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; CGP-NEXT: v_mul_lo_u32 v0, v0, s1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 @@ -167,36 +158,28 @@ define <2 x i32> @v_urem_v2i32(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 @@ -496,42 +479,34 @@ define <2 x i32> @v_urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 -; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 ; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 ; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 +; CGP-NEXT: v_rcp_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_f32_e32 v6, v6 -; CGP-NEXT: v_rcp_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 -; CGP-NEXT: v_mul_lo_u32 v10, v6, 0 -; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 -; CGP-NEXT: v_mul_lo_u32 v11, v8, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v6 -; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 @@ -588,20 +563,16 @@ define i32 @v_urem_i32_24bit(i32 %num, i32 %den) { ; CGP-NEXT: v_and_b32_e32 v1, s4, v1 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 ; CGP-NEXT: v_rcp_f32_e32 v2, v2 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 -; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -674,36 +645,28 @@ define <2 x i32> @v_urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { ; CGP-NEXT: v_and_b32_e32 v3, s4, v3 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 -; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 -; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 From 1f837265eb082441337a42420bf415a99c3f4baa Mon Sep 17 00:00:00 2001 From: Xun Li Date: Mon, 14 Sep 2020 18:56:31 -0700 Subject: [PATCH 0616/1079] [Coroutines] Fix a typo in documentation In the example, the variable that's crossing suspend point was referred wrongly, fix it. Differential Revision: https://reviews.llvm.org/D83563 --- llvm/docs/Coroutines.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst index 3f7cddef9b37d..5afb33fa0a0ab 100644 --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -257,10 +257,10 @@ Coroutine Transformation One of the steps of coroutine lowering is building the coroutine frame. The def-use chains are analyzed to determine which objects need be kept alive across suspend points. In the coroutine shown in the previous section, use of virtual register -`%n.val` is separated from the definition by a suspend point, therefore, it +`%inc` is separated from the definition by a suspend point, therefore, it cannot reside on the stack frame since the latter goes away once the coroutine is suspended and control is returned back to the caller. An i32 slot is -allocated in the coroutine frame and `%n.val` is spilled and reloaded from that +allocated in the coroutine frame and `%inc` is spilled and reloaded from that slot as needed. We also store addresses of the resume and destroy functions so that the From 042c23506869b4ae9a49d2c4bc5ea6e6baeabe78 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Mon, 14 Sep 2020 17:44:12 -0700 Subject: [PATCH 0617/1079] [DebugInfo] Remove dots from getFilenameByIndex return value When concatenating directory with filename in getFilenameByIndex, we might end up with a path that contains extra dots. For example, if the input is /path and ./example, we would return /path/./example. Run sys::path::remove_dots on the output to eliminate unnecessary dots. Differential Revision: https://reviews.llvm.org/D87657 --- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 1 + llvm/test/tools/llvm-symbolizer/frame-fortran.s | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 678f58694e0b5..e7662fc5d295a 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1391,6 +1391,7 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex( // sys::path::append skips empty strings. sys::path::append(FilePath, Style, IncludeDir, FileName); + sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, Style); Result = std::string(FilePath.str()); return true; } diff --git a/llvm/test/tools/llvm-symbolizer/frame-fortran.s b/llvm/test/tools/llvm-symbolizer/frame-fortran.s index 744236fd76f9c..0cd6f2838a6b5 100644 --- a/llvm/test/tools/llvm-symbolizer/frame-fortran.s +++ b/llvm/test/tools/llvm-symbolizer/frame-fortran.s @@ -13,7 +13,7 @@ // CHECK: foo // CHECK-NEXT: array -// CHECK-NEXT: /home/ubuntu{{/|\\}}.{{/|\\}}example.cpp:1 +// CHECK-NEXT: /home/ubuntu{{/|\\}}example.cpp:1 // CHECK-NEXT: -24 8 ?? .file "example.cpp" From 2c12b056bececd3fce3d5a3b731b4ff8fa6dfbbb Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Mon, 14 Sep 2020 19:20:25 -0700 Subject: [PATCH 0618/1079] [lld][WebAssembly] Allow globals imports via import_name/import_module This feature already exists but was limited to function symbols. Differential Revision: https://reviews.llvm.org/D87666 --- lld/test/wasm/mutable-globals.s | 2 ++ lld/wasm/Relocations.cpp | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lld/test/wasm/mutable-globals.s b/lld/test/wasm/mutable-globals.s index 98f216e1bebc8..ea856e5112895 100644 --- a/lld/test/wasm/mutable-globals.s +++ b/lld/test/wasm/mutable-globals.s @@ -9,5 +9,7 @@ _start: end_function .globaltype foo, i32 +.import_module foo, env +.import_name foo, foo # CHECK: error: mutable global imported but 'mutable-globals' feature not present in inputs: `foo`. Use --no-check-features to suppress. diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 2559e0f869cce..0a364d1a53ac4 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -21,10 +21,13 @@ static bool requiresGOTAccess(const Symbol *sym) { } static bool allowUndefined(const Symbol* sym) { - // Undefined functions with explicit import name are allowed to be undefined - // at link time. - if (auto *F = dyn_cast(sym)) - if (F->importName) + // Undefined functions and globals with explicit import name are allowed to be + // undefined at link time. + if (auto *f = dyn_cast(sym)) + if (f->importName) + return true; + if (auto *g = dyn_cast(sym)) + if (g->importName) return true; return (config->allowUndefined || config->allowUndefinedSymbols.count(sym->getName()) != 0); From 380e746bcca87baa5c746854b44d6a5cea6f7bde Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:29:48 +0700 Subject: [PATCH 0619/1079] [DebugInfo] Fix methods of AsmPrinter to emit values corresponding to the DWARF format (1/19). These methods are used to emit values which are 32-bit in DWARF32 and 64-bit in DWARF64. The patch fixes them so that they choose the length automatically, depending on the DWARF format set in the Context. Differential Revision: https://reviews.llvm.org/D87008 --- llvm/include/llvm/CodeGen/AsmPrinter.h | 18 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 + .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 12 +- .../unittests/CodeGen/AsmPrinterDwarfTest.cpp | 253 ++++++++++++++++++ llvm/unittests/CodeGen/CMakeLists.txt | 4 + llvm/unittests/CodeGen/TestAsmPrinter.cpp | 88 ++++++ llvm/unittests/CodeGen/TestAsmPrinter.h | 82 ++++++ 7 files changed, 456 insertions(+), 10 deletions(-) create mode 100644 llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp create mode 100644 llvm/unittests/CodeGen/TestAsmPrinter.cpp create mode 100644 llvm/unittests/CodeGen/TestAsmPrinter.h diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index c157bb0672ba3..89d266b4286b9 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -216,6 +216,11 @@ class AsmPrinter : public MachineFunctionPass { uint16_t getDwarfVersion() const; void setDwarfVersion(uint16_t Version); + bool isDwarf64() const; + + /// Returns 4 for DWARF32 and 8 for DWARF64. + unsigned int getDwarfOffsetByteSize() const; + bool isPositionIndependent() const; /// Return true if assembly output should contain comments. @@ -562,9 +567,6 @@ class AsmPrinter : public MachineFunctionPass { emitLabelPlusOffset(Label, 0, Size, IsSectionRelative); } - /// Emit something like ".long Label + Offset". - void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const; - //===------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===------------------------------------------------------------------===// @@ -593,18 +595,24 @@ class AsmPrinter : public MachineFunctionPass { void emitDwarfSymbolReference(const MCSymbol *Label, bool ForceOffset = false) const; - /// Emit the 4-byte offset of a string from the start of its section. + /// Emit the 4- or 8-byte offset of a string from the start of its section. /// /// When possible, emit a DwarfStringPool section offset without any /// relocations, and without using the symbol. Otherwise, defers to \a /// emitDwarfSymbolReference(). + /// + /// The length of the emitted value depends on the DWARF format. void emitDwarfStringOffset(DwarfStringPoolEntry S) const; - /// Emit the 4-byte offset of a string from the start of its section. + /// Emit the 4-or 8-byte offset of a string from the start of its section. void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { emitDwarfStringOffset(S.getEntry()); } + /// Emit something like ".long Label + Offset" or ".quad Label + Offset" + /// depending on the DWARF format. + void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const; + /// Emit reference to a call site with a specified encoding void emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Encoding) const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 01370baa4fd12..35a40bb277b93 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3432,3 +3432,12 @@ uint16_t AsmPrinter::getDwarfVersion() const { void AsmPrinter::setDwarfVersion(uint16_t Version) { OutStreamer->getContext().setDwarfVersion(Version); } + +bool AsmPrinter::isDwarf64() const { + return OutStreamer->getContext().getDwarfFormat() == dwarf::DWARF64; +} + +unsigned int AsmPrinter::getDwarfOffsetByteSize() const { + return dwarf::getDwarfOffsetByteSize( + OutStreamer->getContext().getDwarfFormat()); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b6a9a95683603..7f8f6c646925a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -154,19 +154,22 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, if (!ForceOffset) { // On COFF targets, we have to emit the special .secrel32 directive. if (MAI->needsDwarfSectionOffsetDirective()) { + assert(!isDwarf64() && + "emitting DWARF64 is not implemented for COFF targets"); OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); return; } // If the format uses relocations with dwarf, refer to the symbol directly. if (MAI->doesDwarfUseRelocationsAcrossSections()) { - OutStreamer->emitSymbolValue(Label, 4); + OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize()); return; } } // Otherwise, emit it as a label difference from the start of the section. - emitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); + emitLabelDifference(Label, Label->getSection().getBeginSymbol(), + getDwarfOffsetByteSize()); } void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { @@ -177,12 +180,11 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { } // Just emit the offset directly; no need for symbol math. - emitInt32(S.Offset); + OutStreamer->emitIntValue(S.Offset, getDwarfOffsetByteSize()); } void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { - // TODO: Support DWARF64 - emitLabelPlusOffset(Label, Offset, 4); + emitLabelPlusOffset(Label, Offset, getDwarfOffsetByteSize()); } void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, diff --git a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp new file mode 100644 index 0000000000000..948b8851149d9 --- /dev/null +++ b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp @@ -0,0 +1,253 @@ +//===- llvm/unittest/CodeGen/AsmPrinterDwarfTest.cpp ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestAsmPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Testing/Support/Error.h" + +using namespace llvm; +using testing::_; +using testing::SaveArg; + +namespace { + +class AsmPrinterFixtureBase : public testing::Test { + void setupTestPrinter(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + auto ExpectedTestPrinter = + TestAsmPrinter::create(TripleStr, DwarfVersion, DwarfFormat); + ASSERT_THAT_EXPECTED(ExpectedTestPrinter, Succeeded()); + TestPrinter = std::move(ExpectedTestPrinter.get()); + } + +protected: + bool init(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + setupTestPrinter(TripleStr, DwarfVersion, DwarfFormat); + return TestPrinter != nullptr; + } + + std::unique_ptr TestPrinter; +}; + +class AsmPrinterEmitDwarfSymbolReferenceTest : public AsmPrinterFixtureBase { +protected: + bool init(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + if (!AsmPrinterFixtureBase::init(TripleStr, DwarfVersion, DwarfFormat)) + return false; + + // Create a symbol which will be emitted in the tests and associate it + // with a section because that is required in some code paths. + + Val = TestPrinter->getCtx().createTempSymbol(); + Sec = TestPrinter->getCtx().getELFSection(".tst", ELF::SHT_PROGBITS, 0); + SecBeginSymbol = Sec->getBeginSymbol(); + TestPrinter->getMS().SwitchSection(Sec); + TestPrinter->getMS().emitLabel(Val); + return true; + } + + MCSymbol *Val = nullptr; + MCSection *Sec = nullptr; + MCSymbol *SecBeginSymbol = nullptr; +}; + +TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, COFF) { + if (!init("x86_64-pc-windows", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), EmitCOFFSecRel32(Val, 0)); + TestPrinter->getAP()->emitDwarfSymbolReference(Val, false); +} + +TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, COFFForceOffset) { + if (!init("x86_64-pc-windows", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), + emitAbsoluteSymbolDiff(Val, SecBeginSymbol, 4)); + TestPrinter->getAP()->emitDwarfSymbolReference(Val, true); +} + +TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, ELFDWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, 4, _)) + .WillOnce(SaveArg<0>(&Arg0)); + TestPrinter->getAP()->emitDwarfSymbolReference(Val, false); + + const MCSymbolRefExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(&(ActualArg0->getSymbol()), Val); +} + +TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, ELFDWARF32ForceOffset) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), + emitAbsoluteSymbolDiff(Val, SecBeginSymbol, 4)); + TestPrinter->getAP()->emitDwarfSymbolReference(Val, true); +} + +TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, ELFDWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, 8, _)) + .WillOnce(SaveArg<0>(&Arg0)); + TestPrinter->getAP()->emitDwarfSymbolReference(Val, false); + + const MCSymbolRefExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(&(ActualArg0->getSymbol()), Val); +} + +TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, ELFDWARF64ForceOffset) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + EXPECT_CALL(TestPrinter->getMS(), + emitAbsoluteSymbolDiff(Val, SecBeginSymbol, 8)); + TestPrinter->getAP()->emitDwarfSymbolReference(Val, true); +} + +class AsmPrinterEmitDwarfStringOffsetTest : public AsmPrinterFixtureBase { +protected: + bool init(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + if (!AsmPrinterFixtureBase::init(TripleStr, DwarfVersion, DwarfFormat)) + return false; + + Val.Index = DwarfStringPoolEntry::NotIndexed; + Val.Symbol = TestPrinter->getCtx().createTempSymbol(); + Val.Offset = 42; + return true; + } + + DwarfStringPoolEntry Val; +}; + +TEST_F(AsmPrinterEmitDwarfStringOffsetTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, 4, _)) + .WillOnce(SaveArg<0>(&Arg0)); + TestPrinter->getAP()->emitDwarfStringOffset(Val); + + const MCSymbolRefExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(&(ActualArg0->getSymbol()), Val.Symbol); +} + +TEST_F(AsmPrinterEmitDwarfStringOffsetTest, + DWARF32NoRelocationsAcrossSections) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + TestPrinter->setDwarfUsesRelocationsAcrossSections(false); + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(Val.Offset, 4)); + TestPrinter->getAP()->emitDwarfStringOffset(Val); +} + +TEST_F(AsmPrinterEmitDwarfStringOffsetTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, 8, _)) + .WillOnce(SaveArg<0>(&Arg0)); + TestPrinter->getAP()->emitDwarfStringOffset(Val); + + const MCSymbolRefExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(&(ActualArg0->getSymbol()), Val.Symbol); +} + +TEST_F(AsmPrinterEmitDwarfStringOffsetTest, + DWARF64NoRelocationsAcrossSections) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + TestPrinter->setDwarfUsesRelocationsAcrossSections(false); + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(Val.Offset, 8)); + TestPrinter->getAP()->emitDwarfStringOffset(Val); +} + +class AsmPrinterEmitDwarfOffsetTest : public AsmPrinterFixtureBase { +protected: + bool init(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + if (!AsmPrinterFixtureBase::init(TripleStr, DwarfVersion, DwarfFormat)) + return false; + + Label = TestPrinter->getCtx().createTempSymbol(); + return true; + } + + MCSymbol *Label = nullptr; + uint64_t Offset = 42; +}; + +TEST_F(AsmPrinterEmitDwarfOffsetTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, 4, _)) + .WillOnce(SaveArg<0>(&Arg0)); + TestPrinter->getAP()->emitDwarfOffset(Label, Offset); + + const MCBinaryExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(ActualArg0->getOpcode(), MCBinaryExpr::Add); + + const MCSymbolRefExpr *ActualLHS = + dyn_cast_or_null(ActualArg0->getLHS()); + ASSERT_NE(ActualLHS, nullptr); + EXPECT_EQ(&(ActualLHS->getSymbol()), Label); + + const MCConstantExpr *ActualRHS = + dyn_cast_or_null(ActualArg0->getRHS()); + ASSERT_NE(ActualRHS, nullptr); + EXPECT_EQ(static_cast(ActualRHS->getValue()), Offset); +} + +TEST_F(AsmPrinterEmitDwarfOffsetTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, 8, _)) + .WillOnce(SaveArg<0>(&Arg0)); + TestPrinter->getAP()->emitDwarfOffset(Label, Offset); + + const MCBinaryExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(ActualArg0->getOpcode(), MCBinaryExpr::Add); + + const MCSymbolRefExpr *ActualLHS = + dyn_cast_or_null(ActualArg0->getLHS()); + ASSERT_NE(ActualLHS, nullptr); + EXPECT_EQ(&(ActualLHS->getSymbol()), Label); + + const MCConstantExpr *ActualRHS = + dyn_cast_or_null(ActualArg0->getRHS()); + ASSERT_NE(ActualRHS, nullptr); + EXPECT_EQ(static_cast(ActualRHS->getValue()), Offset); +} + +} // end namespace diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 831eb66e82cf4..3af8b7f742970 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(CodeGenTests AArch64SelectionDAGTest.cpp + AsmPrinterDwarfTest.cpp DIEHashTest.cpp LowLevelTypeTest.cpp LexicalScopesTest.cpp @@ -25,6 +26,9 @@ add_llvm_unittest(CodeGenTests ScalableVectorMVTsTest.cpp TypeTraitsTest.cpp TargetOptionsTest.cpp + TestAsmPrinter.cpp ) add_subdirectory(GlobalISel) + +target_link_libraries(CodeGenTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/CodeGen/TestAsmPrinter.cpp b/llvm/unittests/CodeGen/TestAsmPrinter.cpp new file mode 100644 index 0000000000000..7d04202067689 --- /dev/null +++ b/llvm/unittests/CodeGen/TestAsmPrinter.cpp @@ -0,0 +1,88 @@ +//===--- unittests/CodeGen/TestAsmPrinter.cpp -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestAsmPrinter.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +using ::testing::StrictMock; + +// Note: a non-const reference argument cannot be passed through +// testing::StrictMock, thus, we pass a pointer and dereference it here. +MockMCStreamer::MockMCStreamer(MCContext *Ctx) : MCStreamer(*Ctx) {} + +MockMCStreamer::~MockMCStreamer() = default; + +TestAsmPrinter::TestAsmPrinter() = default; + +TestAsmPrinter::~TestAsmPrinter() = default; + +llvm::Expected> +TestAsmPrinter::create(const std::string &TripleStr, uint16_t DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + std::string ErrorStr; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrorStr); + if (!TheTarget) + return std::unique_ptr(); + + std::unique_ptr TestPrinter(new TestAsmPrinter); + if (llvm::Error E = + TestPrinter->init(TheTarget, TripleStr, DwarfVersion, DwarfFormat)) + return std::move(E); + + return std::move(TestPrinter); +} + +// Note:: based on dwarfgen::Generator::init() from +// llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp +llvm::Error TestAsmPrinter::init(const Target *TheTarget, StringRef TripleName, + uint16_t DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(), + None)); + if (!TM) + return make_error("no target machine for target " + TripleName, + inconvertibleErrorCode()); + + MC.reset(new MCContext(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), + TM->getObjFileLowering())); + TM->getObjFileLowering()->Initialize(*MC, *TM); + + MS = new StrictMock(MC.get()); + + Asm.reset( + TheTarget->createAsmPrinter(*TM, std::unique_ptr(MS))); + if (!Asm) + return make_error("no asm printer for target " + TripleName, + inconvertibleErrorCode()); + + // Set the DWARF version correctly on all classes that we use. + MC->setDwarfVersion(DwarfVersion); + Asm->setDwarfVersion(DwarfVersion); + + // Set the DWARF format. + MC->setDwarfFormat(DwarfFormat); + + return Error::success(); +} + +void TestAsmPrinter::setDwarfUsesRelocationsAcrossSections(bool Enable) { + struct HackMCAsmInfo : MCAsmInfo { + void setDwarfUsesRelocationsAcrossSections(bool Enable) { + DwarfUsesRelocationsAcrossSections = Enable; + } + }; + static_cast(const_cast(TM->getMCAsmInfo())) + ->setDwarfUsesRelocationsAcrossSections(Enable); +} diff --git a/llvm/unittests/CodeGen/TestAsmPrinter.h b/llvm/unittests/CodeGen/TestAsmPrinter.h new file mode 100644 index 0000000000000..65e557b9b4a60 --- /dev/null +++ b/llvm/unittests/CodeGen/TestAsmPrinter.h @@ -0,0 +1,82 @@ +//===--- unittests/CodeGen/TestAsmPrinter.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UNITTESTS_CODEGEN_TESTASMPRINTER_H +#define LLVM_UNITTESTS_CODEGEN_TESTASMPRINTER_H + +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCStreamer.h" +#include "gmock/gmock.h" + +#include + +namespace llvm { +class AsmPrinter; +class MCContext; +class Target; +class TargetMachine; + +class MockMCStreamer : public MCStreamer { +public: + explicit MockMCStreamer(MCContext *Ctx); + ~MockMCStreamer(); + + // These methods are pure virtual in MCStreamer, thus, have to be overridden: + + MOCK_METHOD2(emitSymbolAttribute, + bool(MCSymbol *Symbol, MCSymbolAttr Attribute)); + MOCK_METHOD3(emitCommonSymbol, + void(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment)); + MOCK_METHOD5(emitZerofill, + void(MCSection *Section, MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, SMLoc Loc)); + + // The following are mock methods to be used in tests. + + MOCK_METHOD2(emitIntValue, void(uint64_t Value, unsigned Size)); + MOCK_METHOD3(emitValueImpl, + void(const MCExpr *Value, unsigned Size, SMLoc Loc)); + MOCK_METHOD3(emitAbsoluteSymbolDiff, + void(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size)); + MOCK_METHOD2(EmitCOFFSecRel32, void(MCSymbol const *Symbol, uint64_t Offset)); +}; + +class TestAsmPrinter { + std::unique_ptr MC; + MockMCStreamer *MS = nullptr; // Owned by AsmPrinter + std::unique_ptr TM; + std::unique_ptr Asm; + + /// Private constructor; call TestAsmPrinter::create(...) + /// to create an instance. + TestAsmPrinter(); + + /// Initialize an AsmPrinter instance with a mocked MCStreamer. + llvm::Error init(const Target *TheTarget, StringRef TripleStr, + uint16_t DwarfVersion, dwarf::DwarfFormat DwarfFormat); + +public: + /// Create an AsmPrinter and accompanied objects. + /// Returns ErrorSuccess() with an empty value if the requested target is not + /// supported so that the corresponding test can be gracefully skipped. + static llvm::Expected> + create(const std::string &TripleStr, uint16_t DwarfVersion, + dwarf::DwarfFormat DwarfFormat); + + ~TestAsmPrinter(); + + void setDwarfUsesRelocationsAcrossSections(bool Enable); + + AsmPrinter *getAP() const { return Asm.get(); } + MCContext &getCtx() const { return *MC; } + MockMCStreamer &getMS() const { return *MS; } +}; + +} // end namespace llvm + +#endif // LLVM_UNITTESTS_CODEGEN_TESTASMPRINTER_H From a8058c6f8d1d3a360986f05b74f548995b384fcd Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:02 +0700 Subject: [PATCH 0620/1079] [DebugInfo] Fix DIE value emitters to be compatible with DWARF64 (2/19). DW_FORM_sec_offset and DW_FORM_strp imply values of different sizes with DWARF32 and DWARF64. The patch fixes DIE value classes to use correct sizes when emitting their values. For DIELocList it ensures that the requested DWARF form matches the current DWARF format because that class uses a method that selects the size automatically. Differential Revision: https://reviews.llvm.org/D87009 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 20 +-- llvm/unittests/CodeGen/CMakeLists.txt | 1 + llvm/unittests/CodeGen/DIETest.cpp | 189 ++++++++++++++++++++++++++ 3 files changed, 202 insertions(+), 8 deletions(-) create mode 100644 llvm/unittests/CodeGen/DIETest.cpp diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index f1d2551281871..b78a47545458b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -476,8 +476,7 @@ unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_data8: return 8; case dwarf::DW_FORM_sec_offset: - // FIXME: add support for DWARF64 - return 4; + return AP->getDwarfOffsetByteSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -505,8 +504,7 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return 4; case dwarf::DW_FORM_sec_offset: case dwarf::DW_FORM_strp: - // FIXME: add support for DWARF64 - return 4; + return AP->getDwarfOffsetByteSize(); case dwarf::DW_FORM_addr: return AP->MAI->getCodePointerSize(); default: @@ -551,8 +549,7 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_data4: return 4; case dwarf::DW_FORM_sec_offset: - // FIXME: add support for DWARF64 - return 4; + return AP->getDwarfOffsetByteSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -822,10 +819,17 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_loclistx: return getULEB128Size(Index); case dwarf::DW_FORM_data4: + assert(!AP->isDwarf64() && + "DW_FORM_data4 is not suitable to emit a pointer to a location list " + "in the 64-bit DWARF format"); return 4; + case dwarf::DW_FORM_data8: + assert(AP->isDwarf64() && + "DW_FORM_data8 is not suitable to emit a pointer to a location list " + "in the 32-bit DWARF format"); + return 8; case dwarf::DW_FORM_sec_offset: - // FIXME: add support for DWARF64 - return 4; + return AP->getDwarfOffsetByteSize(); default: llvm_unreachable("DIE Value form not supported yet"); } diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 3af8b7f742970..817ddb1bbf26c 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_unittest(CodeGenTests AArch64SelectionDAGTest.cpp AsmPrinterDwarfTest.cpp DIEHashTest.cpp + DIETest.cpp LowLevelTypeTest.cpp LexicalScopesTest.cpp MachineInstrBundleIteratorTest.cpp diff --git a/llvm/unittests/CodeGen/DIETest.cpp b/llvm/unittests/CodeGen/DIETest.cpp new file mode 100644 index 0000000000000..4640d65e69580 --- /dev/null +++ b/llvm/unittests/CodeGen/DIETest.cpp @@ -0,0 +1,189 @@ +//===- llvm/unittest/CodeGen/DIETest.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DIE.h" +#include "TestAsmPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Testing/Support/Error.h" + +using namespace llvm; +using testing::_; +using testing::SaveArg; + +namespace { + +using DIETestParams = + std::tuple; + +class DIEFixtureBase : public testing::TestWithParam { +protected: + void SetUp() override { + unsigned Version; + dwarf::DwarfFormat Format; + std::tie(Version, Format, Form, Size) = GetParam(); + auto ExpectedTestPrinter = + TestAsmPrinter::create("x86_64-pc-linux", Version, Format); + ASSERT_THAT_EXPECTED(ExpectedTestPrinter, Succeeded()); + TestPrinter = std::move(ExpectedTestPrinter.get()); + } + + dwarf::Form Form; + unsigned Size; + std::unique_ptr TestPrinter; +}; + +struct DIEExprFixture : public DIEFixtureBase { + void SetUp() override { + DIEFixtureBase::SetUp(); + if (!TestPrinter) + return; + + Val = MCConstantExpr::create(42, TestPrinter->getCtx()); + } + + const MCExpr *Val = nullptr; +}; + +TEST_P(DIEExprFixture, SizeOf) { + if (!TestPrinter) + return; + + DIEExpr Tst(Val); + EXPECT_EQ(Size, Tst.SizeOf(TestPrinter->getAP(), Form)); +} + +TEST_P(DIEExprFixture, EmitValue) { + if (!TestPrinter) + return; + + DIEExpr Tst(Val); + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(Val, Size, _)); + Tst.emitValue(TestPrinter->getAP(), Form); +} + +INSTANTIATE_TEST_CASE_P( + DIETestParams, DIEExprFixture, + testing::Values( + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data8, 8u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_sec_offset, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data8, 8u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_sec_offset, 8u}), ); + +struct DIELabelFixture : public DIEFixtureBase { + void SetUp() override { + DIEFixtureBase::SetUp(); + if (!TestPrinter) + return; + + Val = TestPrinter->getCtx().createTempSymbol(); + } + + const MCSymbol *Val = nullptr; +}; + +TEST_P(DIELabelFixture, SizeOf) { + if (!TestPrinter) + return; + + DIELabel Tst(Val); + EXPECT_EQ(Size, Tst.SizeOf(TestPrinter->getAP(), Form)); +} + +TEST_P(DIELabelFixture, EmitValue) { + if (!TestPrinter) + return; + + DIELabel Tst(Val); + + const MCExpr *Arg0 = nullptr; + EXPECT_CALL(TestPrinter->getMS(), emitValueImpl(_, Size, _)) + .WillOnce(SaveArg<0>(&Arg0)); + Tst.emitValue(TestPrinter->getAP(), Form); + + const MCSymbolRefExpr *ActualArg0 = dyn_cast_or_null(Arg0); + ASSERT_NE(ActualArg0, nullptr); + EXPECT_EQ(&(ActualArg0->getSymbol()), Val); +} + +INSTANTIATE_TEST_CASE_P( + DIETestParams, DIELabelFixture, + testing::Values( + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_sec_offset, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_strp, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_addr, 8u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_sec_offset, 8u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_strp, 8u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_addr, 8u}), ); + +struct DIEDeltaFixture : public DIEFixtureBase { + void SetUp() override { + DIEFixtureBase::SetUp(); + if (!TestPrinter) + return; + + Hi = TestPrinter->getCtx().createTempSymbol(); + Lo = TestPrinter->getCtx().createTempSymbol(); + } + + const MCSymbol *Hi = nullptr; + const MCSymbol *Lo = nullptr; +}; + +TEST_P(DIEDeltaFixture, SizeOf) { + if (!TestPrinter) + return; + + DIEDelta Tst(Hi, Lo); + EXPECT_EQ(Size, Tst.SizeOf(TestPrinter->getAP(), Form)); +} + +TEST_P(DIEDeltaFixture, EmitValue) { + if (!TestPrinter) + return; + + DIEDelta Tst(Hi, Lo); + EXPECT_CALL(TestPrinter->getMS(), emitAbsoluteSymbolDiff(Hi, Lo, Size)); + Tst.emitValue(TestPrinter->getAP(), Form); +} + +INSTANTIATE_TEST_CASE_P( + DIETestParams, DIEDeltaFixture, + testing::Values( + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_sec_offset, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_sec_offset, 8u}), ); + +struct DIELocListFixture : public DIEFixtureBase { + void SetUp() override { DIEFixtureBase::SetUp(); } +}; + +TEST_P(DIELocListFixture, SizeOf) { + if (!TestPrinter) + return; + + DIELocList Tst(999); + EXPECT_EQ(Size, Tst.SizeOf(TestPrinter->getAP(), Form)); +} + +INSTANTIATE_TEST_CASE_P( + DIETestParams, DIELocListFixture, + testing::Values( + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_loclistx, 2u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_sec_offset, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_loclistx, 2u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data8, 8u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_sec_offset, 8u}), ); + +} // end namespace From c3c501f5d79130fe9bbe4f6ca689f2d83f92373e Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:10 +0700 Subject: [PATCH 0621/1079] [DebugInfo] Add new emitting methods for values which depend on the DWARF format (3/19). These methods are going to be used in subsequent patches. Differential Revision: https://reviews.llvm.org/D87010 --- llvm/include/llvm/CodeGen/AsmPrinter.h | 18 +++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 5 + .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 28 +++++ .../unittests/CodeGen/AsmPrinterDwarfTest.cpp | 117 ++++++++++++++++++ 4 files changed, 168 insertions(+) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 89d266b4286b9..11ba36aee5a80 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -221,6 +221,9 @@ class AsmPrinter : public MachineFunctionPass { /// Returns 4 for DWARF32 and 8 for DWARF64. unsigned int getDwarfOffsetByteSize() const; + /// Returns 4 for DWARF32 and 12 for DWARF64. + unsigned int getUnitLengthFieldByteSize() const; + bool isPositionIndependent() const; /// Return true if assembly output should contain comments. @@ -613,6 +616,21 @@ class AsmPrinter : public MachineFunctionPass { /// depending on the DWARF format. void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const; + /// Emit 32- or 64-bit value depending on the DWARF format. + void emitDwarfLengthOrOffset(uint64_t Value) const; + + /// Emit a special value of 0xffffffff if producing 64-bit debugging info. + void maybeEmitDwarf64Mark() const; + + /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen + /// according to the settings. + void emitDwarfUnitLength(uint64_t Length, const Twine &Comment) const; + + /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen + /// according to the settings. + void emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo, + const Twine &Comment) const; + /// Emit reference to a call site with a specified encoding void emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Encoding) const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 35a40bb277b93..7d8355c049693 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3441,3 +3441,8 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const { return dwarf::getDwarfOffsetByteSize( OutStreamer->getContext().getDwarfFormat()); } + +unsigned int AsmPrinter::getUnitLengthFieldByteSize() const { + return dwarf::getUnitLengthFieldByteSize( + OutStreamer->getContext().getDwarfFormat()); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 7f8f6c646925a..594b41bcea53f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -187,6 +188,33 @@ void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { emitLabelPlusOffset(Label, Offset, getDwarfOffsetByteSize()); } +void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const { + assert(isDwarf64() || Value <= UINT32_MAX); + OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize()); +} + +void AsmPrinter::maybeEmitDwarf64Mark() const { + if (!isDwarf64()) + return; + OutStreamer->AddComment("DWARF64 Mark"); + OutStreamer->emitInt32(dwarf::DW_LENGTH_DWARF64); +} + +void AsmPrinter::emitDwarfUnitLength(uint64_t Length, + const Twine &Comment) const { + assert(isDwarf64() || Length <= dwarf::DW_LENGTH_lo_reserved); + maybeEmitDwarf64Mark(); + OutStreamer->AddComment(Comment); + OutStreamer->emitIntValue(Length, getDwarfOffsetByteSize()); +} + +void AsmPrinter::emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo, + const Twine &Comment) const { + maybeEmitDwarf64Mark(); + OutStreamer->AddComment(Comment); + OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, getDwarfOffsetByteSize()); +} + void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Encoding) const { // The least significant 3 bits specify the width of the encoding diff --git a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp index 948b8851149d9..5c53f39fd9a3e 100644 --- a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp +++ b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp @@ -14,6 +14,7 @@ using namespace llvm; using testing::_; +using testing::InSequence; using testing::SaveArg; namespace { @@ -250,4 +251,120 @@ TEST_F(AsmPrinterEmitDwarfOffsetTest, DWARF64) { EXPECT_EQ(static_cast(ActualRHS->getValue()), Offset); } +class AsmPrinterEmitDwarfLengthOrOffsetTest : public AsmPrinterFixtureBase { +protected: + uint64_t Val = 42; +}; + +TEST_F(AsmPrinterEmitDwarfLengthOrOffsetTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(Val, 4)); + TestPrinter->getAP()->emitDwarfLengthOrOffset(Val); +} + +TEST_F(AsmPrinterEmitDwarfLengthOrOffsetTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(Val, 8)); + TestPrinter->getAP()->emitDwarfLengthOrOffset(Val); +} + +class AsmPrinterGetUnitLengthFieldByteSizeTest : public AsmPrinterFixtureBase { +}; + +TEST_F(AsmPrinterGetUnitLengthFieldByteSizeTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_EQ(TestPrinter->getAP()->getUnitLengthFieldByteSize(), 4u); +} + +TEST_F(AsmPrinterGetUnitLengthFieldByteSizeTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + EXPECT_EQ(TestPrinter->getAP()->getUnitLengthFieldByteSize(), 12u); +} + +class AsmPrinterMaybeEmitDwarf64MarkTest : public AsmPrinterFixtureBase {}; + +TEST_F(AsmPrinterMaybeEmitDwarf64MarkTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(_, _)).Times(0); + TestPrinter->getAP()->maybeEmitDwarf64Mark(); +} + +TEST_F(AsmPrinterMaybeEmitDwarf64MarkTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(dwarf::DW_LENGTH_DWARF64, 4)); + TestPrinter->getAP()->maybeEmitDwarf64Mark(); +} + +class AsmPrinterEmitDwarfUnitLengthAsIntTest : public AsmPrinterFixtureBase { +protected: + uint64_t Val = 42; +}; + +TEST_F(AsmPrinterEmitDwarfUnitLengthAsIntTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(Val, 4)); + TestPrinter->getAP()->emitDwarfUnitLength(Val, ""); +} + +TEST_F(AsmPrinterEmitDwarfUnitLengthAsIntTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + InSequence S; + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(dwarf::DW_LENGTH_DWARF64, 4)); + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(Val, 8)); + + TestPrinter->getAP()->emitDwarfUnitLength(Val, ""); +} + +class AsmPrinterEmitDwarfUnitLengthAsHiLoDiffTest + : public AsmPrinterFixtureBase { +protected: + bool init(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + if (!AsmPrinterFixtureBase::init(TripleStr, DwarfVersion, DwarfFormat)) + return false; + + Hi = TestPrinter->getCtx().createTempSymbol(); + Lo = TestPrinter->getCtx().createTempSymbol(); + return true; + } + + MCSymbol *Hi = nullptr; + MCSymbol *Lo = nullptr; +}; + +TEST_F(AsmPrinterEmitDwarfUnitLengthAsHiLoDiffTest, DWARF32) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + return; + + EXPECT_CALL(TestPrinter->getMS(), emitAbsoluteSymbolDiff(Hi, Lo, 4)); + TestPrinter->getAP()->emitDwarfUnitLength(Hi, Lo, ""); +} + +TEST_F(AsmPrinterEmitDwarfUnitLengthAsHiLoDiffTest, DWARF64) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF64)) + return; + + InSequence S; + EXPECT_CALL(TestPrinter->getMS(), emitIntValue(dwarf::DW_LENGTH_DWARF64, 4)); + EXPECT_CALL(TestPrinter->getMS(), emitAbsoluteSymbolDiff(Hi, Lo, 8)); + + TestPrinter->getAP()->emitDwarfUnitLength(Hi, Lo, ""); +} + } // end namespace From 982b31fad2983eef08dbbddb2d58c635bdf6cf08 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:18 +0700 Subject: [PATCH 0622/1079] [DebugInfo] Add the -dwarf64 switch to llc and other internal tools (4/19). The patch adds a switch to enable emitting debug info in the 64-bit DWARF format. Most emitter for sections will be updated in the subsequent patches, whereas for .debug_line and .debug_frame the emitters are in the MC library, which is already updated. For now, the switch is enabled only for 64-bit ELF targets. Differential Revision: https://reviews.llvm.org/D87011 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 +++ .../test/DebugInfo/X86/debug-frame-dwarf64.ll | 37 ++++++++++++ llvm/test/DebugInfo/X86/debug-line-dwarf64.ll | 35 +++++++++++ llvm/test/DebugInfo/X86/dwarf64-support.ll | 59 +++++++++++++++++++ 4 files changed, 138 insertions(+) create mode 100644 llvm/test/DebugInfo/X86/debug-frame-dwarf64.ll create mode 100644 llvm/test/DebugInfo/X86/debug-line-dwarf64.ll create mode 100644 llvm/test/DebugInfo/X86/dwarf64-support.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 64d57aa9402c8..858a89ccab608 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -373,6 +373,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); + bool Dwarf64 = Asm->TM.Options.MCOptions.Dwarf64 && + DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3. + TT.isArch64Bit() && // DWARF64 requires 64-bit relocations. + TT.isOSBinFormatELF(); // Support only ELF for now. + UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); // Use sections as references. Force for NVPTX. @@ -414,6 +419,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion >= 5 || (UseGNUDebugMacro && !useSplitDwarf()); Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); + Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64 + : dwarf::DWARF32); } // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. diff --git a/llvm/test/DebugInfo/X86/debug-frame-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-frame-dwarf64.ll new file mode 100644 index 0000000000000..8efb739a0d621 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-frame-dwarf64.ll @@ -0,0 +1,37 @@ +; This checks that .debug_frame can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf64 -force-dwarf-frame-section -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-frame %t | FileCheck %s + +; CHECK: .debug_frame contents: +; CHECK: 00000000 {{.+}} ffffffffffffffff CIE +; CHECK-NEXT: Format: DWARF64 +; CHECK: {{.+}} 0000000000000000 FDE cie=00000000 pc= +; CHECK-NEXT: Format: DWARF64 + +; IR generated and reduced from: +; $ cat foo.c +; void foo() { } +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo() #0 !dbg !7 { + ret void, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "foo.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocation(line: 1, column: 14, scope: !7) diff --git a/llvm/test/DebugInfo/X86/debug-line-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-line-dwarf64.ll new file mode 100644 index 0000000000000..e5045f1495063 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-line-dwarf64.ll @@ -0,0 +1,35 @@ +; This checks that .debug_line can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=3 -dwarf64 -filetype=obj %s -o %t3 +; RUN: llvm-dwarfdump -debug-line %t3 | FileCheck %s + +; CHECK: .debug_line contents: +; CHECK-NEXT: debug_line[0x00000000] +; CHECK-NEXT: Line table prologue: +; CHECK-NEXT: total_length: +; CHECK-NEXT: format: DWARF64 + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} diff --git a/llvm/test/DebugInfo/X86/dwarf64-support.ll b/llvm/test/DebugInfo/X86/dwarf64-support.ll new file mode 100644 index 0000000000000..6790cafd551eb --- /dev/null +++ b/llvm/test/DebugInfo/X86/dwarf64-support.ll @@ -0,0 +1,59 @@ +; This checks cases when the 64-bit DWARF debug info should not be generated +; even if '-dwarf64' is specified. + +; The 64-bit DWARF format was introduced in DWARFv3, so the '-dwarf64' switch +; should be ignored for earlier versions. +; RUN: llc -mtriple=x86_64 -dwarf-version=2 -dwarf64 -filetype=obj %s -o - | \ +; RUN: llvm-dwarfdump -debug-line - | \ +; RUN: FileCheck %s --check-prefixes=ELF64,CHECK + +; DWARF64 requires 64-bit relocations, so it is not produced for 32-bit targets. +; RUN: llc -mtriple=i386 -dwarf-version=5 -dwarf64 -filetype=obj %s -o - | \ +; RUN: llvm-dwarfdump -debug-line - | \ +; RUN: FileCheck %s --check-prefixes=ELF32,CHECK + +; DWARF64 is enabled only for ELF targets. The switch should be ignored for COFF. +; RUN: llc -mtriple=x86_64-windows-gnu -dwarf-version=5 -dwarf64 -filetype=obj %s -o - | \ +; RUN: llvm-dwarfdump -debug-line - | \ +; RUN: FileCheck %s --check-prefixes=COFF,CHECK + +; DWARF64 is enabled only for ELF targets. The switch should be ignored for Mach-O. +; RUN: llc -mtriple=x86_64-apple-darwin -dwarf-version=5 -dwarf64 -filetype=obj %s -o - | \ +; RUN: llvm-dwarfdump -debug-line - | \ +; RUN: FileCheck %s --check-prefixes=MACHO,CHECK + +; ELF64: file format elf64-x86-64 +; ELF32: file format elf32-i386 +; COFF: file format COFF-x86-64 +; MACHO: file format Mach-O 64-bit x86-64 + +; CHECK: .debug_line contents: +; CHECK-NEXT: debug_line[0x00000000] +; CHECK-NEXT: Line table prologue: +; CHECK-NEXT: total_length: +; CHECK-NEXT: format: DWARF32 + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} From 5dd1c59188988a030dfc80bd20729534f3a41b46 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:30 +0700 Subject: [PATCH 0623/1079] [DebugInfo] Fix emitting DWARF64 compilation units (5/19). The patch also adds a method to choose an appropriate DWARF form to represent section offsets according to the version and the format of producing debug info. Differential Revision: https://reviews.llvm.org/D87014 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 2 + .../lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 4 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 9 +++ llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 6 ++ llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp | 4 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 11 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 6 +- llvm/test/DebugInfo/X86/debug-info-dwarf64.ll | 63 +++++++++++++++++++ llvm/unittests/CodeGen/DIETest.cpp | 2 + 9 files changed, 93 insertions(+), 14 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-info-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index b78a47545458b..4f1ae04714fc1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -502,6 +502,8 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_data4: return 4; + case dwarf::DW_FORM_data8: + return 8; case dwarf::DW_FORM_sec_offset: case dwarf::DW_FORM_strp: return AP->getDwarfOffsetByteSize(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 78015897408d5..6d8186a5ee2b3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -289,8 +289,8 @@ class DwarfCompileUnit final : public DwarfUnit { return DwarfUnit::getHeaderSize() + DWOIdSize; } unsigned getLength() { - return sizeof(uint32_t) + // Length field - getHeaderSize() + getUnitDie().getSize(); + return Asm->getUnitLengthFieldByteSize() + // Length field + getHeaderSize() + getUnitDie().getSize(); } void emitHeader(bool UseOffsets) override; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 858a89ccab608..763f5dd49dba4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3358,6 +3358,15 @@ uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } +dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const { + if (Asm->getDwarfVersion() >= 4) + return dwarf::Form::DW_FORM_sec_offset; + assert((!Asm->isDwarf64() || (Asm->getDwarfVersion() == 3)) && + "DWARF64 is not defined prior DWARFv3"); + return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8 + : dwarf::Form::DW_FORM_data4; +} + const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { return SectionLabels.find(S)->second; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index ba0bb84367035..34c88f1a9c605 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -729,6 +729,12 @@ class DwarfDebug : public DebugHandlerBase { /// Returns the Dwarf Version. uint16_t getDwarfVersion() const; + /// Returns a suitable DWARF form to represent a section offset, i.e. + /// * DW_FORM_sec_offset for DWARF version >= 4; + /// * DW_FORM_data8 for 64-bit DWARFv3; + /// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2. + dwarf::Form getDwarfSectionOffsetForm() const; + /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 812e6383288fc..d9004c4453b5a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -79,8 +79,8 @@ void DwarfFile::computeSizeAndOffsets() { unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info - TheU->getHeaderSize(); // Unit-specific headers + unsigned Offset = Asm->getUnitLengthFieldByteSize() + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers // The return value here is CU-relative, after laying out // all of the CU DIE. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 40c741077d1ad..89174414b4654 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1695,15 +1695,15 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { // Emit size of content not including length itself - Asm->OutStreamer->AddComment("Length of Unit"); if (!DD->useSectionsAsReferences()) { StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_"; MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start"); EndLabel = Asm->createTempSymbol(Prefix + "end"); - Asm->emitLabelDifference(EndLabel, BeginLabel, 4); + Asm->emitDwarfUnitLength(EndLabel, BeginLabel, "Length of Unit"); Asm->OutStreamer->emitLabel(BeginLabel); } else - Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); + Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(), + "Length of Unit"); Asm->OutStreamer->AddComment("DWARF version number"); unsigned Version = DD->getDwarfVersion(); @@ -1759,10 +1759,7 @@ DIE::value_iterator DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label, const MCSymbol *Sec) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - return addLabel(Die, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Label); + return addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label); return addSectionDelta(Die, Attribute, Label, Sec); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7147da33e631e..cc91aec68b8a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -253,9 +253,9 @@ class DwarfUnit : public DIEUnit { /// Compute the size of a header for this unit, not including the initial /// length field. virtual unsigned getHeaderSize() const { - return sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) + return sizeof(int16_t) + // DWARF version number + Asm->getDwarfOffsetByteSize() + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) (DD->getDwarfVersion() >= 5 ? sizeof(int8_t) : 0); // DWARF v5 unit type } diff --git a/llvm/test/DebugInfo/X86/debug-info-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-info-dwarf64.ll new file mode 100644 index 0000000000000..7f988b43a9fd4 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-info-dwarf64.ll @@ -0,0 +1,63 @@ +; This checks that .debug_info can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=3 -dwarf64 -filetype=obj %s -o %t3 +; RUN: llvm-dwarfdump -debug-abbrev -debug-info -v %t3 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,DWARFv3 + +; RUN: llc -mtriple=x86_64 -dwarf-version=4 -dwarf64 -filetype=obj %s -o %t4 +; RUN: llvm-dwarfdump -debug-abbrev -debug-info -v %t4 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,DWARFv4 + +; CHECK: .debug_abbrev contents: +; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_yes +; CHECK-NEXT: DW_AT_producer DW_FORM_strp +; CHECK-NEXT: DW_AT_language DW_FORM_data2 +; CHECK-NEXT: DW_AT_name DW_FORM_strp +; DWARFv3-NEXT: DW_AT_stmt_list DW_FORM_data8 +; DWARFv4-NEXT: DW_AT_stmt_list DW_FORM_sec_offset +; CHECK-NEXT: DW_AT_comp_dir DW_FORM_strp +; CHECK: [2] DW_TAG_variable DW_CHILDREN_no +; CHECK-NEXT: DW_AT_name DW_FORM_strp +; CHECK-NEXT: DW_AT_type DW_FORM_ref4 +; CHECK: [3] DW_TAG_base_type DW_CHILDREN_no +; CHECK-NEXT: DW_AT_name DW_FORM_strp + +; CHECK: .debug_info contents: +; CHECK: Compile Unit: length = 0x{{([[:xdigit:]]{16})}}, format = DWARF64, +; CHECK: DW_TAG_compile_unit [1] * +; CHECK-NEXT: DW_AT_producer [DW_FORM_strp] ( .debug_str[0x{{([[:xdigit:]]{16})}}] = "clang version 12.0.0") +; CHECK-NEXT: DW_AT_language [DW_FORM_data2] (DW_LANG_C99) +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{([[:xdigit:]]{16})}}] = "foo.c") +; DWARFv3-NEXT: DW_AT_stmt_list [DW_FORM_data8] (0x0000000000000000) +; DWARFv4-NEXT: DW_AT_stmt_list [DW_FORM_sec_offset] (0x0000000000000000) +; CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strp] ( .debug_str[0x{{([[:xdigit:]]{16})}}] = "/tmp") +; CHECK: DW_TAG_variable [2] +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{([[:xdigit:]]{16})}}] = "foo") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.+}} => {{.+}} "int") +; CHECK: DW_TAG_base_type [3] +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{([[:xdigit:]]{16})}}] = "int") + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} diff --git a/llvm/unittests/CodeGen/DIETest.cpp b/llvm/unittests/CodeGen/DIETest.cpp index 4640d65e69580..08227b6d2088c 100644 --- a/llvm/unittests/CodeGen/DIETest.cpp +++ b/llvm/unittests/CodeGen/DIETest.cpp @@ -117,10 +117,12 @@ INSTANTIATE_TEST_CASE_P( DIETestParams, DIELabelFixture, testing::Values( DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data8, 8u}, DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_sec_offset, 4u}, DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_strp, 4u}, DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_addr, 8u}, DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data8, 8u}, DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_sec_offset, 8u}, DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_strp, 8u}, DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_addr, 8u}), ); From cae7c1eb781d591aa3d16ec6bc3a8fe1ace6e4ef Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:38 +0700 Subject: [PATCH 0624/1079] [DebugInfo] Use a common method to determine a suitable form for section offsts (6/19). This is mostly an NFC patch because the involved methods are used when emitting DWO files, which is incompatible with DWARFv3, or for platforms where DWARF64 is not supported yet. Differential Revision: https://reviews.llvm.org/D87015 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 2 ++ llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 8 ++------ llvm/unittests/CodeGen/DIETest.cpp | 2 ++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 4f1ae04714fc1..4ec470b63db84 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -550,6 +550,8 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_data4: return 4; + case dwarf::DW_FORM_data8: + return 8; case dwarf::DW_FORM_sec_offset: return AP->getDwarfOffsetByteSize(); default: diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 89174414b4654..0173a8ea2fac4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -300,10 +300,7 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer) { - if (DD->getDwarfVersion() >= 4) - addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); - else - addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); + addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer); } unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { @@ -1750,8 +1747,7 @@ DIE::value_iterator DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { return Die.addValue(DIEValueAllocator, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, + DD->getDwarfSectionOffsetForm(), new (DIEValueAllocator) DIEDelta(Hi, Lo)); } diff --git a/llvm/unittests/CodeGen/DIETest.cpp b/llvm/unittests/CodeGen/DIETest.cpp index 08227b6d2088c..44fb0c0bf6c88 100644 --- a/llvm/unittests/CodeGen/DIETest.cpp +++ b/llvm/unittests/CodeGen/DIETest.cpp @@ -162,8 +162,10 @@ INSTANTIATE_TEST_CASE_P( DIETestParams, DIEDeltaFixture, testing::Values( DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_data8, 8u}, DIETestParams{4, dwarf::DWARF32, dwarf::DW_FORM_sec_offset, 4u}, DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data4, 4u}, + DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_data8, 8u}, DIETestParams{4, dwarf::DWARF64, dwarf::DW_FORM_sec_offset, 8u}), ); struct DIELocListFixture : public DIEFixtureBase { From 26f1f18831342e9c5e137e68d067d7383d72f30d Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:46 +0700 Subject: [PATCH 0625/1079] [DebugInfo] Fix emitting the DW_AT_location attribute for 64-bit DWARFv3 (7/19). The patch uses a common method to determine the appropriate form for the value of the attribute. Differential Revision: https://reviews.llvm.org/D87016 --- .../lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 8 +++----- .../DebugInfo/X86/DW_AT_location-reference.ll | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 602b1bceddc3c..4f8c206d66d65 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1346,11 +1346,9 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, /// Add a Dwarf loclistptr attribute data and value. void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index) { - dwarf::Form Form = dwarf::DW_FORM_data4; - if (DD->getDwarfVersion() == 4) - Form =dwarf::DW_FORM_sec_offset; - if (DD->getDwarfVersion() >= 5) - Form =dwarf::DW_FORM_loclistx; + dwarf::Form Form = (DD->getDwarfVersion() >= 5) + ? dwarf::DW_FORM_loclistx + : DD->getDwarfSectionOffsetForm(); Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); } diff --git a/llvm/test/DebugInfo/X86/DW_AT_location-reference.ll b/llvm/test/DebugInfo/X86/DW_AT_location-reference.ll index d516a4c5d0813..3fe6330d9ae9e 100644 --- a/llvm/test/DebugInfo/X86/DW_AT_location-reference.ll +++ b/llvm/test/DebugInfo/X86/DW_AT_location-reference.ll @@ -1,8 +1,17 @@ ; RUN: llc -O1 -filetype=obj -mtriple=x86_64-apple-darwin < %s > %t -; RUN: llvm-dwarfdump -v %t | FileCheck %s +; RUN: llvm-dwarfdump -v %t | FileCheck %s --check-prefixes=CHECK,DWARFv4 ; RUN: llvm-objdump -r %t | FileCheck --check-prefix=DARWIN %s + ; RUN: llc -O1 -filetype=obj -mtriple=x86_64-pc-linux-gnu < %s > %t -; RUN: llvm-dwarfdump -v %t | FileCheck %s +; RUN: llvm-dwarfdump -v %t | FileCheck %s --check-prefixes=CHECK,DWARFv4 +; RUN: llvm-objdump -r %t | FileCheck --check-prefix=LINUX %s + +; RUN: llc -dwarf-version=3 -O1 -filetype=obj -mtriple=x86_64-pc-linux-gnu < %s > %t +; RUN: llvm-dwarfdump -debug-info -v %t | FileCheck %s --check-prefixes=CHECK,DWARF32v3 +; RUN: llvm-objdump -r %t | FileCheck --check-prefix=LINUX %s + +; RUN: llc -dwarf64 -dwarf-version=3 -O1 -filetype=obj -mtriple=x86_64-pc-linux-gnu < %s > %t +; RUN: llvm-dwarfdump -debug-info -v %t | FileCheck %s --check-prefixes=CHECK,DWARF64v3 ; RUN: llvm-objdump -r %t | FileCheck --check-prefix=LINUX %s ; PR9493 @@ -31,7 +40,9 @@ ; // The 'x' variable and its symbol reference location ; CHECK: .debug_info contents: ; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000000 +; DWARF32v3-NEXT: DW_AT_location [DW_FORM_data4] (0x00000000 +; DWARF64v3-NEXT: DW_AT_location [DW_FORM_data8] (0x00000000 +; DWARFv4-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000000 ; Check that the location contains only 4 ranges. ; CHECK-NEXT: [0x{{[0-9a-f]*}}, 0x{{[0-9a-f]*}}) ; CHECK-NEXT: [0x{{[0-9a-f]*}}, 0x{{[0-9a-f]*}}) From 383d34c077ae7f845bf751936f59f12598e60b3e Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:30:53 +0700 Subject: [PATCH 0626/1079] [DebugInfo] Fix emitting DWARF64 .debug_str_offsets sections (8/19). The patch fixes calculating the size of the table and emitting the unit length field. Differential Revision: https://reviews.llvm.org/D87017 --- .../CodeGen/AsmPrinter/DwarfStringPool.cpp | 6 +- .../X86/debug-str-offsets-dwarf64.ll | 57 +++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-str-offsets-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index a43929d8e8f70..731d7c19c47b5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -58,13 +58,13 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, if (getNumIndexedStrings() == 0) return; Asm.OutStreamer->SwitchSection(Section); - unsigned EntrySize = 4; - // FIXME: DWARF64 + unsigned EntrySize = Asm.getDwarfOffsetByteSize(); // We are emitting the header for a contribution to the string offsets // table. The header consists of an entry with the contribution's // size (not including the size of the length field), the DWARF version and // 2 bytes of padding. - Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4); + Asm.emitDwarfUnitLength(getNumIndexedStrings() * EntrySize + 4, + "Length of String Offsets Set"); Asm.emitInt16(Asm.getDwarfVersion()); Asm.emitInt16(0); // Define the symbol that marks the start of the contribution. It is diff --git a/llvm/test/DebugInfo/X86/debug-str-offsets-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-str-offsets-dwarf64.ll new file mode 100644 index 0000000000000..043c72e9b3c48 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-str-offsets-dwarf64.ll @@ -0,0 +1,57 @@ +; This checks that .debug_str_offsets can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=5 -dwarf64 -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info -debug-str -debug-str-offsets -v %t | \ +; RUN: FileCheck %s + +; CHECK: .debug_info contents: +; CHECK-NEXT: Compile Unit: {{.*}}, format = DWARF64, +; CHECK: DW_TAG_compile_unit [1] * +; CHECK: DW_AT_producer [DW_FORM_strx1] (indexed (00000000) string = "clang version 12.0.0") +; CHECK: DW_AT_name [DW_FORM_strx1] (indexed (00000001) string = "foo.c") +; CHECK: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x0000000000000010) +; CHECK: DW_AT_comp_dir [DW_FORM_strx1] (indexed (00000002) string = "/tmp") +; CHECK: DW_TAG_variable [2] +; CHECK: DW_AT_name [DW_FORM_strx1] (indexed (00000003) string = "foo") +; CHECK: DW_TAG_base_type [3] +; CHECK: DW_AT_name [DW_FORM_strx1] (indexed (00000004) string = "int") + +; CHECK: .debug_str contents: +; CHECK-NEXT: 0x00000000: "clang version 12.0.0" +; CHECK-NEXT: 0x00000015: "foo.c" +; CHECK-NEXT: 0x0000001b: "/tmp" +; CHECK-NEXT: 0x00000020: "foo" +; CHECK-NEXT: 0x00000024: "int" + +; CHECK: .debug_str_offsets contents: +; CHECK-NEXT: 0x00000000: Contribution size = 44, Format = DWARF64, Version = 5 +; CHECK-NEXT: 0x00000010: 0000000000000000 "clang version 12.0.0" +; CHECK-NEXT: 0x00000018: 0000000000000015 "foo.c" +; CHECK-NEXT: 0x00000020: 000000000000001b "/tmp" +; CHECK-NEXT: 0x00000028: 0000000000000020 "foo" +; CHECK-NEXT: 0x00000030: 0000000000000024 "int" + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} From 924dc5807690f9ee0a84e407e8cb943511845bf5 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:00 +0700 Subject: [PATCH 0627/1079] [DebugInfo] Fix emitting DWARF64 DWO compilation units and string offset tables (9/19). These two fixes are better to go together because llvm-dwarfdump is unable to dump a table when another one is malformed. Differential Revision: https://reviews.llvm.org/D87018 --- .../CodeGen/AsmPrinter/DwarfStringPool.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 +- .../DebugInfo/X86/debug-info-dwo-dwarf64.ll | 32 +++++++++++ .../X86/debug-str-offsets-dwo-dwarf64.ll | 56 +++++++++++++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-info-dwo-dwarf64.ll create mode 100644 llvm/test/DebugInfo/X86/debug-str-offsets-dwo-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 731d7c19c47b5..a4cb497ec5024 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -120,7 +120,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, } Asm.OutStreamer->SwitchSection(OffsetSection); - unsigned size = 4; // FIXME: DWARF64 is 8. + unsigned size = Asm.getDwarfOffsetByteSize(); for (const auto &Entry : Entries) if (UseRelativeOffsets) Asm.emitDwarfStringOffset(Entry->getValue()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 0173a8ea2fac4..8f738936bd516 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1720,7 +1720,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { Asm->OutStreamer->AddComment("Offset Into Abbrev. Section"); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); if (UseOffsets) - Asm->emitInt32(0); + Asm->emitDwarfLengthOrOffset(0); else Asm->emitDwarfSymbolReference( TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false); diff --git a/llvm/test/DebugInfo/X86/debug-info-dwo-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-info-dwo-dwarf64.ll new file mode 100644 index 0000000000000..acc2fded69129 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-info-dwo-dwarf64.ll @@ -0,0 +1,32 @@ +; This checks that .debug_info.dwo can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=5 -dwarf64 -split-dwarf-file=foo.dwo -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s + +; CHECK: .debug_info.dwo contents: +; CHECK-NEXT: Compile Unit: {{.+}}, format = DWARF64, version = 0x0005, unit_type = DW_UT_split_compile, abbr_offset = 0x0000, + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} diff --git a/llvm/test/DebugInfo/X86/debug-str-offsets-dwo-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-str-offsets-dwo-dwarf64.ll new file mode 100644 index 0000000000000..1366c195f60be --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-str-offsets-dwo-dwarf64.ll @@ -0,0 +1,56 @@ +; This checks that .debug_str_offsets.dwo can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=5 -dwarf64 -split-dwarf-file=foo.dwo -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info -debug-str -debug-str-offsets -v %t | \ +; RUN: FileCheck %s + +; CHECK: .debug_info.dwo contents: +; CHECK-NEXT: Compile Unit: {{.*}}, format = DWARF64, +; CHECK: DW_TAG_compile_unit [1] * +; CHECK: DW_AT_producer [DW_FORM_strx1] (indexed (00000002) string = "clang version 12.0.0") +; CHECK: DW_AT_name [DW_FORM_strx1] (indexed (00000003) string = "foo.c") +; CHECK: DW_AT_dwo_name [DW_FORM_strx1] (indexed (00000004) string = "foo.dwo") +; CHECK: DW_TAG_variable [2] +; CHECK: DW_AT_name [DW_FORM_strx1] (indexed (00000000) string = "foo") +; CHECK: DW_TAG_base_type [3] +; CHECK: DW_AT_name [DW_FORM_strx1] (indexed (00000001) string = "int") + +; CHECK: .debug_str.dwo contents: +; CHECK-NEXT: 0x00000000: "foo" +; CHECK-NEXT: 0x00000004: "int" +; CHECK-NEXT: 0x00000008: "clang version 12.0.0" +; CHECK-NEXT: 0x0000001d: "foo.c" +; CHECK-NEXT: 0x00000023: "foo.dwo" + +; CHECK: .debug_str_offsets.dwo contents: +; CHECK-NEXT: 0x00000000: Contribution size = 44, Format = DWARF64, Version = 5 +; CHECK-NEXT: 0x00000010: 0000000000000000 "foo" +; CHECK-NEXT: 0x00000018: 0000000000000004 "int" +; CHECK-NEXT: 0x00000020: 0000000000000008 "clang version 12.0.0" +; CHECK-NEXT: 0x00000028: 000000000000001d "foo.c" +; CHECK-NEXT: 0x00000030: 0000000000000023 "foo.dwo" + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} From 18f23b3ecc6d0cec31c655b7ae9054cf0edf630e Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:07 +0700 Subject: [PATCH 0628/1079] [DebugInfo] Fix emitting DWARF64 type units (10/19). The patch fixes emitting the offset to the type DIE. All other fields are already fixed in previous patches. Differential Revision: https://reviews.llvm.org/D87021 --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 3 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 2 +- .../test/DebugInfo/X86/debug-types-dwarf64.ll | 55 +++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-types-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 8f738936bd516..b469f91401f2c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1739,8 +1739,7 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) { Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature)); Asm->OutStreamer->AddComment("Type DIE Offset"); // In a skeleton type unit there is no type DIE so emit a zero offset. - Asm->OutStreamer->emitIntValue(Ty ? Ty->getOffset() : 0, - sizeof(Ty->getOffset())); + Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0); } DIE::value_iterator diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index cc91aec68b8a7..918e5045828d5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -356,7 +356,7 @@ class DwarfTypeUnit final : public DwarfUnit { void emitHeader(bool UseOffsets) override; unsigned getHeaderSize() const override { return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature - sizeof(uint32_t); // Type DIE Offset + Asm->getDwarfOffsetByteSize(); // Type DIE Offset } void addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) override; diff --git a/llvm/test/DebugInfo/X86/debug-types-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-types-dwarf64.ll new file mode 100644 index 0000000000000..7e88d7ef6a3ba --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-types-dwarf64.ll @@ -0,0 +1,55 @@ +; This checks that .debug_types can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=4 -dwarf64 -generate-type-units -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-types -v %t | FileCheck %s + +; CHECK: .debug_types contents: +; CHECK-NEXT: Type Unit: {{.+}}, format = DWARF64, {{.+}}, type_offset = 0x[[OFF:.+]] (next unit at + +; CHECK: 0x00000027: DW_TAG_type_unit + +; CHECK: 0x0000[[OFF]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_calling_convention +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ({{.+}} = "Foo") + +; CHECK: 0x{{.+}}: DW_TAG_member +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ({{.+}} = "bar") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x[[BTOFF:.+]] => {0x0000[[BTOFF]]} "int") + +; CHECK: 0x{{.+}}: NULL + +; CHECK: 0x0000[[BTOFF]]: DW_TAG_base_type [4] +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ({{.+}} = "int") + +; CHECK: 0x{{.+}}: NULL + +; IR generated and reduced from: +; $ cat foo.cc +; struct Foo { int bar; }; +; Foo foo; +; $ clang -g -S -emit-llvm foo.cc -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +%struct.Foo = type { i32 } + +@foo = dso_local global %struct.Foo zeroinitializer, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10, !11, !12} +!llvm.ident = !{!13} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.cc", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !3, line: 1, size: 32, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS3Foo") +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "bar", scope: !6, file: !3, line: 1, baseType: !9, size: 32) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !{i32 7, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{!"clang version 12.0.0"} From b118030f3fa68b308d149d7d4303e0623ead0463 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:14 +0700 Subject: [PATCH 0629/1079] [DebugInfo] Fix emitting DWARF64 .debug_aranges sections (11/19). The patch fixes calculating the size of the table and emitting the fields which depend on the DWARF format by using methods that choose appropriate sizes automatically. Differential Revision: https://reviews.llvm.org/D87012 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 16 ++++---- .../DebugInfo/X86/debug-aranges-dwarf64.ll | 39 +++++++++++++++++++ 2 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-aranges-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 763f5dd49dba4..2938444e0ff72 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2850,23 +2850,23 @@ void DwarfDebug::emitDebugARanges() { // Emit size of content not including length itself. unsigned ContentSize = - sizeof(int16_t) + // DWARF ARange version number - sizeof(int32_t) + // Offset of CU in the .debug_info section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int8_t); // Segment Size (in bytes) + sizeof(int16_t) + // DWARF ARange version number + Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info + // section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) unsigned TupleSize = PtrSize * 2; // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. - unsigned Padding = - offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize)); + unsigned Padding = offsetToAlignment( + Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize)); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; // For each compile unit, write the list of spans it covers. - Asm->OutStreamer->AddComment("Length of ARange Set"); - Asm->emitInt32(ContentSize); + Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set"); Asm->OutStreamer->AddComment("DWARF Arange version number"); Asm->emitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); diff --git a/llvm/test/DebugInfo/X86/debug-aranges-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-aranges-dwarf64.ll new file mode 100644 index 0000000000000..7e037ac125009 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-aranges-dwarf64.ll @@ -0,0 +1,39 @@ +; This checks that .debug_aranges can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf64 -generate-arange-section -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-aranges %t | FileCheck %s + +; CHECK: .debug_aranges contents: +; CHECK-NEXT: Address Range Header: +; CHECK-SAME: length = 0x0000000000000034, +; CHECK-SAME: format = DWARF64, +; CHECK-SAME: version = 0x0002, +; CHECK-SAME: cu_offset = 0x0000000000000000, +; CHECK-SAME: addr_size = 0x08, +; CHECK-SAME: seg_size = 0x00 +; CHECK-NEXT: [0x0000000000000000, 0x0000000000000004) + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} From 03b09c6b68bbce80bea47db40ad85809d363b260 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:20 +0700 Subject: [PATCH 0630/1079] [DebugInfo] Fix emitting pre-v5 name lookup tables in the DWARF64 format (12/19). The transition is done by using methods of AsmPrinter which automatically emit values in compliance with the selected DWARF format. Differential Revision: https://reviews.llvm.org/D87013 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 ++-- .../DebugInfo/X86/debug-pubtables-dwarf64.ll | 54 +++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-pubtables-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2938444e0ff72..ced05a27c4e65 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2336,10 +2336,10 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, TheU = Skeleton; // Emit the header. - Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); - Asm->emitLabelDifference(EndLabel, BeginLabel, 4); + Asm->emitDwarfUnitLength(EndLabel, BeginLabel, + "Length of Public " + Name + " Info"); Asm->OutStreamer->emitLabel(BeginLabel); @@ -2350,7 +2350,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, emitSectionReference(*TheU); Asm->OutStreamer->AddComment("Compilation Unit Length"); - Asm->emitInt32(TheU->getLength()); + Asm->emitDwarfLengthOrOffset(TheU->getLength()); // Emit the pubnames for this compilation unit. for (const auto &GI : Globals) { @@ -2358,7 +2358,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, const DIE *Entity = GI.second; Asm->OutStreamer->AddComment("DIE offset"); - Asm->emitInt32(Entity->getOffset()); + Asm->emitDwarfLengthOrOffset(Entity->getOffset()); if (GnuStyle) { dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); @@ -2373,7 +2373,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, } Asm->OutStreamer->AddComment("End Mark"); - Asm->emitInt32(0); + Asm->emitDwarfLengthOrOffset(0); Asm->OutStreamer->emitLabel(EndLabel); } diff --git a/llvm/test/DebugInfo/X86/debug-pubtables-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-pubtables-dwarf64.ll new file mode 100644 index 0000000000000..5ac3551e68d35 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-pubtables-dwarf64.ll @@ -0,0 +1,54 @@ +; This checks that .debug_pubnames and .debug_pubtypes can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf64 -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info -debug-pubnames -debug-pubtypes %t | FileCheck %s + +; CHECK: .debug_info contents: +; CHECK: 0x[[VAR:.+]]: DW_TAG_variable +; CHECK-NEXT: DW_AT_name ("foo") +; CHECK: 0x[[STRUCT:.+]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name ("Foo") +; CHECK: 0x[[BASET:.+]]: DW_TAG_base_type +; CHECK-NEXT: DW_AT_name ("int") + +; CHECK: .debug_pubnames contents: +; CHECK-NEXT: length = 0x0000000000000026, format = DWARF64, version = 0x0002, unit_offset = +; CHECK-NEXT: Offset Name +; CHECK-NEXT: 0x00000000[[VAR]] "foo" + +; CHECK: .debug_pubtypes contents: +; CHECK-NEXT: length = 0x0000000000000032, format = DWARF64, version = 0x0002, unit_offset = +; CHECK-NEXT: Offset Name +; CHECK-NEXT: 0x00000000[[STRUCT]] "Foo" +; CHECK-NEXT: 0x00000000[[BASET]] "int" + +; IR generated and reduced from: +; $ cat foo.c +; struct Foo { int bar; }; +; struct Foo foo; +; $ clang -g -gpubnames -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +%struct.Foo = type { i32 } + +@foo = dso_local global %struct.Foo zeroinitializer, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10, !11, !12} +!llvm.ident = !{!13} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !3, line: 1, size: 32, elements: !7) +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "bar", scope: !6, file: !3, line: 1, baseType: !9, size: 32) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !{i32 7, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{!"clang version 12.0.0"} From f9b242fe24f764166f818b3260c0635fc0bef6e9 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:28 +0700 Subject: [PATCH 0631/1079] [DebugInfo] Fix emitting DWARF64 .debug_rnglists sections (13/19). The size of the offsets in the table depends on the DWARF format. Differential Revision: https://reviews.llvm.org/D87019 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 +- .../DebugInfo/X86/split-dwarf-v5-ranges.ll | 41 +++++++++++-------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index ced05a27c4e65..77a723a88f744 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2549,7 +2549,8 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym()); for (const RangeSpanList &List : Holder.getRangeLists()) - Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4); + Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), + Asm->getDwarfOffsetByteSize()); return TableEnd; } diff --git a/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll b/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll index 183787620b7d3..bf9b24387c15d 100644 --- a/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll +++ b/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll @@ -1,22 +1,29 @@ -; RUN: llc -split-dwarf-file=foo.dwo -mtriple=x86_64-unknown-linux-gnu -filetype=obj < %s \ -; RUN: | llvm-dwarfdump -v -debug-info -debug-rnglists - | FileCheck %s +; RUN: llc -split-dwarf-file=foo.dwo -mtriple=x86_64-unknown-linux-gnu -filetype=obj %s -o %t32 +; RUN: llvm-dwarfdump -v -debug-info -debug-rnglists %t32 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,DWARF32 -; CHECK: .debug_info contents: -; CHECK: .debug_info.dwo contents: -; CHECK: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x0) rangelist = 0x00000010 -; CHECK: [0x0000000000000001, 0x000000000000000c) ".text" -; CHECK: [0x000000000000000e, 0x0000000000000013) ".text") +; RUN: llc -dwarf64 -split-dwarf-file=foo.dwo -mtriple=x86_64-unknown-linux-gnu -filetype=obj %s -o %t64 +; RUN: llvm-dwarfdump -v -debug-info -debug-rnglists %t64 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,DWARF64 -; CHECK: .debug_rnglists.dwo contents: -; CHECK: 0x00000000: range list header: length = 0x00000015, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001 -; CHECK: offsets: [ -; CHECK: 0x00000004 => 0x00000010 -; CHECK: ] -; CHECK: ranges: -; CHECK: 0x00000010: [DW_RLE_base_addressx]: 0x0000000000000000 -; CHECK: 0x00000012: [DW_RLE_offset_pair ]: 0x0000000000000001, 0x000000000000000c => [0x0000000000000001, 0x000000000000000c) -; CHECK: 0x00000015: [DW_RLE_offset_pair ]: 0x000000000000000e, 0x0000000000000013 => [0x000000000000000e, 0x0000000000000013) -; CHECK: 0x00000018: [DW_RLE_end_of_list ] +; CHECK: .debug_info contents: +; CHECK: .debug_info.dwo contents: +; CHECK: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x0) rangelist = 0x[[#%.8x,RNG_OFF:]] +; CHECK: [0x0000000000000001, 0x000000000000000c) ".text" +; CHECK: [0x000000000000000e, 0x0000000000000013) ".text") + +; CHECK: .debug_rnglists.dwo contents: +; DWARF32: 0x00000000: range list header: length = 0x00000015, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001 +; DWARF64: 0x00000000: range list header: length = 0x0000000000000019, format = DWARF64, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001 +; CHECK: offsets: [ +; DWARF32: 0x00000004 => 0x[[#RNG_OFF]] +; DWARF64: 0x0000000000000008 => 0x[[#RNG_OFF]] +; CHECK: ] +; CHECK: ranges: +; CHECK: 0x[[#RNG_OFF]]: [DW_RLE_base_addressx]: 0x0000000000000000 +; CHECK: 0x[[#RNG_OFF+2]]: [DW_RLE_offset_pair ]: 0x0000000000000001, 0x000000000000000c => [0x0000000000000001, 0x000000000000000c) +; CHECK: 0x[[#RNG_OFF+5]]: [DW_RLE_offset_pair ]: 0x000000000000000e, 0x0000000000000013 => [0x000000000000000e, 0x0000000000000013) +; CHECK: 0x[[#RNG_OFF+8]]: [DW_RLE_end_of_list ] ; Function Attrs: noinline optnone uwtable define dso_local void @_Z2f3v() !dbg !7 { From 3158d3dd4b7e5c6e2aff7c81355757d26579f1a3 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:34 +0700 Subject: [PATCH 0632/1079] [DebugInfo] Fix emitting DWARF64 .debug_loclists sections (14/19). The size of the offsets in the table depends on the DWARF format. Differential Revision: https://reviews.llvm.org/D87020 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 +- llvm/test/CodeGen/X86/debug-loclists-lto.ll | 20 +++-- llvm/test/CodeGen/X86/debug-loclists.ll | 83 +++++++++++++-------- 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 77a723a88f744..f951483cd5af2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2569,7 +2569,8 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, Asm->OutStreamer->emitLabel(DebugLocs.getSym()); for (const auto &List : DebugLocs.getLists()) - Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), 4); + Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), + Asm->getDwarfOffsetByteSize()); return TableEnd; } diff --git a/llvm/test/CodeGen/X86/debug-loclists-lto.ll b/llvm/test/CodeGen/X86/debug-loclists-lto.ll index 7578e09c84a20..fde8e00920adf 100644 --- a/llvm/test/CodeGen/X86/debug-loclists-lto.ll +++ b/llvm/test/CodeGen/X86/debug-loclists-lto.ll @@ -1,10 +1,18 @@ -; RUN: llc -mtriple=x86_64-pc-linux -filetype=asm -function-sections < %s | FileCheck --implicit-check-not=loclists_table_base %s +; RUN: llc -mtriple=x86_64-pc-linux -filetype=asm -function-sections < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,DWARF32 --implicit-check-not=loclists_table_base %s +; RUN: llc -dwarf64 -mtriple=x86_64-pc-linux -filetype=asm -function-sections < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,DWARF64 --implicit-check-not=loclists_table_base %s -; CHECK: {{^}}.Lloclists_table_base0: -; CHECK-NEXT: .long .Ldebug_loc0-.Lloclists_table_base0 -; CHECK-NEXT: .long .Ldebug_loc1-.Lloclists_table_base0 -; CHECK: .long .Lloclists_table_base0 # DW_AT_loclists_base -; CHECK: .long .Lloclists_table_base0 # DW_AT_loclists_base +; CHECK: {{^}}.Lloclists_table_base0: +; DWARF32-NEXT: .long .Ldebug_loc0-.Lloclists_table_base0 +; DWARF32-NEXT: .long .Ldebug_loc1-.Lloclists_table_base0 +; DWARF64-NEXT: .quad .Ldebug_loc0-.Lloclists_table_base0 +; DWARF64-NEXT: .quad .Ldebug_loc1-.Lloclists_table_base0 + +; DWARF32: .long .Lloclists_table_base0 # DW_AT_loclists_base +; DWARF32: .long .Lloclists_table_base0 # DW_AT_loclists_base +; DWARF64: .quad .Lloclists_table_base0 # DW_AT_loclists_base +; DWARF64: .quad .Lloclists_table_base0 # DW_AT_loclists_base ; Function Attrs: uwtable define dso_local void @_Z2f2v() local_unnamed_addr #0 !dbg !15 { diff --git a/llvm/test/CodeGen/X86/debug-loclists.ll b/llvm/test/CodeGen/X86/debug-loclists.ll index 59f244e62669d..d13ad6a11262e 100644 --- a/llvm/test/CodeGen/X86/debug-loclists.ll +++ b/llvm/test/CodeGen/X86/debug-loclists.ll @@ -1,42 +1,61 @@ ; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj -function-sections -o %t < %s -; RUN: llvm-dwarfdump -v -debug-info -debug-loclists %t | FileCheck %s +; RUN: llvm-dwarfdump -v -debug-info -debug-loclists %t | \ +; RUN: FileCheck %s --check-prefixes=CHECK,DWARF32 -; RUN: llc -dwarf-version=5 -split-dwarf-file=foo.dwo -mtriple=x86_64-pc-linux -filetype=obj -function-sections -o %t < %s -; RUN: llvm-dwarfdump -v -debug-info -debug-loclists %t | FileCheck %s --check-prefix=DWO - -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x0) loclist = 0x00000018: -; CHECK-NEXT: [0x0000000000000000, 0x0000000000000003) ".text._Z2f1ii": DW_OP_consts +3, DW_OP_stack_value -; CHECK-NEXT: [0x0000000000000003, 0x0000000000000004) ".text._Z2f1ii": DW_OP_consts +4, DW_OP_stack_value) -; CHECK-NEXT: DW_AT_name {{.*}} "y" - -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x1) loclist = 0x00000029: -; CHECK-NEXT: [0x0000000000000000, 0x0000000000000003) ".text._Z2f1ii": DW_OP_consts +5, DW_OP_stack_value) -; CHECK-NEXT: DW_AT_name {{.*}} "x" +; RUN: llc -dwarf64 -mtriple=x86_64-pc-linux -filetype=obj -function-sections -o %t < %s +; RUN: llvm-dwarfdump -v -debug-info -debug-loclists %t | \ +; RUN: FileCheck %s --check-prefixes=CHECK,DWARF64 -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x2) loclist = 0x00000031: -; CHECK-NEXT: [0x0000000000000003, 0x0000000000000004) ".text._Z2f1ii": DW_OP_reg0 RAX) -; CHECK-NEXT: DW_AT_name {{.*}} "r" - -; CHECK: .debug_loclists contents: -; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000035, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000003 +; RUN: llc -dwarf-version=5 -split-dwarf-file=foo.dwo -mtriple=x86_64-pc-linux -filetype=obj -function-sections -o %t < %s +; RUN: llvm-dwarfdump -v -debug-info -debug-loclists %t | \ +; RUN: FileCheck %s --check-prefixes=DWO,DWO32 + +; RUN: llc -dwarf64 -dwarf-version=5 -split-dwarf-file=foo.dwo -mtriple=x86_64-pc-linux -filetype=obj -function-sections -o %t < %s +; RUN: llvm-dwarfdump -v -debug-info -debug-loclists %t | \ +; RUN: FileCheck %s --check-prefixes=DWO,DWO64 + +; CHECK: DW_TAG_variable +; DWARF32-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x0) loclist = 0x00000018: +; DWARF64-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x0) loclist = 0x0000002c: +; CHECK-NEXT: [0x0000000000000000, 0x0000000000000003) ".text._Z2f1ii": DW_OP_consts +3, DW_OP_stack_value +; CHECK-NEXT: [0x0000000000000003, 0x0000000000000004) ".text._Z2f1ii": DW_OP_consts +4, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name {{.*}} "y" + +; CHECK: DW_TAG_variable +; DWARF32-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x1) loclist = 0x00000029: +; DWARF64-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x1) loclist = 0x0000003d: +; CHECK-NEXT: [0x0000000000000000, 0x0000000000000003) ".text._Z2f1ii": DW_OP_consts +5, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name {{.*}} "x" + +; CHECK: DW_TAG_variable +; DWARF32-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x2) loclist = 0x00000031: +; DWARF64-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x2) loclist = 0x00000045: +; CHECK-NEXT: [0x0000000000000003, 0x0000000000000004) ".text._Z2f1ii": DW_OP_reg0 RAX) +; CHECK-NEXT: DW_AT_name {{.*}} "r" + +; CHECK: .debug_loclists contents: +; DWARF32-NEXT: 0x00000000: locations list header: length = 0x00000035, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000003 +; DWARF64-NEXT: 0x00000000: locations list header: length = 0x0000000000000041, format = DWARF64, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000003 ; DWO: .debug_loclists.dwo contents: -; DWO-NEXT: 0x00000000: locations list header: length = 0x00000035, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000003 - -; CHECK-NEXT: offsets: [ -; CHECK-NEXT: 0x0000000c => 0x00000018 -; CHECK-NEXT: 0x0000001d => 0x00000029 -; CHECK-NEXT: 0x00000025 => 0x00000031 -; CHECK-NEXT: ] +; DWO32-NEXT: 0x00000000: locations list header: length = 0x00000035, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000003 +; DWO64-NEXT: 0x00000000: locations list header: length = 0x0000000000000041, format = DWARF64, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000003 + +; CHECK-NEXT: offsets: [ +; DWARF32-NEXT: 0x0000000c => 0x00000018 +; DWARF32-NEXT: 0x0000001d => 0x00000029 +; DWARF32-NEXT: 0x00000025 => 0x00000031 +; DWARF64-NEXT: 0x0000000000000018 => 0x0000002c +; DWARF64-NEXT: 0x0000000000000029 => 0x0000003d +; DWARF64-NEXT: 0x0000000000000031 => 0x00000045 +; CHECK-NEXT: ] ; Don't use startx_length if there's more than one entry, because the shared ; base address will be useful for both the range that does start at the start of ; the function, and the one that doesn't. -; CHECK-NEXT: 0x00000018: +; DWARF32-NEXT: 0x00000018: +; DWARF64-NEXT: 0x0000002c: ; CHECK-NEXT: DW_LLE_base_addressx (0x0000000000000000) ; CHECK-NEXT: DW_LLE_offset_pair (0x0000000000000000, 0x0000000000000003): DW_OP_consts +3, DW_OP_stack_value ; CHECK-NEXT: DW_LLE_offset_pair (0x0000000000000003, 0x0000000000000004): DW_OP_consts +4, DW_OP_stack_value @@ -44,14 +63,16 @@ ; Show that startx_length can be used when the address range starts at the start of the function. -; CHECK: 0x00000029: +; DWARF32: 0x00000029: +; DWARF64: 0x0000003d: ; CHECK-NEXT: DW_LLE_startx_length (0x0000000000000000, 0x0000000000000003): DW_OP_consts +5, DW_OP_stack_value ; CHECK-NEXT: DW_LLE_end_of_list () ; And use a base address when the range doesn't start at an existing/useful ; address in the pool. -; CHECK: 0x00000031: +; DWARF32: 0x00000031: +; DWARF64: 0x00000045: ; CHECK-NEXT: DW_LLE_base_addressx (0x0000000000000000) ; CHECK-NEXT: DW_LLE_offset_pair (0x0000000000000003, 0x0000000000000004): DW_OP_reg0 RAX ; CHECK-NEXT: DW_LLE_end_of_list () From 00ce54689d30fd65c49ebc87a21841e834f2d086 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:41 +0700 Subject: [PATCH 0633/1079] [DebugInfo] Fix emitting DWARF64 .debug_addr sections (15/19). The patch fixes emitting the header of the table. The content is independent of the DWARF format. Differential Revision: https://reviews.llvm.org/D87022 --- llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp | 4 +- llvm/test/DebugInfo/X86/debug-addr-dwarf64.ll | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-addr-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 883aaf5aefc49..3df8e35accc4a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -29,9 +29,7 @@ MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start"); MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end"); - Asm.OutStreamer->AddComment("Length of contribution"); - Asm.emitLabelDifference(EndLabel, BeginLabel, - 4); // TODO: Support DWARF64 format. + Asm.emitDwarfUnitLength(EndLabel, BeginLabel, "Length of contribution"); Asm.OutStreamer->emitLabel(BeginLabel); Asm.OutStreamer->AddComment("DWARF version number"); Asm.emitInt16(Asm.getDwarfVersion()); diff --git a/llvm/test/DebugInfo/X86/debug-addr-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-addr-dwarf64.ll new file mode 100644 index 0000000000000..5c64d48568a3b --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-addr-dwarf64.ll @@ -0,0 +1,44 @@ +; This checks that .debug_addr can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=5 -dwarf64 -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info -debug-addr %t | FileCheck %s + +; CHECK: .debug_info contents: +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_addr_base (0x0000000000000010) + +; CHECK: .debug_addr contents: +; CHECK-NEXT: Address table header: length = 0x0000000000000014, format = DWARF64, version = 0x0005, addr_size = 0x08, seg_size = 0x00 +; CHECK-NEXT: Addrs: [ +; CHECK-NEXT: 0x0000000000000000 +; CHECK-NEXT: 0x0000000000000004 +; CHECK-NEXT: ] + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; int bar; +; $ clang -g -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 +@bar = dso_local global i32 0, align 4, !dbg !6 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0, !6} +!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) +!7 = distinct !DIGlobalVariable(name: "bar", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = !{i32 7, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"wchar_size", i32 4} +!12 = !{!"clang version 12.0.0"} From a93dd26d8ced81d7d2e9a239a4cc33aaf0ba7c89 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:49 +0700 Subject: [PATCH 0634/1079] [DebugInfo] Fix emitting DWARF64 .debug_names sections (16/19). The patch fixes emitting the unit length field in the header of the table and offsets to the entry pool. Note that while the patch changes the common method to emit offsets, in fact, nothing is changed for Apple accelerator tables, because we do not yet support DWARF64 for those targets. Differential Revision: https://reviews.llvm.org/D87023 --- llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 7 +- .../test/DebugInfo/X86/debug-names-dwarf64.ll | 87 +++++++++++++++++++ 2 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-names-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index d7b0ffc48f09d..5ef4a289c346c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -270,7 +270,7 @@ void AccelTableWriter::emitOffsets(const MCSymbol *Base) const { continue; PrevHash = HashValue; Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); - Asm->emitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); + Asm->emitLabelDifference(Hash->Sym, Base, Asm->getDwarfOffsetByteSize()); } } } @@ -366,9 +366,8 @@ void Dwarf5AccelTableWriter::Header::emit( assert(CompUnitCount > 0 && "Index must have at least one CU."); AsmPrinter *Asm = Ctx.Asm; - Asm->OutStreamer->AddComment("Header: unit length"); - Asm->emitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, - sizeof(uint32_t)); + Asm->emitDwarfUnitLength(Ctx.ContributionEnd, Ctx.ContributionStart, + "Header: unit length"); Asm->OutStreamer->emitLabel(Ctx.ContributionStart); Asm->OutStreamer->AddComment("Header: version"); Asm->emitInt16(Version); diff --git a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll new file mode 100644 index 0000000000000..3fc91ef85df1f --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll @@ -0,0 +1,87 @@ +; This checks that .debug_names can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf64 -accel-tables=Dwarf -dwarf-version=5 -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info -debug-names %t | FileCheck %s +; RUN: llvm-dwarfdump -debug-names -verify %t | FileCheck --check-prefix=VERIFY %s + +; CHECK: .debug_info contents: +; CHECK-NEXT: 0x00000000: Compile Unit: {{.+}}, format = DWARF64, +; CHECK: [[VARDIE:.+]]: DW_TAG_variable +; CHECK-NEXT: DW_AT_name ("foo") +; CHECK: [[TYPEDIE:.+]]: DW_TAG_base_type +; CHECK-NEXT: DW_AT_name ("int") + +; CHECK: .debug_names contents: +; CHECK-NEXT: Name Index @ 0x0 { +; CHECK-NEXT: Header { +; CHECK: Format: DWARF64 +; CHECK-NEXT: Version: 5 +; CHECK-NEXT: CU count: 1 +; CHECK-NEXT: Local TU count: 0 +; CHECK-NEXT: Foreign TU count: 0 +; CHECK-NEXT: Bucket count: 2 +; CHECK-NEXT: Name count: 2 +; CHECK: } +; CHECK-NEXT: Compilation Unit offsets [ +; CHECK-NEXT: CU[0]: 0x00000000 +; CHECK-NEXT: ] +; CHECK-NEXT: Abbreviations [ +; CHECK-NEXT: Abbreviation 0x34 { +; CHECK-NEXT: Tag: DW_TAG_variable +; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +; CHECK-NEXT: } +; CHECK-NEXT: Abbreviation 0x24 { +; CHECK-NEXT: Tag: DW_TAG_base_type +; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +; CHECK-NEXT: } +; CHECK-NEXT: ] +; CHECK-NEXT: Bucket 0 [ +; CHECK-NEXT: Name 1 { +; CHECK-NEXT: Hash: 0xB888030 +; CHECK-NEXT: String: {{.+}} "int" +; CHECK-NEXT: Entry @ {{.+}} { +; CHECK-NEXT: Abbrev: 0x24 +; CHECK-NEXT: Tag: DW_TAG_base_type +; CHECK-NEXT: DW_IDX_die_offset: [[TYPEDIE]] +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: ] +; CHECK-NEXT: Bucket 1 [ +; CHECK-NEXT: Name 2 { +; CHECK-NEXT: Hash: 0xB887389 +; CHECK-NEXT: String: {{.+}} "foo" +; CHECK-NEXT: Entry @ {{.+}} { +; CHECK-NEXT: Abbrev: 0x34 +; CHECK-NEXT: Tag: DW_TAG_variable +; CHECK-NEXT: DW_IDX_die_offset: [[VARDIE]] +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: ] +; CHECK-NEXT: } + +; VERIFY: No errors. + +; IR generated and reduced from: +; $ cat foo.c +; int foo; +; $ clang -g -gpubnames -S -emit-llvm foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +@foo = dso_local global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false) +!3 = !DIFile(filename: "foo.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 12.0.0"} From 7e1e4e81cbcac6156005a31d90b604714c92298c Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:31:55 +0700 Subject: [PATCH 0635/1079] [DebugInfo] Fix emitting DWARF64 .debug_macro[.dwo] sections (17/19). The patch fixes emitting flags and the debug_line_offset field in the header, as well as the reference to the macro string for a pre-standard GNU .debug_macro extension. Differential Revision: https://reviews.llvm.org/D87024 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 27 +++++----- .../test/DebugInfo/X86/debug-macro-dwarf64.ll | 52 +++++++++++++++++++ 2 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-macro-dwarf64.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f951483cd5af2..5a97e321ab1a2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2962,21 +2962,22 @@ static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD, #define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" }; - uint8_t Flags = 0; Asm->OutStreamer->AddComment("Macro information version"); Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4); - // We are setting Offset and line offset flags unconditionally here, - // since we're only supporting DWARF32 and line offset should be mostly - // present. - // FIXME: Add support for DWARF64. - Flags |= MACRO_FLAG_DEBUG_LINE_OFFSET; - Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present"); - Asm->emitInt8(Flags); + // We emit the line offset flag unconditionally here, since line offset should + // be mostly present. + if (Asm->isDwarf64()) { + Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present"); + Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE | MACRO_FLAG_DEBUG_LINE_OFFSET); + } else { + Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present"); + Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET); + } Asm->OutStreamer->AddComment("debug_line_offset"); if (DD.useSplitDwarf()) - Asm->OutStreamer->emitIntValue(0, /*Size=*/4); + Asm->emitDwarfLengthOrOffset(0); else - Asm->OutStreamer->emitSymbolValue(CU.getLineTableStartSym(), /*Size=*/4); + Asm->emitDwarfSymbolReference(CU.getLineTableStartSym()); } void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { @@ -3019,10 +3020,8 @@ void DwarfDebug::emitMacro(DIMacro &M) { Asm->OutStreamer->AddComment("Line Number"); Asm->emitULEB128(M.getLine()); Asm->OutStreamer->AddComment("Macro String"); - // FIXME: Add support for DWARF64. - Asm->OutStreamer->emitSymbolValue( - InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol(), - /*Size=*/4); + Asm->emitDwarfSymbolReference( + InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol()); } } else { Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType())); diff --git a/llvm/test/DebugInfo/X86/debug-macro-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-macro-dwarf64.ll new file mode 100644 index 0000000000000..8a41922cac12f --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-macro-dwarf64.ll @@ -0,0 +1,52 @@ +; This checks that .debug_macro[.dwo] can be generated in the DWARF64 format. + +; RUN: llc -mtriple=x86_64 -dwarf-version=4 -dwarf64 -use-gnu-debug-macro -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-macro %t | FileCheck %s --check-prefix=DWARF4 + +; RUN: llc -mtriple=x86_64 -dwarf-version=5 -dwarf64 -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-macro %t | FileCheck %s --check-prefix=DWARF5 + +; RUN: llc -mtriple=x86_64 -dwarf-version=5 -dwarf64 -split-dwarf-file=foo.dwo -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-macro %t | FileCheck %s --check-prefixes=DWARF5,DWO + +; DWARF4: .debug_macro contents: +; DWARF4-NEXT: 0x00000000: +; DWARF4-NEXT: macro header: version = 0x0004, flags = 0x03, format = DWARF64, debug_line_offset = 0x0000000000000000 +; DWARF4-NEXT: DW_MACRO_GNU_start_file - lineno: 0 filenum: 1 +; DWARF4-NEXT: DW_MACRO_GNU_define_indirect - lineno: 1 macro: FOO 1 +; DWARF4-NEXT: DW_MACRO_GNU_undef_indirect - lineno: 2 macro: BAR +; DWARF4-NEXT: DW_MACRO_GNU_end_file + +; DWARF5: .debug_macro contents: +; DWO: .debug_macro.dwo contents: +; DWARF5-NEXT: 0x00000000: +; DWARF5-NEXT: macro header: version = 0x0005, flags = 0x03, format = DWARF64, debug_line_offset = 0x0000000000000000 +; DWARF5-NEXT: DW_MACRO_start_file - lineno: 0 filenum: 0 +; DWARF5-NEXT: DW_MACRO_define_strx - lineno: 1 macro: FOO 1 +; DWARF5-NEXT: DW_MACRO_undef_strx - lineno: 2 macro: BAR +; DWARF5-NEXT: DW_MACRO_end_file + +; IR generated and reduced from: +; $ cat foo.c +; #define FOO 1 +; #undef BAR +; $ clang -g -S -emit-llvm -fdebug-macro foo.c -o foo.ll + +target triple = "x86_64-unknown-linux-gnu" + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!348, !349, !350} +!llvm.ident = !{!351} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, macros: !3, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "foo.c", directory: "/tmp") +!2 = !{} +!3 = !{!4} +!4 = !DIMacroFile(file: !1, nodes: !5) +!5 = !{!6, !7} +!6 = !DIMacro(type: DW_MACINFO_define, line: 1, name: "FOO", value: "1") +!7 = !DIMacro(type: DW_MACINFO_undef, line: 2, name: "BAR") +!348 = !{i32 7, !"Dwarf Version", i32 4} +!349 = !{i32 2, !"Debug Info Version", i32 3} +!350 = !{i32 1, !"wchar_size", i32 4} +!351 = !{!"clang version 12.0.0"} From 8c19ac23bdefceaaf119add8d693e89a6f7d3d81 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:32:01 +0700 Subject: [PATCH 0636/1079] [DebugInfo] Make the offset of string pool entries 64-bit (18/19). The string pool is shared among several units in the case of LTO, and it potentially can exceed the limit of 4GiB for an extremely large application. As it is now possible to emit 64-bit debugging info, the limitation can be removed. Differential Revision: https://reviews.llvm.org/D87025 --- llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h | 4 ++-- llvm/include/llvm/CodeGen/NonRelocatableStringpool.h | 4 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp | 1 - llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h index e189352a7b2d8..abeba62707c1d 100644 --- a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h +++ b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h @@ -21,7 +21,7 @@ struct DwarfStringPoolEntry { static constexpr unsigned NotIndexed = -1; MCSymbol *Symbol; - unsigned Offset; + uint64_t Offset; unsigned Index; bool isIndexed() const { return Index != NotIndexed; } @@ -47,7 +47,7 @@ class DwarfStringPoolEntryRef { assert(getMapEntry()->second.Symbol && "No symbol available!"); return getMapEntry()->second.Symbol; } - unsigned getOffset() const { return getMapEntry()->second.Offset; } + uint64_t getOffset() const { return getMapEntry()->second.Offset; } bool isIndexed() const { return MapEntryAndIndexed.getInt(); } unsigned getIndex() const { assert(isIndexed()); diff --git a/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h b/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h index 56db30ff7d6de..fe07c70d85c59 100644 --- a/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h +++ b/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h @@ -39,7 +39,7 @@ class NonRelocatableStringpool { /// Get the offset of string \p S in the string table. This can insert a new /// element or return the offset of a pre-existing one. - uint32_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); } + uint64_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); } /// Get permanent storage for \p S (but do not necessarily emit \p S in the /// output section). A latter call to getStringOffset() with the same string @@ -57,7 +57,7 @@ class NonRelocatableStringpool { private: MapTy Strings; - uint32_t CurrentEndOffset = 0; + uint64_t CurrentEndOffset = 0; unsigned NumEntries = 0; DwarfStringPoolEntryRef EmptyString; std::function Translator; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index a4cb497ec5024..1e2c218eaec29 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -33,7 +33,6 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; NumBytes += Str.size() + 1; - assert(NumBytes > Entry.Offset && "Unexpected overflow"); } return *I.first; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h index c5f5637fdae3f..79b5df89e3389 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -28,7 +28,7 @@ class DwarfStringPool { StringMap Pool; StringRef Prefix; - unsigned NumBytes = 0; + uint64_t NumBytes = 0; unsigned NumIndexedStrings = 0; bool ShouldCreateSymbols; From a845ebd6333d95d58bd6ab18c6ff8bb79686c664 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 15 Sep 2020 11:32:08 +0700 Subject: [PATCH 0637/1079] [DebugInfo] Make offsets of dwarf units 64-bit (19/19). In the case of LTO, several DWARF units can be emitted in one section. For an extremely large application, they may exceed the limit of 4GiB for 32-bit offsets. As it is now possible to emit 64-bit debugging info, the patch enables storing the larger offsets. Differential Revision: https://reviews.llvm.org/D87026 --- llvm/include/llvm/CodeGen/DIE.h | 6 +++--- llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 4 ++++ llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 4 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp | 5 ++++- llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h index 43ba859fdc79c..fa554be64e79f 100644 --- a/llvm/include/llvm/CodeGen/DIE.h +++ b/llvm/include/llvm/CodeGen/DIE.h @@ -788,7 +788,7 @@ class DIE : IntrusiveBackListNode, public DIEValueList { /// Get the absolute offset within the .debug_info or .debug_types section /// for this DIE. - unsigned getDebugSectionOffset() const; + uint64_t getDebugSectionOffset() const; /// Compute the offset of this DIE and all its children. /// @@ -890,8 +890,8 @@ class DIEUnit { /// /// \returns Section pointer which can be NULL. MCSection *getSection() const { return Section; } - void setDebugSectionOffset(unsigned O) { Offset = O; } - unsigned getDebugSectionOffset() const { return Offset; } + void setDebugSectionOffset(uint64_t O) { Offset = O; } + uint64_t getDebugSectionOffset() const { return Offset; } DIE &getUnitDie() { return Die; } const DIE &getUnitDie() const { return Die; } }; diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 5ef4a289c346c..4e45a0ffc60fb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -591,10 +591,14 @@ void llvm::emitDWARF5AccelTable( } void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const { + assert(Die.getDebugSectionOffset() <= UINT32_MAX && + "The section offset exceeds the limit."); Asm->emitInt32(Die.getDebugSectionOffset()); } void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const { + assert(Die.getDebugSectionOffset() <= UINT32_MAX && + "The section offset exceeds the limit."); Asm->emitInt32(Die.getDebugSectionOffset()); Asm->emitInt16(Die.getTag()); Asm->emitInt8(0); diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 4ec470b63db84..9b074c89aa93d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -194,7 +194,7 @@ DIEAbbrev DIE::generateAbbrev() const { return Abbrev; } -unsigned DIE::getDebugSectionOffset() const { +uint64_t DIE::getDebugSectionOffset() const { const DIEUnit *Unit = getUnit(); assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset"); return Unit->getDebugSectionOffset() + getOffset(); @@ -662,7 +662,7 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref_addr: { // Get the absolute offset for this DIE within the debug info/types section. - unsigned Addr = Entry->getDebugSectionOffset(); + uint64_t Addr = Entry->getDebugSectionOffset(); if (const MCSymbol *SectionSym = Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index d9004c4453b5a..dee032304b683 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -59,7 +59,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { // Compute the size and offset for each DIE. void DwarfFile::computeSizeAndOffsets() { // Offset from the first CU in the debug info section is 0 initially. - unsigned SecOffset = 0; + uint64_t SecOffset = 0; // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. @@ -75,6 +75,9 @@ void DwarfFile::computeSizeAndOffsets() { TheU->setDebugSectionOffset(SecOffset); SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); } + if (SecOffset > UINT32_MAX && !Asm->isDwarf64()) + report_fatal_error("The generated debug information is too large " + "for the 32-bit DWARF format."); } unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp index 5d53c0d31bdf8..69746dd638ed9 100644 --- a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp @@ -504,7 +504,7 @@ llvm::Error dwarfgen::Generator::init(Triple TheTriple, uint16_t V) { StringRef dwarfgen::Generator::generate() { // Offset from the first CU in the debug info section is 0 initially. - unsigned SecOffset = 0; + uint64_t SecOffset = 0; // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. From 26c293c23d3b5cf4135fce0b1e61b70d6c4dd930 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Mon, 14 Sep 2020 22:29:53 -0700 Subject: [PATCH 0638/1079] [BinaryFormat/MachO] Add a missing constant. Reference: https://opensource.apple.com/source/cctools/cctools-949.0.1/include/mach-o/loader.h.auto.html --- llvm/include/llvm/BinaryFormat/MachO.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h index e84ed8b643cbb..f5d5ec328b5e7 100644 --- a/llvm/include/llvm/BinaryFormat/MachO.h +++ b/llvm/include/llvm/BinaryFormat/MachO.h @@ -83,6 +83,7 @@ enum { MH_NO_HEAP_EXECUTION = 0x01000000u, MH_APP_EXTENSION_SAFE = 0x02000000u, MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000u, + MH_SIM_SUPPORT = 0x08000000u, MH_DYLIB_IN_CACHE = 0x80000000u, }; From 7b416c5e3683d7120e4ce390e669f89b6a72d423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 4 Sep 2020 23:42:22 +0300 Subject: [PATCH 0639/1079] [llvm-readobj] [ARMWinEH] Print ARM64 packed unwind info In addition to printing the individual fields, synthesize and print the corresponding prolog for the unwind info (in reverse order, to match how it's printed for non-packed unwind info). Differential Revision: https://reviews.llvm.org/D87370 --- llvm/include/llvm/Support/ARMWinEH.h | 82 +++++ .../llvm-readobj/COFF/arm64-packed-unwind.s | 332 ++++++++++++++++++ llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp | 141 +++++++- llvm/tools/llvm-readobj/ARMWinEHPrinter.h | 4 + 4 files changed, 557 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/COFF/arm64-packed-unwind.s diff --git a/llvm/include/llvm/Support/ARMWinEH.h b/llvm/include/llvm/Support/ARMWinEH.h index 83ba044ed446d..327aa9804849f 100644 --- a/llvm/include/llvm/Support/ARMWinEH.h +++ b/llvm/include/llvm/Support/ARMWinEH.h @@ -31,6 +31,9 @@ enum class ReturnType { /// RuntimeFunction - An entry in the table of procedure data (.pdata) /// +/// This is ARM specific, but the Function Start RVA, Flag and +/// ExceptionInformationRVA fields work identically for ARM64. +/// /// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 /// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 /// +---------------------------------------------------------------+ @@ -204,6 +207,85 @@ inline uint16_t StackAdjustment(const RuntimeFunction &RF) { /// purpose (r0-r15) and VFP (d0-d31) registers. std::pair SavedRegisterMask(const RuntimeFunction &RF); +/// RuntimeFunctionARM64 - An entry in the table of procedure data (.pdata) +/// +/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 +/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 +/// +---------------------------------------------------------------+ +/// | Function Start RVA | +/// +-----------------+---+-+-------+-----+---------------------+---+ +/// | Frame Size |CR |H| RegI |RegF | Function Length |Flg| +/// +-----------------+---+-+-------+-----+---------------------+---+ +/// +/// See https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling +/// for the full reference for this struct. + +class RuntimeFunctionARM64 { +public: + const support::ulittle32_t BeginAddress; + const support::ulittle32_t UnwindData; + + RuntimeFunctionARM64(const support::ulittle32_t *Data) + : BeginAddress(Data[0]), UnwindData(Data[1]) {} + + RuntimeFunctionARM64(const support::ulittle32_t BeginAddress, + const support::ulittle32_t UnwindData) + : BeginAddress(BeginAddress), UnwindData(UnwindData) {} + + RuntimeFunctionFlag Flag() const { + return RuntimeFunctionFlag(UnwindData & 0x3); + } + + uint32_t ExceptionInformationRVA() const { + assert(Flag() == RuntimeFunctionFlag::RFF_Unpacked && + "unpacked form required for this operation"); + return (UnwindData & ~0x3); + } + + uint32_t PackedUnwindData() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return (UnwindData & ~0x3); + } + uint32_t FunctionLength() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return (((UnwindData & 0x00001ffc) >> 2) << 2); + } + uint8_t RegF() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x0000e000) >> 13); + } + uint8_t RegI() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x000f0000) >> 16); + } + bool H() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x00100000) >> 20); + } + uint8_t CR() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x600000) >> 21); + } + uint16_t FrameSize() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0xff800000) >> 23); + } +}; + /// ExceptionDataRecord - An entry in the table of exception data (.xdata) /// /// The format on ARM is: diff --git a/llvm/test/tools/llvm-readobj/COFF/arm64-packed-unwind.s b/llvm/test/tools/llvm-readobj/COFF/arm64-packed-unwind.s new file mode 100644 index 0000000000000..f8c4d5e3074f9 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/COFF/arm64-packed-unwind.s @@ -0,0 +1,332 @@ +## Check interpretation of the packed unwind info format. + +// REQUIRES: aarch64-registered-target +// RUN: llvm-mc -filetype=obj -triple aarch64-windows %s -o %t.o +// RUN: llvm-readobj --unwind %t.o | FileCheck %s + +// CHECK: UnwindInformation [ +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func1 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 88 +// CHECK-NEXT: RegF: 7 +// CHECK-NEXT: RegI: 10 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 160 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: stp d14, d15, [sp, #128] +// CHECK-NEXT: stp d12, d13, [sp, #112] +// CHECK-NEXT: stp d10, d11, [sp, #96] +// CHECK-NEXT: stp d8, d9, [sp, #80] +// CHECK-NEXT: stp x27, x28, [sp, #64] +// CHECK-NEXT: stp x25, x26, [sp, #48] +// CHECK-NEXT: stp x23, x24, [sp, #32] +// CHECK-NEXT: stp x21, x22, [sp, #16] +// CHECK-NEXT: stp x19, x20, [sp, #-144]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func2 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 48 +// CHECK-NEXT: RegF: 2 +// CHECK-NEXT: RegI: 3 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 48 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: str d10, [sp, #40] +// CHECK-NEXT: stp d8, d9, [sp, #24] +// CHECK-NEXT: str x21, [sp, #16] +// CHECK-NEXT: stp x19, x20, [sp, #-48]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func3 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 40 +// CHECK-NEXT: RegF: 3 +// CHECK-NEXT: RegI: 1 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 48 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: stp d10, d11, [sp, #24] +// CHECK-NEXT: stp d8, d9, [sp, #8] +// CHECK-NEXT: str x19, [sp, #-48]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func4 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 24 +// CHECK-NEXT: RegF: 1 +// CHECK-NEXT: RegI: 0 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 48 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #32 +// CHECK-NEXT: stp d8, d9, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func5 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 56 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 1 +// CHECK-NEXT: HomedParameters: Yes +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 112 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #32 +// CHECK-NEXT: stp x6, x7, [sp, #56] +// CHECK-NEXT: stp x4, x5, [sp, #40] +// CHECK-NEXT: stp x2, x3, [sp, #24] +// CHECK-NEXT: stp x0, x1, [sp, #8] +// CHECK-NEXT: str x19, [sp, #-80]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func6 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 48 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 0 +// CHECK-NEXT: HomedParameters: Yes +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 112 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #48 +// CHECK-NEXT: stp x6, x7, [sp, #48] +// CHECK-NEXT: stp x4, x5, [sp, #32] +// CHECK-NEXT: stp x2, x3, [sp, #16] +// CHECK-NEXT: stp x0, x1, [sp, #-64]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func7 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 24 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 0 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 1 +// CHECK-NEXT: FrameSize: 32 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: str lr, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func8 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 24 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 1 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 1 +// CHECK-NEXT: FrameSize: 32 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: stp x19, lr, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func9 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 32 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 2 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 1 +// CHECK-NEXT: FrameSize: 32 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: str lr, [sp, #16] +// CHECK-NEXT: stp x19, x20, [sp, #-32]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func10 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 32 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 3 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 1 +// CHECK-NEXT: FrameSize: 48 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: stp x21, lr, [sp, #16] +// CHECK-NEXT: stp x19, x20, [sp, #-32]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func11 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 32 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 2 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 3 +// CHECK-NEXT: FrameSize: 48 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: mov x29, sp +// CHECK-NEXT: stp x29, lr, [sp, #-32]! +// CHECK-NEXT: stp x19, x20, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func12 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 40 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 2 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 3 +// CHECK-NEXT: FrameSize: 544 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: mov x29, sp +// CHECK-NEXT: stp x29, lr, [sp, #0] +// CHECK-NEXT: sub sp, sp, #528 +// CHECK-NEXT: stp x19, x20, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func13 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 48 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 2 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 3 +// CHECK-NEXT: FrameSize: 4112 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: mov x29, sp +// CHECK-NEXT: stp x29, lr, [sp, #0] +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: sub sp, sp, #4080 +// CHECK-NEXT: stp x19, x20, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func14 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 32 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 2 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 4112 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: sub sp, sp, #4080 +// CHECK-NEXT: stp x19, x20, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func15 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 24 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 2 +// CHECK-NEXT: HomedParameters: No +// CHECK-NEXT: CR: 0 +// CHECK-NEXT: FrameSize: 560 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #544 +// CHECK-NEXT: stp x19, x20, [sp, #-16]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func16 +// CHECK-NEXT: Fragment: No +// CHECK-NEXT: FunctionLength: 56 +// CHECK-NEXT: RegF: 0 +// CHECK-NEXT: RegI: 0 +// CHECK-NEXT: HomedParameters: Yes +// CHECK-NEXT: CR: 1 +// CHECK-NEXT: FrameSize: 112 +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: sub sp, sp, #32 +// CHECK-NEXT: stp x6, x7, [sp, #56] +// CHECK-NEXT: stp x4, x5, [sp, #40] +// CHECK-NEXT: stp x2, x3, [sp, #24] +// CHECK-NEXT: stp x0, x1, [sp, #8] +// CHECK-NEXT: str lr, [sp, #-80]! +// CHECK-NEXT: end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] + + .text + .globl func1 +func1: +func2: +func3: +func4: +func5: +func6: +func7: +func8: +func9: +func10: +func11: +func12: +func13: +func14: +func15: +func16: + ret + + .section .pdata,"dr" + .long func1@IMGREL + .long 0x050ae059 // FunctionLength=22 RegF=7 RegI=10 H=0 CR=0 FrameSize=10 + .long func2@IMGREL + .long 0x01834031 // FunctionLength=12 RegF=2 RegI=3 H=0 CR=0 FrameSize=3 + .long func3@IMGREL + .long 0x01816029 // FunctionLength=10 RegF=3 RegI=1 H=0 CR=0 FrameSize=3 + .long func4@IMGREL + .long 0x01802019 // FunctionLength=6 RegF=1 RegI=0 H=0 CR=0 FrameSize=3 + .long func5@IMGREL + .long 0x03910039 // FunctionLength=14 RegF=0 RegI=1 H=1 CR=0 FrameSize=7 + .long func6@IMGREL + .long 0x03900031 // FunctionLength=12 RegF=0 RegI=0 H=1 CR=0 FrameSize=7 + .long func7@IMGREL + .long 0x01200019 // FunctionLength=6 RegF=0 RegI=0 H=0 CR=1 FrameSize=2 + .long func8@IMGREL + .long 0x01210019 // FunctionLength=6 RegF=0 RegI=1 H=0 CR=1 FrameSize=2 + .long func9@IMGREL + .long 0x01220021 // FunctionLength=8 RegF=0 RegI=2 H=0 CR=1 FrameSize=2 + .long func10@IMGREL + .long 0x01a30021 // FunctionLength=8 RegF=0 RegI=3 H=0 CR=1 FrameSize=3 + .long func11@IMGREL + .long 0x01e20021 // FunctionLength=8 RegF=0 RegI=2 H=0 CR=3 FrameSize=3 + .long func12@IMGREL + .long 0x11620029 // FunctionLength=10 RegF=0 RegI=2 H=0 CR=3 FrameSize=34 + .long func13@IMGREL + .long 0x80e20031 // FunctionLength=12 RegF=0 RegI=2 H=0 CR=3 FrameSize=257 + .long func14@IMGREL + .long 0x80820021 // FunctionLength=8 RegF=0 RegI=2 H=0 CR=0 FrameSize=257 + .long func15@IMGREL + .long 0x11820019 // FunctionLength=6 RegF=0 RegI=2 H=0 CR=0 FrameSize=34 + .long func16@IMGREL + .long 0x03b00039 // FunctionLength=14 RegF=0 RegI=0 H=1 CR=1 FrameSize=7 diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index c2a84e3ba4835..46a949b990459 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -1111,6 +1111,143 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF, return true; } +bool Decoder::dumpPackedARM64Entry(const object::COFFObjectFile &COFF, + const SectionRef Section, uint64_t Offset, + unsigned Index, + const RuntimeFunctionARM64 &RF) { + assert((RF.Flag() == RuntimeFunctionFlag::RFF_Packed || + RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "unpacked entry cannot be treated as a packed entry"); + + ErrorOr Function = getRelocatedSymbol(COFF, Section, Offset); + if (!Function) + Function = getSymbol(COFF, RF.BeginAddress, /*FunctionOnly=*/true); + + StringRef FunctionName; + uint64_t FunctionAddress; + if (Function) { + Expected FunctionNameOrErr = Function->getName(); + if (!FunctionNameOrErr) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS); + OS.flush(); + report_fatal_error(Buf); + } + FunctionName = *FunctionNameOrErr; + Expected FunctionAddressOrErr = Function->getAddress(); + if (!FunctionAddressOrErr) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS); + OS.flush(); + report_fatal_error(Buf); + } + FunctionAddress = *FunctionAddressOrErr; + } else { + FunctionAddress = COFF.getPE32PlusHeader()->ImageBase + RF.BeginAddress; + } + + SW.printString("Function", formatSymbol(FunctionName, FunctionAddress)); + SW.printBoolean("Fragment", + RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment); + SW.printNumber("FunctionLength", RF.FunctionLength()); + SW.printNumber("RegF", RF.RegF()); + SW.printNumber("RegI", RF.RegI()); + SW.printBoolean("HomedParameters", RF.H()); + SW.printNumber("CR", RF.CR()); + SW.printNumber("FrameSize", RF.FrameSize() << 4); + ListScope PS(SW, "Prologue"); + + // Synthesize the equivalent prologue according to the documentation + // at https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling, + // printed in reverse order compared to the docs, to match how prologues + // are printed for the non-packed case. + int IntSZ = 8 * RF.RegI(); + if (RF.CR() == 1) + IntSZ += 8; + int FpSZ = 8 * RF.RegF(); + if (RF.RegF()) + FpSZ += 8; + int SavSZ = (IntSZ + FpSZ + 8 * 8 * RF.H() + 0xf) & ~0xf; + int LocSZ = (RF.FrameSize() << 4) - SavSZ; + + if (RF.CR() == 3) { + SW.startLine() << "mov x29, sp\n"; + if (LocSZ <= 512) { + SW.startLine() << format("stp x29, lr, [sp, #-%d]!\n", LocSZ); + } else { + SW.startLine() << "stp x29, lr, [sp, #0]\n"; + } + } + if (LocSZ > 4080) { + SW.startLine() << format("sub sp, sp, #%d\n", LocSZ - 4080); + SW.startLine() << "sub sp, sp, #4080\n"; + } else if ((RF.CR() != 3 && LocSZ > 0) || LocSZ > 512) { + SW.startLine() << format("sub sp, sp, #%d\n", LocSZ); + } + if (RF.H()) { + SW.startLine() << format("stp x6, x7, [sp, #%d]\n", IntSZ + FpSZ + 48); + SW.startLine() << format("stp x4, x5, [sp, #%d]\n", IntSZ + FpSZ + 32); + SW.startLine() << format("stp x2, x3, [sp, #%d]\n", IntSZ + FpSZ + 16); + if (RF.RegI() > 0 || RF.RegF() > 0 || RF.CR() == 1) { + SW.startLine() << format("stp x0, x1, [sp, #%d]\n", IntSZ + FpSZ); + } else { + // This case isn't documented; if neither RegI nor RegF nor CR=1 + // have decremented the stack pointer by SavSZ, we need to do it here + // (as the final stack adjustment of LocSZ excludes SavSZ). + SW.startLine() << format("stp x0, x1, [sp, #-%d]!\n", SavSZ); + } + } + int FloatRegs = RF.RegF() > 0 ? RF.RegF() + 1 : 0; + for (int I = (FloatRegs + 1) / 2 - 1; I >= 0; I--) { + if (I == (FloatRegs + 1) / 2 - 1 && FloatRegs % 2 == 1) { + // The last register, an odd register without a pair + SW.startLine() << format("str d%d, [sp, #%d]\n", 8 + 2 * I, + IntSZ + 16 * I); + } else if (I == 0 && RF.RegI() == 0 && RF.CR() != 1) { + SW.startLine() << format("stp d%d, d%d, [sp, #-%d]!\n", 8 + 2 * I, + 8 + 2 * I + 1, SavSZ); + } else { + SW.startLine() << format("stp d%d, d%d, [sp, #%d]\n", 8 + 2 * I, + 8 + 2 * I + 1, IntSZ + 16 * I); + } + } + if (RF.CR() == 1 && (RF.RegI() % 2) == 0) { + if (RF.RegI() == 0) + SW.startLine() << format("str lr, [sp, #-%d]!\n", SavSZ); + else + SW.startLine() << format("str lr, [sp, #%d]\n", IntSZ - 8); + } + for (int I = (RF.RegI() + 1) / 2 - 1; I >= 0; I--) { + if (I == (RF.RegI() + 1) / 2 - 1 && RF.RegI() % 2 == 1) { + // The last register, an odd register without a pair + if (RF.CR() == 1) { + if (I == 0) // If this is the only register pair + SW.startLine() << format("stp x%d, lr, [sp, #-%d]!\n", 19 + 2 * I, + SavSZ); + else + SW.startLine() << format("stp x%d, lr, [sp, #%d]\n", 19 + 2 * I, + 16 * I); + } else { + if (I == 0) + SW.startLine() << format("str x%d, [sp, #-%d]!\n", 19 + 2 * I, SavSZ); + else + SW.startLine() << format("str x%d, [sp, #%d]\n", 19 + 2 * I, 16 * I); + } + } else if (I == 0) { + // The first register pair + SW.startLine() << format("stp x19, x20, [sp, #-%d]!\n", SavSZ); + } else { + SW.startLine() << format("stp x%d, x%d, [sp, #%d]\n", 19 + 2 * I, + 19 + 2 * I + 1, 16 * I); + } + } + SW.startLine() << "end\n"; + + return true; +} + bool Decoder::dumpProcedureDataEntry(const COFFObjectFile &COFF, const SectionRef Section, unsigned Index, ArrayRef Contents) { @@ -1123,8 +1260,8 @@ bool Decoder::dumpProcedureDataEntry(const COFFObjectFile &COFF, if (Entry.Flag() == RuntimeFunctionFlag::RFF_Unpacked) return dumpUnpackedEntry(COFF, Section, Offset, Index, Entry); if (isAArch64) { - SW.startLine() << "Packed unwind data not yet supported for ARM64\n"; - return true; + const RuntimeFunctionARM64 EntryARM64(Data); + return dumpPackedARM64Entry(COFF, Section, Offset, Index, EntryARM64); } return dumpPackedEntry(COFF, Section, Offset, Index, Entry); } diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h index 36fe5d6f4b2b4..3263841a267bc 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h @@ -17,6 +17,7 @@ namespace llvm { namespace ARM { namespace WinEH { class RuntimeFunction; +class RuntimeFunctionARM64; class Decoder { static const size_t PDataEntrySize; @@ -154,6 +155,9 @@ class Decoder { bool dumpPackedEntry(const object::COFFObjectFile &COFF, const object::SectionRef Section, uint64_t Offset, unsigned Index, const RuntimeFunction &Entry); + bool dumpPackedARM64Entry(const object::COFFObjectFile &COFF, + const object::SectionRef Section, uint64_t Offset, + unsigned Index, const RuntimeFunctionARM64 &Entry); bool dumpProcedureDataEntry(const object::COFFObjectFile &COFF, const object::SectionRef Section, unsigned Entry, ArrayRef Contents); From 61e0b2b4c5fbbea01bb40f28ea0222b87166ccdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Tue, 15 Sep 2020 08:39:15 +0300 Subject: [PATCH 0640/1079] [LLD] Allow configuring default ld.lld backend The motivation for this is ld.lld --help targeting MinGW which currently prints help for the ELF backend unless -m i386pe{,p} is added. This confuses build systems that grep through linker help to find supported flags. This matches LD from Binutils which always prints help for MinGW when configured to target it. After this change, the backend can still be overridden to any supported ELF/MinGW target by using correct -m . Differential Revision: https://reviews.llvm.org/D87418 --- lld/CMakeLists.txt | 6 ++++++ lld/tools/lld/lld.cpp | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 34a7a68da42c5..8b8c7178c616c 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -174,6 +174,12 @@ endif() option(LLD_BUILD_TOOLS "Build the lld tools. If OFF, just generate build targets." ON) +option(LLD_DEFAULT_LD_LLD_IS_MINGW + "Use MinGW as the default backend for ld.lld. If OFF, ELF will be used." OFF) +if (LLD_DEFAULT_LD_LLD_IS_MINGW) + add_definitions("-DLLD_DEFAULT_LD_LLD_IS_MINGW=1") +endif() + if (MSVC) add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header. diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index 8a8f8d04bbda6..d4e2fbb0309a7 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -92,7 +92,12 @@ static bool isPETarget(std::vector &v) { continue; return isPETargetName(*(it + 1)); } + +#ifdef LLD_DEFAULT_LD_LLD_IS_MINGW + return true; +#else return false; +#endif } static Flavor parseProgname(StringRef progname) { From e71cda21d71c4c92731ec7fe8345d04395a630b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Tue, 15 Sep 2020 09:12:02 +0300 Subject: [PATCH 0641/1079] [Windows][Polly] Disable LLVMPolly module for all compilers on Windows Before this patch, the cmake disabled loadable modules when compiling with Visual Studio. However, the reason for this is a limitation of the Windows DLLs, thus this restriction should apply to any compiler for the Windows platform, such as MinGW, Cygwin, icc, etc. Differential Revision: https://reviews.llvm.org/D87524 --- polly/cmake/CMakeLists.txt | 2 +- polly/lib/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/polly/cmake/CMakeLists.txt b/polly/cmake/CMakeLists.txt index fd8028a8937af..7cc129ba2e906 100644 --- a/polly/cmake/CMakeLists.txt +++ b/polly/cmake/CMakeLists.txt @@ -10,7 +10,7 @@ else() endif() set(POLLY_CONFIG_EXPORTED_TARGETS Polly ${ISL_TARGET}) -if (NOT MSVC AND LLVM_ENABLE_PIC) +if (NOT WIN32 AND LLVM_ENABLE_PIC) # LLVMPolly is a dummy target on Win or if PIC code is disabled. list(APPEND POLLY_CONFIG_EXPORTED_TARGETS LLVMPolly) endif() diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt index 113ae5f2eb577..b20358e4b3d67 100644 --- a/polly/lib/CMakeLists.txt +++ b/polly/lib/CMakeLists.txt @@ -137,7 +137,7 @@ endif () # Create a loadable module Polly.so that can be loaded using # LLVM's/clang's "-load" option. -if (MSVC OR NOT LLVM_ENABLE_PIC) +if (WIN32 OR NOT LLVM_ENABLE_PIC) # Add dummy target, either because loadable modules are not supported # as on Windows or because PIC code has been disabled add_custom_target(LLVMPolly) From 3023f057d83a5920e39c647b7eaf677676b3a191 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 14 Sep 2020 19:44:27 -0700 Subject: [PATCH 0642/1079] [NFC][lsan][fuzzer] Relax fuzzer-leak.test With lsan we can't guarantee to catch leak on the same iteration. --- compiler-rt/test/fuzzer/fuzzer-leak.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/fuzzer/fuzzer-leak.test b/compiler-rt/test/fuzzer/fuzzer-leak.test index 2b61811d5d1b7..dd22fdec8677e 100644 --- a/compiler-rt/test/fuzzer/fuzzer-leak.test +++ b/compiler-rt/test/fuzzer/fuzzer-leak.test @@ -7,7 +7,7 @@ RUN: %cpp_compiler %S/LeakTimeoutTest.cpp -o %t-LeakTimeoutTest RUN: rm -rf %t-corpus && mkdir -p %t-corpus RUN: not %run %t-LeakTest -runs=100000 -detect_leaks=1 %t-corpus 2>&1 | FileCheck %s --check-prefix=LEAK_DURING LEAK_DURING: ERROR: LeakSanitizer: detected memory leaks -LEAK_DURING: Direct leak of 4 byte(s) in 1 object(s) allocated from: +LEAK_DURING: Direct leak of {{.*}} byte(s) in {{.*}} object(s) allocated from: LEAK_DURING: INFO: to ignore leaks on libFuzzer side use -detect_leaks=0 LEAK_DURING: Test unit written to ./leak- LEAK_DURING-NOT: DONE From c6aadd2b72cf38142f137278a483fea7eb9bd16f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Sep 2020 22:41:39 -0700 Subject: [PATCH 0643/1079] [X86] Pre-commit test cases for D87593 The memory operand for these is incorrect. --- llvm/test/CodeGen/X86/vmaskmov-offset.ll | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll index 03fead64bc29e..f6ecb87705ca7 100644 --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -52,3 +52,31 @@ bb: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %masked_loaded_vec, <8 x double>* nonnull %stack_output_vec, i32 4, <8 x i1> %mask) ret void } + +define <2 x double> @mload_constmask_v2f64(<2 x double>* %addr, <2 x double> %dst) { + ; CHECK-LABEL: name: mload_constmask_v2f64 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $rdi, $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr, align 4) + ; CHECK: $xmm0 = COPY [[VMOVHPDrm]] + ; CHECK: RET 0, $xmm0 + %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> , <2 x double> %dst) + ret <2 x double> %res +} + +define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) { + ; CHECK-LABEL: name: one_mask_bit_set2 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $rdi, $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr) + ; CHECK: RET 0 + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1>) + ret void +} + +declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) From d74e1f3a5119ba6b2b6f49a3e5cfab10ea903d93 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 14 Sep 2020 23:53:58 -0700 Subject: [PATCH 0644/1079] [NFC][Asan] Don't use MetaData for size Now we have enough space in the ChunkHeader. 45 bit is enough for kMaxAllowedMallocSize. Depends on D87642. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87643 --- compiler-rt/lib/asan/asan_allocator.cpp | 49 +++++++++++++------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 5aeb4d14e9a3e..f43882fcd8be8 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -94,18 +94,32 @@ class ChunkHeader { u8 rz_log : 3; u8 lsan_tag : 2; - // This field is used for small sizes. For large sizes it is equal to - // SizeClassMap::kMaxSize and the actual size is stored in the - // SecondaryAllocator's metadata. - u32 user_requested_size : 29; // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 - u32 user_requested_alignment_log : 3; + u16 user_requested_alignment_log : 3; private: + u16 user_requested_size_hi : 13; + u32 user_requested_size_lo; atomic_uint64_t alloc_context_id; public: + uptr UsedSize() const { + uptr R = user_requested_size_lo; + if (sizeof(uptr) > sizeof(user_requested_size_lo)) + R += (uptr)user_requested_size_hi << (8 * sizeof(user_requested_size_lo)); + return R; + } + + void SetUsedSize(uptr size) { + user_requested_size_lo = size; + if (sizeof(uptr) > sizeof(user_requested_size_lo)) { + size >>= (8 * sizeof(user_requested_size_lo)); + user_requested_size_hi = size; + CHECK_EQ(user_requested_size_hi, size); + } + } + void SetAllocContext(u32 tid, u32 stack) { AtomicContextStore(&alloc_context_id, tid, stack); } @@ -147,19 +161,10 @@ enum { class AsanChunk : public ChunkBase { public: uptr Beg() { return reinterpret_cast(this) + kChunkHeaderSize; } - uptr UsedSize(bool locked_version = false) { - if (user_requested_size != SizeClassMap::kMaxSize) - return user_requested_size; - return *reinterpret_cast( - get_allocator().GetMetaData(AllocBeg(locked_version))); - } - void *AllocBeg(bool locked_version = false) { - if (from_memalign) { - if (locked_version) - return get_allocator().GetBlockBeginFastLocked( - reinterpret_cast(this)); + + void *AllocBeg() { + if (from_memalign) return get_allocator().GetBlockBegin(reinterpret_cast(this)); - } return reinterpret_cast(Beg() - RZLog2Size(rz_log)); } }; @@ -337,7 +342,7 @@ struct Allocator { if (ac && atomic_load(&ac->chunk_state, memory_order_acquire) == CHUNK_ALLOCATED) { uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); + uptr end = ac->Beg() + ac->UsedSize(); uptr chunk_end = chunk + allocated_size; if (chunk < beg && beg < end && end <= chunk_end) { // Looks like a valid AsanChunk in use, poison redzones only. @@ -552,15 +557,13 @@ struct Allocator { reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; reinterpret_cast(alloc_beg)[1] = chunk_beg; } + CHECK(size); + m->SetUsedSize(size); if (using_primary_allocator) { - CHECK(size); - m->user_requested_size = size; CHECK(allocator.FromPrimary(allocated)); } else { CHECK(!allocator.FromPrimary(allocated)); - m->user_requested_size = SizeClassMap::kMaxSize; uptr *meta = reinterpret_cast(allocator.GetMetaData(allocated)); - meta[0] = size; meta[1] = chunk_beg; } m->user_requested_alignment_log = user_requested_alignment_log; @@ -1151,7 +1154,7 @@ void LsanMetadata::set_tag(ChunkTag value) { uptr LsanMetadata::requested_size() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->UsedSize(/*locked_version=*/true); + return m->UsedSize(); } u32 LsanMetadata::stack_trace_id() const { From cad961bb24d3b1ec63571e8cac6aa8b16245f95b Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 14 Sep 2020 23:54:48 -0700 Subject: [PATCH 0645/1079] [NFC][Asan] Remove from_memalign and rz_log Before D87643 they where used to optimize UsedSize(). Which was called frequently from leak scanner. It was also used for calls from QuarantineCallback but we have heavy get_allocator().Deallocate call there anyway. Depends on D87643. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87644 --- compiler-rt/lib/asan/asan_allocator.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index f43882fcd8be8..d136423a3e34a 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -89,9 +89,7 @@ static const uptr kAllocBegMagic = 0xCC6E96B9; class ChunkHeader { public: atomic_uint8_t chunk_state; - u8 from_memalign : 1; u8 alloc_type : 2; - u8 rz_log : 3; u8 lsan_tag : 2; // align < 8 -> 0 @@ -161,12 +159,6 @@ enum { class AsanChunk : public ChunkBase { public: uptr Beg() { return reinterpret_cast(this) + kChunkHeaderSize; } - - void *AllocBeg() { - if (from_memalign) - return get_allocator().GetBlockBegin(reinterpret_cast(this)); - return reinterpret_cast(Beg() - RZLog2Size(rz_log)); - } }; struct QuarantineCallback { @@ -185,7 +177,7 @@ struct QuarantineCallback { PoisonShadow(m->Beg(), RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY), kAsanHeapLeftRedzoneMagic); - void *p = reinterpret_cast(m->AllocBeg()); + void *p = get_allocator().GetBlockBegin(m); if (p != m) { uptr *alloc_magic = reinterpret_cast(p); CHECK_EQ(alloc_magic[0], kAllocBegMagic); @@ -541,8 +533,7 @@ struct Allocator { uptr alloc_beg = reinterpret_cast(allocated); uptr alloc_end = alloc_beg + needed_size; - uptr beg_plus_redzone = alloc_beg + rz_size; - uptr user_beg = beg_plus_redzone; + uptr user_beg = alloc_beg + rz_size; if (!IsAligned(user_beg, alignment)) user_beg = RoundUpTo(user_beg, alignment); uptr user_end = user_beg + size; @@ -550,8 +541,6 @@ struct Allocator { uptr chunk_beg = user_beg - kChunkHeaderSize; AsanChunk *m = reinterpret_cast(chunk_beg); m->alloc_type = alloc_type; - m->rz_log = rz_log; - m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; From c8ddf27ddbbe140d8acbcf1b2d3fdfbba253d02c Mon Sep 17 00:00:00 2001 From: Chris Hamilton Date: Tue, 15 Sep 2020 01:54:41 -0500 Subject: [PATCH 0646/1079] Revert "[Sema] Address-space sensitive index check for unbounded arrays" This reverts commit da55e9ba1273284f1af61bceeaeb25e487838034. Build bots uncovered coverage gap in testing. Change not ready. --- .../clang/Basic/DiagnosticSemaKinds.td | 8 -- clang/lib/Sema/SemaChecking.cpp | 85 +++---------------- clang/test/Sema/const-eval.c | 8 +- clang/test/Sema/unbounded-array-bounds.c | 70 --------------- .../SemaCXX/constant-expression-cxx1y.cpp | 3 +- 5 files changed, 17 insertions(+), 157 deletions(-) delete mode 100644 clang/test/Sema/unbounded-array-bounds.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e0be2072bb6e2..e0d700c66724a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8847,14 +8847,6 @@ def warn_array_index_precedes_bounds : Warning< def warn_array_index_exceeds_bounds : Warning< "array index %0 is past the end of the array (which contains %1 " "element%s2)">, InGroup; -def warn_ptr_arith_exceeds_max_addressable_bounds : Warning< - "the pointer incremented by %0 refers past the last possible element for an array in %1-bit " - "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, - InGroup; -def warn_array_index_exceeds_max_addressable_bounds : Warning< - "array index %0 refers past the last possible element for an array in %1-bit " - "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, - InGroup; def note_array_declared_here : Note< "array %0 declared here">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index dbfa329993c8b..f2b70be1d431b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14038,11 +14038,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, const ConstantArrayType *ArrayTy = Context.getAsConstantArrayType(BaseExpr->getType()); - const Type *BaseType = - ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr(); - bool IsUnboundedArray = (BaseType == nullptr); - if (EffectiveType->isDependentType() || - (!IsUnboundedArray && BaseType->isDependentType())) + if (!ArrayTy) + return; + + const Type *BaseType = ArrayTy->getElementType().getTypePtr(); + if (EffectiveType->isDependentType() || BaseType->isDependentType()) return; Expr::EvalResult Result; @@ -14059,69 +14059,6 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); - if (IsUnboundedArray) { - if (index.isUnsigned() || !index.isNegative()) { - const auto &ASTC = getASTContext(); - unsigned AddrBits = - ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace( - EffectiveType->getCanonicalTypeInternal())); - if (index.getBitWidth() < AddrBits) - index = index.zext(AddrBits); - CharUnits ElemCharUnits = ASTC.getTypeSizeInChars(EffectiveType); - llvm::APInt ElemBytes(index.getBitWidth(), ElemCharUnits.getQuantity()); - // If index has more active bits than address space, we already know - // we have a bounds violation to warn about. Otherwise, compute - // address of (index + 1)th element, and warn about bounds violation - // only if that address exceeds address space. - if (index.getActiveBits() <= AddrBits) { - bool Overflow; - llvm::APInt Product(index); - Product += 1; - Product = Product.umul_ov(ElemBytes, Overflow); - if (!Overflow && Product.getActiveBits() <= AddrBits) - return; - } - - // Need to compute max possible elements in address space, since that - // is included in diag message. - llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits); - MaxElems = MaxElems.zext(std::max(AddrBits + 1, ElemBytes.getBitWidth())); - MaxElems += 1; - ElemBytes = ElemBytes.zextOrTrunc(MaxElems.getBitWidth()); - MaxElems = MaxElems.udiv(ElemBytes); - - unsigned DiagID = - ASE ? diag::warn_array_index_exceeds_max_addressable_bounds - : diag::warn_ptr_arith_exceeds_max_addressable_bounds; - - // Diag message shows element size in bits and in "bytes" (platform- - // dependent CharUnits) - DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, - PDiag(DiagID) - << index.toString(10, true) << AddrBits - << (unsigned)ASTC.toBits(ElemCharUnits) - << ElemBytes.toString(10, false) - << MaxElems.toString(10, false) - << (unsigned)MaxElems.getLimitedValue(~0U) - << IndexExpr->getSourceRange()); - - if (!ND) { - // Try harder to find a NamedDecl to point at in the note. - while (const auto *ASE = dyn_cast(BaseExpr)) - BaseExpr = ASE->getBase()->IgnoreParenCasts(); - if (const auto *DRE = dyn_cast(BaseExpr)) - ND = DRE->getDecl(); - if (const auto *ME = dyn_cast(BaseExpr)) - ND = ME->getMemberDecl(); - } - - if (ND) - DiagRuntimeBehavior(ND->getBeginLoc(), BaseExpr, - PDiag(diag::note_array_declared_here) << ND); - } - return; - } - if (index.isUnsigned() || !index.isNegative()) { // It is possible that the type of the base expression after // IgnoreParenCasts is incomplete, even though the type of the base @@ -14184,8 +14121,9 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, } } - unsigned DiagID = ASE ? diag::warn_array_index_exceeds_bounds - : diag::warn_ptr_arith_exceeds_bounds; + unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; + if (ASE) + DiagID = diag::warn_array_index_exceeds_bounds; DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, PDiag(DiagID) << index.toString(10, true) @@ -14206,11 +14144,12 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (!ND) { // Try harder to find a NamedDecl to point at in the note. - while (const auto *ASE = dyn_cast(BaseExpr)) + while (const ArraySubscriptExpr *ASE = + dyn_cast(BaseExpr)) BaseExpr = ASE->getBase()->IgnoreParenCasts(); - if (const auto *DRE = dyn_cast(BaseExpr)) + if (const DeclRefExpr *DRE = dyn_cast(BaseExpr)) ND = DRE->getDecl(); - if (const auto *ME = dyn_cast(BaseExpr)) + if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); } diff --git a/clang/test/Sema/const-eval.c b/clang/test/Sema/const-eval.c index c94539ab1de27..bbcbb0e25237e 100644 --- a/clang/test/Sema/const-eval.c +++ b/clang/test/Sema/const-eval.c @@ -140,10 +140,10 @@ EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a con // We evaluate these by providing 2s' complement semantics in constant // expressions, like we do for integers. -void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; // expected-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}} -void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; // expected-warning {{refers past the last possible element}} -__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}} -void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; // expected-warning {{refers past the last possible element}} +void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; +void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; +__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; +void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; struct PR35214_X { int k; diff --git a/clang/test/Sema/unbounded-array-bounds.c b/clang/test/Sema/unbounded-array-bounds.c deleted file mode 100644 index 18a8225b84697..0000000000000 --- a/clang/test/Sema/unbounded-array-bounds.c +++ /dev/null @@ -1,70 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s \ -// RUN: --implicit-check-not 'past the last possible element' -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \ -// RUN: --implicit-check-not 'past the last possible element' -// RUN: %clang_cc1 -triple avr-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s \ -// RUN: --implicit-check-not 'past the last possible element' - -struct S { - long long a; - char b; - long long c; - short d; -}; - -struct S s[]; - -void f1() { - ++s[3].a; - ++s[7073650413200313099].b; - // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: array index 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) - ++s[7073650].c; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) -} - -long long ll[]; - -void f2() { - ++ll[3]; - ++ll[2705843009213693952]; - // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) - ++ll[847073650]; - // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) - // CHECK-AVR-ADDR16: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) -} - -void f3(struct S p[]) { - ++p[3].a; - ++p[7073650413200313099].b; - // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) - ++p[7073650].c; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) -} - -void f4(struct S *p) { - p += 3; - p += 7073650413200313099; - // CHECK-X86-ADDR64: :[[@LINE-1]]:3: warning: the pointer incremented by 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) - p += 7073650; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) -} - -struct BQ { - struct S bigblock[3276]; -}; - -struct BQ bq[]; - -void f5() { - ++bq[0].bigblock[0].a; - ++bq[1].bigblock[0].a; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element) -} diff --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp index 7fe71d4853508..8bc4f88a63a96 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp @@ -1018,9 +1018,8 @@ constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant } constexpr void PR28739(int n) { // expected-error {{never produces a constant}} - int *p = &n; // expected-note {{declared here}} + int *p = &n; p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} - // expected-warning@-1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}} } constexpr void Void(int n) { From a61bb7f0980805ef13ca188892ba17f386a2347d Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 15 Sep 2020 00:07:56 -0700 Subject: [PATCH 0647/1079] [NFC][Asan] Reorder bitfields Depends on D87644. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87645 --- compiler-rt/lib/asan/asan_allocator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index d136423a3e34a..6daaacf63c2ff 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -94,10 +94,10 @@ class ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 - u16 user_requested_alignment_log : 3; + u8 user_requested_alignment_log : 3; private: - u16 user_requested_size_hi : 13; + u16 user_requested_size_hi; u32 user_requested_size_lo; atomic_uint64_t alloc_context_id; From 08507d83be15387c85edb538517b66add9dc6295 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 15 Sep 2020 00:12:02 -0700 Subject: [PATCH 0648/1079] [Asan] Cleanup kAllocBegMagic setup Make it atomic. Wrap it into class. Set it late after chunk is initialized. Reset it soon when the chunk is still valid. Depends on D87645. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87646 --- compiler-rt/lib/asan/asan_allocator.cpp | 59 ++++++++++++++++--------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 6daaacf63c2ff..0e79c4dbd83c8 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -84,7 +84,6 @@ static void AtomicContextLoad(const volatile atomic_uint64_t *atomic_context, // ---------------------| // M -- magic value kAllocBegMagic // B -- address of ChunkHeader pointing to the first 'H' -static const uptr kAllocBegMagic = 0xCC6E96B9; class ChunkHeader { public: @@ -161,6 +160,33 @@ class AsanChunk : public ChunkBase { uptr Beg() { return reinterpret_cast(this) + kChunkHeaderSize; } }; +class LargeChunkHeader { + static constexpr uptr kAllocBegMagic = 0xCC6E96B9; + atomic_uint64_t magic; + AsanChunk *chunk_header; + + public: + AsanChunk *Get() { + return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic + ? chunk_header + : reinterpret_cast(this); + } + + void Set(AsanChunk *p) { + if (p) { + chunk_header = p; + atomic_store(&magic, kAllocBegMagic, memory_order_release); + return; + } + + u64 old = kAllocBegMagic; + if (!atomic_compare_exchange_strong(&magic, &old, 0, + memory_order_release)) { + CHECK_EQ(old, kAllocBegMagic); + } + } +}; + struct QuarantineCallback { QuarantineCallback(AllocatorCache *cache, BufferedStackTrace *stack) : cache_(cache), @@ -168,6 +194,13 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { + void *p = get_allocator().GetBlockBegin(m); + if (p != m) { + // Clear the magic value, as allocator internals may overwrite the + // contents of deallocated chunk, confusing GetAsanChunk lookup. + reinterpret_cast(p)->Set(nullptr); + } + u8 old_chunk_state = CHUNK_QUARANTINE; if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, CHUNK_INVALID, memory_order_acquire)) { @@ -177,15 +210,6 @@ struct QuarantineCallback { PoisonShadow(m->Beg(), RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY), kAsanHeapLeftRedzoneMagic); - void *p = get_allocator().GetBlockBegin(m); - if (p != m) { - uptr *alloc_magic = reinterpret_cast(p); - CHECK_EQ(alloc_magic[0], kAllocBegMagic); - // Clear the magic value, as allocator internals may overwrite the - // contents of deallocated chunk, confusing GetAsanChunk lookup. - alloc_magic[0] = 0; - CHECK_EQ(alloc_magic[1], reinterpret_cast(m)); - } // Statistics. AsanStats &thread_stats = GetCurrentThreadStats(); @@ -541,11 +565,6 @@ struct Allocator { uptr chunk_beg = user_beg - kChunkHeaderSize; AsanChunk *m = reinterpret_cast(chunk_beg); m->alloc_type = alloc_type; - if (alloc_beg != chunk_beg) { - CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); - reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; - reinterpret_cast(alloc_beg)[1] = chunk_beg; - } CHECK(size); m->SetUsedSize(size); if (using_primary_allocator) { @@ -591,6 +610,10 @@ struct Allocator { #endif // Must be the last mutation of metadata in this function. atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); + if (alloc_beg != chunk_beg) { + CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg); + reinterpret_cast(alloc_beg)->Set(m); + } ASAN_MALLOC_HOOK(res, size); return res; } @@ -763,11 +786,7 @@ struct Allocator { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); p = reinterpret_cast(meta[1]); } else { - uptr *alloc_magic = reinterpret_cast(alloc_beg); - if (alloc_magic[0] == kAllocBegMagic) - p = reinterpret_cast(alloc_magic[1]); - else - p = reinterpret_cast(alloc_beg); + p = reinterpret_cast(alloc_beg)->Get(); } if (!p) return nullptr; From 4540d3baad06e060ba1e42c8fb60ba8c32308db5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 15 Sep 2020 00:16:55 -0700 Subject: [PATCH 0649/1079] [NFC][Asan] Return uptr as before D87646 --- compiler-rt/lib/asan/asan_allocator.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 0e79c4dbd83c8..aae69d4673818 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -161,8 +161,9 @@ class AsanChunk : public ChunkBase { }; class LargeChunkHeader { - static constexpr uptr kAllocBegMagic = 0xCC6E96B9; - atomic_uint64_t magic; + static constexpr uptr kAllocBegMagic = + FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL); + atomic_uintptr_t magic; AsanChunk *chunk_header; public: @@ -179,7 +180,7 @@ class LargeChunkHeader { return; } - u64 old = kAllocBegMagic; + uptr old = kAllocBegMagic; if (!atomic_compare_exchange_strong(&magic, &old, 0, memory_order_release)) { CHECK_EQ(old, kAllocBegMagic); From 86ccf4f728c20dc4d4be04192d6a647c3c9ee819 Mon Sep 17 00:00:00 2001 From: Chris Hamilton Date: Tue, 15 Sep 2020 02:19:02 -0500 Subject: [PATCH 0650/1079] [NFC] Test commit From 943b0c8bffc55eba4cebaaffc4bd33856e271e94 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 15 Sep 2020 00:22:10 -0700 Subject: [PATCH 0651/1079] [NFC][Asan] Remove chunk pointer from metadata kAllocBegMagic should be enough. kAllocBegMagic is already set for the Secondary allocations. kAllocBegMagic is good enough for the Primary, but it's even safer for the Secondary allocator as all allocated block are from mmap. Depends on D87646. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87647 --- compiler-rt/lib/asan/asan_allocator.cpp | 29 +++++++------------------ 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index aae69d4673818..b1d99699a6e64 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -167,10 +167,10 @@ class LargeChunkHeader { AsanChunk *chunk_header; public: - AsanChunk *Get() { + AsanChunk *Get() const { return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic ? chunk_header - : reinterpret_cast(this); + : nullptr; } void Set(AsanChunk *p) { @@ -510,13 +510,10 @@ struct Allocator { uptr needed_size = rounded_size + rz_size; if (alignment > min_alignment) needed_size += alignment; - bool using_primary_allocator = true; // If we are allocating from the secondary allocator, there will be no // automatic right redzone, so add the right redzone manually. - if (!PrimaryAllocator::CanAllocate(needed_size, alignment)) { + if (!PrimaryAllocator::CanAllocate(needed_size, alignment)) needed_size += rz_size; - using_primary_allocator = false; - } CHECK(IsAligned(needed_size, min_alignment)); if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize || size > max_user_defined_malloc_size) { @@ -568,13 +565,6 @@ struct Allocator { m->alloc_type = alloc_type; CHECK(size); m->SetUsedSize(size); - if (using_primary_allocator) { - CHECK(allocator.FromPrimary(allocated)); - } else { - CHECK(!allocator.FromPrimary(allocated)); - uptr *meta = reinterpret_cast(allocator.GetMetaData(allocated)); - meta[1] = chunk_beg; - } m->user_requested_alignment_log = user_requested_alignment_log; m->SetAllocContext(t ? t->tid() : 0, StackDepotPut(*stack)); @@ -782,15 +772,12 @@ struct Allocator { AsanChunk *GetAsanChunk(void *alloc_beg) { if (!alloc_beg) return nullptr; - AsanChunk *p = nullptr; - if (!allocator.FromPrimary(alloc_beg)) { - uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); - p = reinterpret_cast(meta[1]); - } else { - p = reinterpret_cast(alloc_beg)->Get(); + AsanChunk *p = reinterpret_cast(alloc_beg)->Get(); + if (!p) { + if (!allocator.FromPrimary(alloc_beg)) + return nullptr; + p = reinterpret_cast(alloc_beg); } - if (!p) - return nullptr; u8 state = atomic_load(&p->chunk_state, memory_order_relaxed); // It does not guaranty that Chunk is initialized, but it's // definitely not for any other value. From 69cccb3189d6e0535ab78411a37cfcccf06a58a7 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 7 Sep 2020 09:17:10 +0100 Subject: [PATCH 0652/1079] [SVE] Fix isLoadInvariantInLoop for scalable vectors I've amended the isLoadInvariantInLoop function to bail out for scalable vectors for now since the invariant.start intrinsic is only ever generated by the clang frontend for thread locals or struct and class constructors, neither of which support sizeless types. In addition, the intrinsic itself does not currently support the concept of a scaled size, which makes it impossible to compare the sizes of different scalable objects, e.g. and . Added new tests here: Transforms/LICM/AArch64/sve-load-hoist.ll Transforms/LICM/hoisting.ll Differential Revision: https://reviews.llvm.org/D87227 --- llvm/lib/IR/Verifier.cpp | 8 +++++ llvm/lib/Transforms/Scalar/LICM.cpp | 24 +++++++++++--- .../Transforms/LICM/AArch64/lit.local.cfg | 2 ++ .../Transforms/LICM/AArch64/sve-load-hoist.ll | 30 +++++++++++++++++ llvm/test/Transforms/LICM/hoisting.ll | 33 +++++++++++++++++++ 5 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LICM/AArch64/lit.local.cfg create mode 100644 llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 783c492dbeae1..a5baa2bf16314 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5010,6 +5010,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Assert(Size % 16 == 0, "bswap must be an even number of bytes", &Call); break; } + case Intrinsic::invariant_start: { + ConstantInt *InvariantSize = dyn_cast(Call.getArgOperand(0)); + Assert(InvariantSize && + (!InvariantSize->isNegative() || InvariantSize->isMinusOne()), + "invariant_start parameter must be -1, 0 or a positive number", + &Call); + break; + } case Intrinsic::matrix_multiply: case Intrinsic::matrix_transpose: case Intrinsic::matrix_column_major_load: diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 4bf39ba8f151c..b741d36e37bff 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -940,7 +940,19 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop) { Value *Addr = LI->getOperand(0); const DataLayout &DL = LI->getModule()->getDataLayout(); - const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + + // It is not currently possible for clang to generate an invariant.start + // intrinsic with scalable vector types because we don't support thread local + // sizeless types and we don't permit sizeless types in structs or classes. + // Furthermore, even if support is added for this in future the intrinsic + // itself is defined to have a size of -1 for variable sized objects. This + // makes it impossible to verify if the intrinsic envelops our region of + // interest. For example, both and + // types would have a -1 parameter, but the former is clearly double the size + // of the latter. + if (LocSizeInBits.isScalable()) + return false; // if the type is i8 addrspace(x)*, we know this is the type of // llvm.invariant.start operand @@ -970,13 +982,17 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || !II->use_empty()) continue; - unsigned InvariantSizeInBits = - cast(II->getArgOperand(0))->getSExtValue() * 8; + ConstantInt *InvariantSize = cast(II->getArgOperand(0)); + // The intrinsic supports having a -1 argument for variable sized objects + // so we should check for that here. + if (InvariantSize->isNegative()) + continue; + uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; // Confirm the invariant.start location size contains the load operand size // in bits. Also, the invariant.start should dominate the load, and we // should not hoist the load out of a loop that contains this dominating // invariant.start. - if (LocSizeInBits <= InvariantSizeInBits && + if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && DT->properlyDominates(II->getParent(), CurLoop->getHeader())) return true; } diff --git a/llvm/test/Transforms/LICM/AArch64/lit.local.cfg b/llvm/test/Transforms/LICM/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..7184443994b69 --- /dev/null +++ b/llvm/test/Transforms/LICM/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll b/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll new file mode 100644 index 0000000000000..b0fcdb7d8dfcd --- /dev/null +++ b/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll @@ -0,0 +1,30 @@ +; RUN: opt -licm -mtriple aarch64-linux-gnu -mattr=+sve -S < %s | FileCheck %s + +define void @no_hoist_load1_nxv2i64(* %out, i8* %in8, i32 %n) { +; CHECK-LABEL: @no_hoist_load1_nxv2i64( +; CHECK: entry: +; CHECK-NOT: load +; CHECK: for.body: +; CHECK: load +entry: + %cmp0 = icmp ugt i32 %n, 0 + %invst = call {}* @llvm.invariant.start.p0i8(i64 16, i8* %in8) + %in = bitcast i8* %in8 to * + br i1 %cmp0, label %for.body, label %for.end + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %i2 = zext i32 %i to i64 + %ptr = getelementptr , * %out, i64 %i2 + %val = load , * %in, align 16 + store %val, * %ptr, align 16 + %inc = add nuw nsw i32 %i, 1 + %cmp = icmp ult i32 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly + diff --git a/llvm/test/Transforms/LICM/hoisting.ll b/llvm/test/Transforms/LICM/hoisting.ll index 97609fa397e45..00ac0f5756dea 100644 --- a/llvm/test/Transforms/LICM/hoisting.ll +++ b/llvm/test/Transforms/LICM/hoisting.ll @@ -360,3 +360,36 @@ loop: loopexit: ret i32 %sum } + +; We can't hoist the invariant load out of the loop because +; the marker is given a variable size (-1). +define i32 @test_fence5(i8* %addr, i32 %n, i8* %volatile) { +; CHECK-LABEL: @test_fence5 +; CHECK-LABEL: entry +; CHECK: invariant.start +; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 +; CHECK: br label %loop +entry: + %gep = getelementptr inbounds i8, i8* %addr, i64 8 + %addr.i = bitcast i8* %gep to i32 * + store atomic i32 5, i32 * %addr.i unordered, align 8 + fence release + %invst = call {}* @llvm.invariant.start.p0i8(i64 -1, i8* %gep) + br label %loop + +loop: + %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] + %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] + %volload = load atomic i8, i8* %volatile unordered, align 8 + fence acquire + %volchk = icmp eq i8 %volload, 0 + %addrld = load atomic i32, i32* %addr.i unordered, align 8 + %sel = select i1 %volchk, i32 0, i32 %addrld + %sum.next = add i32 %sel, %sum + %indvar.next = add i32 %indvar, 1 + %cond = icmp slt i32 %indvar.next, %n + br i1 %cond, label %loop, label %loopexit + +loopexit: + ret i32 %sum +} From e15996b5c6e9609c5902cae12455f43d7ba97a0f Mon Sep 17 00:00:00 2001 From: Han Seoul-Oh Date: Mon, 14 Sep 2020 21:15:16 -0700 Subject: [PATCH 0653/1079] [doc] Fix broken link --- llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl03.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl03.rst b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl03.rst index c37c9600f51e7..7170b0fb25de0 100644 --- a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl03.rst +++ b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl03.rst @@ -90,7 +90,7 @@ detail, we just need a single instance to pass into APIs that require it. The ``Builder`` object is a helper object that makes it easy to generate LLVM instructions. Instances of the -`IRBuilder `_ +`IRBuilder `_ class template keep track of the current place to insert instructions and has methods to create new instructions. From c1f2fb5184ca79e9d53d51355b380c5441191878 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 15 Sep 2020 00:48:12 -0700 Subject: [PATCH 0654/1079] [DebugInfo] Support both forward and backward slashes in tests This addresses test failure revealed by 042c23506869. --- lld/test/COFF/duplicate-dwarf.s | 12 ++++++------ lld/test/COFF/undefined-symbol-dwarf.s | 4 ++-- lld/test/ELF/conflict-debug-variable2.s | 4 ++-- lld/test/wasm/debuginfo.test | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lld/test/COFF/duplicate-dwarf.s b/lld/test/COFF/duplicate-dwarf.s index b81c13c4300ae..d3863e9ca366d 100644 --- a/lld/test/COFF/duplicate-dwarf.s +++ b/lld/test/COFF/duplicate-dwarf.s @@ -4,21 +4,21 @@ # RUN: not lld-link -lldmingw -out:%t.exe %t.o %t.dupl.o -entry:_Z4funcv 2>&1 | FileCheck %s # CHECK: error: duplicate symbol: func() -# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:6 +# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:6 # CHECK-NEXT: >>> {{.*}}.o -# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:6 +# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:6 # CHECK-NEXT: >>> {{.*}}.o # CHECK-EMPTY: # CHECK-NEXT: error: duplicate symbol: _var -# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:1 +# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:1 # CHECK-NEXT: >>> {{.*}}.o -# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:1 +# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:1 # CHECK-NEXT: >>> {{.*}}.o # CHECK-EMPTY: # CHECK-NEXT: error: duplicate symbol: A::namespaceVar -# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:3 +# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:3 # CHECK-NEXT: >>> {{.*}}.o -# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:3 +# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:3 # CHECK-NEXT: >>> {{.*}}.o .text diff --git a/lld/test/COFF/undefined-symbol-dwarf.s b/lld/test/COFF/undefined-symbol-dwarf.s index 7e677f88b7e00..4e890987a1f46 100644 --- a/lld/test/COFF/undefined-symbol-dwarf.s +++ b/lld/test/COFF/undefined-symbol-dwarf.s @@ -3,11 +3,11 @@ # RUN: not lld-link /lldmingw /out:%t.exe %t.o /entry:entry 2>&1 | FileCheck %s # CHECK: error: undefined symbol: bar() -# CHECK-NEXT: >>> referenced by /path/to/src{{[/\\]}}undef.cpp:17 +# CHECK-NEXT: >>> referenced by /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}undef.cpp:17 # CHECK-NEXT: >>> {{.*}}.o:(entry) # CHECK-EMPTY: # CHECK-NEXT: error: undefined symbol: foo() -# CHECK-NEXT: >>> referenced by /path/to/src{{[/\\]}}undef.cpp:7 +# CHECK-NEXT: >>> referenced by /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}undef.cpp:7 # CHECK-NEXT: >>> {{.*}}.o:(A::afunc()) .text diff --git a/lld/test/ELF/conflict-debug-variable2.s b/lld/test/ELF/conflict-debug-variable2.s index 3fb59e6b4d028..fe134f49730d1 100644 --- a/lld/test/ELF/conflict-debug-variable2.s +++ b/lld/test/ELF/conflict-debug-variable2.s @@ -7,14 +7,14 @@ # INPUT-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000027] = "foo") # INPUT-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0033 => {0x00000033} "int") # INPUT-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home/path/test.c") +# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{[/\\]}}path{{[/\\]}}test.c") # INPUT-NEXT: DW_AT_decl_line [DW_FORM_data1] (1) # INPUT-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) # INPUT: DW_TAG_variable # INPUT-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002f] = "bar") # INPUT-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0033 => {0x00000033} "int") # INPUT-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home/path/test.c") +# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{[/\\]}}path{{[/\\]}}test.c") # INPUT-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) # INPUT-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test index 2566b74d93bf5..039a051f44faf 100644 --- a/lld/test/wasm/debuginfo.test +++ b/lld/test/wasm/debuginfo.test @@ -16,13 +16,13 @@ CHECK-NEXT: DW_AT_low_pc CHECK-NEXT: DW_AT_high_pc CHECK-NEXT: DW_AT_frame_base CHECK-NEXT: DW_AT_name ("test") -CHECK-NEXT: DW_AT_decl_file ("/Users/yury/llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users{{[/\\]}}yury{{[/\\]}}llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (3) CHECK-NEXT: DW_AT_prototyped (true) CHECK: DW_TAG_formal_parameter CHECK-NEXT: DW_AT_name ("t") -CHECK-NEXT: DW_AT_decl_file ("/Users/yury/llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users{{[/\\]}}yury{{[/\\]}}llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (3) CHECK: DW_TAG_subprogram @@ -30,7 +30,7 @@ CHECK-NEXT: DW_AT_low_pc CHECK-NEXT: DW_AT_high_pc CHECK-NEXT: DW_AT_frame_base CHECK-NEXT: DW_AT_name ("_start") -CHECK-NEXT: DW_AT_decl_file ("/Users/yury/llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users{{[/\\]}}yury{{[/\\]}}llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (7) CHECK: DW_TAG_base_type From 7b58eb50d96b80323504d87ca2f39ee3d7abc7d5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 10 Sep 2020 19:32:45 +0200 Subject: [PATCH 0655/1079] [Support] Make building with snmalloc work Differential revision: https://reviews.llvm.org/D87471 --- llvm/lib/Support/CMakeLists.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 01bf8febb5407..45fe23c5b5a68 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -65,7 +65,6 @@ if(LLVM_INTEGRATED_CRT_ALLOC) add_definitions(-DENABLE_OVERRIDE -DENABLE_PRELOAD) set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/rpmalloc/rpmalloc.c") elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17" PARENT_SCOPE) set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/malloc.cc" "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/new.cc") set(system_libs ${system_libs} "mincore.lib" "-INCLUDE:malloc") elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "mimalloc$") @@ -249,6 +248,18 @@ endif() set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${llvm_system_libs}") + +if(LLVM_INTEGRATED_CRT_ALLOC) + if(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$") + set_property(TARGET LLVMSupport PROPERTY CXX_STANDARD 17) + add_definitions(-D_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING) + if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND + "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "x86_64") + set_property(TARGET LLVMSupport PROPERTY COMPILE_FLAGS "-mcx16") + endif() + endif() +endif() + if(LLVM_WITH_Z3) target_include_directories(LLVMSupport SYSTEM PRIVATE From 487412988cea99c94f2c58f8fa9eff34600fe684 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 15 Sep 2020 09:09:59 +0100 Subject: [PATCH 0656/1079] [MVE] Rename of tests making them consistent with tail-predication tests. NFC. --- .../{basic-tail-pred.ll => tail-pred-basic.ll} | 1 - .../LowOverheadLoops/{tail-reduce.ll => tail-pred-reduce.ll} | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename llvm/test/CodeGen/Thumb2/LowOverheadLoops/{basic-tail-pred.ll => tail-pred-basic.ll} (99%) rename llvm/test/CodeGen/Thumb2/LowOverheadLoops/{tail-reduce.ll => tail-pred-reduce.ll} (98%) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll similarity index 99% rename from llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll rename to llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll index fffa430b7274d..22ffa12c93ea4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: mul_v16i8 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll similarity index 98% rename from llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll rename to llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll index 0c85e89133374..338c980eeb9b0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll @@ -135,8 +135,9 @@ for.cond.cleanup: } ; The vector loop is not guarded with an entry check (N == 0). Check that -; despite this we can still calculate a precise enough range for the -; backedge count to safely insert a vctp here. +; despite this we can still calculate a precise enough range so that the +; the overflow checks for get.active.active.lane.mask don't reject +; tail-predication. ; ; CHECK-LABEL: @reduction_not_guarded ; From 9b4fa854343166dd88e4f2e135239bbf1ce0a16c Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Tue, 15 Sep 2020 10:25:38 +0200 Subject: [PATCH 0657/1079] GlobalISel/IRTranslator resetTargetOptions based on function attributes Update TargetMachine.Options with function attributes before we start to generate MIR instructions. This allows access to correct function attributes via TargetMachine.Options (it used to access attributes of the function that was translated first). This affects some existing tests with "no-nans-fp-math" attribute. Follow-up on D87456. Differential Revision: https://reviews.llvm.org/D87511 --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 1 + llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll | 123 ++++++++++++++---- llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll | 127 +++++-------------- 3 files changed, 132 insertions(+), 119 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8a39739242002..22c5d3c40dd90 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2917,6 +2917,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { DL = &F.getParent()->getDataLayout(); ORE = std::make_unique(&F); const TargetMachine &TM = MF->getTarget(); + TM.resetTargetOptions(F); EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; if (EnableOpts) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll index d64e97e80a6d1..4e7c2959e6aed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll @@ -105,8 +105,18 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(2) ; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; SI-NEXT: v_min_f32_e32 v5, v2, v3 +; SI-NEXT: v_max_f32_e32 v2, v2, v3 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_med3_f32 v2, v2, v3, v4 +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v4 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: v_min_f32_e32 v2, v2, v3 +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: v_max_f32_e32 v2, v3, v2 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm @@ -114,32 +124,42 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; VI-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 -; VI-NEXT: v_lshlrev_b32_e32 v8, 2, v0 +; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v8 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v8 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v6 ; VI-NEXT: v_mov_b32_e32 v4, s6 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: v_mov_b32_e32 v5, s7 -; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v8 +; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v6 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: flat_load_dword v2, v[4:5] -; VI-NEXT: v_mov_b32_e32 v7, s1 -; VI-NEXT: v_mov_b32_e32 v6, s0 -; VI-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; VI-NEXT: flat_load_dword v7, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: flat_load_dword v3, v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; VI-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; VI-NEXT: v_min_f32_e32 v5, v4, v2 +; VI-NEXT: v_max_f32_e32 v2, v4, v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_med3_f32 v0, v0, v1, v2 -; VI-NEXT: flat_store_dword v[6:7], v0 +; VI-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_min_f32_e32 v2, v2, v3 +; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_max_f32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0: @@ -152,8 +172,18 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 +; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX9-NEXT: v_max_f32_e32 v2, v4, v4 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_max_f32_e32 v1, v2, v1 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -396,7 +426,13 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; SI-NEXT: v_add_f32_e32 v3, 2.0, v3 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_f32_e32 v4, 4.0, v4 -; SI-NEXT: v_med3_f32 v2, v2, v3, v4 +; SI-NEXT: v_min_f32_e32 v5, v2, v3 +; SI-NEXT: v_max_f32_e32 v2, v2, v3 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: v_min_f32_e32 v2, v2, v4 +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: v_max_f32_e32 v2, v3, v2 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm @@ -429,9 +465,15 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; VI-NEXT: v_add_f32_e32 v4, 1.0, v7 ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) ; VI-NEXT: v_add_f32_e32 v2, 2.0, v2 +; VI-NEXT: v_min_f32_e32 v5, v4, v2 +; VI-NEXT: v_max_f32_e32 v2, v4, v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_f32_e32 v3, 4.0, v3 -; VI-NEXT: v_med3_f32 v2, v4, v2, v3 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_min_f32_e32 v2, v2, v3 +; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_max_f32_e32 v2, v3, v2 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -447,9 +489,15 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 +; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 -; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX9-NEXT: v_max_f32_e32 v2, v4, v4 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_max_f32_e32 v1, v2, v1 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -495,13 +543,20 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace( ; SI-NEXT: s_mov_b32 s3, s11 ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_min_f32_e32 v5, v2, v3 ; SI-NEXT: v_max_f32_e32 v2, v2, v3 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v4 ; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 -; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_min_f32_e32 v2, v2, v4 -; SI-NEXT: v_max_f32_e32 v2, v5, v2 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: v_min_f32_e32 v2, v2, v3 +; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; SI-NEXT: v_max_f32_e32 v2, v3, v2 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm @@ -530,13 +585,20 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace( ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; VI-NEXT: v_mul_f32_e32 v4, 1.0, v7 ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_min_f32_e32 v4, v7, v2 -; VI-NEXT: v_max_f32_e32 v2, v7, v2 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_min_f32_e32 v5, v4, v2 +; VI-NEXT: v_max_f32_e32 v2, v4, v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; VI-NEXT: v_min_f32_e32 v2, v2, v3 -; VI-NEXT: v_max_f32_e32 v2, v4, v2 -; VI-NEXT: flat_store_dword v[0:1], v4 +; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; VI-NEXT: v_max_f32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v5 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -548,13 +610,20 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace( ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 ; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v[0:1], v4, off ; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v4, v1 +; GFX9-NEXT: v_max_f32_e32 v2, v4, v4 +; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_max_f32_e32 v1, v2, v1 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll index 878b93218fd58..71cca1df9157a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll @@ -139,29 +139,17 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa ; CI-NEXT: s_mov_b64 s[2:3], s[10:11] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b64 s[8:9], s[6:7] -; CI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; CI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:8 +; CI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; CI-NEXT: s_mov_b64 s[6:7], s[10:11] ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_div_scale_f32 v2, s[0:1], v1, v1, v0 -; CI-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 -; CI-NEXT: v_rcp_f32_e32 v4, v2 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; CI-NEXT: v_fma_f32 v5, -v2, v4, 1.0 -; CI-NEXT: v_fma_f32 v4, v5, v4, v4 -; CI-NEXT: v_mul_f32_e32 v5, v3, v4 -; CI-NEXT: v_fma_f32 v6, -v2, v5, v3 -; CI-NEXT: v_fma_f32 v5, v6, v4, v5 -; CI-NEXT: v_fma_f32 v2, -v2, v5, v3 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 -; CI-NEXT: s_mov_b64 s[6:7], s[10:11] -; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_rcp_f32_e32 v2, v1 +; CI-NEXT: v_mul_f32_e32 v2, v0, v2 ; CI-NEXT: v_trunc_f32_e32 v2, v2 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CI-NEXT: s_endpgm @@ -179,14 +167,9 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_load_ushort v0, v[0:1] -; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_cvt_f32_f16_e32 v1, v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v3, v0 -; VI-NEXT: v_rcp_f32_e32 v3, v3 -; VI-NEXT: v_mul_f32_e32 v1, v1, v3 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_div_fixup_f16 v1, v1, v0, v2 +; VI-NEXT: v_rcp_f16_e32 v1, v0 +; VI-NEXT: v_mul_f16_e32 v1, v2, v1 ; VI-NEXT: v_trunc_f16_e32 v1, v1 ; VI-NEXT: v_fma_f16 v2, -v1, v0, v2 ; VI-NEXT: v_mov_b32_e32 v0, s4 @@ -317,27 +300,16 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_load_dword s2, s[6:7], 0x0 -; CI-NEXT: s_load_dword s0, s[8:9], 0x4 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: v_div_scale_f32 v1, s[0:1], v0, v0, s2 -; CI-NEXT: v_div_scale_f32 v2, vcc, s2, v0, s2 -; CI-NEXT: v_rcp_f32_e32 v3, v1 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; CI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 -; CI-NEXT: v_fma_f32 v3, v4, v3, v3 -; CI-NEXT: v_mul_f32_e32 v4, v2, v3 -; CI-NEXT: v_fma_f32 v5, -v1, v4, v2 -; CI-NEXT: v_fma_f32 v4, v5, v3, v4 -; CI-NEXT: v_fma_f32 v1, -v1, v4, v2 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; CI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; CI-NEXT: s_load_dword s0, s[6:7], 0x0 +; CI-NEXT: s_load_dword s1, s[8:9], 0x4 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 s7, 0xf000 -; CI-NEXT: v_div_fixup_f32 v1, v1, v0, s2 -; CI-NEXT: v_trunc_f32_e32 v1, v1 -; CI-NEXT: v_fma_f32 v0, -v1, v0, s2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v1, s0 +; CI-NEXT: v_rcp_f32_e32 v0, s1 +; CI-NEXT: v_mul_f32_e32 v0, s0, v0 +; CI-NEXT: v_trunc_f32_e32 v0, v0 +; CI-NEXT: v_fma_f32 v0, -v0, s1, v1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_endpgm ; @@ -346,25 +318,14 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_load_dword s2, s[6:7], 0x0 -; VI-NEXT: s_load_dword s0, s[8:9], 0x10 +; VI-NEXT: s_load_dword s0, s[6:7], 0x0 +; VI-NEXT: s_load_dword s1, s[8:9], 0x10 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_div_scale_f32 v1, s[0:1], v0, v0, s2 -; VI-NEXT: v_div_scale_f32 v2, vcc, s2, v0, s2 -; VI-NEXT: v_rcp_f32_e32 v3, v1 -; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 -; VI-NEXT: v_fma_f32 v3, v4, v3, v3 -; VI-NEXT: v_mul_f32_e32 v4, v2, v3 -; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 -; VI-NEXT: v_fma_f32 v4, v5, v3, v4 -; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 -; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 -; VI-NEXT: v_div_fixup_f32 v1, v1, v0, s2 -; VI-NEXT: v_trunc_f32_e32 v1, v1 -; VI-NEXT: v_fma_f32 v2, -v1, v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: v_rcp_f32_e32 v0, s1 +; VI-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-NEXT: v_trunc_f32_e32 v0, v0 +; VI-NEXT: v_fma_f32 v2, -v0, s1, v1 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -512,21 +473,12 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; CI-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_mov_b32_e32 v0, s2 -; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], s[0:1] -; CI-NEXT: v_div_scale_f64 v[8:9], vcc, s[0:1], v[0:1], s[0:1] -; CI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] -; CI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 -; CI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] -; CI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 -; CI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] -; CI-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] -; CI-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] -; CI-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] -; CI-NEXT: v_div_fixup_f64 v[2:3], v[2:3], v[0:1], s[0:1] -; CI-NEXT: v_trunc_f64_e32 v[2:3], v[2:3] -; CI-NEXT: v_fma_f64 v[0:1], -v[2:3], v[0:1], s[0:1] +; CI-NEXT: v_mov_b32_e32 v3, s1 +; CI-NEXT: v_rcp_f64_e32 v[0:1], s[2:3] +; CI-NEXT: v_mov_b32_e32 v2, s0 +; CI-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1] +; CI-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] +; CI-NEXT: v_fma_f64 v[0:1], -v[0:1], s[2:3], v[2:3] ; CI-NEXT: v_mov_b32_e32 v2, s4 ; CI-NEXT: v_mov_b32_e32 v3, s5 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -540,21 +492,12 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; VI-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], s[0:1] -; VI-NEXT: v_div_scale_f64 v[8:9], vcc, s[0:1], v[0:1], s[0:1] -; VI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] -; VI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 -; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] -; VI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 -; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] -; VI-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] -; VI-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] -; VI-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] -; VI-NEXT: v_div_fixup_f64 v[2:3], v[2:3], v[0:1], s[0:1] -; VI-NEXT: v_trunc_f64_e32 v[2:3], v[2:3] -; VI-NEXT: v_fma_f64 v[0:1], -v[2:3], v[0:1], s[0:1] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_rcp_f64_e32 v[0:1], s[2:3] +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1] +; VI-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] +; VI-NEXT: v_fma_f64 v[0:1], -v[0:1], s[2:3], v[2:3] ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] From 4845531fa88cb0f104b5afc5d99abded22623c53 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 9 Sep 2020 17:03:53 +0300 Subject: [PATCH 0658/1079] [lib/Object] - Refine interface of ELFFile. NFCI. `ELFFile` has many methods that take pointers, though they assume that arguments are never null and hence could take references instead. This patch performs such clean-up. Differential revision: https://reviews.llvm.org/D87385 --- lld/ELF/Arch/AMDGPU.cpp | 2 +- lld/ELF/Arch/Hexagon.cpp | 2 +- lld/ELF/Arch/Mips.cpp | 4 +- lld/ELF/Arch/MipsArchTree.cpp | 4 +- lld/ELF/Arch/PPC64.cpp | 4 +- lld/ELF/Arch/RISCV.cpp | 4 +- lld/ELF/Driver.cpp | 2 +- lld/ELF/InputFiles.cpp | 30 +- lld/ELF/InputSection.cpp | 4 +- lld/ELF/Relocations.cpp | 2 +- llvm/include/llvm/Object/ELF.h | 201 ++++++------ llvm/include/llvm/Object/ELFObjectFile.h | 51 ++-- .../ExecutionEngine/JITLink/ELF_x86_64.cpp | 24 +- llvm/lib/InterfaceStub/ELFObjHandler.cpp | 2 +- llvm/lib/Object/ELF.cpp | 6 +- llvm/tools/llvm-objcopy/ELF/Object.cpp | 30 +- llvm/tools/llvm-objdump/ELFDump.cpp | 6 +- llvm/tools/llvm-readobj/ARMEHABIPrinter.h | 14 +- llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h | 8 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 287 +++++++++--------- llvm/tools/obj2yaml/elf2yaml.cpp | 70 ++--- 21 files changed, 378 insertions(+), 379 deletions(-) diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index 3610a38692d6d..4f4ce0094bbfd 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -41,7 +41,7 @@ AMDGPU::AMDGPU() { } static uint32_t getEFlags(InputFile *file) { - return cast>(file)->getObj().getHeader()->e_flags; + return cast>(file)->getObj().getHeader().e_flags; } uint32_t AMDGPU::calcEFlags() const { diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 7740ce9a71e03..4896c75c44911 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -66,7 +66,7 @@ uint32_t Hexagon::calcEFlags() const { // greatest revision in the list of inputs. uint32_t ret = 0; for (InputFile *f : objectFiles) { - uint32_t eflags = cast>(f)->getObj().getHeader()->e_flags; + uint32_t eflags = cast>(f)->getObj().getHeader().e_flags; if (eflags > ret) ret = eflags; } diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index fd1c5f5077342..d5eaf94625e00 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -372,7 +372,7 @@ bool MIPS::needsThunk(RelExpr expr, RelType type, const InputFile *file, if (!f) return false; // If current file has PIC code, LA25 stub is not required. - if (f->getObj().getHeader()->e_flags & EF_MIPS_PIC) + if (f->getObj().getHeader().e_flags & EF_MIPS_PIC) return false; auto *d = dyn_cast(&s); // LA25 is required if target file has PIC code @@ -749,7 +749,7 @@ template bool elf::isMipsPIC(const Defined *sym) { if (!file) return false; - return file->getObj().getHeader()->e_flags & EF_MIPS_PIC; + return file->getObj().getHeader().e_flags & EF_MIPS_PIC; } template TargetInfo *elf::getMipsTargetInfo() { diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp index 85329c3bef536..77c05a818a5d3 100644 --- a/lld/ELF/Arch/MipsArchTree.cpp +++ b/lld/ELF/Arch/MipsArchTree.cpp @@ -297,7 +297,7 @@ static uint32_t getArchFlags(ArrayRef files) { template uint32_t elf::calcMipsEFlags() { std::vector v; for (InputFile *f : objectFiles) - v.push_back({f, cast>(f)->getObj().getHeader()->e_flags}); + v.push_back({f, cast>(f)->getObj().getHeader().e_flags}); if (v.empty()) { // If we don't have any input files, we'll have to rely on the information // we can derive from emulation information, since this at least gets us @@ -363,7 +363,7 @@ uint8_t elf::getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, template static bool isN32Abi(const InputFile *f) { if (auto *ef = dyn_cast(f)) - return ef->template getObj().getHeader()->e_flags & EF_MIPS_ABI2; + return ef->template getObj().getHeader().e_flags & EF_MIPS_ABI2; return false; } diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index de4321d903994..bdd7d55172132 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -620,8 +620,8 @@ int PPC64::getTlsGdRelaxSkip(RelType type) const { static uint32_t getEFlags(InputFile *file) { if (config->ekind == ELF64BEKind) - return cast>(file)->getObj().getHeader()->e_flags; - return cast>(file)->getObj().getHeader()->e_flags; + return cast>(file)->getObj().getHeader().e_flags; + return cast>(file)->getObj().getHeader().e_flags; } // This file implements v2 ABI. This function makes sure that all diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index b340fd00deee6..4cbf925dcfa26 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -104,8 +104,8 @@ RISCV::RISCV() { static uint32_t getEFlags(InputFile *f) { if (config->is64) - return cast>(f)->getObj().getHeader()->e_flags; - return cast>(f)->getObj().getHeader()->e_flags; + return cast>(f)->getObj().getHeader().e_flags; + return cast>(f)->getObj().getHeader().e_flags; } uint32_t RISCV::calcEFlags() const { diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 34f2cd633e425..0f2e80b659879 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1719,7 +1719,7 @@ static void findKeepUniqueSections(opt::InputArgList &args) { ArrayRef syms = obj->getSymbols(); if (obj->addrsigSec) { ArrayRef contents = - check(obj->getObj().getSectionContents(obj->addrsigSec)); + check(obj->getObj().getSectionContents(*obj->addrsigSec)); const uint8_t *cur = contents.begin(); while (cur != contents.end()) { unsigned size; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index acdb5c71efb96..63474b15e451e 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -348,9 +348,9 @@ template void ELFFileBase::init() { // Initialize trivial attributes. const ELFFile &obj = getObj(); - emachine = obj.getHeader()->e_machine; - osabi = obj.getHeader()->e_ident[llvm::ELF::EI_OSABI]; - abiVersion = obj.getHeader()->e_ident[llvm::ELF::EI_ABIVERSION]; + emachine = obj.getHeader().e_machine; + osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; + abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; ArrayRef sections = CHECK(obj.sections(), this); @@ -378,7 +378,7 @@ template void ELFFileBase::init() { template uint32_t ObjFile::getSectionIndex(const Elf_Sym &sym) const { return CHECK( - this->getObj().getSectionIndex(&sym, getELFSyms(), shndxTable), + this->getObj().getSectionIndex(sym, getELFSyms(), shndxTable), this); } @@ -566,7 +566,7 @@ void ObjFile::initializeSections(bool ignoreComdats) { if (sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) cgProfile = - check(obj.template getSectionContentsAsArray(&sec)); + check(obj.template getSectionContentsAsArray(sec)); // SHF_EXCLUDE'ed sections are discarded by the linker. However, // if -r is given, we'll let the final link discard such sections. @@ -595,7 +595,7 @@ void ObjFile::initializeSections(bool ignoreComdats) { ArrayRef entries = - CHECK(obj.template getSectionContentsAsArray(&sec), this); + CHECK(obj.template getSectionContentsAsArray(sec), this); if (entries.empty()) fatal(toString(this) + ": empty SHT_GROUP"); @@ -870,7 +870,7 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) { ARMAttributeParser attributes; - ArrayRef contents = check(this->getObj().getSectionContents(&sec)); + ArrayRef contents = check(this->getObj().getSectionContents(sec)); if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind ? support::little : support::big)) { @@ -894,7 +894,7 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { if (config->emachine == EM_RISCV && sec.sh_type == SHT_RISCV_ATTRIBUTES) { RISCVAttributeParser attributes; - ArrayRef contents = check(this->getObj().getSectionContents(&sec)); + ArrayRef contents = check(this->getObj().getSectionContents(sec)); if (Error e = attributes.parse(contents, support::little)) { auto *isec = make(*this, sec, name); warn(toString(isec) + ": " + llvm::toString(std::move(e))); @@ -919,7 +919,7 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { if (config->relocatable) break; ArrayRef data = - CHECK(this->getObj().template getSectionContentsAsArray(&sec), this); + CHECK(this->getObj().template getSectionContentsAsArray(sec), this); if (!data.empty() && data.back() != '\0') { error(toString(this) + ": corrupted dependent libraries section (unterminated string): " + @@ -959,12 +959,12 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { ": multiple relocation sections to one section are not supported"); if (sec.sh_type == SHT_RELA) { - ArrayRef rels = CHECK(getObj().relas(&sec), this); + ArrayRef rels = CHECK(getObj().relas(sec), this); target->firstRelocation = rels.begin(); target->numRelocations = rels.size(); target->areRelocsRela = true; } else { - ArrayRef rels = CHECK(getObj().rels(&sec), this); + ArrayRef rels = CHECK(getObj().rels(sec), this); target->firstRelocation = rels.begin(); target->numRelocations = rels.size(); target->areRelocsRela = false; @@ -1065,7 +1065,7 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { template StringRef ObjFile::getSectionName(const Elf_Shdr &sec) { - return CHECK(getObj().getSectionName(&sec, sectionStringTable), this); + return CHECK(getObj().getSectionName(sec, sectionStringTable), this); } // Initialize this->Symbols. this->Symbols is a parallel array as @@ -1279,7 +1279,7 @@ std::vector SharedFile::parseVerneed(const ELFFile &obj, if (!sec) return {}; std::vector verneeds; - ArrayRef data = CHECK(obj.getSectionContents(sec), this); + ArrayRef data = CHECK(obj.getSectionContents(*sec), this); const uint8_t *verneedBuf = data.begin(); for (unsigned i = 0; i != sec->sh_info; ++i) { if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) @@ -1355,7 +1355,7 @@ template void SharedFile::parse() { continue; case SHT_DYNAMIC: dynamicTags = - CHECK(obj.template getSectionContentsAsArray(&sec), this); + CHECK(obj.template getSectionContentsAsArray(sec), this); break; case SHT_GNU_versym: versymSec = &sec; @@ -1414,7 +1414,7 @@ template void SharedFile::parse() { std::vector versyms(size, VER_NDX_GLOBAL); if (versymSec) { ArrayRef versym = - CHECK(obj.template getSectionContentsAsArray(versymSec), + CHECK(obj.template getSectionContentsAsArray(*versymSec), this) .slice(firstGlobal); for (size_t i = 0; i < size; ++i) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index ad4a12855ad1d..497fb607f4243 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -53,7 +53,7 @@ static ArrayRef getSectionContents(ObjFile &file, const typename ELFT::Shdr &hdr) { if (hdr.sh_type == SHT_NOBITS) return makeArrayRef(nullptr, hdr.sh_size); - return check(file.getObj().getSectionContents(&hdr)); + return check(file.getObj().getSectionContents(hdr)); } InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, @@ -456,7 +456,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { Elf_Shdr_Impl sec = CHECK(file->getObj().sections(), file)[secIdx]; warn("relocation refers to a discarded section: " + - CHECK(file->getObj().getSectionName(&sec), file) + + CHECK(file->getObj().getSectionName(sec), file) + "\n>>> referenced by " + getObjMsg(p->r_offset)); } p->setSymbolAndType(0, 0, false); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 1ff47244c9903..4c6a70d9034e9 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -692,7 +692,7 @@ static std::string maybeReportDiscarded(Undefined &sym) { if (sym.type == ELF::STT_SECTION) { msg = "relocation refers to a discarded section: "; msg += CHECK( - file->getObj().getSectionName(&objSections[sym.discardedSecIdx]), file); + file->getObj().getSectionName(objSections[sym.discardedSecIdx]), file); } else { msg = "relocation refers to a symbol in a discarded section: " + toString(sym); diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 35d2456f7ce20..f4ba2cf66d9f3 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -58,11 +58,11 @@ enum PPCInstrMasks : uint64_t { template class ELFFile; template -std::string getSecIndexForError(const ELFFile *Obj, - const typename ELFT::Shdr *Sec) { - auto TableOrErr = Obj->sections(); +std::string getSecIndexForError(const ELFFile &Obj, + const typename ELFT::Shdr &Sec) { + auto TableOrErr = Obj.sections(); if (TableOrErr) - return "[index " + std::to_string(Sec - &TableOrErr->front()) + "]"; + return "[index " + std::to_string(&Sec - &TableOrErr->front()) + "]"; // To make this helper be more convenient for error reporting purposes we // drop the error. But really it should never be triggered. Before this point, // our code should have called 'sections()' and reported a proper error on @@ -72,11 +72,11 @@ std::string getSecIndexForError(const ELFFile *Obj, } template -std::string getPhdrIndexForError(const ELFFile *Obj, - const typename ELFT::Phdr *Phdr) { - auto Headers = Obj->program_headers(); +std::string getPhdrIndexForError(const ELFFile &Obj, + const typename ELFT::Phdr &Phdr) { + auto Headers = Obj.program_headers(); if (Headers) - return ("[index " + Twine(Phdr - &Headers->front()) + "]").str(); + return ("[index " + Twine(&Phdr - &Headers->front()) + "]").str(); // See comment in the getSecIndexForError() above. llvm::consumeError(Headers.takeError()); return "[unknown index]"; @@ -134,17 +134,17 @@ class ELFFile { ELFFile(StringRef Object); public: - const Elf_Ehdr *getHeader() const { - return reinterpret_cast(base()); + const Elf_Ehdr &getHeader() const { + return *reinterpret_cast(base()); } template Expected getEntry(uint32_t Section, uint32_t Entry) const; template - Expected getEntry(const Elf_Shdr *Section, uint32_t Entry) const; + Expected getEntry(const Elf_Shdr &Section, uint32_t Entry) const; Expected - getStringTable(const Elf_Shdr *Section, + getStringTable(const Elf_Shdr &Section, WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getStringTableForSymtab(const Elf_Shdr &Section) const; Expected getStringTableForSymtab(const Elf_Shdr &Section, @@ -163,18 +163,18 @@ class ELFFile { std::string getDynamicTagAsString(uint64_t Type) const; /// Get the symbol for a given relocation. - Expected getRelocationSymbol(const Elf_Rel *Rel, + Expected getRelocationSymbol(const Elf_Rel &Rel, const Elf_Shdr *SymTab) const; static Expected create(StringRef Object); bool isLE() const { - return getHeader()->getDataEncoding() == ELF::ELFDATA2LSB; + return getHeader().getDataEncoding() == ELF::ELFDATA2LSB; } bool isMipsELF64() const { - return getHeader()->e_machine == ELF::EM_MIPS && - getHeader()->getFileClass() == ELF::ELFCLASS64; + return getHeader().e_machine == ELF::EM_MIPS && + getHeader().getFileClass() == ELF::ELFCLASS64; } bool isMips64EL() const { return isMipsELF64() && isLE(); } @@ -188,43 +188,43 @@ class ELFFile { Expected symbols(const Elf_Shdr *Sec) const { if (!Sec) return makeArrayRef(nullptr, nullptr); - return getSectionContentsAsArray(Sec); + return getSectionContentsAsArray(*Sec); } - Expected relas(const Elf_Shdr *Sec) const { + Expected relas(const Elf_Shdr &Sec) const { return getSectionContentsAsArray(Sec); } - Expected rels(const Elf_Shdr *Sec) const { + Expected rels(const Elf_Shdr &Sec) const { return getSectionContentsAsArray(Sec); } - Expected relrs(const Elf_Shdr *Sec) const { + Expected relrs(const Elf_Shdr &Sec) const { return getSectionContentsAsArray(Sec); } std::vector decode_relrs(Elf_Relr_Range relrs) const; - Expected> android_relas(const Elf_Shdr *Sec) const; + Expected> android_relas(const Elf_Shdr &Sec) const; /// Iterate over program header table. Expected program_headers() const { - if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr)) + if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr)) return createError("invalid e_phentsize: " + - Twine(getHeader()->e_phentsize)); + Twine(getHeader().e_phentsize)); uint64_t HeadersSize = - (uint64_t)getHeader()->e_phnum * getHeader()->e_phentsize; - uint64_t PhOff = getHeader()->e_phoff; + (uint64_t)getHeader().e_phnum * getHeader().e_phentsize; + uint64_t PhOff = getHeader().e_phoff; if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize()) return createError("program headers are longer than binary of size " + Twine(getBufSize()) + ": e_phoff = 0x" + - Twine::utohexstr(getHeader()->e_phoff) + - ", e_phnum = " + Twine(getHeader()->e_phnum) + - ", e_phentsize = " + Twine(getHeader()->e_phentsize)); + Twine::utohexstr(getHeader().e_phoff) + + ", e_phnum = " + Twine(getHeader().e_phnum) + + ", e_phentsize = " + Twine(getHeader().e_phentsize)); auto *Begin = reinterpret_cast(base() + PhOff); - return makeArrayRef(Begin, Begin + getHeader()->e_phnum); + return makeArrayRef(Begin, Begin + getHeader().e_phnum); } /// Get an iterator over notes in a program header. @@ -257,7 +257,7 @@ class ELFFile { assert(Shdr.sh_type == ELF::SHT_NOTE && "Shdr is not of type SHT_NOTE"); ErrorAsOutParameter ErrAsOutParam(&Err); if (Shdr.sh_offset + Shdr.sh_size > getBufSize()) { - Err = createError("SHT_NOTE section " + getSecIndexForError(this, &Shdr) + + Err = createError("SHT_NOTE section " + getSecIndexForError(*this, Shdr) + " has invalid offset (0x" + Twine::utohexstr(Shdr.sh_offset) + ") or size (0x" + Twine::utohexstr(Shdr.sh_size) + ")"); @@ -298,12 +298,12 @@ class ELFFile { Expected getSectionStringTable( Elf_Shdr_Range Sections, WarningHandler WarnHandler = &defaultWarningHandler) const; - Expected getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, + Expected getSectionIndex(const Elf_Sym &Sym, Elf_Sym_Range Syms, ArrayRef ShndxTable) const; - Expected getSection(const Elf_Sym *Sym, + Expected getSection(const Elf_Sym &Sym, const Elf_Shdr *SymTab, ArrayRef ShndxTable) const; - Expected getSection(const Elf_Sym *Sym, + Expected getSection(const Elf_Sym &Sym, Elf_Sym_Range Symtab, ArrayRef ShndxTable) const; Expected getSection(uint32_t Index) const; @@ -312,14 +312,14 @@ class ELFFile { uint32_t Index) const; Expected - getSectionName(const Elf_Shdr *Section, + getSectionName(const Elf_Shdr &Section, WarningHandler WarnHandler = &defaultWarningHandler) const; - Expected getSectionName(const Elf_Shdr *Section, + Expected getSectionName(const Elf_Shdr &Section, StringRef DotShstrtab) const; template - Expected> getSectionContentsAsArray(const Elf_Shdr *Sec) const; - Expected> getSectionContents(const Elf_Shdr *Sec) const; - Expected> getSegmentContents(const Elf_Phdr *Phdr) const; + Expected> getSectionContentsAsArray(const Elf_Shdr &Sec) const; + Expected> getSectionContents(const Elf_Shdr &Sec) const; + Expected> getSegmentContents(const Elf_Phdr &Phdr) const; }; using ELF32LEFile = ELFFile; @@ -337,11 +337,11 @@ getSection(typename ELFT::ShdrRange Sections, uint32_t Index) { template inline Expected -getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym, - const typename ELFT::Sym *FirstSym, +getExtendedSymbolTableIndex(const typename ELFT::Sym &Sym, + const typename ELFT::Sym &FirstSym, ArrayRef ShndxTable) { - assert(Sym->st_shndx == ELF::SHN_XINDEX); - unsigned Index = Sym - FirstSym; + assert(Sym.st_shndx == ELF::SHN_XINDEX); + unsigned Index = &Sym - &FirstSym; if (Index >= ShndxTable.size()) return createError( "extended symbol index (" + Twine(Index) + @@ -354,12 +354,12 @@ getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym, template Expected -ELFFile::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, +ELFFile::getSectionIndex(const Elf_Sym &Sym, Elf_Sym_Range Syms, ArrayRef ShndxTable) const { - uint32_t Index = Sym->st_shndx; + uint32_t Index = Sym.st_shndx; if (Index == ELF::SHN_XINDEX) { - auto ErrorOrIndex = getExtendedSymbolTableIndex( - Sym, Syms.begin(), ShndxTable); + Expected ErrorOrIndex = + getExtendedSymbolTableIndex(Sym, *Syms.begin(), ShndxTable); if (!ErrorOrIndex) return ErrorOrIndex.takeError(); return *ErrorOrIndex; @@ -371,7 +371,7 @@ ELFFile::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, template Expected -ELFFile::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab, +ELFFile::getSection(const Elf_Sym &Sym, const Elf_Shdr *SymTab, ArrayRef ShndxTable) const { auto SymsOrErr = symbols(SymTab); if (!SymsOrErr) @@ -381,7 +381,7 @@ ELFFile::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab, template Expected -ELFFile::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols, +ELFFile::getSection(const Elf_Sym &Sym, Elf_Sym_Range Symbols, ArrayRef ShndxTable) const { auto IndexOrErr = getSectionIndex(Sym, Symbols, ShndxTable); if (!IndexOrErr) @@ -402,7 +402,7 @@ ELFFile::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const { Elf_Sym_Range Symbols = *SymsOrErr; if (Index >= Symbols.size()) return createError("unable to get symbol from section " + - getSecIndexForError(this, Sec) + + getSecIndexForError(*this, *Sec) + ": invalid symbol index (" + Twine(Index) + ")"); return &Symbols[Index]; } @@ -410,26 +410,26 @@ ELFFile::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const { template template Expected> -ELFFile::getSectionContentsAsArray(const Elf_Shdr *Sec) const { - if (Sec->sh_entsize != sizeof(T) && sizeof(T) != 1) - return createError("section " + getSecIndexForError(this, Sec) + - " has an invalid sh_entsize: " + Twine(Sec->sh_entsize)); +ELFFile::getSectionContentsAsArray(const Elf_Shdr &Sec) const { + if (Sec.sh_entsize != sizeof(T) && sizeof(T) != 1) + return createError("section " + getSecIndexForError(*this, Sec) + + " has an invalid sh_entsize: " + Twine(Sec.sh_entsize)); - uintX_t Offset = Sec->sh_offset; - uintX_t Size = Sec->sh_size; + uintX_t Offset = Sec.sh_offset; + uintX_t Size = Sec.sh_size; if (Size % sizeof(T)) - return createError("section " + getSecIndexForError(this, Sec) + + return createError("section " + getSecIndexForError(*this, Sec) + " has an invalid sh_size (" + Twine(Size) + ") which is not a multiple of its sh_entsize (" + - Twine(Sec->sh_entsize) + ")"); + Twine(Sec.sh_entsize) + ")"); if (std::numeric_limits::max() - Offset < Size) - return createError("section " + getSecIndexForError(this, Sec) + + return createError("section " + getSecIndexForError(*this, Sec) + " has a sh_offset (0x" + Twine::utohexstr(Offset) + ") + sh_size (0x" + Twine::utohexstr(Size) + ") that cannot be represented"); if (Offset + Size > Buf.size()) - return createError("section " + getSecIndexForError(this, Sec) + + return createError("section " + getSecIndexForError(*this, Sec) + " has a sh_offset (0x" + Twine::utohexstr(Offset) + ") + sh_size (0x" + Twine::utohexstr(Size) + ") that is greater than the file size (0x" + @@ -445,17 +445,17 @@ ELFFile::getSectionContentsAsArray(const Elf_Shdr *Sec) const { template Expected> -ELFFile::getSegmentContents(const Elf_Phdr *Phdr) const { - uintX_t Offset = Phdr->p_offset; - uintX_t Size = Phdr->p_filesz; +ELFFile::getSegmentContents(const Elf_Phdr &Phdr) const { + uintX_t Offset = Phdr.p_offset; + uintX_t Size = Phdr.p_filesz; if (std::numeric_limits::max() - Offset < Size) - return createError("program header " + getPhdrIndexForError(this, Phdr) + + return createError("program header " + getPhdrIndexForError(*this, Phdr) + " has a p_offset (0x" + Twine::utohexstr(Offset) + ") + p_filesz (0x" + Twine::utohexstr(Size) + ") that cannot be represented"); if (Offset + Size > Buf.size()) - return createError("program header " + getPhdrIndexForError(this, Phdr) + + return createError("program header " + getPhdrIndexForError(*this, Phdr) + " has a p_offset (0x" + Twine::utohexstr(Offset) + ") + p_filesz (0x" + Twine::utohexstr(Size) + ") that is greater than the file size (0x" + @@ -465,13 +465,13 @@ ELFFile::getSegmentContents(const Elf_Phdr *Phdr) const { template Expected> -ELFFile::getSectionContents(const Elf_Shdr *Sec) const { +ELFFile::getSectionContents(const Elf_Shdr &Sec) const { return getSectionContentsAsArray(Sec); } template StringRef ELFFile::getRelocationTypeName(uint32_t Type) const { - return getELFRelocationTypeName(getHeader()->e_machine, Type); + return getELFRelocationTypeName(getHeader().e_machine, Type); } template @@ -507,24 +507,24 @@ void ELFFile::getRelocationTypeName(uint32_t Type, template uint32_t ELFFile::getRelativeRelocationType() const { - return getELFRelativeRelocationType(getHeader()->e_machine); + return getELFRelativeRelocationType(getHeader().e_machine); } template Expected -ELFFile::getRelocationSymbol(const Elf_Rel *Rel, +ELFFile::getRelocationSymbol(const Elf_Rel &Rel, const Elf_Shdr *SymTab) const { - uint32_t Index = Rel->getSymbol(isMips64EL()); + uint32_t Index = Rel.getSymbol(isMips64EL()); if (Index == 0) return nullptr; - return getEntry(SymTab, Index); + return getEntry(*SymTab, Index); } template Expected ELFFile::getSectionStringTable(Elf_Shdr_Range Sections, WarningHandler WarnHandler) const { - uint32_t Index = getHeader()->e_shstrndx; + uint32_t Index = getHeader().e_shstrndx; if (Index == ELF::SHN_XINDEX) { // If the section name string table section index is greater than // or equal to SHN_LORESERVE, then the actual index of the section name @@ -542,7 +542,7 @@ ELFFile::getSectionStringTable(Elf_Shdr_Range Sections, if (Index >= Sections.size()) return createError("section header string table index " + Twine(Index) + " does not exist"); - return getStringTable(&Sections[Index], WarnHandler); + return getStringTable(Sections[Index], WarnHandler); } template ELFFile::ELFFile(StringRef Object) : Buf(Object) {} @@ -558,13 +558,13 @@ Expected> ELFFile::create(StringRef Object) { template Expected ELFFile::sections() const { - const uintX_t SectionTableOffset = getHeader()->e_shoff; + const uintX_t SectionTableOffset = getHeader().e_shoff; if (SectionTableOffset == 0) return ArrayRef(); - if (getHeader()->e_shentsize != sizeof(Elf_Shdr)) + if (getHeader().e_shentsize != sizeof(Elf_Shdr)) return createError("invalid e_shentsize in ELF header: " + - Twine(getHeader()->e_shentsize)); + Twine(getHeader().e_shentsize)); const uint64_t FileSize = Buf.size(); if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize || @@ -581,7 +581,7 @@ Expected ELFFile::sections() const { const Elf_Shdr *First = reinterpret_cast(base() + SectionTableOffset); - uintX_t NumSections = getHeader()->e_shnum; + uintX_t NumSections = getHeader().e_shnum; if (NumSections == 0) NumSections = First->sh_size; @@ -612,21 +612,21 @@ Expected ELFFile::getEntry(uint32_t Section, auto SecOrErr = getSection(Section); if (!SecOrErr) return SecOrErr.takeError(); - return getEntry(*SecOrErr, Entry); + return getEntry(**SecOrErr, Entry); } template template -Expected ELFFile::getEntry(const Elf_Shdr *Section, +Expected ELFFile::getEntry(const Elf_Shdr &Section, uint32_t Entry) const { - if (sizeof(T) != Section->sh_entsize) - return createError("section " + getSecIndexForError(this, Section) + + if (sizeof(T) != Section.sh_entsize) + return createError("section " + getSecIndexForError(*this, Section) + " has invalid sh_entsize: expected " + Twine(sizeof(T)) + - ", but got " + Twine(Section->sh_entsize)); - uint64_t Pos = Section->sh_offset + (uint64_t)Entry * sizeof(T); + ", but got " + Twine(Section.sh_entsize)); + uint64_t Pos = Section.sh_offset + (uint64_t)Entry * sizeof(T); if (Pos + sizeof(T) > Buf.size()) return createError("unable to access section " + - getSecIndexForError(this, Section) + " data at 0x" + + getSecIndexForError(*this, Section) + " data at 0x" + Twine::utohexstr(Pos) + ": offset goes past the end of file"); return reinterpret_cast(base() + Pos); @@ -643,14 +643,14 @@ ELFFile::getSection(uint32_t Index) const { template Expected -ELFFile::getStringTable(const Elf_Shdr *Section, +ELFFile::getStringTable(const Elf_Shdr &Section, WarningHandler WarnHandler) const { - if (Section->sh_type != ELF::SHT_STRTAB) + if (Section.sh_type != ELF::SHT_STRTAB) if (Error E = WarnHandler("invalid sh_type for string table section " + - getSecIndexForError(this, Section) + + getSecIndexForError(*this, Section) + ": expected SHT_STRTAB, but got " + object::getELFSectionTypeName( - getHeader()->e_machine, Section->sh_type))) + getHeader().e_machine, Section.sh_type))) return std::move(E); auto V = getSectionContentsAsArray(Section); @@ -659,10 +659,10 @@ ELFFile::getStringTable(const Elf_Shdr *Section, ArrayRef Data = *V; if (Data.empty()) return createError("SHT_STRTAB string table section " + - getSecIndexForError(this, Section) + " is empty"); + getSecIndexForError(*this, Section) + " is empty"); if (Data.back() != '\0') return createError("SHT_STRTAB string table section " + - getSecIndexForError(this, Section) + + getSecIndexForError(*this, Section) + " is non-null terminated"); return StringRef(Data.begin(), Data.size()); } @@ -681,7 +681,7 @@ Expected> ELFFile::getSHNDXTable(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const { assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX); - auto VOrErr = getSectionContentsAsArray(&Section); + auto VOrErr = getSectionContentsAsArray(Section); if (!VOrErr) return VOrErr.takeError(); ArrayRef V = *VOrErr; @@ -691,10 +691,10 @@ ELFFile::getSHNDXTable(const Elf_Shdr &Section, const Elf_Shdr &SymTable = **SymTableOrErr; if (SymTable.sh_type != ELF::SHT_SYMTAB && SymTable.sh_type != ELF::SHT_DYNSYM) - return createError("SHT_SYMTAB_SHNDX section is linked with " + - object::getELFSectionTypeName(getHeader()->e_machine, - SymTable.sh_type) + - " section (expected SHT_SYMTAB/SHT_DYNSYM)"); + return createError( + "SHT_SYMTAB_SHNDX section is linked with " + + object::getELFSectionTypeName(getHeader().e_machine, SymTable.sh_type) + + " section (expected SHT_SYMTAB/SHT_DYNSYM)"); uint64_t Syms = SymTable.sh_size / sizeof(Elf_Sym); if (V.size() != Syms) @@ -722,15 +722,16 @@ ELFFile::getStringTableForSymtab(const Elf_Shdr &Sec, if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM) return createError( "invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM"); - auto SectionOrErr = object::getSection(Sections, Sec.sh_link); + Expected SectionOrErr = + object::getSection(Sections, Sec.sh_link); if (!SectionOrErr) return SectionOrErr.takeError(); - return getStringTable(*SectionOrErr); + return getStringTable(**SectionOrErr); } template Expected -ELFFile::getSectionName(const Elf_Shdr *Section, +ELFFile::getSectionName(const Elf_Shdr &Section, WarningHandler WarnHandler) const { auto SectionsOrErr = sections(); if (!SectionsOrErr) @@ -742,13 +743,13 @@ ELFFile::getSectionName(const Elf_Shdr *Section, } template -Expected ELFFile::getSectionName(const Elf_Shdr *Section, +Expected ELFFile::getSectionName(const Elf_Shdr &Section, StringRef DotShstrtab) const { - uint32_t Offset = Section->sh_name; + uint32_t Offset = Section.sh_name; if (Offset == 0) return StringRef(); if (Offset >= DotShstrtab.size()) - return createError("a section " + getSecIndexForError(this, Section) + + return createError("a section " + getSecIndexForError(*this, Section) + " has an invalid sh_name (0x" + Twine::utohexstr(Offset) + ") offset which goes past the end of the " diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 74d4745c1034f..5c12231331be8 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -377,7 +377,7 @@ template class ELFObjectFile : public ELFObjectFileBase { for (const Elf_Shdr &Sec : *SectionsOrErr) { if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES || Sec.sh_type == ELF::SHT_RISCV_ATTRIBUTES) { - auto ErrorOrContents = EF.getSectionContents(&Sec); + auto ErrorOrContents = EF.getSectionContents(Sec); if (!ErrorOrContents) return ErrorOrContents.takeError(); @@ -432,7 +432,7 @@ template class ELFObjectFile : public ELFObjectFileBase { Triple::ArchType getArch() const override; Expected getStartAddress() const override; - unsigned getPlatformFlags() const override { return EF.getHeader()->e_flags; } + unsigned getPlatformFlags() const override { return EF.getHeader().e_flags; } const ELFFile *getELFFile() const { return &EF; } @@ -468,7 +468,7 @@ Expected ELFObjectFile::getSymbolName(DataRefImpl Sym) const { if (!StrTabOrErr) return StrTabOrErr.takeError(); const Elf_Shdr *StringTableSec = *StrTabOrErr; - auto SymStrTabOrErr = EF.getStringTable(StringTableSec); + auto SymStrTabOrErr = EF.getStringTable(*StringTableSec); if (!SymStrTabOrErr) return SymStrTabOrErr.takeError(); Expected Name = ESym->getName(*SymStrTabOrErr); @@ -507,9 +507,9 @@ uint64_t ELFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { if (ESym->st_shndx == ELF::SHN_ABS) return Ret; - const Elf_Ehdr *Header = EF.getHeader(); + const Elf_Ehdr &Header = EF.getHeader(); // Clear the ARM/Thumb or microMIPS indicator flag. - if ((Header->e_machine == ELF::EM_ARM || Header->e_machine == ELF::EM_MIPS) && + if ((Header.e_machine == ELF::EM_ARM || Header.e_machine == ELF::EM_MIPS) && ESym->getType() == ELF::STT_FUNC) Ret &= ~1; @@ -533,14 +533,13 @@ ELFObjectFile::getSymbolAddress(DataRefImpl Symb) const { return Result; } - const Elf_Ehdr *Header = EF.getHeader(); auto SymTabOrErr = EF.getSection(Symb.d.a); if (!SymTabOrErr) return SymTabOrErr.takeError(); - const Elf_Shdr *SymTab = *SymTabOrErr; - if (Header->e_type == ELF::ET_REL) { - auto SectionOrErr = EF.getSection(ESym, SymTab, ShndxTable); + if (EF.getHeader().e_type == ELF::ET_REL) { + Expected SectionOrErr = + EF.getSection(*ESym, *SymTabOrErr, ShndxTable); if (!SectionOrErr) return SectionOrErr.takeError(); const Elf_Shdr *Section = *SectionOrErr; @@ -561,11 +560,11 @@ uint32_t ELFObjectFile::getSymbolAlignment(DataRefImpl Symb) const { template uint16_t ELFObjectFile::getEMachine() const { - return EF.getHeader()->e_machine; + return EF.getHeader().e_machine; } template uint16_t ELFObjectFile::getEType() const { - return EF.getHeader()->e_type; + return EF.getHeader().e_type; } template @@ -652,7 +651,7 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { // TODO: Test this error. return SymbolsOrErr.takeError(); - if (EF.getHeader()->e_machine == ELF::EM_ARM) { + if (EF.getHeader().e_machine == ELF::EM_ARM) { if (Expected NameOrErr = getSymbolName(Sym)) { StringRef Name = *NameOrErr; if (Name.startswith("$d") || Name.startswith("$t") || @@ -685,7 +684,7 @@ template Expected ELFObjectFile::getSymbolSection(const Elf_Sym *ESym, const Elf_Shdr *SymTab) const { - auto ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable); + auto ESecOrErr = EF.getSection(*ESym, SymTab, ShndxTable); if (!ESecOrErr) return ESecOrErr.takeError(); @@ -717,7 +716,7 @@ void ELFObjectFile::moveSectionNext(DataRefImpl &Sec) const { template Expected ELFObjectFile::getSectionName(DataRefImpl Sec) const { - return EF.getSectionName(&*getSection(Sec)); + return EF.getSectionName(*getSection(Sec)); } template @@ -847,7 +846,7 @@ ELFObjectFile::section_rel_begin(DataRefImpl Sec) const { if (!SectionsOrErr) return relocation_iterator(RelocationRef()); uintptr_t SHT = reinterpret_cast((*SectionsOrErr).begin()); - RelData.d.a = (Sec.p - SHT) / EF.getHeader()->e_shentsize; + RelData.d.a = (Sec.p - SHT) / EF.getHeader().e_shentsize; RelData.d.b = 0; return relocation_iterator(RelocationRef(RelData, this)); } @@ -874,7 +873,7 @@ ELFObjectFile::section_rel_end(DataRefImpl Sec) const { template Expected ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { - if (EF.getHeader()->e_type != ELF::ET_REL) + if (EF.getHeader().e_type != ELF::ET_REL) return section_end(); const Elf_Shdr *EShdr = getSection(Sec); @@ -933,7 +932,7 @@ uint64_t ELFObjectFile::getRelocationType(DataRefImpl Rel) const { template StringRef ELFObjectFile::getRelocationTypeName(uint32_t Type) const { - return getELFRelocationTypeName(EF.getHeader()->e_machine, Type); + return getELFRelocationTypeName(EF.getHeader().e_machine, Type); } template @@ -1087,9 +1086,9 @@ uint8_t ELFObjectFile::getBytesInAddress() const { template StringRef ELFObjectFile::getFileFormatName() const { bool IsLittleEndian = ELFT::TargetEndianness == support::little; - switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: - switch (EF.getHeader()->e_machine) { + switch (EF.getHeader().e_machine) { case ELF::EM_386: return "elf32-i386"; case ELF::EM_IAMCU: @@ -1123,7 +1122,7 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf32-unknown"; } case ELF::ELFCLASS64: - switch (EF.getHeader()->e_machine) { + switch (EF.getHeader().e_machine) { case ELF::EM_386: return "elf64-i386"; case ELF::EM_X86_64: @@ -1157,7 +1156,7 @@ StringRef ELFObjectFile::getFileFormatName() const { template Triple::ArchType ELFObjectFile::getArch() const { bool IsLittleEndian = ELFT::TargetEndianness == support::little; - switch (EF.getHeader()->e_machine) { + switch (EF.getHeader().e_machine) { case ELF::EM_386: case ELF::EM_IAMCU: return Triple::x86; @@ -1174,7 +1173,7 @@ template Triple::ArchType ELFObjectFile::getArch() const { case ELF::EM_LANAI: return Triple::lanai; case ELF::EM_MIPS: - switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: return IsLittleEndian ? Triple::mipsel : Triple::mips; case ELF::ELFCLASS64: @@ -1189,7 +1188,7 @@ template Triple::ArchType ELFObjectFile::getArch() const { case ELF::EM_PPC64: return IsLittleEndian ? Triple::ppc64le : Triple::ppc64; case ELF::EM_RISCV: - switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: return Triple::riscv32; case ELF::ELFCLASS64: @@ -1210,7 +1209,7 @@ template Triple::ArchType ELFObjectFile::getArch() const { if (!IsLittleEndian) return Triple::UnknownArch; - unsigned MACH = EF.getHeader()->e_flags & ELF::EF_AMDGPU_MACH; + unsigned MACH = EF.getHeader().e_flags & ELF::EF_AMDGPU_MACH; if (MACH >= ELF::EF_AMDGPU_MACH_R600_FIRST && MACH <= ELF::EF_AMDGPU_MACH_R600_LAST) return Triple::r600; @@ -1235,7 +1234,7 @@ template Triple::ArchType ELFObjectFile::getArch() const { template Expected ELFObjectFile::getStartAddress() const { - return EF.getHeader()->e_entry; + return EF.getHeader().e_entry; } template @@ -1245,7 +1244,7 @@ ELFObjectFile::getDynamicSymbolIterators() const { } template bool ELFObjectFile::isRelocatableObject() const { - return EF.getHeader()->e_type == ELF::ET_REL; + return EF.getHeader().e_type == ELF::ET_REL; } } // end namespace object diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 8b078690dea24..20295434d2e5a 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -244,7 +244,7 @@ class ELFLinkGraphBuilder_x86_64 { object::ELFFile::Elf_Shdr_Range sections; SymbolTable SymTab; - bool isRelocatable() { return Obj.getHeader()->e_type == llvm::ELF::ET_REL; } + bool isRelocatable() { return Obj.getHeader().e_type == llvm::ELF::ET_REL; } support::endianness getEndianness(const object::ELFFile &Obj) { @@ -253,7 +253,7 @@ class ELFLinkGraphBuilder_x86_64 { // This could also just become part of a template unsigned getPointerSize(const object::ELFFile &Obj) { - return Obj.getHeader()->getFileClass() == ELF::ELFCLASS64 ? 8 : 4; + return Obj.getHeader().getFileClass() == ELF::ELFCLASS64 ? 8 : 4; } // We don't technically need this right now @@ -277,7 +277,7 @@ class ELFLinkGraphBuilder_x86_64 { auto StrTabSec = Obj.getSection(SecRef.sh_link); if (!StrTabSec) return StrTabSec.takeError(); - auto StringTable = Obj.getStringTable(*StrTabSec); + auto StringTable = Obj.getStringTable(**StrTabSec); if (!StringTable) return StringTable.takeError(); @@ -310,7 +310,7 @@ class ELFLinkGraphBuilder_x86_64 { Error createNormalizedSections() { LLVM_DEBUG(dbgs() << "Creating normalized sections...\n"); for (auto &SecRef : sections) { - auto Name = Obj.getSectionName(&SecRef); + auto Name = Obj.getSectionName(SecRef); if (!Name) return Name.takeError(); sys::Memory::ProtectionFlags Prot; @@ -343,7 +343,7 @@ class ELFLinkGraphBuilder_x86_64 { if (SecRef.sh_type != ELF::SHT_NOBITS) { // .sections() already checks that the data is not beyond the end of // file - auto contents = Obj.getSectionContentsAsArray(&SecRef); + auto contents = Obj.getSectionContentsAsArray(SecRef); if (!contents) return contents.takeError(); @@ -375,7 +375,7 @@ class ELFLinkGraphBuilder_x86_64 { return make_error("Shouldn't have REL in x64", llvm::inconvertibleErrorCode()); - auto RelSectName = Obj.getSectionName(&SecRef); + auto RelSectName = Obj.getSectionName(SecRef); if (!RelSectName) return RelSectName.takeError(); // Deal with .eh_frame later @@ -386,7 +386,7 @@ class ELFLinkGraphBuilder_x86_64 { if (!UpdateSection) return UpdateSection.takeError(); - auto UpdateSectionName = Obj.getSectionName(*UpdateSection); + auto UpdateSectionName = Obj.getSectionName(**UpdateSection); if (!UpdateSectionName) return UpdateSectionName.takeError(); @@ -397,7 +397,7 @@ class ELFLinkGraphBuilder_x86_64 { *UpdateSectionName, llvm::inconvertibleErrorCode()); - auto Relocations = Obj.relas(&SecRef); + auto Relocations = Obj.relas(SecRef); if (!Relocations) return Relocations.takeError(); @@ -409,7 +409,7 @@ class ELFLinkGraphBuilder_x86_64 { << "Name: " << Obj.getRelocationTypeName(Type) << "\n"; }); auto SymbolIndex = Rela.getSymbol(false); - auto Symbol = Obj.getRelocationSymbol(&Rela, &SymTab); + auto Symbol = Obj.getRelocationSymbol(Rela, &SymTab); if (!Symbol) return Symbol.takeError(); @@ -472,10 +472,10 @@ class ELFLinkGraphBuilder_x86_64 { auto StrTabSec = Obj.getSection(SecRef.sh_link); if (!StrTabSec) return StrTabSec.takeError(); - auto StringTable = Obj.getStringTable(*StrTabSec); + auto StringTable = Obj.getStringTable(**StrTabSec); if (!StringTable) return StringTable.takeError(); - auto Name = Obj.getSectionName(&SecRef); + auto Name = Obj.getSectionName(SecRef); if (!Name) return Name.takeError(); auto Section = G->findSectionByName(*Name); @@ -520,7 +520,7 @@ class ELFLinkGraphBuilder_x86_64 { auto DefinedSection = Obj.getSection(SymRef.st_shndx); if (!DefinedSection) return DefinedSection.takeError(); - auto sectName = Obj.getSectionName(*DefinedSection); + auto sectName = Obj.getSectionName(**DefinedSection); if (!sectName) return Name.takeError(); diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp index 82e7a3c8b1baa..cc9a8743cd084 100644 --- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -320,7 +320,7 @@ buildStub(const ELFObjectFile &ElfObj) { DynEnt.StrSize); // Populate Arch from ELF header. - DestStub->Arch = ElfFile->getHeader()->e_machine; + DestStub->Arch = ElfFile->getHeader().e_machine; // Populate SoName from .dynamic entries and dynamic string table. if (DynEnt.SONameOffset.hasValue()) { diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index c6e9ee175adc8..5290f8ce05607 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -366,7 +366,7 @@ ELFFile::decode_relrs(Elf_Relr_Range relrs) const { template Expected> -ELFFile::android_relas(const Elf_Shdr *Sec) const { +ELFFile::android_relas(const Elf_Shdr &Sec) const { // This function reads relocations in Android's packed relocation format, // which is based on SLEB128 and delta encoding. Expected> ContentsOrErr = getSectionContents(Sec); @@ -511,7 +511,7 @@ std::string ELFFile::getDynamicTagAsString(unsigned Arch, template std::string ELFFile::getDynamicTagAsString(uint64_t Type) const { - return getDynamicTagAsString(getHeader()->e_machine, Type); + return getDynamicTagAsString(getHeader().e_machine, Type); } template @@ -541,7 +541,7 @@ Expected ELFFile::dynamicEntries() const { for (const Elf_Shdr &Sec : *SectionsOrError) { if (Sec.sh_type == ELF::SHT_DYNAMIC) { Expected> DynOrError = - getSectionContentsAsArray(&Sec); + getSectionContentsAsArray(Sec); if (!DynOrError) return DynOrError.takeError(); Dyn = *DynOrError; diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp index e15fb24f4c425..e19285ee97eac 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -1320,7 +1320,7 @@ void ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { ElfHdr.Index = Index++; ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset; - const auto &Ehdr = *HeadersFile.getHeader(); + const typename ELFT::Ehdr &Ehdr = HeadersFile.getHeader(); auto &PrHdr = Obj.ProgramHdrSegment; PrHdr.Type = PT_PHDR; PrHdr.Flags = 0; @@ -1398,7 +1398,7 @@ void ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { const Elf_Shdr &ShndxSec = *unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index)); ShndxData = unwrapOrError( - ElfFile.template getSectionContentsAsArray(&ShndxSec)); + ElfFile.template getSectionContentsAsArray(ShndxSec)); if (ShndxData.size() != Symbols.size()) error("symbol section index table does not have the same number of " "entries as the symbol table"); @@ -1476,7 +1476,7 @@ SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { case SHT_REL: case SHT_RELA: if (Shdr.sh_flags & SHF_ALLOC) { - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); return Obj.addSection(Data); } return Obj.addSection(); @@ -1485,7 +1485,7 @@ SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { // mean altering the memory image. There are no special link types or // anything so we can just use a Section. if (Shdr.sh_flags & SHF_ALLOC) { - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); return Obj.addSection
(Data); } return Obj.addSection(); @@ -1493,16 +1493,16 @@ SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { case SHT_GNU_HASH: // Hash tables should refer to SHT_DYNSYM which we're not going to change. // Because of this we don't need to mess with the hash tables either. - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); return Obj.addSection
(Data); case SHT_GROUP: - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); return Obj.addSection(Data); case SHT_DYNSYM: - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); return Obj.addSection(Data); case SHT_DYNAMIC: - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); return Obj.addSection(Data); case SHT_SYMTAB: { auto &SymTab = Obj.addSection(); @@ -1517,9 +1517,9 @@ SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { case SHT_NOBITS: return Obj.addSection
(Data); default: { - Data = unwrapOrError(ElfFile.getSectionContents(&Shdr)); + Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - StringRef Name = unwrapOrError(ElfFile.getSectionName(&Shdr)); + StringRef Name = unwrapOrError(ElfFile.getSectionName(Shdr)); if (Name.startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) { uint64_t DecompressedSize, DecompressedAlign; std::tie(DecompressedSize, DecompressedAlign) = @@ -1541,7 +1541,7 @@ template void ELFBuilder::readSectionHeaders() { continue; } auto &Sec = makeSection(Shdr); - Sec.Name = std::string(unwrapOrError(ElfFile.getSectionName(&Shdr))); + Sec.Name = std::string(unwrapOrError(ElfFile.getSectionName(Shdr))); Sec.Type = Sec.OriginalType = Shdr.sh_type; Sec.Flags = Sec.OriginalFlags = Shdr.sh_flags; Sec.Addr = Shdr.sh_addr; @@ -1560,7 +1560,7 @@ template void ELFBuilder::readSectionHeaders() { } template void ELFBuilder::readSections(bool EnsureSymtab) { - uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx; + uint32_t ShstrIndex = ElfFile.getHeader().e_shstrndx; if (ShstrIndex == SHN_XINDEX) ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link; @@ -1602,10 +1602,10 @@ template void ELFBuilder::readSections(bool EnsureSymtab) { auto Shdr = unwrapOrError(ElfFile.sections()).begin() + RelSec->Index; if (RelSec->Type == SHT_REL) initRelocations(RelSec, Obj.SymbolTable, - unwrapOrError(ElfFile.rels(Shdr))); + unwrapOrError(ElfFile.rels(*Shdr))); else initRelocations(RelSec, Obj.SymbolTable, - unwrapOrError(ElfFile.relas(Shdr))); + unwrapOrError(ElfFile.relas(*Shdr))); } else if (auto GroupSec = dyn_cast(&Sec)) { initGroupSection(GroupSec); } @@ -1622,7 +1622,7 @@ template void ELFBuilder::build(bool EnsureSymtab) { ELFFile HeadersFile = unwrapOrError(ELFFile::create(toStringRef( {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset}))); - auto &Ehdr = *HeadersFile.getHeader(); + auto &Ehdr = HeadersFile.getHeader(); Obj.OSABI = Ehdr.e_ident[EI_OSABI]; Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION]; Obj.Type = Ehdr.e_type; diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index 602bc63882527..c7a84385ffd50 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -92,7 +92,7 @@ static Error getRelocationValueString(const ELFObjectFile *Obj, return SymSI.takeError(); const typename ELFT::Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl()); - auto SecName = EF.getSectionName(SymSec); + auto SecName = EF.getSectionName(*SymSec); if (!SecName) return SecName.takeError(); Fmt << *SecName; @@ -338,10 +338,10 @@ static void printSymbolVersionInfo(const ELFFile *Elf, continue; ArrayRef Contents = - unwrapOrError(Elf->getSectionContents(&Shdr), FileName); + unwrapOrError(Elf->getSectionContents(Shdr), FileName); const typename ELFT::Shdr *StrTabSec = unwrapOrError(Elf->getSection(Shdr.sh_link), FileName); - StringRef StrTab = unwrapOrError(Elf->getStringTable(StrTabSec), FileName); + StringRef StrTab = unwrapOrError(Elf->getStringTable(*StrTabSec), FileName); if (Shdr.sh_type == ELF::SHT_GNU_verneed) printSymbolVersionDependency(Contents, StrTab); diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h index dfa2a3538d893..613c4b78b1c21 100644 --- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h +++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h @@ -407,7 +407,7 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, reportError(SymTabOrErr.takeError(), FileName); const Elf_Shdr *SymTab = *SymTabOrErr; - for (const Elf_Rel &R : unwrapOrError(FileName, ELF->rels(&Sec))) { + for (const Elf_Rel &R : unwrapOrError(FileName, ELF->rels(Sec))) { if (R.r_offset != static_cast(IndexTableOffset)) continue; @@ -417,9 +417,9 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, RelA.r_addend = 0; const Elf_Sym *Symbol = - unwrapOrError(FileName, ELF->getRelocationSymbol(&RelA, SymTab)); + unwrapOrError(FileName, ELF->getRelocationSymbol(RelA, SymTab)); - auto Ret = ELF->getSection(Symbol, SymTab, ShndxTable); + auto Ret = ELF->getSection(*Symbol, SymTab, ShndxTable); if (!Ret) report_fatal_error(errorToErrorCode(Ret.takeError()).message()); return *Ret; @@ -432,7 +432,7 @@ template void PrinterContext::PrintExceptionTable(const Elf_Shdr *IT, const Elf_Shdr *EHT, uint64_t TableEntryOffset) const { - Expected> Contents = ELF->getSectionContents(EHT); + Expected> Contents = ELF->getSectionContents(*EHT); if (!Contents) return; @@ -499,7 +499,7 @@ void PrinterContext::PrintOpcodes(const uint8_t *Entry, template void PrinterContext::PrintIndexTable(unsigned SectionIndex, const Elf_Shdr *IT) const { - Expected> Contents = ELF->getSectionContents(IT); + Expected> Contents = ELF->getSectionContents(*IT); if (!Contents) return; @@ -553,7 +553,7 @@ void PrinterContext::PrintIndexTable(unsigned SectionIndex, FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4); if (EHT) - if (auto Name = ELF->getSectionName(EHT)) + if (auto Name = ELF->getSectionName(*EHT)) SW.printString("ExceptionHandlingTable", *Name); uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr); @@ -575,7 +575,7 @@ void PrinterContext::PrintUnwindInformation() const { DictScope UIT(SW, "UnwindIndexTable"); SW.printNumber("SectionIndex", SectionIndex); - if (auto SectionName = ELF->getSectionName(&Sec)) + if (auto SectionName = ELF->getSectionName(Sec)) SW.printString("SectionName", *SectionName); SW.printHex("SectionOffset", Sec.sh_offset); diff --git a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h index 035037f4eebc1..52db477ba7267 100644 --- a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h +++ b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h @@ -85,7 +85,7 @@ void PrinterContext::printUnwindInformation() const { reportError(SectionsOrErr.takeError(), ObjF->getFileName()); for (const Elf_Shdr &Shdr : *SectionsOrErr) { - Expected NameOrErr = Obj->getSectionName(&Shdr); + Expected NameOrErr = Obj->getSectionName(Shdr); if (!NameOrErr) reportError(NameOrErr.takeError(), ObjF->getFileName()); if (*NameOrErr == ".eh_frame") @@ -104,13 +104,13 @@ void PrinterContext::printEHFrameHdr(const Elf_Phdr *EHFramePHdr) const { const object::ELFFile *Obj = ObjF->getELFFile(); if (const Elf_Shdr *EHFrameHdr = findSectionByAddress(ObjF, EHFramePHdr->p_vaddr)) { - Expected NameOrErr = Obj->getSectionName(EHFrameHdr); + Expected NameOrErr = Obj->getSectionName(*EHFrameHdr); if (!NameOrErr) reportError(NameOrErr.takeError(), ObjF->getFileName()); W.printString("Corresponding Section", *NameOrErr); } - Expected> Content = Obj->getSegmentContents(EHFramePHdr); + Expected> Content = Obj->getSegmentContents(*EHFramePHdr); if (!Content) reportError(Content.takeError(), ObjF->getFileName()); @@ -181,7 +181,7 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { W.indent(); Expected> DataOrErr = - ObjF->getELFFile()->getSectionContents(EHFrameShdr); + ObjF->getELFFile()->getSectionContents(*EHFrameShdr); if (!DataOrErr) reportError(DataOrErr.takeError(), ObjF->getFileName()); diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 70584e8a161c8..86d76b056b924 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -404,7 +404,7 @@ template static std::string describe(const ELFFile &Obj, const typename ELFT::Shdr &Sec) { unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front(); - return (object::getELFSectionTypeName(Obj.getHeader()->e_machine, + return (object::getELFSectionTypeName(Obj.getHeader().e_machine, Sec.sh_type) + " section with index " + Twine(SecNdx)) .str(); @@ -424,7 +424,7 @@ static Expected getLinkAsStrtab(const ELFFile &Obj, return createError("invalid section linked to " + describe(Obj, *Sec) + ": " + toString(StrTabSecOrErr.takeError())); - Expected StrTabOrErr = Obj.getStringTable(*StrTabSecOrErr); + Expected StrTabOrErr = Obj.getStringTable(**StrTabSecOrErr); if (!StrTabOrErr) return createError("invalid string table linked to " + describe(Obj, *Sec) + ": " + toString(StrTabOrErr.takeError())); @@ -443,13 +443,12 @@ getLinkAsSymtab(const ELFFile &Obj, const typename ELFT::Shdr *Sec, ": " + toString(SymtabOrErr.takeError())); if ((*SymtabOrErr)->sh_type != ExpectedType) - return createError("invalid section linked to " + describe(Obj, *Sec) + - ": expected " + - object::getELFSectionTypeName(Obj.getHeader()->e_machine, - ExpectedType) + - ", but got " + - object::getELFSectionTypeName(Obj.getHeader()->e_machine, - (*SymtabOrErr)->sh_type)); + return createError( + "invalid section linked to " + describe(Obj, *Sec) + ": expected " + + object::getELFSectionTypeName(Obj.getHeader().e_machine, ExpectedType) + + ", but got " + + object::getELFSectionTypeName(Obj.getHeader().e_machine, + (*SymtabOrErr)->sh_type)); Expected StrTabOrErr = getLinkAsStrtab(Obj, *SymtabOrErr); if (!StrTabOrErr) @@ -477,7 +476,7 @@ ELFDumper::getVersionTable(const Elf_Shdr *Sec, ArrayRef *SymTab, return createError("the " + describe(*Sec) + " is misaligned"); Expected> VersionsOrErr = - Obj->template getSectionContentsAsArray(Sec); + Obj->template getSectionContentsAsArray(*Sec); if (!VersionsOrErr) return createError("cannot read content of " + describe(*Sec) + ": " + toString(VersionsOrErr.takeError())); @@ -511,7 +510,7 @@ ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { if (!StrTabOrErr) return StrTabOrErr.takeError(); - Expected> ContentsOrErr = Obj->getSectionContents(Sec); + Expected> ContentsOrErr = Obj->getSectionContents(*Sec); if (!ContentsOrErr) return createError("cannot read content of " + describe(*Sec) + ": " + toString(ContentsOrErr.takeError())); @@ -600,7 +599,7 @@ ELFDumper::getVersionDependencies(const Elf_Shdr *Sec) const { else StrTab = *StrTabOrErr; - Expected> ContentsOrErr = Obj->getSectionContents(Sec); + Expected> ContentsOrErr = Obj->getSectionContents(*Sec); if (!ContentsOrErr) return createError("cannot read content of " + describe(*Sec) + ": " + toString(ContentsOrErr.takeError())); @@ -1069,7 +1068,7 @@ Expected ELFDumper::getSymbolVersion(const Elf_Sym *Sym, // Get the corresponding version index entry. if (Expected EntryOrErr = ObjF->getELFFile()->template getEntry( - SymbolVersionSection, EntryIndex)) + *SymbolVersionSection, EntryIndex)) return this->getSymbolVersionByIndex((*EntryOrErr)->vs_index, IsDefault); else return EntryOrErr.takeError(); @@ -1084,7 +1083,7 @@ ELFDumper::getRelocationTarget(const Relocation &R, const ELFFile &Obj = *ObjF->getELFFile(); Expected SymOrErr = - Obj.template getEntry(SymTab, R.Symbol); + Obj.template getEntry(*SymTab, R.Symbol); if (!SymOrErr) return SymOrErr.takeError(); const Elf_Sym *Sym = *SymOrErr; @@ -1095,14 +1094,14 @@ ELFDumper::getRelocationTarget(const Relocation &R, // This code block returns the section name. if (Sym->getType() == ELF::STT_SECTION) { Expected SecOrErr = - Obj.getSection(Sym, SymTab, ShndxTable); + Obj.getSection(*Sym, SymTab, ShndxTable); if (!SecOrErr) return SecOrErr.takeError(); // A section symbol describes the section at index 0. if (*SecOrErr == nullptr) return RelSymbol(Sym, ""); - Expected NameOrErr = Obj.getSectionName(*SecOrErr); + Expected NameOrErr = Obj.getSectionName(**SecOrErr); if (!NameOrErr) return NameOrErr.takeError(); return RelSymbol(Sym, NameOrErr->str()); @@ -1227,7 +1226,7 @@ Expected ELFDumper::getSymbolSectionIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym) const { return Symbol->st_shndx == SHN_XINDEX - ? object::getExtendedSymbolTableIndex(Symbol, FirstSym, + ? object::getExtendedSymbolTableIndex(*Symbol, *FirstSym, ShndxTable) : Symbol->st_shndx; } @@ -1259,7 +1258,7 @@ ELFDumper::getSymbolSectionName(const Elf_Sym *Symbol, Obj->getSection(SectionIndex); if (!SecOrErr) return SecOrErr.takeError(); - return Obj->getSectionName(*SecOrErr); + return Obj->getSectionName(**SecOrErr); } template @@ -2423,7 +2422,7 @@ const typename ELFT::Shdr * ELFDumper::findSectionByName(StringRef Name) const { const ELFFile *Obj = ObjF->getELFFile(); for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { - if (Expected NameOrErr = Obj->getSectionName(&Shdr)) { + if (Expected NameOrErr = Obj->getSectionName(Shdr)) { if (*NameOrErr == Name) return &Shdr; } else { @@ -2456,7 +2455,7 @@ std::string ELFDumper::getDynamicEntry(uint64_t Type, }; // Handle custom printing of architecture specific tags - switch (ObjF->getELFFile()->getHeader()->e_machine) { + switch (ObjF->getELFFile()->getHeader().e_machine) { case EM_AARCH64: switch (Type) { case DT_AARCH64_BTI_PLT: @@ -2653,7 +2652,7 @@ namespace { template <> void ELFDumper::printUnwindInfo() { const ELFFile *Obj = ObjF->getELFFile(); - const unsigned Machine = Obj->getHeader()->e_machine; + const unsigned Machine = Obj->getHeader().e_machine; if (Machine == EM_ARM) { ARM::EHABI::PrinterContext Ctx(W, Obj, ObjF->getFileName(), DotSymtabSec); @@ -2832,7 +2831,7 @@ template void ELFDumper::printLoadName() { template void ELFDumper::printArchSpecificInfo() { const ELFFile *Obj = ObjF->getELFFile(); - switch (Obj->getHeader()->e_machine) { + switch (Obj->getHeader().e_machine) { case EM_ARM: case EM_RISCV: printAttributes(); @@ -2867,7 +2866,7 @@ template void ELFDumper::printAttributes() { return; } - const unsigned Machine = Obj->getHeader()->e_machine; + const unsigned Machine = Obj->getHeader().e_machine; assert((Machine == EM_ARM || Machine == EM_RISCV) && "Attributes not implemented."); @@ -2878,7 +2877,7 @@ template void ELFDumper::printAttributes() { continue; ArrayRef Contents = - unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(&Sec)); + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Sec)); if (Contents[0] != ELFAttrs::Format_Version) { reportWarning(createError(Twine("unrecognised FormatVersion: 0x") + Twine::utohexstr(Contents[0])), @@ -2978,7 +2977,7 @@ Error MipsGOTParser::findGOT(Elf_Dyn_Range DynTable, return Error::success(); ArrayRef Content = - unwrapOrError(FileName, Obj->getSectionContents(GotSec)); + unwrapOrError(FileName, Obj->getSectionContents(*GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); LocalNum = GotEntries.size(); @@ -3028,7 +3027,7 @@ Error MipsGOTParser::findGOT(Elf_Dyn_Range DynTable, GlobalNum = DynSymTotal - *DtGotSym; ArrayRef Content = - unwrapOrError(FileName, Obj->getSectionContents(GotSec)); + unwrapOrError(FileName, Obj->getSectionContents(*GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); GotDynSyms = DynSyms.drop_front(*DtGotSym); @@ -3072,7 +3071,7 @@ Error MipsGOTParser::findPLT(Elf_Dyn_Range DynTable) { Twine::utohexstr(*DtJmpRel)); if (Expected> PltContentOrErr = - Obj->getSectionContents(PltSec)) + Obj->getSectionContents(*PltSec)) PltEntries = Entries(reinterpret_cast(PltContentOrErr->data()), PltContentOrErr->size() / sizeof(Entry)); @@ -3196,13 +3195,13 @@ const typename MipsGOTParser::Elf_Sym * MipsGOTParser::getPltSym(const Entry *E) const { int64_t Offset = std::distance(getPltEntries().data(), E); if (PltRelSec->sh_type == ELF::SHT_REL) { - Elf_Rel_Range Rels = unwrapOrError(FileName, Obj->rels(PltRelSec)); + Elf_Rel_Range Rels = unwrapOrError(FileName, Obj->rels(*PltRelSec)); return unwrapOrError(FileName, - Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); + Obj->getRelocationSymbol(Rels[Offset], PltSymTable)); } else { - Elf_Rela_Range Rels = unwrapOrError(FileName, Obj->relas(PltRelSec)); + Elf_Rela_Range Rels = unwrapOrError(FileName, Obj->relas(*PltRelSec)); return unwrapOrError(FileName, - Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); + Obj->getRelocationSymbol(Rels[Offset], PltSymTable)); } } @@ -3299,7 +3298,7 @@ template void ELFDumper::printMipsReginfo() { const ELFFile *Obj = ObjF->getELFFile(); Expected> ContentsOrErr = - Obj->getSectionContents(RegInfoSec); + Obj->getSectionContents(*RegInfoSec); if (!ContentsOrErr) { this->reportUniqueWarning(createError( "unable to read the content of the .reginfo section (" + @@ -3367,7 +3366,7 @@ template void ELFDumper::printMipsOptions() { DictScope GS(W, "MIPS Options"); ArrayRef Data = - unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(MipsOpts)); + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(*MipsOpts)); const uint8_t *const SecBegin = Data.begin(); while (!Data.empty()) { bool IsSupported; @@ -3407,7 +3406,7 @@ template void ELFDumper::printStackMap() const { }; Expected> ContentOrErr = - Obj->getSectionContents(StackMapSection); + Obj->getSectionContents(*StackMapSection); if (!ContentOrErr) { Warn(ContentOrErr.takeError()); return; @@ -3442,9 +3441,9 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, template static std::string getSectionHeadersNumString(const ELFFile &Obj, StringRef FileName) { - const typename ELFT::Ehdr *ElfHeader = Obj.getHeader(); - if (ElfHeader->e_shnum != 0) - return to_string(ElfHeader->e_shnum); + const typename ELFT::Ehdr &ElfHeader = Obj.getHeader(); + if (ElfHeader.e_shnum != 0) + return to_string(ElfHeader.e_shnum); ArrayRef Arr = cantFail(Obj.sections()); if (Arr.empty()) @@ -3455,71 +3454,71 @@ static std::string getSectionHeadersNumString(const ELFFile &Obj, template static std::string getSectionHeaderTableIndexString(const ELFFile &Obj, StringRef FileName) { - const typename ELFT::Ehdr *ElfHeader = Obj.getHeader(); - if (ElfHeader->e_shstrndx != SHN_XINDEX) - return to_string(ElfHeader->e_shstrndx); + const typename ELFT::Ehdr &ElfHeader = Obj.getHeader(); + if (ElfHeader.e_shstrndx != SHN_XINDEX) + return to_string(ElfHeader.e_shstrndx); ArrayRef Arr = cantFail(Obj.sections()); if (Arr.empty()) return "65535 (corrupt: out of range)"; - return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + + return to_string(ElfHeader.e_shstrndx) + " (" + to_string(Arr[0].sh_link) + ")"; } template void GNUStyle::printFileHeaders() { - const Elf_Ehdr *e = this->Obj.getHeader(); + const Elf_Ehdr &e = this->Obj.getHeader(); OS << "ELF Header:\n"; OS << " Magic: "; std::string Str; for (int i = 0; i < ELF::EI_NIDENT; i++) - OS << format(" %02x", static_cast(e->e_ident[i])); + OS << format(" %02x", static_cast(e.e_ident[i])); OS << "\n"; - Str = printEnum(e->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass)); + Str = printEnum(e.e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass)); printFields(OS, "Class:", Str); - Str = printEnum(e->e_ident[ELF::EI_DATA], makeArrayRef(ElfDataEncoding)); + Str = printEnum(e.e_ident[ELF::EI_DATA], makeArrayRef(ElfDataEncoding)); printFields(OS, "Data:", Str); OS.PadToColumn(2u); OS << "Version:"; OS.PadToColumn(37u); - OS << to_hexString(e->e_ident[ELF::EI_VERSION]); - if (e->e_version == ELF::EV_CURRENT) + OS << to_hexString(e.e_ident[ELF::EI_VERSION]); + if (e.e_version == ELF::EV_CURRENT) OS << " (current)"; OS << "\n"; - Str = printEnum(e->e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI)); + Str = printEnum(e.e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI)); printFields(OS, "OS/ABI:", Str); printFields(OS, - "ABI Version:", std::to_string(e->e_ident[ELF::EI_ABIVERSION])); - Str = printEnum(e->e_type, makeArrayRef(ElfObjectFileType)); + "ABI Version:", std::to_string(e.e_ident[ELF::EI_ABIVERSION])); + Str = printEnum(e.e_type, makeArrayRef(ElfObjectFileType)); printFields(OS, "Type:", Str); - Str = printEnum(e->e_machine, makeArrayRef(ElfMachineType)); + Str = printEnum(e.e_machine, makeArrayRef(ElfMachineType)); printFields(OS, "Machine:", Str); - Str = "0x" + to_hexString(e->e_version); + Str = "0x" + to_hexString(e.e_version); printFields(OS, "Version:", Str); - Str = "0x" + to_hexString(e->e_entry); + Str = "0x" + to_hexString(e.e_entry); printFields(OS, "Entry point address:", Str); - Str = to_string(e->e_phoff) + " (bytes into file)"; + Str = to_string(e.e_phoff) + " (bytes into file)"; printFields(OS, "Start of program headers:", Str); - Str = to_string(e->e_shoff) + " (bytes into file)"; + Str = to_string(e.e_shoff) + " (bytes into file)"; printFields(OS, "Start of section headers:", Str); std::string ElfFlags; - if (e->e_machine == EM_MIPS) + if (e.e_machine == EM_MIPS) ElfFlags = - printFlags(e->e_flags, makeArrayRef(ElfHeaderMipsFlags), + printFlags(e.e_flags, makeArrayRef(ElfHeaderMipsFlags), unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI), unsigned(ELF::EF_MIPS_MACH)); - else if (e->e_machine == EM_RISCV) - ElfFlags = printFlags(e->e_flags, makeArrayRef(ElfHeaderRISCVFlags)); - Str = "0x" + to_hexString(e->e_flags); + else if (e.e_machine == EM_RISCV) + ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderRISCVFlags)); + Str = "0x" + to_hexString(e.e_flags); if (!ElfFlags.empty()) Str = Str + ", " + ElfFlags; printFields(OS, "Flags:", Str); - Str = to_string(e->e_ehsize) + " (bytes)"; + Str = to_string(e.e_ehsize) + " (bytes)"; printFields(OS, "Size of this header:", Str); - Str = to_string(e->e_phentsize) + " (bytes)"; + Str = to_string(e.e_phentsize) + " (bytes)"; printFields(OS, "Size of program headers:", Str); - Str = to_string(e->e_phnum); + Str = to_string(e.e_phnum); printFields(OS, "Number of program headers:", Str); - Str = to_string(e->e_shentsize) + " (bytes)"; + Str = to_string(e.e_shentsize) + " (bytes)"; printFields(OS, "Size of section headers:", Str); Str = getSectionHeadersNumString(this->Obj, this->FileName); printFields(OS, "Number of section headers:", Str); @@ -3563,11 +3562,11 @@ std::vector getGroups(const ELFFile &Obj, StringRef StrTable = unwrapOrError(FileName, Obj.getStringTableForSymtab(*Symtab)); const Elf_Sym *Sym = unwrapOrError( - FileName, Obj.template getEntry(Symtab, Sec.sh_info)); + FileName, Obj.template getEntry(*Symtab, Sec.sh_info)); auto Data = unwrapOrError( - FileName, Obj.template getSectionContentsAsArray(&Sec)); + FileName, Obj.template getSectionContentsAsArray(Sec)); - StringRef Name = unwrapOrError(FileName, Obj.getSectionName(&Sec)); + StringRef Name = unwrapOrError(FileName, Obj.getSectionName(Sec)); StringRef Signature = StrTable.data() + Sym->st_name; Ret.push_back({Name, maybeDemangle(Signature), @@ -3580,7 +3579,7 @@ std::vector getGroups(const ELFFile &Obj, std::vector &GM = Ret.back().Members; for (uint32_t Ndx : Data.slice(1)) { - auto Sec = unwrapOrError(FileName, Obj.getSection(Ndx)); + const Elf_Shdr &Sec = *unwrapOrError(FileName, Obj.getSection(Ndx)); const StringRef Name = unwrapOrError(FileName, Obj.getSectionName(Sec)); GM.push_back({Name, Ndx}); } @@ -3727,7 +3726,7 @@ template void GNUStyle::printRelocations() { if (Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA) { Expected> RelasOrErr = - this->Obj.android_relas(&Sec); + this->Obj.android_relas(Sec); if (!RelasOrErr) return RelasOrErr.takeError(); return RelasOrErr->size(); @@ -3735,7 +3734,7 @@ template void GNUStyle::printRelocations() { if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_RELR)) { - Expected RelrsOrErr = this->Obj.relrs(&Sec); + Expected RelrsOrErr = this->Obj.relrs(Sec); if (!RelrsOrErr) return RelrsOrErr.takeError(); return this->Obj.decode_relrs(*RelrsOrErr).size(); @@ -3827,7 +3826,7 @@ template void GNUStyle::printSectionHeaders() { ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " - << "0x" << to_hexString(this->Obj.getHeader()->e_shoff, false) << ":\n\n"; + << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n"; OS << "Section Headers:\n"; Field Fields[11] = { {"[Nr]", 2}, {"Name", 7}, {"Type", 25}, @@ -3852,15 +3851,15 @@ template void GNUStyle::printSectionHeaders() { Fields[1].Str = ""; else Fields[1].Str = std::string(unwrapOrError( - this->FileName, this->Obj.getSectionName(&Sec, SecStrTable))); + this->FileName, this->Obj.getSectionName(Sec, SecStrTable))); Fields[2].Str = - getSectionTypeString(this->Obj.getHeader()->e_machine, Sec.sh_type); + getSectionTypeString(this->Obj.getHeader().e_machine, Sec.sh_type); Fields[3].Str = to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8)); Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6)); Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6)); Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2)); - Fields[7].Str = getGNUFlags(this->Obj.getHeader()->e_machine, Sec.sh_flags); + Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_machine, Sec.sh_flags); Fields[8].Str = to_string(Sec.sh_link); Fields[9].Str = to_string(Sec.sh_info); Fields[10].Str = to_string(Sec.sh_addralign); @@ -3880,7 +3879,7 @@ template void GNUStyle::printSectionHeaders() { OS << "\n"; ++SectionIndex; } - printSectionDescription(OS, this->Obj.getHeader()->e_machine); + printSectionDescription(OS, this->Obj.getHeader().e_machine); } template @@ -3918,7 +3917,7 @@ std::string GNUStyle::getSymbolSectionNdx(const Elf_Sym *Symbol, return "COM"; case ELF::SHN_XINDEX: { Expected IndexOrErr = object::getExtendedSymbolTableIndex( - Symbol, FirstSym, this->dumper()->getShndxTable()); + *Symbol, *FirstSym, this->dumper()->getShndxTable()); if (!IndexOrErr) { assert(Symbol->st_shndx == SHN_XINDEX && "getSymbolSectionIndex should only fail due to an invalid " @@ -3961,7 +3960,7 @@ void GNUStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, Fields[2].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); - if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[3].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -4000,7 +3999,7 @@ void GNUStyle::printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); - if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[4].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -4227,14 +4226,14 @@ void GNUStyle::printProgramHeaders( template void GNUStyle::printProgramHeaders() { unsigned Bias = ELFT::Is64Bits ? 8 : 0; - const Elf_Ehdr *Header = this->Obj.getHeader(); + const Elf_Ehdr &Header = this->Obj.getHeader(); Field Fields[8] = {2, 17, 26, 37 + Bias, 48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias}; OS << "\nElf file type is " - << printEnum(Header->e_type, makeArrayRef(ElfObjectFileType)) << "\n" - << "Entry point " << format_hex(Header->e_entry, 3) << "\n" - << "There are " << Header->e_phnum << " program headers," - << " starting at offset " << Header->e_phoff << "\n\n" + << printEnum(Header.e_type, makeArrayRef(ElfObjectFileType)) << "\n" + << "Entry point " << format_hex(Header.e_entry, 3) << "\n" + << "There are " << Header.e_phnum << " program headers," + << " starting at offset " << Header.e_phoff << "\n\n" << "Program Headers:\n"; if (ELFT::Is64Bits) OS << " Type Offset VirtAddr PhysAddr " @@ -4254,7 +4253,7 @@ template void GNUStyle::printProgramHeaders() { } for (const Elf_Phdr &Phdr : *PhdrsOrErr) { - Fields[0].Str = getGNUPtType(Header->e_machine, Phdr.p_type); + Fields[0].Str = getGNUPtType(Header.e_machine, Phdr.p_type); Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8)); Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width)); Fields[3].Str = to_string(format_hex(Phdr.p_paddr, Width)); @@ -4322,8 +4321,7 @@ template void GNUStyle::printSectionMapping() { if (checkTLSSections(Phdr, Sec) && checkOffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && checkPTDynamic(Phdr, Sec)) { Sections += - unwrapOrError(this->FileName, this->Obj.getSectionName(&Sec)) - .str() + + unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() + " "; BelongsToSegment.insert(&Sec); } @@ -4337,7 +4335,7 @@ template void GNUStyle::printSectionMapping() { for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) Sections += - unwrapOrError(this->FileName, this->Obj.getSectionName(&Sec)).str() + + unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() + ' '; } if (!Sections.empty()) { @@ -4478,7 +4476,7 @@ template void GNUStyle::printGNUVersionSectionProlog( const typename ELFT::Shdr *Sec, const Twine &Label, unsigned EntriesNum) { StringRef SecName = - unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)); + unwrapOrError(this->FileName, this->Obj.getSectionName(*Sec)); OS << Label << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; @@ -4487,7 +4485,7 @@ void GNUStyle::printGNUVersionSectionProlog( this->Obj.getSection(Sec->sh_link); if (SymTabOrErr) SymTabName = - unwrapOrError(this->FileName, this->Obj.getSectionName(*SymTabOrErr)); + unwrapOrError(this->FileName, this->Obj.getSectionName(**SymTabOrErr)); else this->reportUniqueWarning(createError("invalid section linked to " + describe(this->Obj, *Sec) + ": " + @@ -5273,7 +5271,7 @@ template void GNUStyle::printNotes() { << format_hex(Descriptor.size(), 10) << '\t'; StringRef NoteType = - getNoteTypeName(Note, this->Obj.getHeader()->e_type); + getNoteTypeName(Note, this->Obj.getHeader().e_type); if (!NoteType.empty()) OS << NoteType << '\n'; else @@ -5311,11 +5309,11 @@ template void GNUStyle::printNotes() { }; ArrayRef Sections = cantFail(this->Obj.sections()); - if (this->Obj.getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { - for (const auto &S : Sections) { + if (this->Obj.getHeader().e_type != ELF::ET_CORE && !Sections.empty()) { + for (const Elf_Shdr &S : Sections) { if (S.sh_type != SHT_NOTE) continue; - PrintHeader(expectedToOptional(this->Obj.getSectionName(&S)), S.sh_offset, + PrintHeader(expectedToOptional(this->Obj.getSectionName(S)), S.sh_offset, S.sh_size); Error Err = Error::success(); for (auto Note : this->Obj.notes(S, Err)) @@ -5367,7 +5365,7 @@ void DumpStyle::printDependentLibsHelper( OnSectionStart(Shdr); - Expected> ContentsOrErr = Obj.getSectionContents(&Shdr); + Expected> ContentsOrErr = Obj.getSectionContents(Shdr); if (!ContentsOrErr) { Warn(I, toString(ContentsOrErr.takeError())); continue; @@ -5412,7 +5410,7 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { const bool IsMips64EL = this->Obj.isMips64EL(); switch (Sec.sh_type) { case ELF::SHT_REL: - if (Expected RangeOrErr = Obj.rels(&Sec)) { + if (Expected RangeOrErr = Obj.rels(Sec)) { for (const Elf_Rel &R : *RangeOrErr) printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { @@ -5420,7 +5418,7 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { } break; case ELF::SHT_RELA: - if (Expected RangeOrErr = Obj.relas(&Sec)) { + if (Expected RangeOrErr = Obj.relas(Sec)) { for (const Elf_Rela &R : *RangeOrErr) printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { @@ -5429,7 +5427,7 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: { - Expected RangeOrErr = Obj.relrs(&Sec); + Expected RangeOrErr = Obj.relrs(Sec); if (!RangeOrErr) { Warn(RangeOrErr.takeError()); break; @@ -5447,7 +5445,7 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: - if (Expected> RelasOrErr = Obj.android_relas(&Sec)) { + if (Expected> RelasOrErr = Obj.android_relas(Sec)) { for (const Elf_Rela &R : *RelasOrErr) printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { @@ -5461,7 +5459,7 @@ template StringRef DumpStyle::getPrintableSectionName(const Elf_Shdr &Sec) const { StringRef Name = ""; if (Expected SecNameOrErr = - Obj.getSectionName(&Sec, this->dumper()->WarningHandler)) + Obj.getSectionName(Sec, this->dumper()->WarningHandler)) Name = *SecNameOrErr; else this->reportUniqueWarning(createError("unable to get the name of " + @@ -5659,7 +5657,7 @@ void DumpStyle::printNonRelocatableStackSizes( PrintHeader(); const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); ArrayRef Contents = - unwrapOrError(this->FileName, EF->getSectionContents(ElfSec)); + unwrapOrError(this->FileName, EF->getSectionContents(*ElfSec)); DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); uint64_t Offset = 0; while (Offset < Contents.size()) { @@ -5724,7 +5722,7 @@ void DumpStyle::printRelocatableStackSizes( const Elf_Shdr *ContentsSec = Obj->getSection((*RelSecOrErr)->getRawDataRefImpl()); Expected ContentsSectionNameOrErr = - EF->getSectionName(ContentsSec); + EF->getSectionName(*ContentsSec); if (!ContentsSectionNameOrErr) { consumeError(ContentsSectionNameOrErr.takeError()); continue; @@ -5936,7 +5934,7 @@ getMipsAbiFlagsSection(const ELFObjectFile *ObjF, const ELFFile *Obj = ObjF->getELFFile(); constexpr StringRef ErrPrefix = "unable to read the .MIPS.abiflags section: "; - Expected> DataOrErr = Obj->getSectionContents(Sec); + Expected> DataOrErr = Obj->getSectionContents(*Sec); if (!DataOrErr) return createError(ErrPrefix + toString(DataOrErr.takeError())); @@ -5981,21 +5979,21 @@ void GNUStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { } template void LLVMStyle::printFileHeaders() { - const Elf_Ehdr *E = this->Obj.getHeader(); + const Elf_Ehdr &E = this->Obj.getHeader(); { DictScope D(W, "ElfHeader"); { DictScope D(W, "Ident"); - W.printBinary("Magic", makeArrayRef(E->e_ident).slice(ELF::EI_MAG0, 4)); - W.printEnum("Class", E->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass)); - W.printEnum("DataEncoding", E->e_ident[ELF::EI_DATA], + W.printBinary("Magic", makeArrayRef(E.e_ident).slice(ELF::EI_MAG0, 4)); + W.printEnum("Class", E.e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass)); + W.printEnum("DataEncoding", E.e_ident[ELF::EI_DATA], makeArrayRef(ElfDataEncoding)); - W.printNumber("FileVersion", E->e_ident[ELF::EI_VERSION]); + W.printNumber("FileVersion", E.e_ident[ELF::EI_VERSION]); auto OSABI = makeArrayRef(ElfOSABI); - if (E->e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH && - E->e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) { - switch (E->e_machine) { + if (E.e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH && + E.e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) { + switch (E.e_machine) { case ELF::EM_AMDGPU: OSABI = makeArrayRef(AMDGPUElfOSABI); break; @@ -6007,32 +6005,32 @@ template void LLVMStyle::printFileHeaders() { break; } } - W.printEnum("OS/ABI", E->e_ident[ELF::EI_OSABI], OSABI); - W.printNumber("ABIVersion", E->e_ident[ELF::EI_ABIVERSION]); - W.printBinary("Unused", makeArrayRef(E->e_ident).slice(ELF::EI_PAD)); + W.printEnum("OS/ABI", E.e_ident[ELF::EI_OSABI], OSABI); + W.printNumber("ABIVersion", E.e_ident[ELF::EI_ABIVERSION]); + W.printBinary("Unused", makeArrayRef(E.e_ident).slice(ELF::EI_PAD)); } - W.printEnum("Type", E->e_type, makeArrayRef(ElfObjectFileType)); - W.printEnum("Machine", E->e_machine, makeArrayRef(ElfMachineType)); - W.printNumber("Version", E->e_version); - W.printHex("Entry", E->e_entry); - W.printHex("ProgramHeaderOffset", E->e_phoff); - W.printHex("SectionHeaderOffset", E->e_shoff); - if (E->e_machine == EM_MIPS) - W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderMipsFlags), + W.printEnum("Type", E.e_type, makeArrayRef(ElfObjectFileType)); + W.printEnum("Machine", E.e_machine, makeArrayRef(ElfMachineType)); + W.printNumber("Version", E.e_version); + W.printHex("Entry", E.e_entry); + W.printHex("ProgramHeaderOffset", E.e_phoff); + W.printHex("SectionHeaderOffset", E.e_shoff); + if (E.e_machine == EM_MIPS) + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags), unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI), unsigned(ELF::EF_MIPS_MACH)); - else if (E->e_machine == EM_AMDGPU) - W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderAMDGPUFlags), + else if (E.e_machine == EM_AMDGPU) + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlags), unsigned(ELF::EF_AMDGPU_MACH)); - else if (E->e_machine == EM_RISCV) - W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderRISCVFlags)); + else if (E.e_machine == EM_RISCV) + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags)); else - W.printFlags("Flags", E->e_flags); - W.printNumber("HeaderSize", E->e_ehsize); - W.printNumber("ProgramHeaderEntrySize", E->e_phentsize); - W.printNumber("ProgramHeaderCount", E->e_phnum); - W.printNumber("SectionHeaderEntrySize", E->e_shentsize); + W.printFlags("Flags", E.e_flags); + W.printNumber("HeaderSize", E.e_ehsize); + W.printNumber("ProgramHeaderEntrySize", E.e_phentsize); + W.printNumber("ProgramHeaderCount", E.e_phnum); + W.printNumber("SectionHeaderEntrySize", E.e_shentsize); W.printString("SectionHeaderCount", getSectionHeadersNumString(this->Obj, this->FileName)); W.printString("StringTableSectionIndex", @@ -6133,13 +6131,13 @@ template void LLVMStyle::printSectionHeaders() { int SectionIndex = -1; std::vector> FlagsList = - getSectionFlagsForTarget(this->Obj.getHeader()->e_machine); + getSectionFlagsForTarget(this->Obj.getHeader().e_machine); for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); W.printNumber("Name", this->getPrintableSectionName(Sec), Sec.sh_name); W.printHex("Type", - object::getELFSectionTypeName(this->Obj.getHeader()->e_machine, + object::getELFSectionTypeName(this->Obj.getHeader().e_machine, Sec.sh_type), Sec.sh_type); W.printFlags("Flags", Sec.sh_flags, makeArrayRef(FlagsList)); @@ -6167,7 +6165,7 @@ template void LLVMStyle::printSectionHeaders() { const Elf_Shdr *SymSec = unwrapOrError(this->FileName, this->Obj.getSection( - &Sym, Symtab, this->dumper()->getShndxTable())); + Sym, Symtab, this->dumper()->getShndxTable())); if (SymSec == &Sec) printSymbol(&Sym, unwrapOrError(this->FileName, this->Obj.symbols(Symtab)) @@ -6179,7 +6177,7 @@ template void LLVMStyle::printSectionHeaders() { if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) { ArrayRef Data = - unwrapOrError(this->FileName, this->Obj.getSectionContents(&Sec)); + unwrapOrError(this->FileName, this->Obj.getSectionContents(Sec)); W.printBinaryBlock( "SectionData", StringRef(reinterpret_cast(Data.data()), Data.size())); @@ -6229,7 +6227,7 @@ void LLVMStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, W.printHex("Value", Symbol->st_value); W.printNumber("Size", Symbol->st_size); W.printEnum("Binding", Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); - if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) W.printEnum("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -6241,7 +6239,7 @@ void LLVMStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, else { std::vector> SymOtherFlags(std::begin(ElfSymOtherFlags), std::end(ElfSymOtherFlags)); - if (this->Obj.getHeader()->e_machine == EM_MIPS) { + if (this->Obj.getHeader().e_machine == EM_MIPS) { // Someones in their infinite wisdom decided to make STO_MIPS_MIPS16 // flag overlapped with other ST_MIPS_xxx flags. So consider both // cases separately. @@ -6342,7 +6340,7 @@ template void LLVMStyle::printProgramHeaders() { for (const Elf_Phdr &Phdr : *PhdrsOrErr) { DictScope P(W, "ProgramHeader"); StringRef Type = - segmentTypeToString(this->Obj.getHeader()->e_machine, Phdr.p_type); + segmentTypeToString(this->Obj.getHeader().e_machine, Phdr.p_type); W.printHex("Type", Type.empty() ? "Unknown" : Type, Phdr.p_type); W.printHex("Offset", Phdr.p_offset); @@ -6452,7 +6450,7 @@ template void LLVMStyle::printCGProfile() { Expected> CGProfileOrErr = this->Obj.template getSectionContentsAsArray( - this->dumper()->getDotCGProfileSec()); + *this->dumper()->getDotCGProfileSec()); if (!CGProfileOrErr) { this->reportUniqueWarning( createError("unable to dump the SHT_LLVM_CALL_GRAPH_PROFILE section: " + @@ -6491,7 +6489,8 @@ template void LLVMStyle::printAddrsig() { if (!Sec) return; - Expected> ContentsOrErr = this->Obj.getSectionContents(Sec); + Expected> ContentsOrErr = + this->Obj.getSectionContents(*Sec); if (!ContentsOrErr) { this->reportUniqueWarning(ContentsOrErr.takeError()); return; @@ -6573,7 +6572,7 @@ template void LLVMStyle::printNotes() { W.printHex("Data size", Descriptor.size()); StringRef NoteType = - getNoteTypeName(Note, this->Obj.getHeader()->e_type); + getNoteTypeName(Note, this->Obj.getHeader().e_type); if (!NoteType.empty()) W.printString("Type", NoteType); else @@ -6609,12 +6608,12 @@ template void LLVMStyle::printNotes() { }; ArrayRef Sections = cantFail(this->Obj.sections()); - if (this->Obj.getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { - for (const auto &S : Sections) { + if (this->Obj.getHeader().e_type != ELF::ET_CORE && !Sections.empty()) { + for (const Elf_Shdr &S : Sections) { if (S.sh_type != SHT_NOTE) continue; DictScope D(W, "NoteSection"); - PrintHeader(expectedToOptional(this->Obj.getSectionName(&S)), S.sh_offset, + PrintHeader(expectedToOptional(this->Obj.getSectionName(S)), S.sh_offset, S.sh_size); Error Err = Error::success(); for (auto Note : this->Obj.notes(S, Err)) @@ -6655,7 +6654,7 @@ template void LLVMStyle::printELFLinkerOptions() { continue; Expected> ContentsOrErr = - this->Obj.getSectionContents(&Shdr); + this->Obj.getSectionContents(Shdr); if (!ContentsOrErr) { this->reportUniqueWarning( createError("unable to read the content of the " diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 22fbdd2ed72e7..a2c78b81a700b 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -124,7 +124,7 @@ ELFDumper::getUniquedSectionName(const Elf_Shdr *Sec) { if (!SectionNames[SecIndex].empty()) return SectionNames[SecIndex]; - auto NameOrErr = Obj.getSectionName(Sec); + auto NameOrErr = Obj.getSectionName(*Sec); if (!NameOrErr) return NameOrErr; StringRef Name = *NameOrErr; @@ -153,7 +153,7 @@ ELFDumper::getUniquedSymbolName(const Elf_Sym *Sym, StringRef StrTable, return SymbolNameOrErr; StringRef Name = *SymbolNameOrErr; if (Name.empty() && Sym->getType() == ELF::STT_SECTION) { - auto ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable); + auto ShdrOrErr = Obj.getSection(*Sym, SymTab, ShndxTable); if (!ShdrOrErr) return ShdrOrErr.takeError(); return getUniquedSectionName(*ShdrOrErr); @@ -235,14 +235,14 @@ template Expected ELFDumper::dump() { // Dump header. We do not dump EPh* and ESh* fields. When not explicitly set, // the values are set by yaml2obj automatically and there is no need to dump // them here. - Y->Header.Class = ELFYAML::ELF_ELFCLASS(Obj.getHeader()->getFileClass()); - Y->Header.Data = ELFYAML::ELF_ELFDATA(Obj.getHeader()->getDataEncoding()); - Y->Header.OSABI = Obj.getHeader()->e_ident[ELF::EI_OSABI]; - Y->Header.ABIVersion = Obj.getHeader()->e_ident[ELF::EI_ABIVERSION]; - Y->Header.Type = Obj.getHeader()->e_type; - Y->Header.Machine = ELFYAML::ELF_EM(Obj.getHeader()->e_machine); - Y->Header.Flags = Obj.getHeader()->e_flags; - Y->Header.Entry = Obj.getHeader()->e_entry; + Y->Header.Class = ELFYAML::ELF_ELFCLASS(Obj.getHeader().getFileClass()); + Y->Header.Data = ELFYAML::ELF_ELFDATA(Obj.getHeader().getDataEncoding()); + Y->Header.OSABI = Obj.getHeader().e_ident[ELF::EI_OSABI]; + Y->Header.ABIVersion = Obj.getHeader().e_ident[ELF::EI_ABIVERSION]; + Y->Header.Type = Obj.getHeader().e_type; + Y->Header.Machine = ELFYAML::ELF_EM(Obj.getHeader().e_machine); + Y->Header.Flags = Obj.getHeader().e_flags; + Y->Header.Entry = Obj.getHeader().e_entry; // Dump sections auto SectionsOrErr = Obj.sections(); @@ -588,7 +588,7 @@ Error ELFDumper::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab, return Error::success(); } - auto ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable); + auto ShdrOrErr = Obj.getSection(*Sym, SymTab, ShndxTable); if (!ShdrOrErr) return ShdrOrErr.takeError(); const Elf_Shdr *Shdr = *ShdrOrErr; @@ -611,7 +611,7 @@ Error ELFDumper::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab, R.Offset = Rel->r_offset; R.Addend = 0; - auto SymOrErr = Obj.getRelocationSymbol(Rel, SymTab); + auto SymOrErr = Obj.getRelocationSymbol(*Rel, SymTab); if (!SymOrErr) return SymOrErr.takeError(); @@ -624,7 +624,7 @@ Error ELFDumper::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab, auto StrTabSec = Obj.getSection(SymTab->sh_link); if (!StrTabSec) return StrTabSec.takeError(); - auto StrTabOrErr = Obj.getStringTable(*StrTabSec); + auto StrTabOrErr = Obj.getStringTable(**StrTabSec); if (!StrTabOrErr) return StrTabOrErr.takeError(); @@ -725,7 +725,7 @@ ELFDumper::dumpStackSizesSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -758,7 +758,7 @@ ELFDumper::dumpAddrsigSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -799,7 +799,7 @@ ELFDumper::dumpLinkerOptionsSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -830,7 +830,7 @@ ELFDumper::dumpDependentLibrariesSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *DL)) return std::move(E); - Expected> ContentOrErr = Obj.getSectionContents(Shdr); + Expected> ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -857,7 +857,7 @@ ELFDumper::dumpCallGraphProfileSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - Expected> ContentOrErr = Obj.getSectionContents(Shdr); + Expected> ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); ArrayRef Content = *ContentOrErr; @@ -913,7 +913,7 @@ ELFDumper::dumpDynamicSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto DynTagsOrErr = Obj.template getSectionContentsAsArray(Shdr); + auto DynTagsOrErr = Obj.template getSectionContentsAsArray(*Shdr); if (!DynTagsOrErr) return DynTagsOrErr.takeError(); @@ -936,7 +936,7 @@ ELFDumper::dumpRelocSection(const Elf_Shdr *Shdr) { const Elf_Shdr *SymTab = *SymTabOrErr; if (Shdr->sh_type == ELF::SHT_REL) { - auto Rels = Obj.rels(Shdr); + auto Rels = Obj.rels(*Shdr); if (!Rels) return Rels.takeError(); for (const Elf_Rel &Rel : *Rels) { @@ -946,7 +946,7 @@ ELFDumper::dumpRelocSection(const Elf_Shdr *Shdr) { S->Relocations.push_back(R); } } else { - auto Rels = Obj.relas(Shdr); + auto Rels = Obj.relas(*Shdr); if (!Rels) return Rels.takeError(); for (const Elf_Rela &Rel : *Rels) { @@ -968,7 +968,7 @@ ELFDumper::dumpRelrSection(const Elf_Shdr *Shdr) { if (auto E = dumpCommonSection(Shdr, *S)) return std::move(E); - if (Expected> Relrs = Obj.relrs(Shdr)) { + if (Expected> Relrs = Obj.relrs(*Shdr)) { S->Entries.emplace(); for (Elf_Relr Rel : *Relrs) S->Entries->emplace_back(Rel); @@ -978,7 +978,7 @@ ELFDumper::dumpRelrSection(const Elf_Shdr *Shdr) { consumeError(Relrs.takeError()); } - Expected> ContentOrErr = Obj.getSectionContents(Shdr); + Expected> ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); S->Content = *ContentOrErr; @@ -994,7 +994,7 @@ ELFDumper::dumpContentSection(const Elf_Shdr *Shdr) { unsigned SecIndex = Shdr - &Sections[0]; if (SecIndex != 0 || Shdr->sh_type != ELF::SHT_NULL) { - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); ArrayRef Content = *ContentOrErr; @@ -1016,7 +1016,7 @@ ELFDumper::dumpSymtabShndxSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto EntriesOrErr = Obj.template getSectionContentsAsArray(Shdr); + auto EntriesOrErr = Obj.template getSectionContentsAsArray(*Shdr); if (!EntriesOrErr) return EntriesOrErr.takeError(); for (const Elf_Word &E : *EntriesOrErr) @@ -1042,7 +1042,7 @@ ELFDumper::dumpNoteSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -1078,7 +1078,7 @@ ELFDumper::dumpHashSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -1119,7 +1119,7 @@ ELFDumper::dumpGnuHashSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -1179,11 +1179,11 @@ ELFDumper::dumpVerdefSection(const Elf_Shdr *Shdr) { if (!StringTableShdrOrErr) return StringTableShdrOrErr.takeError(); - auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr); + auto StringTableOrErr = Obj.getStringTable(**StringTableShdrOrErr); if (!StringTableOrErr) return StringTableOrErr.takeError(); - auto Contents = Obj.getSectionContents(Shdr); + auto Contents = Obj.getSectionContents(*Shdr); if (!Contents) return Contents.takeError(); @@ -1224,7 +1224,7 @@ ELFDumper::dumpSymverSection(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto VersionsOrErr = Obj.template getSectionContentsAsArray(Shdr); + auto VersionsOrErr = Obj.template getSectionContentsAsArray(*Shdr); if (!VersionsOrErr) return VersionsOrErr.takeError(); for (const Elf_Half &E : *VersionsOrErr) @@ -1245,7 +1245,7 @@ ELFDumper::dumpVerneedSection(const Elf_Shdr *Shdr) { S->Info = Shdr->sh_info; - auto Contents = Obj.getSectionContents(Shdr); + auto Contents = Obj.getSectionContents(*Shdr); if (!Contents) return Contents.takeError(); @@ -1253,7 +1253,7 @@ ELFDumper::dumpVerneedSection(const Elf_Shdr *Shdr) { if (!StringTableShdrOrErr) return StringTableShdrOrErr.takeError(); - auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr); + auto StringTableOrErr = Obj.getStringTable(**StringTableShdrOrErr); if (!StringTableOrErr) return StringTableOrErr.takeError(); @@ -1322,7 +1322,7 @@ Expected ELFDumper::dumpGroup(const Elf_Shdr *Shdr) { return SymbolName.takeError(); S->Signature = *SymbolName; - auto MembersOrErr = Obj.template getSectionContentsAsArray(Shdr); + auto MembersOrErr = Obj.template getSectionContentsAsArray(*Shdr); if (!MembersOrErr) return MembersOrErr.takeError(); @@ -1352,7 +1352,7 @@ ELFDumper::dumpMipsABIFlags(const Elf_Shdr *Shdr) { if (Error E = dumpCommonSection(Shdr, *S)) return std::move(E); - auto ContentOrErr = Obj.getSectionContents(Shdr); + auto ContentOrErr = Obj.getSectionContents(*Shdr); if (!ContentOrErr) return ContentOrErr.takeError(); From 58938b544b728ccf90462a7e4854e8a533eb9296 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 15 Sep 2020 01:46:58 -0700 Subject: [PATCH 0659/1079] [NFC][DebugInfo] Use consistent regex group spelling This is a follow up to c1f2fb5184ca. --- lld/test/ELF/conflict-debug-variable2.s | 4 ++-- lld/test/wasm/debuginfo.test | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lld/test/ELF/conflict-debug-variable2.s b/lld/test/ELF/conflict-debug-variable2.s index fe134f49730d1..2b5ea882012e9 100644 --- a/lld/test/ELF/conflict-debug-variable2.s +++ b/lld/test/ELF/conflict-debug-variable2.s @@ -7,14 +7,14 @@ # INPUT-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000027] = "foo") # INPUT-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0033 => {0x00000033} "int") # INPUT-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{[/\\]}}path{{[/\\]}}test.c") +# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{(/|\\)}}path{{(/|\\)}}test.c") # INPUT-NEXT: DW_AT_decl_line [DW_FORM_data1] (1) # INPUT-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) # INPUT: DW_TAG_variable # INPUT-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002f] = "bar") # INPUT-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0033 => {0x00000033} "int") # INPUT-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{[/\\]}}path{{[/\\]}}test.c") +# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{(/|\\)}}path{{(/|\\)}}test.c") # INPUT-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) # INPUT-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test index 039a051f44faf..f6aae5a6c2fdd 100644 --- a/lld/test/wasm/debuginfo.test +++ b/lld/test/wasm/debuginfo.test @@ -16,13 +16,13 @@ CHECK-NEXT: DW_AT_low_pc CHECK-NEXT: DW_AT_high_pc CHECK-NEXT: DW_AT_frame_base CHECK-NEXT: DW_AT_name ("test") -CHECK-NEXT: DW_AT_decl_file ("/Users{{[/\\]}}yury{{[/\\]}}llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users{{(/|\\)}}yury{{(/|\\)}}llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (3) CHECK-NEXT: DW_AT_prototyped (true) CHECK: DW_TAG_formal_parameter CHECK-NEXT: DW_AT_name ("t") -CHECK-NEXT: DW_AT_decl_file ("/Users{{[/\\]}}yury{{[/\\]}}llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users{{(/|\\)}}yury{{(/|\\)}}llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (3) CHECK: DW_TAG_subprogram @@ -30,7 +30,7 @@ CHECK-NEXT: DW_AT_low_pc CHECK-NEXT: DW_AT_high_pc CHECK-NEXT: DW_AT_frame_base CHECK-NEXT: DW_AT_name ("_start") -CHECK-NEXT: DW_AT_decl_file ("/Users{{[/\\]}}yury{{[/\\]}}llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users{{(/|\\)}}yury{{(/|\\)}}llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (7) CHECK: DW_TAG_base_type From bccd2ec3e216fed04c46df7077462165435703a1 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Sat, 5 Sep 2020 19:13:50 +0300 Subject: [PATCH 0660/1079] [llvm-readobj/elf] - Simplify and refine the implementation which dumps .stack_sizes Our implementation of stack sizes section dumping heavily uses `ELFObjectFile`, while the rest of the code uses `ELFFile`. That APIs are very different. `ELFObjectFile` is very generic and has `SectionRef`, `RelocationRef`, `SymbolRef` and other generic concepts. The `ELFFile` class works directly with `Elf_Shdr`, `Elf_Rel[a]`, `Elf_Sym` etc, what is probably much cleaner for ELF dumper. Also, `ELFObjectFile` API does not always provide a way to check for possible errors. E.g. the implementation of `symbol_end()` does not verify the `sh_size`: ``` template basic_symbol_iterator ELFObjectFile::symbol_end() const { const Elf_Shdr *SymTab = DotSymtabSec; if (!SymTab) return symbol_begin(); DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym)); return basic_symbol_iterator(SymbolRef(Sym, this)); } ``` There are many other examples which makes me thing we might win from switching to `ELFFile` API, where we heavily validate an input data already. This patch is the first step in this direction. I've converted the large portion of the code to use `ELFFile`. Differential revision: https://reviews.llvm.org/D87362 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 205 +++++++++++--------------- 1 file changed, 86 insertions(+), 119 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 86d76b056b924..e28d4ece226ce 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -722,8 +722,9 @@ template class DumpStyle { TYPEDEF_ELF_TYPES(ELFT) DumpStyle(ELFDumper *Dumper) - : Obj(*Dumper->getElfObject()->getELFFile()), Dumper(Dumper) { - FileName = this->Dumper->getElfObject()->getFileName(); + : Obj(*Dumper->getElfObject()->getELFFile()), + ElfObj(*Dumper->getElfObject()), Dumper(Dumper) { + FileName = ElfObj.getFileName(); } virtual ~DumpStyle() = default; @@ -752,17 +753,15 @@ template class DumpStyle { virtual void printAddrsig() = 0; virtual void printNotes() = 0; virtual void printELFLinkerOptions() = 0; - virtual void printStackSizes(const ELFObjectFile *Obj) = 0; - void printNonRelocatableStackSizes(const ELFObjectFile *Obj, - std::function PrintHeader); - void printRelocatableStackSizes(const ELFObjectFile *Obj, - std::function PrintHeader); - void printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, - Optional FunctionSec, + virtual void printStackSizes() = 0; + void printNonRelocatableStackSizes(std::function PrintHeader); + void printRelocatableStackSizes(std::function PrintHeader); + void printFunctionStackSize(uint64_t SymValue, + Optional FunctionSec, const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset); - void printStackSize(const ELFObjectFile *Obj, RelocationRef Rel, - SectionRef FunctionSec, const Elf_Shdr &StackSizeSec, + void printStackSize(RelocationRef Rel, const Elf_Shdr *FunctionSec, + const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data); virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; @@ -790,6 +789,7 @@ template class DumpStyle { StringRef FileName; const ELFFile &Obj; + const ELFObjectFile &ElfObj; private: const ELFDumper *Dumper; @@ -828,7 +828,7 @@ template class GNUStyle : public DumpStyle { void printAddrsig() override; void printNotes() override; void printELFLinkerOptions() override; - void printStackSizes(const ELFObjectFile *Obj) override; + void printStackSizes() override; void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; @@ -952,7 +952,7 @@ template class LLVMStyle : public DumpStyle { void printAddrsig() override; void printNotes() override; void printELFLinkerOptions() override; - void printStackSizes(const ELFObjectFile *Obj) override; + void printStackSizes() override; void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; @@ -2333,7 +2333,7 @@ template void ELFDumper::printELFLinkerOptions() { } template void ELFDumper::printStackSizes() { - ELFDumperStyle->printStackSizes(ObjF); + ELFDumperStyle->printStackSizes(); } #define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \ @@ -5503,16 +5503,6 @@ template void GNUStyle::printDependentLibs() { PrintSection(); } -// Used for printing section names in places where possible errors can be -// ignored. -static StringRef getSectionName(const SectionRef &Sec) { - Expected NameOrErr = Sec.getName(); - if (NameOrErr) - return *NameOrErr; - consumeError(NameOrErr.takeError()); - return ""; -} - // Used for printing symbol names in places where possible errors can be // ignored. static std::string getSymbolName(const ELFSymbolRef &Sym) { @@ -5524,16 +5514,13 @@ static std::string getSymbolName(const ELFSymbolRef &Sym) { } template -void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, - uint64_t SymValue, - Optional FunctionSec, - const Elf_Shdr &StackSizeSec, - DataExtractor Data, - uint64_t *Offset) { +void DumpStyle::printFunctionStackSize( + uint64_t SymValue, Optional FunctionSec, + const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) { // This function ignores potentially erroneous input, unless it is directly // related to stack size reporting. SymbolRef FuncSym; - for (const ELFSymbolRef &Symbol : Obj->symbols()) { + for (const ELFSymbolRef &Symbol : ElfObj.symbols()) { Expected SymAddrOrErr = Symbol.getAddress(); if (!SymAddrOrErr) { consumeError(SymAddrOrErr.takeError()); @@ -5547,7 +5534,8 @@ void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, if (Symbol.getELFType() == ELF::STT_FUNC && *SymAddrOrErr == SymValue) { // Check if the symbol is in the right section. FunctionSec == None means // "any section". - if (!FunctionSec || FunctionSec->containsSymbol(Symbol)) { + if (!FunctionSec || + ElfObj.toSectionRef(*FunctionSec).containsSymbol(Symbol)) { FuncSym = Symbol; break; } @@ -5561,7 +5549,7 @@ void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, else reportWarning( createError("could not identify function symbol for stack size entry"), - Obj->getFileName()); + FileName); // Extract the size. The expectation is that Offset is pointing to the right // place, i.e. past the function address. @@ -5570,11 +5558,10 @@ void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, // getULEB128() does not advance Offset if it is not able to extract a valid // integer. if (*Offset == PrevOffset) { - reportWarning( - createStringError(object_error::parse_failed, - "could not extract a valid stack size in " + - describe(*Obj->getELFFile(), StackSizeSec)), - Obj->getFileName()); + reportWarning(createStringError(object_error::parse_failed, + "could not extract a valid stack size in " + + describe(Obj, StackSizeSec)), + FileName); return; } @@ -5590,9 +5577,8 @@ void GNUStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { } template -void DumpStyle::printStackSize(const ELFObjectFile *Obj, - RelocationRef Reloc, - SectionRef FunctionSec, +void DumpStyle::printStackSize(RelocationRef Reloc, + const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data) { @@ -5600,8 +5586,7 @@ void DumpStyle::printStackSize(const ELFObjectFile *Obj, // related to stack size reporting. object::symbol_iterator RelocSym = Reloc.getSymbol(); uint64_t RelocSymValue = 0; - StringRef FileStr = Obj->getFileName(); - if (RelocSym != Obj->symbol_end()) { + if (RelocSym != ElfObj.symbol_end()) { // Ensure that the relocation symbol is in the function section, i.e. the // section where the functions whose stack sizes we are reporting are // located. @@ -5610,16 +5595,16 @@ void DumpStyle::printStackSize(const ELFObjectFile *Obj, reportWarning( createError("cannot identify the section for relocation symbol '" + getSymbolName(*RelocSym) + "'"), - FileStr); + FileName); consumeError(SectionOrErr.takeError()); - } else if (*SectionOrErr != FunctionSec) { + } else if (*SectionOrErr != ElfObj.toSectionRef(FunctionSec)) { reportWarning(createError("relocation symbol '" + getSymbolName(*RelocSym) + "' is not in the expected section"), - FileStr); + FileName); // Pretend that the symbol is in the correct section and report its // stack size anyway. - FunctionSec = **SectionOrErr; + FunctionSec = ElfObj.getSection((*SectionOrErr)->getRawDataRefImpl()); } Expected RelocSymValueOrErr = RelocSym->getValue(); @@ -5634,31 +5619,29 @@ void DumpStyle::printStackSize(const ELFObjectFile *Obj, reportUniqueWarning(createStringError( object_error::parse_failed, "found invalid relocation offset (0x" + Twine::utohexstr(Offset) + - ") into " + describe(*Obj->getELFFile(), StackSizeSec) + + ") into " + describe(Obj, StackSizeSec) + " while trying to extract a stack size entry")); return; } uint64_t Addend = Data.getAddress(&Offset); uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend); - this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSec, Data, + this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data, &Offset); } template void DumpStyle::printNonRelocatableStackSizes( - const ELFObjectFile *Obj, std::function PrintHeader) { + std::function PrintHeader) { // This function ignores potentially erroneous input, unless it is directly // related to stack size reporting. - const ELFFile *EF = Obj->getELFFile(); - for (const SectionRef &Sec : Obj->sections()) { - if (getSectionName(Sec) != ".stack_sizes") + for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { + if (this->getPrintableSectionName(Sec) != ".stack_sizes") continue; PrintHeader(); - const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); ArrayRef Contents = - unwrapOrError(this->FileName, EF->getSectionContents(*ElfSec)); - DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); + unwrapOrError(this->FileName, Obj.getSectionContents(Sec)); + DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr)); uint64_t Offset = 0; while (Offset < Contents.size()) { // The function address is followed by a ULEB representing the stack @@ -5666,12 +5649,12 @@ void DumpStyle::printNonRelocatableStackSizes( if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning(createStringError( object_error::parse_failed, - describe(*EF, *ElfSec) + + describe(Obj, Sec) + " ended while trying to extract a stack size entry")); break; } uint64_t SymValue = Data.getAddress(&Offset); - printFunctionStackSize(Obj, SymValue, /*FunctionSec=*/None, *ElfSec, Data, + printFunctionStackSize(SymValue, /*FunctionSec=*/None, Sec, Data, &Offset); } } @@ -5679,17 +5662,13 @@ void DumpStyle::printNonRelocatableStackSizes( template void DumpStyle::printRelocatableStackSizes( - const ELFObjectFile *Obj, std::function PrintHeader) { - const ELFFile *EF = Obj->getELFFile(); - + std::function PrintHeader) { // Build a map between stack size sections and their corresponding relocation // sections. - llvm::MapVector StackSizeRelocMap; - const SectionRef NullSection{}; - - for (const SectionRef &Sec : Obj->sections()) { + llvm::MapVector StackSizeRelocMap; + for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { StringRef SectionName; - if (Expected NameOrErr = Sec.getName()) + if (Expected NameOrErr = Obj.getSectionName(Sec)) SectionName = *NameOrErr; else consumeError(NameOrErr.takeError()); @@ -5697,92 +5676,80 @@ void DumpStyle::printRelocatableStackSizes( // A stack size section that we haven't encountered yet is mapped to the // null section until we find its corresponding relocation section. if (SectionName == ".stack_sizes") - if (StackSizeRelocMap.count(Sec) == 0) { - StackSizeRelocMap[Sec] = NullSection; + if (StackSizeRelocMap + .insert(std::make_pair(&Sec, (const Elf_Shdr *)nullptr)) + .second) continue; - } // Check relocation sections if they are relocating contents of a // stack sizes section. - const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); - uint32_t SectionType = ElfSec->sh_type; - if (SectionType != ELF::SHT_RELA && SectionType != ELF::SHT_REL) + if (Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_REL) continue; - Expected RelSecOrErr = Sec.getRelocatedSection(); + Expected RelSecOrErr = Obj.getSection(Sec.sh_info); if (!RelSecOrErr) { - reportUniqueWarning( - createStringError(object_error::parse_failed, - describe(*Obj->getELFFile(), *ElfSec) + - ": failed to get a relocated section: " + - toString(RelSecOrErr.takeError()))); + reportUniqueWarning(createStringError( + object_error::parse_failed, + describe(Obj, Sec) + ": failed to get a relocated section: " + + toString(RelSecOrErr.takeError()))); continue; } - const Elf_Shdr *ContentsSec = - Obj->getSection((*RelSecOrErr)->getRawDataRefImpl()); - Expected ContentsSectionNameOrErr = - EF->getSectionName(*ContentsSec); - if (!ContentsSectionNameOrErr) { - consumeError(ContentsSectionNameOrErr.takeError()); - continue; - } - if (*ContentsSectionNameOrErr != ".stack_sizes") + const Elf_Shdr *ContentsSec = *RelSecOrErr; + if (this->getPrintableSectionName(**RelSecOrErr) != ".stack_sizes") continue; + // Insert a mapping from the stack sizes section to its relocation section. - StackSizeRelocMap[Obj->toSectionRef(ContentsSec)] = Sec; + StackSizeRelocMap[ContentsSec] = &Sec; } for (const auto &StackSizeMapEntry : StackSizeRelocMap) { PrintHeader(); - const SectionRef &StackSizesSec = StackSizeMapEntry.first; - const SectionRef &RelocSec = StackSizeMapEntry.second; - const Elf_Shdr *StackSizesELFSec = - Obj->getSection(StackSizesSec.getRawDataRefImpl()); + const Elf_Shdr *StackSizesELFSec = StackSizeMapEntry.first; + const Elf_Shdr *RelocSec = StackSizeMapEntry.second; // Warn about stack size sections without a relocation section. - if (RelocSec == NullSection) { - reportWarning( - createError(".stack_sizes (" + - describe(*Obj->getELFFile(), *StackSizesELFSec) + - ") does not have a corresponding " - "relocation section"), - Obj->getFileName()); + if (!RelocSec) { + reportWarning(createError(".stack_sizes (" + + describe(Obj, *StackSizesELFSec) + + ") does not have a corresponding " + "relocation section"), + FileName); continue; } // A .stack_sizes section header's sh_link field is supposed to point // to the section that contains the functions whose stack sizes are // described in it. - const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError( - this->FileName, EF->getSection(StackSizesELFSec->sh_link))); - + const Elf_Shdr *FunctionSec = unwrapOrError( + this->FileName, Obj.getSection(StackSizesELFSec->sh_link)); bool (*IsSupportedFn)(uint64_t); RelocationResolver Resolver; - std::tie(IsSupportedFn, Resolver) = getRelocationResolver(*Obj); - auto Contents = unwrapOrError(this->FileName, StackSizesSec.getContents()); - DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); + std::tie(IsSupportedFn, Resolver) = getRelocationResolver(ElfObj); + ArrayRef Contents = + unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec)); + DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr)); + size_t I = 0; - for (const RelocationRef &Reloc : RelocSec.relocations()) { + for (const RelocationRef &Reloc : + ElfObj.toSectionRef(RelocSec).relocations()) { ++I; if (!IsSupportedFn || !IsSupportedFn(Reloc.getType())) { - const Elf_Shdr *RelocSecShdr = - Obj->getSection(RelocSec.getRawDataRefImpl()); reportUniqueWarning(createStringError( object_error::parse_failed, - describe(*EF, *RelocSecShdr) + + describe(Obj, *RelocSec) + " contains an unsupported relocation with index " + Twine(I) + - ": " + EF->getRelocationTypeName(Reloc.getType()))); + ": " + Obj.getRelocationTypeName(Reloc.getType()))); continue; } - this->printStackSize(Obj, Reloc, FunctionSec, *StackSizesELFSec, Resolver, + this->printStackSize(Reloc, FunctionSec, *StackSizesELFSec, Resolver, Data); } } } template -void GNUStyle::printStackSizes(const ELFObjectFile *Obj) { +void GNUStyle::printStackSizes() { bool HeaderHasBeenPrinted = false; auto PrintHeader = [&]() { if (HeaderHasBeenPrinted) @@ -5797,10 +5764,10 @@ void GNUStyle::printStackSizes(const ELFObjectFile *Obj) { // For non-relocatable objects, look directly for sections whose name starts // with .stack_sizes and process the contents. - if (Obj->isRelocatableObject()) - this->printRelocatableStackSizes(Obj, PrintHeader); + if (this->Obj.getHeader().e_type == ELF::ET_REL) + this->printRelocatableStackSizes(PrintHeader); else - this->printNonRelocatableStackSizes(Obj, PrintHeader); + this->printNonRelocatableStackSizes(PrintHeader); } template @@ -6697,12 +6664,12 @@ template void LLVMStyle::printDependentLibs() { } template -void LLVMStyle::printStackSizes(const ELFObjectFile *Obj) { +void LLVMStyle::printStackSizes() { ListScope L(W, "StackSizes"); - if (Obj->isRelocatableObject()) - this->printRelocatableStackSizes(Obj, []() {}); + if (this->Obj.getHeader().e_type == ELF::ET_REL) + this->printRelocatableStackSizes([]() {}); else - this->printNonRelocatableStackSizes(Obj, []() {}); + this->printNonRelocatableStackSizes([]() {}); } template From fc446935d724e87be515eb465293d82e040eb571 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 10:06:35 +0100 Subject: [PATCH 0661/1079] [X86] detectAVGPattern - accept non-pow2 vectors by padding. Drop the pow2 vector limitation for AVG generation by padding the vector to the next pow2, creating the PAVG nodes and then extracting the final subvector. Fixes some poor codegen that has been annoying me for years..... --- llvm/lib/Target/X86/X86ISelLowering.cpp | 36 +- llvm/test/CodeGen/X86/avg.ll | 689 ++++-------------------- 2 files changed, 121 insertions(+), 604 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a704ac3345123..0af3cacb22813 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37860,7 +37860,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( case X86ISD::UNPCKL: case X86ISD::UNPCKH: case X86ISD::BLENDI: - // Saturated Packs. + // Integer ops. + case X86ISD::AVG: case X86ISD::PACKSS: case X86ISD::PACKUS: // Horizontal Ops. @@ -44183,8 +44184,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, unsigned NumElems = VT.getVectorNumElements(); EVT ScalarVT = VT.getVectorElementType(); - if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && - NumElems >= 2 && isPowerOf2_32(NumElems))) + if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2)) return SDValue(); // InScalarVT is the intermediate type in AVG pattern and it should be greater @@ -44235,6 +44235,29 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops); }; + auto AVGSplitter = [&](SDValue Op0, SDValue Op1) { + // Pad to a power-of-2 vector, split+apply and extract the original vector. + unsigned NumElemsPow2 = PowerOf2Ceil(NumElems); + EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2); + if (NumElemsPow2 != NumElems) { + SmallVector Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT)); + SmallVector Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT)); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Idx = DAG.getIntPtrConstant(i, DL); + Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx); + Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx); + } + Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0); + Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1); + } + SDValue Res = + SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder); + if (NumElemsPow2 == NumElems) + return Res; + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + }; + // Take care of the case when one of the operands is a constant vector whose // element is in the range [1, 256]. if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && @@ -44245,9 +44268,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, SDValue VecOnes = DAG.getConstant(1, DL, InVT); Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); - return SplitOpsAndApply(DAG, Subtarget, DL, VT, - { Operands[0].getOperand(0), Operands[1] }, - AVGBuilder); + return AVGSplitter(Operands[0].getOperand(0), Operands[1]); } // Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)). @@ -44294,8 +44315,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, } // The pattern is detected, emit X86ISD::AVG instruction(s). - return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]}, - AVGBuilder); + return AVGSplitter(Operands[0], Operands[1]); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index 051493a4ab57a..e2139fd20d32c 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -90,157 +90,29 @@ define void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) nounwind { define void @avg_v24i8(<24 x i8>* %a, <24 x i8>* %b) nounwind { ; SSE2-LABEL: avg_v24i8: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm5 -; SSE2-NEXT: movdqa 16(%rdi), %xmm6 ; SSE2-NEXT: movdqa (%rsi), %xmm0 ; SSE2-NEXT: movdqa 16(%rsi), %xmm1 -; SSE2-NEXT: pxor %xmm7, %xmm7 -; SSE2-NEXT: movdqa %xmm5, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm7[8],xmm4[9],xmm7[9],xmm4[10],xmm7[10],xmm4[11],xmm7[11],xmm4[12],xmm7[12],xmm4[13],xmm7[13],xmm4[14],xmm7[14],xmm4[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm4, %xmm8 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm5, %xmm9 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm7[4],xmm9[5],xmm7[5],xmm9[6],xmm7[6],xmm9[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm6, %xmm10 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm7[4],xmm10[5],xmm7[5],xmm10[6],xmm7[6],xmm10[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm7[8],xmm2[9],xmm7[9],xmm2[10],xmm7[10],xmm2[11],xmm7[11],xmm2[12],xmm7[12],xmm2[13],xmm7[13],xmm2[14],xmm7[14],xmm2[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7] -; SSE2-NEXT: paddd %xmm8, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3] -; SSE2-NEXT: paddd %xmm4, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm7[4],xmm4[5],xmm7[5],xmm4[6],xmm7[6],xmm4[7],xmm7[7] -; SSE2-NEXT: paddd %xmm9, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; SSE2-NEXT: paddd %xmm5, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3],xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7] -; SSE2-NEXT: paddd %xmm10, %xmm5 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3] -; SSE2-NEXT: paddd %xmm6, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE2-NEXT: psubd %xmm6, %xmm3 -; SSE2-NEXT: psubd %xmm6, %xmm2 -; SSE2-NEXT: psubd %xmm6, %xmm4 -; SSE2-NEXT: psubd %xmm6, %xmm0 -; SSE2-NEXT: psubd %xmm6, %xmm5 -; SSE2-NEXT: psubd %xmm6, %xmm1 -; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: psrld $1, %xmm5 -; SSE2-NEXT: psrld $1, %xmm0 -; SSE2-NEXT: psrld $1, %xmm4 -; SSE2-NEXT: psrld $1, %xmm2 -; SSE2-NEXT: psrld $1, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,0,255,0,255,0] -; SSE2-NEXT: pand %xmm6, %xmm3 -; SSE2-NEXT: pand %xmm6, %xmm2 -; SSE2-NEXT: packuswb %xmm3, %xmm2 -; SSE2-NEXT: pand %xmm6, %xmm4 -; SSE2-NEXT: pand %xmm6, %xmm0 -; SSE2-NEXT: packuswb %xmm4, %xmm0 -; SSE2-NEXT: packuswb %xmm2, %xmm0 -; SSE2-NEXT: pand %xmm6, %xmm5 -; SSE2-NEXT: pand %xmm6, %xmm1 -; SSE2-NEXT: packuswb %xmm5, %xmm1 -; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: pavgb (%rdi), %xmm0 +; SSE2-NEXT: pavgb 16(%rdi), %xmm1 ; SSE2-NEXT: movq %xmm1, (%rax) ; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v24i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa (%rdi), %xmm0 -; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm8 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; AVX1-NEXT: vmovdqa (%rsi), %xmm6 -; AVX1-NEXT: vmovdqa 16(%rsi), %xmm7 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm6[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm7[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm5, %xmm8, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpsubd %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vpsubd %xmm6, %xmm2, %xmm2 -; AVX1-NEXT: vpsubd %xmm6, %xmm3, %xmm3 -; AVX1-NEXT: vpsubd %xmm6, %xmm0, %xmm0 -; AVX1-NEXT: vpsubd %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpsubd %xmm6, %xmm5, %xmm5 -; AVX1-NEXT: vpsrld $1, %xmm5, %xmm5 -; AVX1-NEXT: vpsrld $1, %xmm4, %xmm4 -; AVX1-NEXT: vpackusdw %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm3, %xmm3 -; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vmovdqa (%rsi), %xmm0 +; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1 +; AVX1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpavgb 16(%rdi), %xmm1, %xmm1 ; AVX1-NEXT: vmovq %xmm1, (%rax) ; AVX1-NEXT: vmovdqu %xmm0, (%rax) ; AVX1-NEXT: retq ; ; AVX2-LABEL: avg_v24i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq 8(%rdi), %xmm0 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpbroadcastq 8(%rsi), %xmm3 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX2-NEXT: vpsubd %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpsubd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpsubd %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsrld $1, %ymm2, %ymm2 -; AVX2-NEXT: vpsrld $1, %ymm1, %ymm1 -; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0 -; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa (%rsi), %ymm0 +; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 -; AVX2-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vmovq %xmm1, (%rax) ; AVX2-NEXT: vmovdqu %xmm0, (%rax) ; AVX2-NEXT: vzeroupper @@ -248,17 +120,11 @@ define void @avg_v24i8(<24 x i8>* %a, <24 x i8>* %b) nounwind { ; ; AVX512-LABEL: avg_v24i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero -; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero -; AVX512-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpsubd %ymm2, %ymm1, %ymm1 -; AVX512-NEXT: vpsrld $1, %ymm1, %ymm1 -; AVX512-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512-NEXT: vpavgb (%rsi), %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu %xmm0, (%rax) +; AVX512-NEXT: vmovdqa (%rsi), %ymm0 +; AVX512-NEXT: vpavgb (%rdi), %ymm0, %ymm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmovq %xmm1, (%rax) +; AVX512-NEXT: vmovdqu %xmm0, (%rax) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = load <24 x i8>, <24 x i8>* %a @@ -324,314 +190,60 @@ define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind { define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind { ; SSE2-LABEL: avg_v48i8: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm1 -; SSE2-NEXT: movdqa 16(%rdi), %xmm6 -; SSE2-NEXT: movdqa 32(%rdi), %xmm11 -; SSE2-NEXT: movdqa (%rsi), %xmm12 -; SSE2-NEXT: movdqa 16(%rsi), %xmm13 -; SSE2-NEXT: movdqa 32(%rsi), %xmm0 -; SSE2-NEXT: pxor %xmm7, %xmm7 -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm7[8],xmm4[9],xmm7[9],xmm4[10],xmm7[10],xmm4[11],xmm7[11],xmm4[12],xmm7[12],xmm4[13],xmm7[13],xmm4[14],xmm7[14],xmm4[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm4, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3],xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm1, %xmm10 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm7[4],xmm10[5],xmm7[5],xmm10[6],xmm7[6],xmm10[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3] -; SSE2-NEXT: movdqa %xmm6, %xmm5 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm7[8],xmm5[9],xmm7[9],xmm5[10],xmm7[10],xmm5[11],xmm7[11],xmm5[12],xmm7[12],xmm5[13],xmm7[13],xmm5[14],xmm7[14],xmm5[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm5, %xmm15 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm15 = xmm15[4],xmm7[4],xmm15[5],xmm7[5],xmm15[6],xmm7[6],xmm15[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm6, %xmm14 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm7[4],xmm14[5],xmm7[5],xmm14[6],xmm7[6],xmm14[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] -; SSE2-NEXT: movdqa %xmm12, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm7[8],xmm3[9],xmm7[9],xmm3[10],xmm7[10],xmm3[11],xmm7[11],xmm3[12],xmm7[12],xmm3[13],xmm7[13],xmm3[14],xmm7[14],xmm3[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm3, %xmm8 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] -; SSE2-NEXT: paddd %xmm2, %xmm8 -; SSE2-NEXT: movdqa %xmm11, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm7[8],xmm2[9],xmm7[9],xmm2[10],xmm7[10],xmm2[11],xmm7[11],xmm2[12],xmm7[12],xmm2[13],xmm7[13],xmm2[14],xmm7[14],xmm2[15],xmm7[15] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3] -; SSE2-NEXT: paddd %xmm4, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm12 = xmm12[0],xmm7[0],xmm12[1],xmm7[1],xmm12[2],xmm7[2],xmm12[3],xmm7[3],xmm12[4],xmm7[4],xmm12[5],xmm7[5],xmm12[6],xmm7[6],xmm12[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm12, %xmm9 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm7[4],xmm9[5],xmm7[5],xmm9[6],xmm7[6],xmm9[7],xmm7[7] -; SSE2-NEXT: paddd %xmm10, %xmm9 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm12 = xmm12[0],xmm7[0],xmm12[1],xmm7[1],xmm12[2],xmm7[2],xmm12[3],xmm7[3] -; SSE2-NEXT: paddd %xmm1, %xmm12 -; SSE2-NEXT: movdqa %xmm13, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm7[8],xmm4[9],xmm7[9],xmm4[10],xmm7[10],xmm4[11],xmm7[11],xmm4[12],xmm7[12],xmm4[13],xmm7[13],xmm4[14],xmm7[14],xmm4[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm4, %xmm10 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm7[4],xmm10[5],xmm7[5],xmm10[6],xmm7[6],xmm10[7],xmm7[7] -; SSE2-NEXT: paddd %xmm15, %xmm10 -; SSE2-NEXT: movdqa %xmm2, %xmm15 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm15 = xmm15[4],xmm7[4],xmm15[5],xmm7[5],xmm15[6],xmm7[6],xmm15[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1],xmm11[2],xmm7[2],xmm11[3],xmm7[3],xmm11[4],xmm7[4],xmm11[5],xmm7[5],xmm11[6],xmm7[6],xmm11[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3] -; SSE2-NEXT: paddd %xmm5, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm7[0],xmm13[1],xmm7[1],xmm13[2],xmm7[2],xmm13[3],xmm7[3],xmm13[4],xmm7[4],xmm13[5],xmm7[5],xmm13[6],xmm7[6],xmm13[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm13, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7] -; SSE2-NEXT: paddd %xmm14, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm7[0],xmm13[1],xmm7[1],xmm13[2],xmm7[2],xmm13[3],xmm7[3] -; SSE2-NEXT: paddd %xmm6, %xmm13 -; SSE2-NEXT: movdqa %xmm0, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm7[8],xmm6[9],xmm7[9],xmm6[10],xmm7[10],xmm6[11],xmm7[11],xmm6[12],xmm7[12],xmm6[13],xmm7[13],xmm6[14],xmm7[14],xmm6[15],xmm7[15] -; SSE2-NEXT: movdqa %xmm6, %xmm14 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm7[4],xmm14[5],xmm7[5],xmm14[6],xmm7[6],xmm14[7],xmm7[7] -; SSE2-NEXT: paddd %xmm15, %xmm14 -; SSE2-NEXT: movdqa %xmm11, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] -; SSE2-NEXT: paddd %xmm2, %xmm6 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7] -; SSE2-NEXT: paddd %xmm5, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1],xmm11[2],xmm7[2],xmm11[3],xmm7[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; SSE2-NEXT: paddd %xmm11, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm5, %xmm5 -; SSE2-NEXT: psubd %xmm5, %xmm8 -; SSE2-NEXT: psubd %xmm5, %xmm3 -; SSE2-NEXT: psubd %xmm5, %xmm9 -; SSE2-NEXT: psubd %xmm5, %xmm12 -; SSE2-NEXT: psubd %xmm5, %xmm10 -; SSE2-NEXT: psubd %xmm5, %xmm4 -; SSE2-NEXT: psubd %xmm5, %xmm1 -; SSE2-NEXT: psubd %xmm5, %xmm13 -; SSE2-NEXT: psubd %xmm5, %xmm14 -; SSE2-NEXT: psubd %xmm5, %xmm6 -; SSE2-NEXT: psubd %xmm5, %xmm2 -; SSE2-NEXT: psubd %xmm5, %xmm0 -; SSE2-NEXT: psrld $1, %xmm3 -; SSE2-NEXT: psrld $1, %xmm8 -; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [255,0,255,0,255,0,255,0] -; SSE2-NEXT: pand %xmm7, %xmm8 -; SSE2-NEXT: pand %xmm7, %xmm3 -; SSE2-NEXT: packuswb %xmm8, %xmm3 -; SSE2-NEXT: psrld $1, %xmm12 -; SSE2-NEXT: psrld $1, %xmm9 -; SSE2-NEXT: pand %xmm7, %xmm9 -; SSE2-NEXT: pand %xmm7, %xmm12 -; SSE2-NEXT: packuswb %xmm9, %xmm12 -; SSE2-NEXT: packuswb %xmm3, %xmm12 -; SSE2-NEXT: psrld $1, %xmm4 -; SSE2-NEXT: psrld $1, %xmm10 -; SSE2-NEXT: pand %xmm7, %xmm10 -; SSE2-NEXT: pand %xmm7, %xmm4 -; SSE2-NEXT: packuswb %xmm10, %xmm4 -; SSE2-NEXT: psrld $1, %xmm13 -; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: pand %xmm7, %xmm1 -; SSE2-NEXT: pand %xmm7, %xmm13 -; SSE2-NEXT: packuswb %xmm1, %xmm13 -; SSE2-NEXT: packuswb %xmm4, %xmm13 -; SSE2-NEXT: psrld $1, %xmm6 -; SSE2-NEXT: psrld $1, %xmm14 -; SSE2-NEXT: pand %xmm7, %xmm14 -; SSE2-NEXT: pand %xmm7, %xmm6 -; SSE2-NEXT: packuswb %xmm14, %xmm6 -; SSE2-NEXT: psrld $1, %xmm0 -; SSE2-NEXT: psrld $1, %xmm2 -; SSE2-NEXT: pand %xmm7, %xmm2 -; SSE2-NEXT: pand %xmm7, %xmm0 -; SSE2-NEXT: packuswb %xmm2, %xmm0 -; SSE2-NEXT: packuswb %xmm6, %xmm0 +; SSE2-NEXT: movdqa (%rsi), %xmm0 +; SSE2-NEXT: movdqa 16(%rsi), %xmm1 +; SSE2-NEXT: movdqa 32(%rsi), %xmm2 +; SSE2-NEXT: pavgb (%rdi), %xmm0 +; SSE2-NEXT: pavgb 16(%rdi), %xmm1 +; SSE2-NEXT: pavgb 32(%rdi), %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm1, (%rax) ; SSE2-NEXT: movdqu %xmm0, (%rax) -; SSE2-NEXT: movdqu %xmm13, (%rax) -; SSE2-NEXT: movdqu %xmm12, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v48i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa (%rdi), %xmm0 -; AVX1-NEXT: vmovdqa 16(%rdi), %xmm4 -; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm15 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm11 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm14 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm13 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero -; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX1-NEXT: vmovdqa (%rsi), %xmm0 -; AVX1-NEXT: vmovdqa 16(%rsi), %xmm4 -; AVX1-NEXT: vmovdqa 32(%rsi), %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm2, %xmm5, %xmm12 -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm5, %xmm6, %xmm10 -; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm6, %xmm7, %xmm9 -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm8 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm3, %xmm15, %xmm15 -; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm7, %xmm11, %xmm7 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm2, %xmm14, %xmm14 -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vpaddd %xmm0, %xmm13, %xmm13 -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero -; AVX1-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 16-byte Folded Reload -; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[3,3,3,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero -; AVX1-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm6 # 16-byte Folded Reload -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm4[1,1,1,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; AVX1-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero -; AVX1-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4, %xmm4 # 16-byte Folded Reload -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpsubd %xmm3, %xmm12, %xmm11 -; AVX1-NEXT: vpsubd %xmm3, %xmm10, %xmm10 -; AVX1-NEXT: vpsubd %xmm3, %xmm9, %xmm9 -; AVX1-NEXT: vpsubd %xmm3, %xmm8, %xmm8 -; AVX1-NEXT: vpsubd %xmm3, %xmm15, %xmm12 -; AVX1-NEXT: vpsubd %xmm3, %xmm7, %xmm7 -; AVX1-NEXT: vpsubd %xmm3, %xmm14, %xmm0 -; AVX1-NEXT: vpsubd %xmm3, %xmm13, %xmm2 -; AVX1-NEXT: vpsubd %xmm3, %xmm5, %xmm5 -; AVX1-NEXT: vpsubd %xmm3, %xmm6, %xmm6 -; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpsubd %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpsrld $1, %xmm3, %xmm3 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpsrld $1, %xmm6, %xmm3 -; AVX1-NEXT: vpsrld $1, %xmm5, %xmm4 -; AVX1-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2 -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm7, %xmm2 -; AVX1-NEXT: vpsrld $1, %xmm12, %xmm4 -; AVX1-NEXT: vpackusdw %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrld $1, %xmm8, %xmm4 -; AVX1-NEXT: vpsrld $1, %xmm9, %xmm5 -; AVX1-NEXT: vpackusdw %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpsrld $1, %xmm10, %xmm5 -; AVX1-NEXT: vpsrld $1, %xmm11, %xmm6 -; AVX1-NEXT: vpackusdw %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5 -; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm2 -; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1 +; AVX1-NEXT: vmovdqa 32(%rsi), %xmm2 +; AVX1-NEXT: vpavgb 32(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpavgb 16(%rdi), %xmm1, %xmm1 ; AVX1-NEXT: vmovdqu %xmm1, (%rax) ; AVX1-NEXT: vmovdqu %xmm0, (%rax) -; AVX1-NEXT: vmovdqu %xmm4, (%rax) +; AVX1-NEXT: vmovdqu %xmm2, (%rax) ; AVX1-NEXT: retq ; ; AVX2-LABEL: avg_v48i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq 24(%rdi), %xmm0 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpbroadcastq 8(%rdi), %xmm2 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpbroadcastq 40(%rdi), %xmm4 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm5 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpbroadcastq 24(%rsi), %xmm6 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm6, %ymm0, %ymm0 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm6 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm6, %ymm1, %ymm1 -; AVX2-NEXT: vpbroadcastq 8(%rsi), %xmm6 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm6, %ymm2, %ymm2 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm6 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm6, %ymm3, %ymm3 -; AVX2-NEXT: vpbroadcastq 40(%rsi), %xmm6 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm6, %ymm4, %ymm4 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm6 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX2-NEXT: vpaddd %ymm6, %ymm5, %ymm5 -; AVX2-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6 -; AVX2-NEXT: vpsubd %ymm6, %ymm0, %ymm0 -; AVX2-NEXT: vpsubd %ymm6, %ymm1, %ymm1 -; AVX2-NEXT: vpsubd %ymm6, %ymm2, %ymm2 -; AVX2-NEXT: vpsubd %ymm6, %ymm3, %ymm3 -; AVX2-NEXT: vpsubd %ymm6, %ymm4, %ymm4 -; AVX2-NEXT: vpsubd %ymm6, %ymm5, %ymm5 -; AVX2-NEXT: vpsrld $1, %ymm5, %ymm5 -; AVX2-NEXT: vpsrld $1, %ymm4, %ymm4 -; AVX2-NEXT: vpackusdw %ymm4, %ymm5, %ymm4 -; AVX2-NEXT: vpsrld $1, %ymm3, %ymm3 -; AVX2-NEXT: vpsrld $1, %ymm2, %ymm2 -; AVX2-NEXT: vpackusdw %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrld $1, %ymm1, %ymm1 -; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0 -; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] -; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm2[2,3],ymm0[2,3] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 -; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm4[0,2,1,3] -; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovdqa (%rsi), %ymm0 +; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa 32(%rsi), %xmm1 +; AVX2-NEXT: vpavgb 32(%rdi), %xmm1, %xmm1 ; AVX2-NEXT: vmovdqu %xmm1, (%rax) ; AVX2-NEXT: vmovdqu %ymm0, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: avg_v48i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512-NEXT: vpavgb 32(%rsi), %xmm2, %xmm2 -; AVX512-NEXT: vpavgb (%rsi), %xmm0, %xmm0 -; AVX512-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1 -; AVX512-NEXT: vmovdqu %xmm1, (%rax) -; AVX512-NEXT: vmovdqu %xmm0, (%rax) -; AVX512-NEXT: vmovdqu %xmm2, (%rax) -; AVX512-NEXT: retq +; AVX512F-LABEL: avg_v48i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 +; AVX512F-NEXT: vpavgb (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: vmovdqa 32(%rsi), %xmm1 +; AVX512F-NEXT: vpavgb 32(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqu %xmm1, (%rax) +; AVX512F-NEXT: vmovdqu %ymm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: avg_v48i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0 +; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %1 = load <48 x i8>, <48 x i8>* %a %2 = load <48 x i8>, <48 x i8>* %b %3 = zext <48 x i8> %1 to <48 x i32> @@ -897,193 +509,78 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind { define void @avg_v40i16(<40 x i16>* %a, <40 x i16>* %b) nounwind { ; SSE2-LABEL: avg_v40i16: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa 64(%rdi), %xmm10 -; SSE2-NEXT: movdqa (%rdi), %xmm5 -; SSE2-NEXT: movdqa 16(%rdi), %xmm6 -; SSE2-NEXT: movdqa 32(%rdi), %xmm13 -; SSE2-NEXT: movdqa 48(%rdi), %xmm12 -; SSE2-NEXT: movdqa 64(%rsi), %xmm8 -; SSE2-NEXT: movdqa (%rsi), %xmm1 -; SSE2-NEXT: movdqa 16(%rsi), %xmm14 -; SSE2-NEXT: movdqa 32(%rsi), %xmm11 -; SSE2-NEXT: movdqa 48(%rsi), %xmm9 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: movdqa %xmm5, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE2-NEXT: paddd %xmm3, %xmm4 -; SSE2-NEXT: movdqa %xmm6, %xmm7 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm2[4],xmm7[5],xmm2[5],xmm7[6],xmm2[6],xmm7[7],xmm2[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE2-NEXT: paddd %xmm5, %xmm1 -; SSE2-NEXT: movdqa %xmm14, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: paddd %xmm7, %xmm3 -; SSE2-NEXT: movdqa %xmm13, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm2[0],xmm13[1],xmm2[1],xmm13[2],xmm2[2],xmm13[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm14 = xmm14[0],xmm2[0],xmm14[1],xmm2[1],xmm14[2],xmm2[2],xmm14[3],xmm2[3] -; SSE2-NEXT: paddd %xmm6, %xmm14 -; SSE2-NEXT: movdqa %xmm11, %xmm7 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm2[4],xmm7[5],xmm2[5],xmm7[6],xmm2[6],xmm7[7],xmm2[7] -; SSE2-NEXT: paddd %xmm5, %xmm7 -; SSE2-NEXT: movdqa %xmm12, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm12 = xmm12[0],xmm2[0],xmm12[1],xmm2[1],xmm12[2],xmm2[2],xmm12[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm2[0],xmm11[1],xmm2[1],xmm11[2],xmm2[2],xmm11[3],xmm2[3] -; SSE2-NEXT: paddd %xmm13, %xmm11 -; SSE2-NEXT: movdqa %xmm9, %xmm6 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] -; SSE2-NEXT: paddd %xmm5, %xmm6 -; SSE2-NEXT: movdqa %xmm10, %xmm0 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm9 = xmm9[0],xmm2[0],xmm9[1],xmm2[1],xmm9[2],xmm2[2],xmm9[3],xmm2[3] -; SSE2-NEXT: paddd %xmm12, %xmm9 -; SSE2-NEXT: movdqa %xmm8, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] -; SSE2-NEXT: paddd %xmm0, %xmm5 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm2[0],xmm10[1],xmm2[1],xmm10[2],xmm2[2],xmm10[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] -; SSE2-NEXT: paddd %xmm10, %xmm8 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: psubd %xmm0, %xmm4 -; SSE2-NEXT: psubd %xmm0, %xmm1 -; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: psubd %xmm0, %xmm14 -; SSE2-NEXT: psubd %xmm0, %xmm7 -; SSE2-NEXT: psubd %xmm0, %xmm11 -; SSE2-NEXT: psubd %xmm0, %xmm6 -; SSE2-NEXT: psubd %xmm0, %xmm9 -; SSE2-NEXT: psubd %xmm0, %xmm5 -; SSE2-NEXT: psubd %xmm0, %xmm8 -; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: psrld $1, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: psrld $1, %xmm14 -; SSE2-NEXT: psrld $1, %xmm3 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm14[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; SSE2-NEXT: psrld $1, %xmm11 -; SSE2-NEXT: psrld $1, %xmm7 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm7[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm11[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSE2-NEXT: psrld $1, %xmm9 -; SSE2-NEXT: psrld $1, %xmm6 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm9[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] -; SSE2-NEXT: psrld $1, %xmm8 -; SSE2-NEXT: psrld $1, %xmm5 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm5[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0] -; SSE2-NEXT: movdqu %xmm5, (%rax) +; SSE2-NEXT: movdqa (%rsi), %xmm0 +; SSE2-NEXT: movdqa 16(%rsi), %xmm1 +; SSE2-NEXT: movdqa 32(%rsi), %xmm2 +; SSE2-NEXT: movdqa 48(%rsi), %xmm3 +; SSE2-NEXT: pavgw (%rdi), %xmm0 +; SSE2-NEXT: pavgw 16(%rdi), %xmm1 +; SSE2-NEXT: pavgw 32(%rdi), %xmm2 +; SSE2-NEXT: pavgw 48(%rdi), %xmm3 +; SSE2-NEXT: movdqa 64(%rsi), %xmm4 +; SSE2-NEXT: pavgw 64(%rdi), %xmm4 ; SSE2-NEXT: movdqu %xmm4, (%rax) ; SSE2-NEXT: movdqu %xmm3, (%rax) ; SSE2-NEXT: movdqu %xmm2, (%rax) ; SSE2-NEXT: movdqu %xmm1, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v40i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa (%rdi), %xmm0 -; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX1-NEXT: vmovdqa 64(%rdi), %xmm4 -; AVX1-NEXT: vpavgw 64(%rsi), %xmm4, %xmm4 -; AVX1-NEXT: vpavgw (%rsi), %xmm0, %xmm0 -; AVX1-NEXT: vpavgw 16(%rsi), %xmm1, %xmm1 -; AVX1-NEXT: vpavgw 32(%rsi), %xmm2, %xmm2 -; AVX1-NEXT: vpavgw 48(%rsi), %xmm3, %xmm3 +; AVX1-NEXT: vmovdqa 64(%rsi), %xmm0 +; AVX1-NEXT: vpavgw 64(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa (%rsi), %xmm1 +; AVX1-NEXT: vmovdqa 16(%rsi), %xmm2 +; AVX1-NEXT: vmovdqa 32(%rsi), %xmm3 +; AVX1-NEXT: vmovdqa 48(%rsi), %xmm4 +; AVX1-NEXT: vpavgw (%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpavgw 16(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vpavgw 32(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vpavgw 48(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vmovdqu %xmm4, (%rax) ; AVX1-NEXT: vmovdqu %xmm3, (%rax) ; AVX1-NEXT: vmovdqu %xmm2, (%rax) ; AVX1-NEXT: vmovdqu %xmm1, (%rax) ; AVX1-NEXT: vmovdqu %xmm0, (%rax) -; AVX1-NEXT: vmovdqu %xmm4, (%rax) ; AVX1-NEXT: retq ; ; AVX2-LABEL: avg_v40i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa (%rdi), %xmm0 -; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX2-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX2-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX2-NEXT: vmovdqa 64(%rdi), %xmm4 -; AVX2-NEXT: vpavgw 64(%rsi), %xmm4, %xmm4 -; AVX2-NEXT: vpavgw (%rsi), %xmm0, %xmm0 -; AVX2-NEXT: vpavgw 16(%rsi), %xmm1, %xmm1 -; AVX2-NEXT: vpavgw 32(%rsi), %xmm2, %xmm2 -; AVX2-NEXT: vpavgw 48(%rsi), %xmm3, %xmm3 -; AVX2-NEXT: vmovdqu %xmm3, (%rax) +; AVX2-NEXT: vmovdqa (%rsi), %ymm0 +; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1 +; AVX2-NEXT: vpavgw (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa 64(%rsi), %xmm2 +; AVX2-NEXT: vpavgw 64(%rdi), %xmm2, %xmm2 ; AVX2-NEXT: vmovdqu %xmm2, (%rax) -; AVX2-NEXT: vmovdqu %xmm1, (%rax) -; AVX2-NEXT: vmovdqu %xmm0, (%rax) -; AVX2-NEXT: vmovdqu %xmm4, (%rax) +; AVX2-NEXT: vmovdqu %ymm1, (%rax) +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: avg_v40i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX512F-NEXT: vpaddd %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512F-NEXT: vpsubd %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrld $1, %ymm2, %ymm2 -; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512F-NEXT: vpavgw (%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa 64(%rsi), %xmm0 +; AVX512F-NEXT: vpavgw 64(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 +; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm2 +; AVX512F-NEXT: vpavgw (%rdi), %ymm1, %ymm1 +; AVX512F-NEXT: vpavgw 32(%rdi), %ymm2, %ymm2 +; AVX512F-NEXT: vmovdqu %ymm2, (%rax) ; AVX512F-NEXT: vmovdqu %ymm1, (%rax) -; AVX512F-NEXT: vmovdqu %ymm0, (%rax) -; AVX512F-NEXT: vmovdqu %xmm2, (%rax) +; AVX512F-NEXT: vmovdqu %xmm0, (%rax) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: avg_v40i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX512BW-NEXT: vpaddd %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512BW-NEXT: vpsubd %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsrld $1, %ymm2, %ymm2 -; AVX512BW-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1 -; AVX512BW-NEXT: vpavgw (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovdw %zmm2, %ymm1 -; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0 +; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa 64(%rsi), %xmm1 +; AVX512BW-NEXT: vpavgw 64(%rdi), %xmm1, %xmm1 ; AVX512BW-NEXT: vmovdqu %xmm1, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <40 x i16>, <40 x i16>* %a From b4b1b84106a03d7b6374090bc0ff04b3a77a0862 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 15 Sep 2020 10:04:02 +0100 Subject: [PATCH 0662/1079] [MVE] fix typo in llvm debug message. NFC. --- llvm/lib/Target/ARM/MVETailPredication.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index 26e21f04c6b9a..b2c15be75cd4e 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -424,14 +424,14 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, // the case when the element count is just a variable %N, we can just see // if it is an operand in the tripcount scev expression. if (isa(TC) && !SE->hasOperand(TC, EC)) { - LLVM_DEBUG(dbgs() << "ARM TP: 1Can't verify the element counter\n"); + LLVM_DEBUG(dbgs() << "ARM TP: Can't verify the element counter\n"); return false; } } else if (const SCEVAddRecExpr *AddRecExpr = dyn_cast(EC)) { // For more complicated AddRecExpr, check that the corresponding loop and // its loop hierarhy contains the trip count loop. if (!AddRecExpr->getLoop()->contains(L)) { - LLVM_DEBUG(dbgs() << "ARM TP: 2Can't verify the element counter\n"); + LLVM_DEBUG(dbgs() << "ARM TP: Can't verify the element counter\n"); return false; } } else { From 5f13d6c1eef7fa4264d143af6e7bafbb74937ccd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 10:37:25 +0100 Subject: [PATCH 0663/1079] [Transforms][Coroutines] Add missing header path to CMakeLists.txt Helps Visual Studio check include dependencies. --- llvm/lib/Transforms/Coroutines/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Transforms/Coroutines/CMakeLists.txt b/llvm/lib/Transforms/Coroutines/CMakeLists.txt index c1f6d6c8d8d8f..783093c16e60e 100644 --- a/llvm/lib/Transforms/Coroutines/CMakeLists.txt +++ b/llvm/lib/Transforms/Coroutines/CMakeLists.txt @@ -6,6 +6,9 @@ add_llvm_component_library(LLVMCoroutines CoroFrame.cpp CoroSplit.cpp + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Coroutines + DEPENDS intrinsics_gen ) From 2508ef014e8b01006de4e5ee6fd451d1f68d550f Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 15 Sep 2020 17:59:10 +0800 Subject: [PATCH 0664/1079] [SelectionDAG] Remove unused FP constant in getNegatedExpression 960cbc53 immediately removes nodes that won't be used to avoid compilation time explosion. This patch adds the removal to constants to fix PR47517. Reviewed By: RKSimon, steven.zhang Differential Revision: https://reviews.llvm.org/D87614 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++- llvm/test/CodeGen/X86/pr47517.ll | 28 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr47517.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3446ee0efc450..749a5e83058e7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5773,8 +5773,10 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // If we already have the use of the negated floating constant, it is free // to negate it even it has multiple uses. - if (!Op.hasOneUse() && CFP.use_empty()) + if (!Op.hasOneUse() && CFP.use_empty()) { + RemoveDeadNode(CFP); break; + } Cost = NegatibleCost::Neutral; return CFP; } diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll new file mode 100644 index 0000000000000..6b508acf15dda --- /dev/null +++ b/llvm/test/CodeGen/X86/pr47517.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple x86_64 < %s | FileCheck %s + +; To ensure unused floating point constant is removed in negation +define float @test(float %src, float* %p) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq $0, (%rdi) +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %a0 = getelementptr inbounds float, float* %p, i32 0 + %a1 = getelementptr inbounds float, float* %p, i32 1 + store float 0.000000e+00, float* %a0 + store float 0.000000e+00, float* %a1 + %zero = load float, float* %a0 + %fmul1 = fmul fast float %zero, %src + %fadd1 = fadd fast float %fmul1, %zero + %fmul2 = fmul fast float %fadd1, 2.000000e+00 + %fmul3 = fmul fast float %fmul2, %fmul2 + %fmul4 = fmul fast float %fmul2, 2.000000e+00 + %fadd2 = fadd fast float %fmul4, -3.000000e+00 + %fmul5 = fmul fast float %fadd2, %fmul2 + %fadd3 = fadd fast float %fmul2, %src + %fadd4 = fadd fast float %fadd3, %fmul5 + %fmul6 = fmul fast float %fmul3, %fadd4 + ret float %fmul6 +} From 1119bf95be94950da602b268dc96dbb2110cbe15 Mon Sep 17 00:00:00 2001 From: Meera Nakrani Date: Tue, 15 Sep 2020 10:14:30 +0000 Subject: [PATCH 0665/1079] [ARM] Corrected condition in isSaturatingConditional Fixed a small error in an if condition to prevent usat/ssat being generated if (upper constant + 1) is not a power of 2. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- llvm/test/CodeGen/ARM/usat.ll | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d9ccd86802c75..cfb77f466cd19 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5062,7 +5062,7 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, int64_t PosVal = std::max(Val1, Val2); int64_t NegVal = std::min(Val1, Val2); - if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) && + if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) || !isPowerOf2_64(PosVal + 1)) return false; diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll index 99064386fa504..ba4e0dd037649 100644 --- a/llvm/test/CodeGen/ARM/usat.ll +++ b/llvm/test/CodeGen/ARM/usat.ll @@ -176,6 +176,18 @@ entry: ret i32 %saturateUp } +; The interval is [0, k] but k+1 is not a power of 2 +define i32 @no_unsigned_sat_incorrect_constant2(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_constant2: +; CHECK-NOT: usat +entry: + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 + %1 = icmp slt i32 %saturateLow, 8388609 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388609 + ret i32 %saturateUp +} + ; The interval is not [0, k] define i32 @no_unsigned_sat_incorrect_interval(i32 %x) #0 { ; CHECK-LABEL: no_unsigned_sat_incorrect_interval: From 9eab73fa17f5920178a87ee8a5021f4fd6f0f5ef Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 11:18:44 +0100 Subject: [PATCH 0666/1079] [X86] Update SSE/AVX integer MINMAX intrinsics to emit llvm.smax.* etc. (PR46851) We're now getting close to having the necessary analysis/combines etc. for the new generic llvm smax/smin/umax/umin intrinsics. This patch updates the SSE/AVX integer MINMAX intrinsics to emit the generic equivalents instead of the icmp+select code pattern. Differential Revision: https://reviews.llvm.org/D87603 --- clang/lib/CodeGen/CGBuiltin.cpp | 17 +- clang/test/CodeGen/X86/avx2-builtins.c | 36 ++-- .../CodeGen/X86/avx512-reduceMinMaxIntrin.c | 174 +++++++----------- clang/test/CodeGen/X86/avx512bw-builtins.c | 72 +++----- clang/test/CodeGen/X86/avx512f-builtins.c | 72 +++----- clang/test/CodeGen/X86/avx512vl-builtins.c | 120 ++++-------- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 96 ++++------ clang/test/CodeGen/X86/sse2-builtins.c | 12 +- clang/test/CodeGen/X86/sse41-builtins.c | 24 +-- llvm/lib/IR/AutoUpgrade.cpp | 21 +-- .../CodeGen/X86/avx2-intrinsics-fast-isel.ll | 48 ++--- .../CodeGen/X86/sse2-intrinsics-fast-isel.ll | 16 +- .../CodeGen/X86/sse41-intrinsics-fast-isel.ll | 32 ++-- 13 files changed, 262 insertions(+), 478 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b2abc10544e12..3c7f13a006d07 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11314,15 +11314,6 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, return EmitX86Select(CGF, Ops[2], Res, Ops[1]); } -static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, - ArrayRef Ops) { - Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); - Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - - assert(Ops.size() == 2); - return Res; -} - // Lowers X86 FMA intrinsics to IR. static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef Ops, unsigned BuiltinID, bool IsAddSub) { @@ -13306,7 +13297,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pmaxsw512: case X86::BI__builtin_ia32_pmaxsd512: case X86::BI__builtin_ia32_pmaxsq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax); case X86::BI__builtin_ia32_pmaxub128: case X86::BI__builtin_ia32_pmaxuw128: case X86::BI__builtin_ia32_pmaxud128: @@ -13319,7 +13310,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pmaxuw512: case X86::BI__builtin_ia32_pmaxud512: case X86::BI__builtin_ia32_pmaxuq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax); case X86::BI__builtin_ia32_pminsb128: case X86::BI__builtin_ia32_pminsw128: case X86::BI__builtin_ia32_pminsd128: @@ -13332,7 +13323,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pminsw512: case X86::BI__builtin_ia32_pminsd512: case X86::BI__builtin_ia32_pminsq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin); case X86::BI__builtin_ia32_pminub128: case X86::BI__builtin_ia32_pminuw128: case X86::BI__builtin_ia32_pminud128: @@ -13345,7 +13336,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pminuw512: case X86::BI__builtin_ia32_pminud512: case X86::BI__builtin_ia32_pminuq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin); case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index f3de6d1b87474..46717a78b49ed 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -727,85 +727,73 @@ void test_mm256_maskstore_epi64(long long *a, __m256i m, __m256i b) { __m256i test_mm256_max_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: call <32 x i8> @llvm.smax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_max_epi8(a, b); } __m256i test_mm256_max_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: call <16 x i16> @llvm.smax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_max_epi16(a, b); } __m256i test_mm256_max_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_max_epi32(a, b); } __m256i test_mm256_max_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: call <32 x i8> @llvm.umax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_max_epu8(a, b); } __m256i test_mm256_max_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: call <16 x i16> @llvm.umax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_max_epu16(a, b); } __m256i test_mm256_max_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_max_epu32(a, b); } __m256i test_mm256_min_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: call <32 x i8> @llvm.smin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_min_epi8(a, b); } __m256i test_mm256_min_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: call <16 x i16> @llvm.smin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_min_epi16(a, b); } __m256i test_mm256_min_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_min_epi32(a, b); } __m256i test_mm256_min_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: call <32 x i8> @llvm.umin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_min_epu8(a, b); } __m256i test_mm256_min_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: call <16 x i16> @llvm.umin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_min_epu16(a, b); } __m256i test_mm256_min_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_min_epu32(a, b); } diff --git a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c index b02bd7c66658d..923672bb80953 100644 --- a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -5,28 +5,23 @@ long long test_mm512_reduce_max_epi64(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_max_epi64( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_reduce_max_epi64(__W); } unsigned long long test_mm512_reduce_max_epu64(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_max_epu64( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_reduce_max_epu64(__W); } @@ -48,28 +43,23 @@ double test_mm512_reduce_max_pd(__m512d __W){ long long test_mm512_reduce_min_epi64(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_min_epi64( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_reduce_min_epi64(__W); } unsigned long long test_mm512_reduce_min_epu64(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_min_epu64( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_reduce_min_epu64(__W); } @@ -93,14 +83,12 @@ long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){ // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_mask_reduce_max_epi64(__M, __W); } @@ -109,14 +97,12 @@ unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){ // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_mask_reduce_max_epu64(__M, __W); } @@ -141,14 +127,12 @@ long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){ // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_mask_reduce_min_epi64(__M, __W); } @@ -157,14 +141,12 @@ unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){ // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> -// CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} +// CHECK: call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractelement <8 x i64> %{{.*}}, i32 0 return _mm512_mask_reduce_min_epu64(__M, __W); } @@ -188,18 +170,14 @@ int test_mm512_reduce_max_epi32(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_max_epi32( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp sgt <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_reduce_max_epi32(__W); } @@ -208,18 +186,14 @@ unsigned int test_mm512_reduce_max_epu32(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_max_epu32( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp ugt <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_reduce_max_epu32(__W); } @@ -244,18 +218,14 @@ int test_mm512_reduce_min_epi32(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_min_epi32( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp slt <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_reduce_min_epi32(__W); } @@ -264,18 +234,14 @@ unsigned int test_mm512_reduce_min_epu32(__m512i __W){ // CHECK-LABEL: @test_mm512_reduce_min_epu32( // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp ult <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_reduce_min_epu32(__W); } @@ -302,18 +268,14 @@ int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp sgt <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_max_epi32(__M, __W); } @@ -324,18 +286,14 @@ unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp ugt <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_max_epu32(__M, __W); } @@ -364,18 +322,14 @@ int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp slt <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_min_epi32(__M, __W); } @@ -386,18 +340,14 @@ unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> -// CHECK: icmp ult <8 x i32> %{{.*}}, %{{.*}} -// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} +// CHECK: call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> -// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> -// CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} -// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} +// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 return _mm512_mask_reduce_min_epu32(__M, __W); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index cc173f1a9cfe6..58b2488f3caf0 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1088,161 +1088,137 @@ __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { } __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_max_epi8(__A,__B); } __m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_max_epi8(__M,__A,__B); } __m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_max_epi8(__W,__M,__A,__B); } __m512i test_mm512_max_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_max_epi16(__A,__B); } __m512i test_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_max_epi16(__M,__A,__B); } __m512i test_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_max_epi16(__W,__M,__A,__B); } __m512i test_mm512_max_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_max_epu8(__A,__B); } __m512i test_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_max_epu8(__M,__A,__B); } __m512i test_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_max_epu8(__W,__M,__A,__B); } __m512i test_mm512_max_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_max_epu16(__A,__B); } __m512i test_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_max_epu16(__M,__A,__B); } __m512i test_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_max_epu16(__W,__M,__A,__B); } __m512i test_mm512_min_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_min_epi8(__A,__B); } __m512i test_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_min_epi8(__M,__A,__B); } __m512i test_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_min_epi8(__W,__M,__A,__B); } __m512i test_mm512_min_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_min_epi16(__A,__B); } __m512i test_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_min_epi16(__M,__A,__B); } __m512i test_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_min_epi16(__W,__M,__A,__B); } __m512i test_mm512_min_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_min_epu8(__A,__B); } __m512i test_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_min_epu8(__M,__A,__B); } __m512i test_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <64 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_min_epu8(__W,__M,__A,__B); } __m512i test_mm512_min_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_min_epu16(__A,__B); } __m512i test_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_min_epu16(__M,__A,__B); } __m512i test_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_min_epu16(__W,__M,__A,__B); } diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index fb5db4c321748..a4b23eb1cf5e2 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9882,16 +9882,14 @@ __m512d test_mm512_roundscale_round_pd(__m512d __A) __m512i test_mm512_max_epi32 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_max_epi32 (__A,__B); } __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_max_epi32 (__W,__M,__A,__B); } @@ -9899,8 +9897,7 @@ __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_max_epi32 (__M,__A,__B); } @@ -9908,16 +9905,14 @@ __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) __m512i test_mm512_max_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_max_epi64 (__A,__B); } __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_max_epi64 (__W,__M,__A,__B); } @@ -9925,8 +9920,7 @@ __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_max_epi64 (__M,__A,__B); } @@ -9934,16 +9928,14 @@ __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) __m512i test_mm512_max_epu64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_max_epu64 (__A,__B); } __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_max_epu64 (__W,__M,__A,__B); } @@ -9951,8 +9943,7 @@ __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_max_epu64 (__M,__A,__B); } @@ -9960,16 +9951,14 @@ __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) __m512i test_mm512_max_epu32 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_max_epu32 (__A,__B); } __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_max_epu32 (__W,__M,__A,__B); } @@ -9977,8 +9966,7 @@ __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_max_epu32 (__M,__A,__B); } @@ -9986,16 +9974,14 @@ __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) __m512i test_mm512_min_epi32 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_min_epi32 (__A,__B); } __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_min_epi32 (__W,__M,__A,__B); } @@ -10003,8 +9989,7 @@ __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_min_epi32 (__M,__A,__B); } @@ -10012,16 +9997,14 @@ __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) __m512i test_mm512_min_epu32 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_min_epu32 (__A,__B); } __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_min_epu32 (__W,__M,__A,__B); } @@ -10029,8 +10012,7 @@ __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_min_epu32 (__M,__A,__B); } @@ -10038,16 +10020,14 @@ __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) __m512i test_mm512_min_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_min_epi64 (__A,__B); } __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_min_epi64 (__W,__M,__A,__B); } @@ -10055,8 +10035,7 @@ __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_min_epi64 (__M,__A,__B); } @@ -10064,16 +10043,14 @@ __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) __m512i test_mm512_min_epu64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_min_epu64 (__A,__B); } __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_min_epu64 (__W,__M,__A,__B); } @@ -10081,8 +10058,7 @@ __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 __m512i test_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_min_epu64 (__M,__A,__B); } diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index e7965119fb4b9..248cb61d97ae4 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -4603,8 +4603,7 @@ __m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) { } __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4612,8 +4611,7 @@ __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4621,8 +4619,7 @@ __m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4630,8 +4627,7 @@ __m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4639,48 +4635,41 @@ __m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_max_epi64(__M,__A,__B); } __m128i test_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_max_epi64(__W,__M,__A,__B); } __m128i test_mm_max_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_max_epi64(__A,__B); } __m256i test_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_max_epi64(__M,__A,__B); } __m256i test_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_max_epi64(__W,__M,__A,__B); } __m256i test_mm256_max_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_max_epi64(__A,__B); } __m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4688,8 +4677,7 @@ __m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4697,8 +4685,7 @@ __m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4706,8 +4693,7 @@ __m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4715,48 +4701,41 @@ __m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_max_epu64(__M,__A,__B); } __m128i test_mm_max_epu64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_max_epu64(__A,__B); } __m128i test_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_max_epu64(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_max_epu64(__M,__A,__B); } __m256i test_mm256_max_epu64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_max_epu64(__A,__B); } __m256i test_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_max_epu64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4764,8 +4743,7 @@ __m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4773,8 +4751,7 @@ __m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4782,8 +4759,7 @@ __m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4791,48 +4767,41 @@ __m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_min_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_min_epi64(__A,__B); } __m128i test_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_min_epi64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_min_epi64(__M,__A,__B); } __m256i test_mm256_min_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_min_epi64(__A,__B); } __m256i test_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_min_epi64(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_min_epi64(__M,__A,__B); } __m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4840,8 +4809,7 @@ __m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> // CHECK: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} @@ -4849,8 +4817,7 @@ __m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4858,8 +4825,7 @@ __m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> // CHECK: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} @@ -4867,41 +4833,35 @@ __m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_min_epu64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_min_epu64(__A,__B); } __m128i test_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_min_epu64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_min_epu64(__M,__A,__B); } __m256i test_mm256_min_epu64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_min_epu64(__A,__B); } __m256i test_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_min_epu64(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: [[RES:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_min_epu64(__M,__A,__B); } diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index df2adfdb97be6..36feafd29437b 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -1226,8 +1226,7 @@ __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { } __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1235,8 +1234,7 @@ __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1244,8 +1242,7 @@ __m128i test_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1253,8 +1250,7 @@ __m256i test_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1262,8 +1258,7 @@ __m256i test_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256 } __m128i test_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1271,8 +1266,7 @@ __m128i test_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1280,8 +1274,7 @@ __m128i test_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1289,8 +1282,7 @@ __m256i test_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1298,8 +1290,7 @@ __m256i test_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m25 } __m128i test_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1307,8 +1298,7 @@ __m128i test_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1316,8 +1306,7 @@ __m128i test_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1325,8 +1314,7 @@ __m256i test_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1334,8 +1322,7 @@ __m256i test_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256 } __m128i test_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1343,8 +1330,7 @@ __m128i test_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1352,8 +1338,7 @@ __m128i test_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1361,8 +1346,7 @@ __m256i test_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1370,8 +1354,7 @@ __m256i test_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m25 } __m128i test_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1379,8 +1362,7 @@ __m128i test_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1388,8 +1370,7 @@ __m128i test_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1397,8 +1378,7 @@ __m256i test_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1406,8 +1386,7 @@ __m256i test_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256 } __m128i test_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1415,8 +1394,7 @@ __m128i test_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1424,8 +1402,7 @@ __m128i test_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1433,8 +1410,7 @@ __m256i test_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1442,8 +1418,7 @@ __m256i test_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m25 } __m128i test_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1451,8 +1426,7 @@ __m128i test_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} @@ -1460,8 +1434,7 @@ __m128i test_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1469,8 +1442,7 @@ __m256i test_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} @@ -1478,8 +1450,7 @@ __m256i test_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256 } __m128i test_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1487,8 +1458,7 @@ __m128i test_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { } __m128i test_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[RES]] to [[DSTTY:<2 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} @@ -1496,8 +1466,7 @@ __m128i test_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ } __m256i test_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} @@ -1505,8 +1474,7 @@ __m256i test_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { } __m256i test_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[RES]] to [[DSTTY:<4 x i64>]] // CHECK: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 34e3baef84c32..180677de03314 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -752,15 +752,13 @@ void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) { __m128i test_mm_max_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_max_epi16(A, B); } __m128i test_mm_max_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_max_epu8(A, B); } @@ -784,15 +782,13 @@ void test_mm_mfence() { __m128i test_mm_min_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_min_epi16(A, B); } __m128i test_mm_min_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_min_epu8(A, B); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 5f623ce9c38fd..1e38e3c3355a9 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -248,57 +248,49 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) { __m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_max_epi8(x, y); } __m128i test_mm_max_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_max_epi32(x, y); } __m128i test_mm_max_epu16(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_max_epu16(x, y); } __m128i test_mm_max_epu32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_max_epu32(x, y); } __m128i test_mm_min_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_min_epi8(x, y); } __m128i test_mm_min_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_min_epi32(x, y); } __m128i test_mm_min_epu16(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_min_epu16(x, y); } __m128i test_mm_min_epu32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_min_epu32(x, y); } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 12286264c81df..d27c1b4591496 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1380,19 +1380,6 @@ static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { return Res; } -static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, - ICmpInst::Predicate Pred) { - Value *Op0 = CI.getArgOperand(0); - Value *Op1 = CI.getArgOperand(1); - Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); - Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); - - if (CI.getNumArgOperands() == 4) - Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); - - return Res; -} - static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { Type *Ty = CI.getType(); @@ -2136,25 +2123,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Name == "sse41.pmaxsd" || Name.startswith("avx2.pmaxs") || Name.startswith("avx512.mask.pmaxs"))) { - Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax); } else if (IsX86 && (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" || Name == "sse41.pmaxud" || Name.startswith("avx2.pmaxu") || Name.startswith("avx512.mask.pmaxu"))) { - Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax); } else if (IsX86 && (Name == "sse41.pminsb" || Name == "sse2.pmins.w" || Name == "sse41.pminsd" || Name.startswith("avx2.pmins") || Name.startswith("avx512.mask.pmins"))) { - Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin); } else if (IsX86 && (Name == "sse2.pminu.b" || Name == "sse41.pminuw" || Name == "sse41.pminud" || Name.startswith("avx2.pminu") || Name.startswith("avx512.mask.pminu"))) { - Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin); } else if (IsX86 && (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" || Name == "avx512.pmulu.dq.512" || diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index 0fe9d0b0d35c8..49f6c2b849b65 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -1632,11 +1632,11 @@ define <4 x i64> @test_mm256_max_epi8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <32 x i8> %arg1 = bitcast <4 x i64> %a1 to <32 x i8> - %cmp = icmp sgt <32 x i8> %arg0, %arg1 - %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 + %sel = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %arg0, <32 x i8> %arg1) %bc = bitcast <32 x i8> %sel to <4 x i64> ret <4 x i64> %bc } +declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>) define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_max_epi16: @@ -1645,11 +1645,11 @@ define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <16 x i16> %arg1 = bitcast <4 x i64> %a1 to <16 x i16> - %cmp = icmp sgt <16 x i16> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 + %sel = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %arg0, <16 x i16> %arg1) %bc = bitcast <16 x i16> %sel to <4 x i64> ret <4 x i64> %bc } +declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>) define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_max_epi32: @@ -1658,11 +1658,11 @@ define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast <4 x i64> %a1 to <8 x i32> - %cmp = icmp sgt <8 x i32> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 + %sel = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %arg0, <8 x i32> %arg1) %bc = bitcast <8 x i32> %sel to <4 x i64> ret <4 x i64> %bc } +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_max_epu8: @@ -1671,11 +1671,11 @@ define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <32 x i8> %arg1 = bitcast <4 x i64> %a1 to <32 x i8> - %cmp = icmp ugt <32 x i8> %arg0, %arg1 - %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 + %sel = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %arg0, <32 x i8> %arg1) %bc = bitcast <32 x i8> %sel to <4 x i64> ret <4 x i64> %bc } +declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>) define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_max_epu16: @@ -1684,11 +1684,11 @@ define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <16 x i16> %arg1 = bitcast <4 x i64> %a1 to <16 x i16> - %cmp = icmp ugt <16 x i16> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 + %sel = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %arg0, <16 x i16> %arg1) %bc = bitcast <16 x i16> %sel to <4 x i64> ret <4 x i64> %bc } +declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>) define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_max_epu32: @@ -1697,11 +1697,11 @@ define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast <4 x i64> %a1 to <8 x i32> - %cmp = icmp ugt <8 x i32> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 + %sel = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %arg0, <8 x i32> %arg1) %bc = bitcast <8 x i32> %sel to <4 x i64> ret <4 x i64> %bc } +declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_min_epi8: @@ -1710,11 +1710,11 @@ define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <32 x i8> %arg1 = bitcast <4 x i64> %a1 to <32 x i8> - %cmp = icmp slt <32 x i8> %arg0, %arg1 - %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 + %sel = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %arg0, <32 x i8> %arg1) %bc = bitcast <32 x i8> %sel to <4 x i64> ret <4 x i64> %bc } +declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>) define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_min_epi16: @@ -1723,11 +1723,11 @@ define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <16 x i16> %arg1 = bitcast <4 x i64> %a1 to <16 x i16> - %cmp = icmp slt <16 x i16> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 + %sel = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %arg0, <16 x i16> %arg1) %bc = bitcast <16 x i16> %sel to <4 x i64> ret <4 x i64> %bc } +declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>) define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_min_epi32: @@ -1736,11 +1736,11 @@ define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast <4 x i64> %a1 to <8 x i32> - %cmp = icmp slt <8 x i32> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 + %sel = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %arg0, <8 x i32> %arg1) %bc = bitcast <8 x i32> %sel to <4 x i64> ret <4 x i64> %bc } +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_min_epu8: @@ -1749,11 +1749,11 @@ define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <32 x i8> %arg1 = bitcast <4 x i64> %a1 to <32 x i8> - %cmp = icmp ult <32 x i8> %arg0, %arg1 - %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 + %sel = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %arg0, <32 x i8> %arg1) %bc = bitcast <32 x i8> %sel to <4 x i64> ret <4 x i64> %bc } +declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>) define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_min_epu16: @@ -1762,11 +1762,11 @@ define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <16 x i16> %arg1 = bitcast <4 x i64> %a1 to <16 x i16> - %cmp = icmp ult <16 x i16> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 + %sel = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %arg0, <16 x i16> %arg1) %bc = bitcast <16 x i16> %sel to <4 x i64> ret <4 x i64> %bc } +declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>) define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_min_epu32: @@ -1775,11 +1775,11 @@ define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast <4 x i64> %a1 to <8 x i32> - %cmp = icmp ult <8 x i32> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 + %sel = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %arg0, <8 x i32> %arg1) %bc = bitcast <8 x i32> %sel to <4 x i64> ret <4 x i64> %bc } +declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) define i32 @test_mm256_movemask_epi8(<4 x i64> %a0) nounwind { ; CHECK-LABEL: test_mm256_movemask_epi8: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index e233bf5be8cfa..e3051f669e18a 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -2510,11 +2510,11 @@ define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %cmp = icmp sgt <8 x i16> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 + %sel = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) %bc = bitcast <8 x i16> %sel to <2 x i64> ret <2 x i64> %bc } +declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_max_epu8: @@ -2533,11 +2533,11 @@ define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %cmp = icmp ugt <16 x i8> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 + %sel = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) %bc = bitcast <16 x i8> %sel to <2 x i64> ret <2 x i64> %bc } +declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_max_pd: @@ -2606,11 +2606,11 @@ define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %cmp = icmp slt <8 x i16> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 + %sel = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) %bc = bitcast <8 x i16> %sel to <2 x i64> ret <2 x i64> %bc } +declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_min_epu8: @@ -2629,11 +2629,11 @@ define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %cmp = icmp ult <16 x i8> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 + %sel = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) %bc = bitcast <16 x i8> %sel to <2 x i64> ret <2 x i64> %bc } +declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_min_pd: diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll index 9990ac00eb054..e4db7c09ef6d8 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -662,11 +662,11 @@ define <2 x i64> @test_mm_max_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %cmp = icmp sgt <16 x i8> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 + %sel = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) %bc = bitcast <16 x i8> %sel to <2 x i64> ret <2 x i64> %bc } +declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_max_epi32: @@ -680,11 +680,11 @@ define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %cmp = icmp sgt <4 x i32> %arg0, %arg1 - %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 + %sel = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) %bc = bitcast <4 x i32> %sel to <2 x i64> ret <2 x i64> %bc } +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_max_epu16: @@ -698,11 +698,11 @@ define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %cmp = icmp ugt <8 x i16> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 + %sel = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) %bc = bitcast <8 x i16> %sel to <2 x i64> ret <2 x i64> %bc } +declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_max_epu32: @@ -716,11 +716,11 @@ define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %cmp = icmp ugt <4 x i32> %arg0, %arg1 - %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 + %sel = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) %bc = bitcast <4 x i32> %sel to <2 x i64> ret <2 x i64> %bc } +declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_min_epi8: @@ -734,11 +734,11 @@ define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %cmp = icmp slt <16 x i8> %arg0, %arg1 - %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 + %sel = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) %bc = bitcast <16 x i8> %sel to <2 x i64> ret <2 x i64> %bc } +declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_min_epi32: @@ -752,11 +752,11 @@ define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %cmp = icmp slt <4 x i32> %arg0, %arg1 - %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 + %sel = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) %bc = bitcast <4 x i32> %sel to <2 x i64> ret <2 x i64> %bc } +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_min_epu16: @@ -770,11 +770,11 @@ define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %cmp = icmp ult <8 x i16> %arg0, %arg1 - %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 + %sel = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) %bc = bitcast <8 x i16> %sel to <2 x i64> ret <2 x i64> %bc } +declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_min_epu32: @@ -788,11 +788,11 @@ define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %cmp = icmp ult <4 x i32> %arg0, %arg1 - %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 + %sel = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) %bc = bitcast <4 x i32> %sel to <2 x i64> ret <2 x i64> %bc } +declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_minpos_epu16: From b768546fe0cc1d320857a6e080d4c796efb0c00c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 15 Sep 2020 12:22:47 +0200 Subject: [PATCH 0667/1079] Revert "[InstCombine] Simplify select operand based on equality condition" This reverts commit cfff88c03cf9e9b72906a41fd11e06721d54f293. Sends instcombine into an infinite loop. ``` define i1 @foo(i32 %arg, i32 %arg1) { bb: %tmp = udiv i32 %arg, %arg1 %tmp2 = mul nsw i32 %tmp, %arg1 %tmp3 = icmp eq i32 %tmp2, %arg %tmp4 = select i1 %tmp3, i32 %tmp, i32 undef %tmp5 = icmp sgt i32 %tmp4, 255 ret i1 %tmp5 } ``` --- .../InstCombine/InstCombineSelect.cpp | 30 +++++-------------- llvm/test/Transforms/InstCombine/rem.ll | 3 +- .../InstCombine/select-binop-cmp.ll | 15 ++++++---- llvm/test/Transforms/InstCombine/select.ll | 15 ++++++---- 4 files changed, 28 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ce473410f4caf..378132011aba2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1165,32 +1165,15 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// /// We can't replace %sel with %add unless we strip away the flags. /// TODO: Wrapping flags could be preserved in some cases with better analysis. -static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, - const SimplifyQuery &Q, - InstCombiner &IC) { +static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, + const SimplifyQuery &Q) { if (!Cmp.isEquality()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); - bool Swapped = false; - if (Cmp.getPredicate() == ICmpInst::ICMP_NE) { + if (Cmp.getPredicate() == ICmpInst::ICMP_NE) std::swap(TrueVal, FalseVal); - Swapped = true; - } - - // In X == Y ? f(X) : Z, try to evaluate f(X) and replace the operand. - // Take care to avoid replacing X == Y ? X : Z with X == Y ? Y : Z, as that - // would lead to an infinite replacement cycle. - Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); - if (TrueVal != CmpLHS) - if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, - /* AllowRefinement */ true)) - return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); - if (TrueVal != CmpRHS) - if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, - /* AllowRefinement */ true)) - return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); auto *FalseInst = dyn_cast(FalseVal); if (!FalseInst) @@ -1215,11 +1198,12 @@ static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, // We have an 'EQ' comparison, so the select's false value will propagate. // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 + Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, /* AllowRefinement */ false) == TrueVal) { - return IC.replaceInstUsesWith(Sel, FalseVal); + return FalseVal; } // Restore poison-generating flags if the transform did not apply. @@ -1455,8 +1439,8 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI, SQ, *this)) - return NewSel; + if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ)) + return replaceInstUsesWith(SI, V); if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) return NewSel; diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index 37d81f2ebf6a0..2b9f5326dd152 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -50,7 +50,8 @@ define i8 @big_divisor(i8 %x) { define i5 @biggest_divisor(i5 %x) { ; CHECK-LABEL: @biggest_divisor( ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X:%.*]], -1 -; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[DOTNOT]] to i5 +; CHECK-NEXT: [[REM:%.*]] = add i5 [[TMP1]], [[X]] ; CHECK-NEXT: ret i5 [[REM]] ; %rem = urem i5 %x, -1 diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index aa450f8af8b7e..4173c31b2acb1 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -564,10 +564,12 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) ret <2 x i8> %C } -define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { -; CHECK-LABEL: @select_xor_icmp_vec_undef( +; TODO: support for undefs, check for an identity constant does not handle them yet +define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { +; CHECK-LABEL: @select_xor_icmp_vec_bad_2( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; %A = icmp eq <2 x i8> %x, @@ -602,10 +604,11 @@ define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) { ret i32 %C } -define i32 @select_and_icmp_zero(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: @select_and_icmp_zero( +define i32 @select_and_icmp_bad(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @select_and_icmp_bad( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 0, i32 [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = and i32 [[X]], [[Z:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index c4c282e9cacf4..d9a4f4bdbd473 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2606,7 +2606,8 @@ define i32 @pr47322_more_poisonous_replacement(i32 %arg) { define i8 @select_replacement_add_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_eq( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 2, i8 [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, 1 @@ -2619,7 +2620,8 @@ define i8 @select_replacement_add_ne(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_ne( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 1 ; CHECK-NEXT: call void @use(i1 [[CMP]]) -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 2 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 [[ADD]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp ne i8 %x, 1 @@ -2632,7 +2634,8 @@ define i8 @select_replacement_add_ne(i8 %x, i8 %y) { define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_nuw( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 2, i8 [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, 1 @@ -2644,7 +2647,8 @@ define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_sub( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, %y @@ -2657,7 +2661,8 @@ define i8 @select_replacement_shift(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_shift( ; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[Y]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SHL]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %shr = lshr exact i8 %x, 1 From c20852300a35a33cb6bf47028f3c95a2640dab9f Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 15 Sep 2020 10:07:29 +0000 Subject: [PATCH 0668/1079] [mlir][integration_test] Linalg Conv folder renamed to CPU Changing directory name to reflect naming convention discussed here: https://llvm.discourse.group/t/vectorops-rfc-add-suite-of-integration-tests-for-vector-dialect-operations/1213 Differential Revision: https://reviews.llvm.org/D87678 --- .../Dialect/Linalg/{Conv => CPU}/test-conv-1d-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-1d-ncw-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-1d-nwc-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-2d-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-2d-nchw-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-2d-nhwc-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-3d-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-3d-ncdhw-call.mlir | 0 .../Dialect/Linalg/{Conv => CPU}/test-conv-3d-ndhwc-call.mlir | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-1d-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-1d-ncw-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-1d-nwc-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-2d-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-2d-nchw-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-2d-nhwc-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-3d-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-3d-ncdhw-call.mlir (100%) rename mlir/integration_test/Dialect/Linalg/{Conv => CPU}/test-conv-3d-ndhwc-call.mlir (100%) diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir similarity index 100% rename from mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir rename to mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir From 967c7b6936a66878919568b94643c942cc7de69e Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 14 Sep 2020 13:47:27 +0200 Subject: [PATCH 0669/1079] [mlir] check for failures when packing function sigunatures in std->llvm conversion When packing function results into a structure during the standard-to-llvm dialect conversion, do not assume the conversion was successful and propagate nullptr as error state. Fixes PR45184. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D87605 --- mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 2 +- mlir/test/Conversion/StandardToLLVM/invalid.mlir | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 62b787153d84b..814a2550015d8 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -3390,7 +3390,7 @@ Type LLVMTypeConverter::packFunctionResults(ArrayRef types) { SmallVector resultTypes; resultTypes.reserve(types.size()); for (auto t : types) { - auto converted = convertType(t).dyn_cast(); + auto converted = convertType(t).dyn_cast_or_null(); if (!converted) return {}; resultTypes.push_back(converted); diff --git a/mlir/test/Conversion/StandardToLLVM/invalid.mlir b/mlir/test/Conversion/StandardToLLVM/invalid.mlir index 5f79cef68ba8e..40acf4bc9d49b 100644 --- a/mlir/test/Conversion/StandardToLLVM/invalid.mlir +++ b/mlir/test/Conversion/StandardToLLVM/invalid.mlir @@ -34,3 +34,7 @@ func @mlir_cast_to_llvm_vec(%0 : vector<1x1xf32>) -> !llvm.vec<1 x float> { // Should not crash on unsupported types in function signatures. func @unsupported_signature() -> tensor<10 x i32> + +// ----- + +func @partially_supported_signature() -> (vector<10 x i32>, tensor<10 x i32>) From cd4edf94cd43754954aff0ddabd704de0f8f7ac0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 15 Sep 2020 10:28:25 +0100 Subject: [PATCH 0670/1079] Recommit "[ConstraintSystem] Add helpers to deal with linear constraints." This patch recommits "[ConstraintSystem] Add helpers to deal with linear constraints." (it reverts the revert commit 8da6ae4ce1b686c5c13698e4c5ee937811fda6f7). The reason for the revert was using __builtin_multiply_overflow, which is not available for all compilers. The patch has been updated to use MulOverflow from MathExtras.h --- llvm/include/llvm/Analysis/ConstraintSystem.h | 57 +++++++ llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/ConstraintSystem.cpp | 142 ++++++++++++++++++ llvm/unittests/Analysis/CMakeLists.txt | 1 + .../Analysis/ConstraintSystemTest.cpp | 82 ++++++++++ llvm/utils/convert-constraint-log-to-z3.py | 69 +++++++++ 6 files changed, 352 insertions(+) create mode 100644 llvm/include/llvm/Analysis/ConstraintSystem.h create mode 100644 llvm/lib/Analysis/ConstraintSystem.cpp create mode 100644 llvm/unittests/Analysis/ConstraintSystemTest.cpp create mode 100755 llvm/utils/convert-constraint-log-to-z3.py diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h new file mode 100644 index 0000000000000..7de787c1fc390 --- /dev/null +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -0,0 +1,57 @@ +//===- ConstraintSystem.h - A system of linear constraints. --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CONSTRAINTSYSTEM_H +#define LLVM_ANALYSIS_CONSTRAINTSYSTEM_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +#include + +namespace llvm { + +class ConstraintSystem { + /// Current linear constraints in the system. + /// An entry of the form c0, c1, ... cn represents the following constraint: + /// c0 >= v0 * c1 + .... + v{n-1} * cn + SmallVector, 4> Constraints; + + /// Current greatest common divisor for all coefficients in the system. + uint32_t GCD = 1; + + // Eliminate constraints from the system using Fourier–Motzkin elimination. + bool eliminateUsingFM(); + + /// Print the constraints in the system, using \p Names as variable names. + void dump(ArrayRef Names) const; + + /// Print the constraints in the system, using x0...xn as variable names. + void dump() const; + + /// Returns true if there may be a solution for the constraints in the system. + bool mayHaveSolutionImpl(); + +public: + void addVariableRow(const SmallVector &R) { + assert(Constraints.empty() || R.size() == Constraints.back().size()); + for (const auto &C : R) { + auto A = std::abs(C); + GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) + .getZExtValue(); + } + Constraints.push_back(R); + } + + /// Returns true if there may be a solution for the constraints in the system. + bool mayHaveSolution(); +}; +} // namespace llvm + +#endif // LLVM_ANALYSIS_CONSTRAINTSYSTEM_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index f50439bc87627..78cc764379e17 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_component_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp DDG.cpp + ConstraintSystem.cpp Delinearization.cpp DemandedBits.cpp DependenceAnalysis.cpp diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp new file mode 100644 index 0000000000000..21115fc946e9b --- /dev/null +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -0,0 +1,142 @@ +//===- ConstraintSytem.cpp - A system of linear constraints. ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstraintSystem.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" + +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "constraint-system" + +bool ConstraintSystem::eliminateUsingFM() { + // Implementation of Fourier–Motzkin elimination, with some tricks from the + // paper Pugh, William. "The Omega test: a fast and practical integer + // programming algorithm for dependence + // analysis." + // Supercomputing'91: Proceedings of the 1991 ACM/ + // IEEE conference on Supercomputing. IEEE, 1991. + assert(!Constraints.empty() && + "should only be called for non-empty constraint systems"); + unsigned NumVariables = Constraints[0].size(); + SmallVector, 4> NewSystem; + + unsigned NumConstraints = Constraints.size(); + uint32_t NewGCD = 1; + // FIXME do not use copy + for (unsigned R1 = 0; R1 < NumConstraints; R1++) { + if (Constraints[R1][1] == 0) { + SmallVector NR; + NR.push_back(Constraints[R1][0]); + for (unsigned i = 2; i < NumVariables; i++) { + NR.push_back(Constraints[R1][i]); + } + NewSystem.push_back(std::move(NR)); + continue; + } + + // FIXME do not use copy + bool EliminatedInRow = false; + for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) { + if (R1 == R2) + continue; + + // FIXME: can we do better than just dropping things here? + if (Constraints[R2][1] == 0) + continue; + + if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) || + (Constraints[R1][1] > 0 && Constraints[R2][1] > 0)) + continue; + + unsigned LowerR = R1; + unsigned UpperR = R2; + if (Constraints[UpperR][1] < 0) + std::swap(LowerR, UpperR); + + SmallVector NR; + for (unsigned I = 0; I < NumVariables; I++) { + if (I == 1) + continue; + + int64_t M1, M2, N; + if (MulOverflow(Constraints[UpperR][I], + ((-1) * Constraints[LowerR][1] / GCD), M1)) + return false; + if (MulOverflow(Constraints[LowerR][I], + (Constraints[UpperR][1] / GCD), M2)) + return false; + if (AddOverflow(M1, M2, N)) + return false; + NR.push_back(N); + + NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()}, + {32, NewGCD}) + .getZExtValue(); + } + NewSystem.push_back(std::move(NR)); + EliminatedInRow = true; + } + } + Constraints = std::move(NewSystem); + GCD = NewGCD; + + return true; +} + +bool ConstraintSystem::mayHaveSolutionImpl() { + while (!Constraints.empty() && Constraints[0].size() > 1) { + if (!eliminateUsingFM()) + return true; + } + + if (Constraints.empty() || Constraints[0].size() > 1) + return true; + + return all_of(Constraints, [](auto &R) { return R[0] >= 0; }); +} + +void ConstraintSystem::dump(ArrayRef Names) const { + if (Constraints.empty()) + return; + + for (auto &Row : Constraints) { + SmallVector Parts; + for (unsigned I = 1, S = Row.size(); I < S; ++I) { + if (Row[I] == 0) + continue; + std::string Coefficient = ""; + if (Row[I] != 1) + Coefficient = std::to_string(Row[I]) + " * "; + Parts.push_back(Coefficient + Names[I - 1]); + } + assert(!Parts.empty() && "need to have at least some parts"); + LLVM_DEBUG(dbgs() << join(Parts, std::string(" + ")) + << " <= " << std::to_string(Row[0]) << "\n"); + } +} + +void ConstraintSystem::dump() const { + SmallVector Names; + for (unsigned i = 1; i < Constraints.back().size(); ++i) + Names.push_back("x" + std::to_string(i)); + LLVM_DEBUG(dbgs() << "---\n"); + dump(Names); +} + +bool ConstraintSystem::mayHaveSolution() { + dump(); + bool HasSolution = mayHaveSolutionImpl(); + LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n"); + return HasSolution; +} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index eb97f6289b67a..dfe570fd15749 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_unittest_with_input_files(AnalysisTests CaptureTrackingTest.cpp CFGTest.cpp CGSCCPassManagerTest.cpp + ConstraintSystemTest.cpp DDGTest.cpp DivergenceAnalysisTest.cpp DomTreeUpdaterTest.cpp diff --git a/llvm/unittests/Analysis/ConstraintSystemTest.cpp b/llvm/unittests/Analysis/ConstraintSystemTest.cpp new file mode 100644 index 0000000000000..2301da7ec296f --- /dev/null +++ b/llvm/unittests/Analysis/ConstraintSystemTest.cpp @@ -0,0 +1,82 @@ +//===--- ConstraintSystemTests.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstraintSystem.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(ConstraintSloverTest, TestSolutionChecks) { + { + ConstraintSystem CS; + // x + y <= 10, x >= 5, y >= 6, x <= 10, y <= 10 + CS.addVariableRow({10, 1, 1}); + CS.addVariableRow({-5, -1, 0}); + CS.addVariableRow({-6, 0, -1}); + CS.addVariableRow({10, 1, 0}); + CS.addVariableRow({10, 0, 1}); + + EXPECT_FALSE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + // x + y <= 10, x >= 2, y >= 3, x <= 10, y <= 10 + CS.addVariableRow({10, 1, 1}); + CS.addVariableRow({-2, -1, 0}); + CS.addVariableRow({-3, 0, -1}); + CS.addVariableRow({10, 1, 0}); + CS.addVariableRow({10, 0, 1}); + + EXPECT_TRUE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + // x + y <= 10, 10 >= x, 10 >= y; does not have a solution. + CS.addVariableRow({10, 1, 1}); + CS.addVariableRow({-10, -1, 0}); + CS.addVariableRow({-10, 0, -1}); + + EXPECT_FALSE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + // x + y >= 20, 10 >= x, 10 >= y; does HAVE a solution. + CS.addVariableRow({-20, -1, -1}); + CS.addVariableRow({-10, -1, 0}); + CS.addVariableRow({-10, 0, -1}); + + EXPECT_TRUE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + + // 2x + y + 3z <= 10, 2x + y >= 10, y >= 1 + CS.addVariableRow({10, 2, 1, 3}); + CS.addVariableRow({-10, -2, -1, 0}); + CS.addVariableRow({-1, 0, 0, -1}); + + EXPECT_FALSE(CS.mayHaveSolution()); + } + + { + ConstraintSystem CS; + + // 2x + y + 3z <= 10, 2x + y >= 10 + CS.addVariableRow({10, 2, 1, 3}); + CS.addVariableRow({-10, -2, -1, 0}); + + EXPECT_TRUE(CS.mayHaveSolution()); + } +} +} // namespace diff --git a/llvm/utils/convert-constraint-log-to-z3.py b/llvm/utils/convert-constraint-log-to-z3.py new file mode 100755 index 0000000000000..77b0a3d95b6d4 --- /dev/null +++ b/llvm/utils/convert-constraint-log-to-z3.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +""" +Helper script to convert the log generated by '-debug-only=constraint-system' +to a Python script that uses Z3 to verify the decisions using Z3's Python API. + +Example usage: + +> cat path/to/file.log +--- +x6 + -1 * x7 <= -1 +x6 + -1 * x7 <= -2 +sat + +> ./convert-constraint-log-to-z3.py path/to/file.log > check.py && python ./check.py + +> cat check.py + from z3 import * +x3 = Int("x3") +x1 = Int("x1") +x2 = Int("x2") +s = Solver() +s.add(x1 + -1 * x2 <= 0) +s.add(x2 + -1 * x3 <= 0) +s.add(-1 * x1 + x3 <= -1) +assert(s.check() == unsat) +print('all checks passed') +""" + + +import argparse +import re + + +def main(): + parser = argparse.ArgumentParser( + description='Convert constraint log to script to verify using Z3.') + parser.add_argument('log_file', metavar='log', type=str, + help='constraint-system log file') + args = parser.parse_args() + + content = '' + with open(args.log_file, 'rt') as f: + content = f.read() + + groups = content.split('---') + var_re = re.compile('x\d+') + + print('from z3 import *') + for group in groups: + constraints = [g.strip() for g in group.split('\n') if g.strip() != ''] + variables = set() + for c in constraints[:-1]: + for m in var_re.finditer(c): + variables.add(m.group()) + if len(variables) == 0: + continue + for v in variables: + print('{} = Int("{}")'.format(v, v)) + print('s = Solver()') + for c in constraints[:-1]: + print('s.add({})'.format(c)) + expected = constraints[-1].strip() + print('assert(s.check() == {})'.format(expected)) + print('print("all checks passed")') + + +if __name__ == '__main__': + main() From bee79cdcc6aa855f4abcaa1f7e7f9df54538496b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 11:44:47 +0100 Subject: [PATCH 0671/1079] SelectionDAGBuilder.h - remove unnecessary includes. NFCI. Reduce to forward declarations and move implicit dependencies down to the cpp files. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 ++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 8 ++++---- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 057ebebe87d73..530ede44548ae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" @@ -82,6 +83,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index e51e7bf89f8e7..4904134a7d400 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,7 +18,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" @@ -26,7 +25,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Statepoint.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" @@ -39,6 +37,7 @@ namespace llvm { +class AAResults; class AllocaInst; class AtomicCmpXchgInst; class AtomicRMWInst; @@ -63,6 +62,7 @@ class FunctionLoweringInfo; class GCFunctionInfo; class GCRelocateInst; class GCResultInst; +class GCStatepointInst; class IndirectBrInst; class InvokeInst; class LandingPadInst; @@ -388,7 +388,7 @@ class SelectionDAGBuilder { SelectionDAG &DAG; const DataLayout *DL = nullptr; - AliasAnalysis *AA = nullptr; + AAResults *AA = nullptr; const TargetLibraryInfo *LibInfo; class SDAGSwitchLowering : public SwitchCG::SwitchLowering { @@ -442,7 +442,7 @@ class SelectionDAGBuilder { SL(std::make_unique(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} - void init(GCFunctionInfo *gfi, AliasAnalysis *AA, + void init(GCFunctionInfo *gfi, AAResults *AA, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8650cfceb86c5..ffabe7a5b0411 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -75,6 +75,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" From 1abb4461ea03f1166c13c4dd5fa349d41d02be6a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 12:00:21 +0100 Subject: [PATCH 0672/1079] StatepointLowering.cpp - remove unnecessary includes. NFCI. These are all directly included in StatepointLowering.h --- llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 83c72ca2da39b..7d3fe690cf101 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -14,12 +14,10 @@ #include "StatepointLowering.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -30,7 +28,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" From 6c1f2a34fbcaa57c3dc0de3f9e4da58da7f328b6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 12:18:06 +0100 Subject: [PATCH 0673/1079] SpillPlacement.cpp - remove unnecessary includes. NFCI. These are all directly included in SpillPlacement.h --- llvm/lib/CodeGen/SpillPlacement.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 36a0ddf67b193..4bb50a285497f 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -27,10 +27,7 @@ //===----------------------------------------------------------------------===// #include "SpillPlacement.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -39,7 +36,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" #include #include #include From a21387c65470417c58021f8d3194a4510bb64f46 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 15 Sep 2020 10:47:02 +0200 Subject: [PATCH 0674/1079] Revert "RegAllocFast: Record internal state based on register units" This seems to have caused incorrect register allocation in some cases, breaking tests in the Zig standard library (PR47278). As discussed on the bug, revert back to green for now. > Record internal state based on register units. This is often more > efficient as there are typically fewer register units to update > compared to iterating over all the aliases of a register. > > Original patch by Matthias Braun, but I've been rebasing and fixing it > for almost 2 years and fixed a few bugs causing intermediate failures > to make this patch independent of the changes in > https://reviews.llvm.org/D52010. This reverts commit 66251f7e1de79a7c1620659b7f58352b8c8e892e, and follow-ups 931a68f26b9a3de853807ffad7b2cd0a2dd30922 and 0671a4c5087d40450603d9d26cf239f1a8b1367e. It also adjust some test expectations. --- llvm/lib/CodeGen/RegAllocFast.cpp | 217 +-- .../arm64-fast-isel-conversion-fallback.ll | 8 +- .../AArch64/arm64-fast-isel-conversion.ll | 8 +- llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll | 8 +- .../CodeGen/AArch64/fast-isel-sp-adjust.ll | 3 +- llvm/test/CodeGen/AArch64/popcount.ll | 37 +- .../AMDGPU/indirect-addressing-term.ll | 12 +- .../AMDGPU/partial-sgpr-to-vgpr-spills.ll | 1260 ++++++++--------- llvm/test/CodeGen/AMDGPU/spill-m0.ll | 95 -- llvm/test/CodeGen/AMDGPU/wwm-reserved.ll | 8 +- llvm/test/CodeGen/ARM/legalize-bitcast.ll | 6 +- .../GlobalISel/llvm-ir/fptosi_and_fptoui.ll | 72 +- llvm/test/CodeGen/Mips/atomic-min-max.ll | 960 ++++++------- llvm/test/CodeGen/Mips/atomic.ll | 282 ++-- llvm/test/CodeGen/Mips/implicit-sret.ll | 14 +- llvm/test/CodeGen/PowerPC/addegluecrash.ll | 10 +- llvm/test/CodeGen/PowerPC/popcount.ll | 14 +- llvm/test/CodeGen/PowerPC/vsx.ll | 54 +- llvm/test/CodeGen/SPARC/fp16-promote.ll | 10 +- .../CodeGen/X86/2009-04-14-IllegalRegs.ll | 29 +- llvm/test/CodeGen/X86/atomic-unordered.ll | 58 +- llvm/test/CodeGen/X86/atomic32.ll | 122 +- llvm/test/CodeGen/X86/atomic64.ll | 40 +- llvm/test/CodeGen/X86/avx-load-store.ll | 22 +- .../CodeGen/X86/avx512-mask-zext-bugfix.ll | 22 +- llvm/test/CodeGen/X86/crash-O0.ll | 9 +- .../CodeGen/X86/extend-set-cc-uses-dbg.ll | 4 +- .../test/CodeGen/X86/fast-isel-nontemporal.ll | 60 +- llvm/test/CodeGen/X86/lvi-hardening-loads.ll | 4 +- llvm/test/CodeGen/X86/mixed-ptr-sizes.ll | 102 +- llvm/test/CodeGen/X86/pr1489.ll | 24 +- llvm/test/CodeGen/X86/pr27591.ll | 14 +- llvm/test/CodeGen/X86/pr30430.ll | 34 +- llvm/test/CodeGen/X86/pr30813.ll | 5 +- llvm/test/CodeGen/X86/pr32241.ll | 18 +- llvm/test/CodeGen/X86/pr32284.ll | 274 ++-- llvm/test/CodeGen/X86/pr32340.ll | 54 +- llvm/test/CodeGen/X86/pr32345.ll | 63 +- llvm/test/CodeGen/X86/pr32451.ll | 23 +- llvm/test/CodeGen/X86/pr34592.ll | 25 +- llvm/test/CodeGen/X86/pr39733.ll | 4 +- llvm/test/CodeGen/X86/pr44749.ll | 24 +- llvm/test/CodeGen/X86/pr47000.ll | 135 +- .../regalloc-fast-missing-live-out-spill.mir | 8 +- llvm/test/CodeGen/X86/swift-return.ll | 41 +- llvm/test/CodeGen/X86/swifterror.ll | 4 +- llvm/test/DebugInfo/X86/op_deref.ll | 8 +- 47 files changed, 2155 insertions(+), 2153 deletions(-) diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index e0742c4508ea0..d93fd8f601c6b 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -106,8 +106,13 @@ namespace { /// that it is alive across blocks. BitVector MayLiveAcrossBlocks; - /// State of a register unit. - enum RegUnitState { + /// State of a physical register. + enum RegState { + /// A disabled register is not available for allocation, but an alias may + /// be in use. A register can only be moved out of the disabled state if + /// all aliases are disabled. + regDisabled, + /// A free register is not currently in use and can be allocated /// immediately without checking aliases. regFree, @@ -121,8 +126,8 @@ namespace { /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - /// Maps each physical register to a RegUnitState enum or virtual register. - std::vector RegUnitStates; + /// Maps each physical register to a RegState enum or a virtual register. + std::vector PhysRegState; SmallVector VirtDead; SmallVector Coalesced; @@ -184,10 +189,6 @@ namespace { bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); -#ifndef NDEBUG - bool verifyRegStateMapping(const LiveReg &LR) const; -#endif - void killVirtReg(LiveReg &LR); void killVirtReg(Register VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); @@ -195,7 +196,7 @@ namespace { void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - unsigned NewState); + RegState NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); @@ -228,7 +229,7 @@ namespace { bool mayLiveOut(Register VirtReg); bool mayLiveIn(Register VirtReg); - void dumpState() const; + void dumpState(); }; } // end anonymous namespace @@ -239,8 +240,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) - RegUnitStates[*UI] = NewState; + PhysRegState[PhysReg] = NewState; } /// This allocates space for the specified virtual register to be held on the @@ -384,23 +384,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { } } -#ifndef NDEBUG -bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { - for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { - if (RegUnitStates[*UI] != LR.VirtReg) - return false; - } - - return true; -} -#endif - /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); addKillFlag(LR); - MCPhysReg PhysReg = LR.PhysReg; - setPhysRegState(PhysReg, regFree); + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && + "Broken RegState mapping"); + setPhysRegState(LR.PhysReg, regFree); LR.PhysReg = 0; } @@ -427,9 +416,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, /// Do the actual work of spilling. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - - MCPhysReg PhysReg = LR.PhysReg; + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the @@ -437,7 +424,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - spill(MI, LR.VirtReg, PhysReg, SpillKill); + spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); if (SpillKill) LR.LastUse = nullptr; // Don't kill register again @@ -473,16 +460,53 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); + switch (PhysRegState[PhysReg]) { + case regDisabled: + break; + case regReserved: + PhysRegState[PhysReg] = regFree; + LLVM_FALLTHROUGH; + case regFree: + MO.setIsKill(); + return; + default: + // The physreg was allocated to a virtual register. That means the value we + // wanted has been clobbered. + llvm_unreachable("Instruction uses an allocated register"); + } - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (RegUnitStates[*UI]) { + // Maybe a superregister is reserved? + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + MCPhysReg Alias = *AI; + switch (PhysRegState[Alias]) { + case regDisabled: + break; case regReserved: - RegUnitStates[*UI] = regFree; + // Either PhysReg is a subregister of Alias and we mark the + // whole register as free, or PhysReg is the superregister of + // Alias and we mark all the aliases as disabled before freeing + // PhysReg. + // In the latter case, since PhysReg was disabled, this means that + // its value is defined only by physical sub-registers. This check + // is performed by the assert of the default case in this loop. + // Note: The value of the superregister may only be partial + // defined, that is why regDisabled is a valid state for aliases. + assert((TRI->isSuperRegister(PhysReg, Alias) || + TRI->isSuperRegister(Alias, PhysReg)) && + "Instruction is not using a subregister of a reserved register"); LLVM_FALLTHROUGH; case regFree: + if (TRI->isSuperRegister(PhysReg, Alias)) { + // Leave the superregister in the working set. + setPhysRegState(Alias, regFree); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + } + // Some other alias was in the working set - clear it. + setPhysRegState(Alias, regDisabled); break; default: - llvm_unreachable("Unexpected reg unit state"); + llvm_unreachable("Instruction uses an alias of an allocated register"); } } @@ -495,20 +519,38 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, unsigned NewState) { - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (unsigned VirtReg = RegUnitStates[*UI]) { + MCPhysReg PhysReg, RegState NewState) { + markRegUsedInInstr(PhysReg); + switch (Register VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + LLVM_FALLTHROUGH; + case regFree: + case regReserved: + setPhysRegState(PhysReg, NewState); + return; + } + + // This is a disabled register, disable all aliases. + setPhysRegState(PhysReg, NewState); + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + MCPhysReg Alias = *AI; + switch (Register VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; default: spillVirtReg(MI, VirtReg); - break; + LLVM_FALLTHROUGH; case regFree: case regReserved: + setPhysRegState(Alias, regDisabled); + if (TRI->isSuperRegister(PhysReg, Alias)) + return; break; } } - - markRegUsedInInstr(PhysReg); - setPhysRegState(PhysReg, NewState); } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -521,24 +563,46 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { << " is already used in instr.\n"); return spillImpossible; } + switch (Register VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return 0; + case regReserved: + LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " + << printReg(PhysReg, TRI) << " is reserved already.\n"); + return spillImpossible; + default: { + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + return LRI->Dirty ? spillDirty : spillClean; + } + } - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (unsigned VirtReg = RegUnitStates[*UI]) { + // This is a disabled register, add up cost of aliases. + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); + unsigned Cost = 0; + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + MCPhysReg Alias = *AI; + switch (Register VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; case regFree: + ++Cost; break; case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; + Cost += LRI->Dirty ? spillDirty : spillClean; + break; } } } - return 0; + return Cost; } /// This method updates local state so that we know that PhysReg is the @@ -845,17 +909,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!Reg || !Reg.isPhysical()) continue; markRegUsedInInstr(Reg); - - for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { - if (!ThroughRegs.count(RegUnitStates[*UI])) - continue; - - // Need to spill any aliasing registers. - for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { - for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { - definePhysReg(MI, *SI, regFree); - } - } + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (ThroughRegs.count(PhysRegState[*AI])) + definePhysReg(MI, *AI, regFree); } } @@ -919,40 +975,37 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } #ifndef NDEBUG - -void RegAllocFast::dumpState() const { - for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; - ++Unit) { - switch (unsigned VirtReg = RegUnitStates[Unit]) { +void RegAllocFast::dumpState() { + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << printReg(Reg, TRI); + switch(PhysRegState[Reg]) { case regFree: break; case regReserved: - dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; + dbgs() << "*"; break; default: { - dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); - if (I->Dirty) - dbgs() << "[D]"; - assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); + dbgs() << '=' << printReg(PhysRegState[Reg]); + LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && + "Missing VirtReg entry"); + if (LRI->Dirty) + dbgs() << "*"; + assert(LRI->PhysReg == Reg && "Bad inverse map"); break; } } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. - for (const LiveReg &LR : LiveVirtRegs) { - Register VirtReg = LR.VirtReg; - assert(VirtReg.isVirtual() && "Bad map key"); - MCPhysReg PhysReg = LR.PhysReg; - if (PhysReg != 0) { - assert(Register::isPhysicalRegister(PhysReg) && - "mapped to physreg"); - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - assert(RegUnitStates[*UI] == VirtReg && "inverse map valid"); - } - } + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { + if (!i->PhysReg) + continue; + assert(i->VirtReg.isVirtual() && "Bad map key"); + assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } } #endif @@ -1194,7 +1247,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); - RegUnitStates.assign(TRI->getNumRegUnits(), regFree); + PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB.begin(); diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll index 7c546936ba27a..392af063eb8a0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -4,8 +4,8 @@ define i32 @fptosi_wh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptosi_wh -; CHECK: fcvt s0, h0 -; CHECK: fcvtzs [[REG:w[0-9]+]], s0 +; CHECK: fcvt s1, h0 +; CHECK: fcvtzs [[REG:w[0-9]+]], s1 ; CHECK: mov w0, [[REG]] %conv = fptosi half %a to i32 ret i32 %conv @@ -15,8 +15,8 @@ entry: define i32 @fptoui_swh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptoui_swh -; CHECK: fcvt s0, h0 -; CHECK: fcvtzu [[REG:w[0-9]+]], s0 +; CHECK: fcvt s1, h0 +; CHECK: fcvtzu [[REG:w[0-9]+]], s1 ; CHECK: mov w0, [[REG]] %conv = fptoui half %a to i32 ret i32 %conv diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index d8abf14c1366b..ed03aec07e7da 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -54,8 +54,8 @@ entry: ; CHECK: ldrh w8, [sp, #12] ; CHECK: str w8, [sp, #8] ; CHECK: ldr w8, [sp, #8] -; CHECK: ; kill: def $x8 killed $w8 -; CHECK: str x8, [sp] +; CHECK: mov x9, x8 +; CHECK: str x9, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -109,8 +109,8 @@ entry: ; CHECK: strh w8, [sp, #12] ; CHECK: ldrsh w8, [sp, #12] ; CHECK: str w8, [sp, #8] -; CHECK: ldrsw x8, [sp, #8] -; CHECK: str x8, [sp] +; CHECK: ldrsw x9, [sp, #8] +; CHECK: str x9, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index e1e889b906c01..6b3e8d747d43d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -285,11 +285,11 @@ define i16 @to_half(float %in) { ; FAST: // %bb.0: ; FAST-NEXT: sub sp, sp, #16 // =16 ; FAST-NEXT: .cfi_def_cfa_offset 16 -; FAST-NEXT: fcvt h0, s0 +; FAST-NEXT: fcvt h1, s0 ; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s1, w0 -; FAST-NEXT: mov.16b v1, v0 -; FAST-NEXT: fmov w8, s1 +; FAST-NEXT: fmov s0, w0 +; FAST-NEXT: mov.16b v0, v1 +; FAST-NEXT: fmov w8, s0 ; FAST-NEXT: mov w0, w8 ; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill ; FAST-NEXT: mov w0, w8 diff --git a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll index 22e3ccf2b1209..8d62fb3556661 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -15,7 +15,8 @@ ; CHECK-LABEL: foo: ; CHECK: sub ; CHECK-DAG: mov x[[SP:[0-9]+]], sp -; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104 +; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 +; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] ; CHECK: strb w0, [x[[SP]], x[[OFFSET]]] define void @foo(i8 %in) { diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll index 105969717e46b..1e796fff710c0 100644 --- a/llvm/test/CodeGen/AArch64/popcount.ll +++ b/llvm/test/CodeGen/AArch64/popcount.ll @@ -10,11 +10,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) { ; CHECK-NEXT: // implicit-def: $q1 ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b +; CHECK-NEXT: cnt v1.16b, v1.16b +; CHECK-NEXT: uaddlv h2, v1.16b ; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: fmov w1, s1 +; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: ret Entry: %1 = load i128, i128* %0, align 16 @@ -36,21 +37,21 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) { ; CHECK-NEXT: // implicit-def: $q1 ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b +; CHECK-NEXT: cnt v1.16b, v1.16b +; CHECK-NEXT: uaddlv h2, v1.16b ; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: fmov w10, s1 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: // implicit-def: $q1 ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b +; CHECK-NEXT: cnt v1.16b, v1.16b +; CHECK-NEXT: uaddlv h2, v1.16b ; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: add w0, w11, w10 ; CHECK-NEXT: ret Entry: %1 = load i256, i256* %0, align 16 @@ -69,11 +70,11 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: cnt v0.16b, v0.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: // kill: def $x0 killed $w0 +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w2, s0 +; CHECK-NEXT: mov w0, w2 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll index 3d3b511ab34b7..8999cd91169ac 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -69,15 +69,15 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 - ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN: renamable $sgpr20_sgpr21 = S_MOV_B64 $exec ; GCN: renamable $vgpr1 = IMPLICIT_DEF - ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF + ; GCN: renamable $sgpr22_sgpr23 = IMPLICIT_DEF ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5) @@ -91,8 +91,8 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 - ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5 + ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index b119ffd303e08..e991c550c6be0 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { ; GCN-LABEL: spill_sgprs_to_multiple_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND @@ -42,354 +42,352 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[84:91] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 8 +; GCN-NEXT: v_writelane_b32 v0, s5, 9 +; GCN-NEXT: v_writelane_b32 v0, s6, 10 +; GCN-NEXT: v_writelane_b32 v0, s7, 11 +; GCN-NEXT: v_writelane_b32 v0, s8, 12 +; GCN-NEXT: v_writelane_b32 v0, s9, 13 +; GCN-NEXT: v_writelane_b32 v0, s10, 14 +; GCN-NEXT: v_writelane_b32 v0, s11, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 16 +; GCN-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-NEXT: v_writelane_b32 v0, s6, 18 +; GCN-NEXT: v_writelane_b32 v0, s7, 19 +; GCN-NEXT: v_writelane_b32 v0, s8, 20 +; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v0, s10, 22 +; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 24 +; GCN-NEXT: v_writelane_b32 v0, s5, 25 +; GCN-NEXT: v_writelane_b32 v0, s6, 26 +; GCN-NEXT: v_writelane_b32 v0, s7, 27 +; GCN-NEXT: v_writelane_b32 v0, s8, 28 +; GCN-NEXT: v_writelane_b32 v0, s9, 29 +; GCN-NEXT: v_writelane_b32 v0, s10, 30 +; GCN-NEXT: v_writelane_b32 v0, s11, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 32 +; GCN-NEXT: v_writelane_b32 v0, s5, 33 +; GCN-NEXT: v_writelane_b32 v0, s6, 34 +; GCN-NEXT: v_writelane_b32 v0, s7, 35 +; GCN-NEXT: v_writelane_b32 v0, s8, 36 +; GCN-NEXT: v_writelane_b32 v0, s9, 37 +; GCN-NEXT: v_writelane_b32 v0, s10, 38 +; GCN-NEXT: v_writelane_b32 v0, s11, 39 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 40 +; GCN-NEXT: v_writelane_b32 v0, s5, 41 +; GCN-NEXT: v_writelane_b32 v0, s6, 42 +; GCN-NEXT: v_writelane_b32 v0, s7, 43 +; GCN-NEXT: v_writelane_b32 v0, s8, 44 +; GCN-NEXT: v_writelane_b32 v0, s9, 45 +; GCN-NEXT: v_writelane_b32 v0, s10, 46 +; GCN-NEXT: v_writelane_b32 v0, s11, 47 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s4, 1 -; GCN-NEXT: v_writelane_b32 v0, s5, 2 -; GCN-NEXT: v_writelane_b32 v0, s6, 3 -; GCN-NEXT: v_writelane_b32 v0, s7, 4 -; GCN-NEXT: v_writelane_b32 v0, s8, 5 -; GCN-NEXT: v_writelane_b32 v0, s9, 6 -; GCN-NEXT: v_writelane_b32 v0, s10, 7 -; GCN-NEXT: v_writelane_b32 v0, s11, 8 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 9 -; GCN-NEXT: v_writelane_b32 v0, s1, 10 -; GCN-NEXT: v_writelane_b32 v0, s2, 11 -; GCN-NEXT: v_writelane_b32 v0, s3, 12 -; GCN-NEXT: v_writelane_b32 v0, s4, 13 -; GCN-NEXT: v_writelane_b32 v0, s5, 14 -; GCN-NEXT: v_writelane_b32 v0, s6, 15 -; GCN-NEXT: v_writelane_b32 v0, s7, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s1, 18 -; GCN-NEXT: v_writelane_b32 v0, s2, 19 -; GCN-NEXT: v_writelane_b32 v0, s3, 20 -; GCN-NEXT: v_writelane_b32 v0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s5, 22 -; GCN-NEXT: v_writelane_b32 v0, s6, 23 -; GCN-NEXT: v_writelane_b32 v0, s7, 24 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 25 -; GCN-NEXT: v_writelane_b32 v0, s1, 26 -; GCN-NEXT: v_writelane_b32 v0, s2, 27 -; GCN-NEXT: v_writelane_b32 v0, s3, 28 -; GCN-NEXT: v_writelane_b32 v0, s4, 29 -; GCN-NEXT: v_writelane_b32 v0, s5, 30 -; GCN-NEXT: v_writelane_b32 v0, s6, 31 -; GCN-NEXT: v_writelane_b32 v0, s7, 32 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 33 -; GCN-NEXT: v_writelane_b32 v0, s1, 34 -; GCN-NEXT: v_writelane_b32 v0, s2, 35 -; GCN-NEXT: v_writelane_b32 v0, s3, 36 -; GCN-NEXT: v_writelane_b32 v0, s4, 37 -; GCN-NEXT: v_writelane_b32 v0, s5, 38 -; GCN-NEXT: v_writelane_b32 v0, s6, 39 -; GCN-NEXT: v_writelane_b32 v0, s7, 40 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 41 -; GCN-NEXT: v_writelane_b32 v0, s1, 42 -; GCN-NEXT: v_writelane_b32 v0, s2, 43 -; GCN-NEXT: v_writelane_b32 v0, s3, 44 -; GCN-NEXT: v_writelane_b32 v0, s4, 45 -; GCN-NEXT: v_writelane_b32 v0, s5, 46 -; GCN-NEXT: v_writelane_b32 v0, s6, 47 -; GCN-NEXT: v_writelane_b32 v0, s7, 48 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 49 -; GCN-NEXT: v_writelane_b32 v0, s1, 50 -; GCN-NEXT: v_writelane_b32 v0, s2, 51 -; GCN-NEXT: v_writelane_b32 v0, s3, 52 -; GCN-NEXT: v_writelane_b32 v0, s4, 53 -; GCN-NEXT: v_writelane_b32 v0, s5, 54 -; GCN-NEXT: v_writelane_b32 v0, s6, 55 -; GCN-NEXT: v_writelane_b32 v0, s7, 56 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s8, 0 -; GCN-NEXT: v_readlane_b32 s9, v0, 0 -; GCN-NEXT: s_cmp_lg_u32 s9, s8 -; GCN-NEXT: v_writelane_b32 v0, s12, 57 -; GCN-NEXT: v_writelane_b32 v0, s13, 58 -; GCN-NEXT: v_writelane_b32 v0, s14, 59 -; GCN-NEXT: v_writelane_b32 v0, s15, 60 -; GCN-NEXT: v_writelane_b32 v0, s16, 61 -; GCN-NEXT: v_writelane_b32 v0, s17, 62 -; GCN-NEXT: v_writelane_b32 v0, s18, 63 -; GCN-NEXT: v_writelane_b32 v1, s19, 0 -; GCN-NEXT: v_writelane_b32 v1, s20, 1 -; GCN-NEXT: v_writelane_b32 v1, s21, 2 -; GCN-NEXT: v_writelane_b32 v1, s22, 3 -; GCN-NEXT: v_writelane_b32 v1, s23, 4 -; GCN-NEXT: v_writelane_b32 v1, s24, 5 -; GCN-NEXT: v_writelane_b32 v1, s25, 6 -; GCN-NEXT: v_writelane_b32 v1, s26, 7 -; GCN-NEXT: v_writelane_b32 v1, s27, 8 -; GCN-NEXT: v_writelane_b32 v1, s36, 9 -; GCN-NEXT: v_writelane_b32 v1, s37, 10 -; GCN-NEXT: v_writelane_b32 v1, s38, 11 -; GCN-NEXT: v_writelane_b32 v1, s39, 12 -; GCN-NEXT: v_writelane_b32 v1, s40, 13 -; GCN-NEXT: v_writelane_b32 v1, s41, 14 -; GCN-NEXT: v_writelane_b32 v1, s42, 15 -; GCN-NEXT: v_writelane_b32 v1, s43, 16 -; GCN-NEXT: v_writelane_b32 v1, s44, 17 -; GCN-NEXT: v_writelane_b32 v1, s45, 18 -; GCN-NEXT: v_writelane_b32 v1, s46, 19 -; GCN-NEXT: v_writelane_b32 v1, s47, 20 -; GCN-NEXT: v_writelane_b32 v1, s48, 21 -; GCN-NEXT: v_writelane_b32 v1, s49, 22 -; GCN-NEXT: v_writelane_b32 v1, s50, 23 -; GCN-NEXT: v_writelane_b32 v1, s51, 24 -; GCN-NEXT: v_writelane_b32 v1, s52, 25 -; GCN-NEXT: v_writelane_b32 v1, s53, 26 -; GCN-NEXT: v_writelane_b32 v1, s54, 27 -; GCN-NEXT: v_writelane_b32 v1, s55, 28 -; GCN-NEXT: v_writelane_b32 v1, s56, 29 -; GCN-NEXT: v_writelane_b32 v1, s57, 30 -; GCN-NEXT: v_writelane_b32 v1, s58, 31 -; GCN-NEXT: v_writelane_b32 v1, s59, 32 -; GCN-NEXT: v_writelane_b32 v1, s60, 33 -; GCN-NEXT: v_writelane_b32 v1, s61, 34 -; GCN-NEXT: v_writelane_b32 v1, s62, 35 -; GCN-NEXT: v_writelane_b32 v1, s63, 36 -; GCN-NEXT: v_writelane_b32 v1, s64, 37 -; GCN-NEXT: v_writelane_b32 v1, s65, 38 -; GCN-NEXT: v_writelane_b32 v1, s66, 39 -; GCN-NEXT: v_writelane_b32 v1, s67, 40 -; GCN-NEXT: v_writelane_b32 v1, s68, 41 -; GCN-NEXT: v_writelane_b32 v1, s69, 42 -; GCN-NEXT: v_writelane_b32 v1, s70, 43 -; GCN-NEXT: v_writelane_b32 v1, s71, 44 -; GCN-NEXT: v_writelane_b32 v1, s72, 45 -; GCN-NEXT: v_writelane_b32 v1, s73, 46 -; GCN-NEXT: v_writelane_b32 v1, s74, 47 -; GCN-NEXT: v_writelane_b32 v1, s75, 48 -; GCN-NEXT: v_writelane_b32 v1, s76, 49 -; GCN-NEXT: v_writelane_b32 v1, s77, 50 -; GCN-NEXT: v_writelane_b32 v1, s78, 51 -; GCN-NEXT: v_writelane_b32 v1, s79, 52 -; GCN-NEXT: v_writelane_b32 v1, s80, 53 -; GCN-NEXT: v_writelane_b32 v1, s81, 54 -; GCN-NEXT: v_writelane_b32 v1, s82, 55 -; GCN-NEXT: v_writelane_b32 v1, s83, 56 -; GCN-NEXT: v_writelane_b32 v1, s84, 57 -; GCN-NEXT: v_writelane_b32 v1, s85, 58 -; GCN-NEXT: v_writelane_b32 v1, s86, 59 -; GCN-NEXT: v_writelane_b32 v1, s87, 60 -; GCN-NEXT: v_writelane_b32 v1, s88, 61 -; GCN-NEXT: v_writelane_b32 v1, s89, 62 -; GCN-NEXT: v_writelane_b32 v1, s90, 63 -; GCN-NEXT: v_writelane_b32 v2, s91, 0 -; GCN-NEXT: v_writelane_b32 v2, s0, 1 -; GCN-NEXT: v_writelane_b32 v2, s1, 2 -; GCN-NEXT: v_writelane_b32 v2, s2, 3 -; GCN-NEXT: v_writelane_b32 v2, s3, 4 -; GCN-NEXT: v_writelane_b32 v2, s4, 5 -; GCN-NEXT: v_writelane_b32 v2, s5, 6 -; GCN-NEXT: v_writelane_b32 v2, s6, 7 -; GCN-NEXT: v_writelane_b32 v2, s7, 8 +; GCN-NEXT: s_cmp_lg_u32 s2, s3 +; GCN-NEXT: v_writelane_b32 v0, s12, 56 +; GCN-NEXT: v_writelane_b32 v0, s13, 57 +; GCN-NEXT: v_writelane_b32 v0, s14, 58 +; GCN-NEXT: v_writelane_b32 v0, s15, 59 +; GCN-NEXT: v_writelane_b32 v0, s16, 60 +; GCN-NEXT: v_writelane_b32 v0, s17, 61 +; GCN-NEXT: v_writelane_b32 v0, s18, 62 +; GCN-NEXT: v_writelane_b32 v0, s19, 63 +; GCN-NEXT: v_writelane_b32 v1, s20, 0 +; GCN-NEXT: v_writelane_b32 v1, s21, 1 +; GCN-NEXT: v_writelane_b32 v1, s22, 2 +; GCN-NEXT: v_writelane_b32 v1, s23, 3 +; GCN-NEXT: v_writelane_b32 v1, s24, 4 +; GCN-NEXT: v_writelane_b32 v1, s25, 5 +; GCN-NEXT: v_writelane_b32 v1, s26, 6 +; GCN-NEXT: v_writelane_b32 v1, s27, 7 +; GCN-NEXT: v_writelane_b32 v1, s36, 8 +; GCN-NEXT: v_writelane_b32 v1, s37, 9 +; GCN-NEXT: v_writelane_b32 v1, s38, 10 +; GCN-NEXT: v_writelane_b32 v1, s39, 11 +; GCN-NEXT: v_writelane_b32 v1, s40, 12 +; GCN-NEXT: v_writelane_b32 v1, s41, 13 +; GCN-NEXT: v_writelane_b32 v1, s42, 14 +; GCN-NEXT: v_writelane_b32 v1, s43, 15 +; GCN-NEXT: v_writelane_b32 v1, s44, 16 +; GCN-NEXT: v_writelane_b32 v1, s45, 17 +; GCN-NEXT: v_writelane_b32 v1, s46, 18 +; GCN-NEXT: v_writelane_b32 v1, s47, 19 +; GCN-NEXT: v_writelane_b32 v1, s48, 20 +; GCN-NEXT: v_writelane_b32 v1, s49, 21 +; GCN-NEXT: v_writelane_b32 v1, s50, 22 +; GCN-NEXT: v_writelane_b32 v1, s51, 23 +; GCN-NEXT: v_writelane_b32 v1, s52, 24 +; GCN-NEXT: v_writelane_b32 v1, s53, 25 +; GCN-NEXT: v_writelane_b32 v1, s54, 26 +; GCN-NEXT: v_writelane_b32 v1, s55, 27 +; GCN-NEXT: v_writelane_b32 v1, s56, 28 +; GCN-NEXT: v_writelane_b32 v1, s57, 29 +; GCN-NEXT: v_writelane_b32 v1, s58, 30 +; GCN-NEXT: v_writelane_b32 v1, s59, 31 +; GCN-NEXT: v_writelane_b32 v1, s60, 32 +; GCN-NEXT: v_writelane_b32 v1, s61, 33 +; GCN-NEXT: v_writelane_b32 v1, s62, 34 +; GCN-NEXT: v_writelane_b32 v1, s63, 35 +; GCN-NEXT: v_writelane_b32 v1, s64, 36 +; GCN-NEXT: v_writelane_b32 v1, s65, 37 +; GCN-NEXT: v_writelane_b32 v1, s66, 38 +; GCN-NEXT: v_writelane_b32 v1, s67, 39 +; GCN-NEXT: v_writelane_b32 v1, s68, 40 +; GCN-NEXT: v_writelane_b32 v1, s69, 41 +; GCN-NEXT: v_writelane_b32 v1, s70, 42 +; GCN-NEXT: v_writelane_b32 v1, s71, 43 +; GCN-NEXT: v_writelane_b32 v1, s72, 44 +; GCN-NEXT: v_writelane_b32 v1, s73, 45 +; GCN-NEXT: v_writelane_b32 v1, s74, 46 +; GCN-NEXT: v_writelane_b32 v1, s75, 47 +; GCN-NEXT: v_writelane_b32 v1, s76, 48 +; GCN-NEXT: v_writelane_b32 v1, s77, 49 +; GCN-NEXT: v_writelane_b32 v1, s78, 50 +; GCN-NEXT: v_writelane_b32 v1, s79, 51 +; GCN-NEXT: v_writelane_b32 v1, s80, 52 +; GCN-NEXT: v_writelane_b32 v1, s81, 53 +; GCN-NEXT: v_writelane_b32 v1, s82, 54 +; GCN-NEXT: v_writelane_b32 v1, s83, 55 +; GCN-NEXT: v_writelane_b32 v1, s84, 56 +; GCN-NEXT: v_writelane_b32 v1, s85, 57 +; GCN-NEXT: v_writelane_b32 v1, s86, 58 +; GCN-NEXT: v_writelane_b32 v1, s87, 59 +; GCN-NEXT: v_writelane_b32 v1, s88, 60 +; GCN-NEXT: v_writelane_b32 v1, s89, 61 +; GCN-NEXT: v_writelane_b32 v1, s90, 62 +; GCN-NEXT: v_writelane_b32 v1, s91, 63 +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 ; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 1 -; GCN-NEXT: v_readlane_b32 s1, v0, 2 -; GCN-NEXT: v_readlane_b32 s2, v0, 3 -; GCN-NEXT: v_readlane_b32 s3, v0, 4 -; GCN-NEXT: v_readlane_b32 s4, v0, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 -; GCN-NEXT: v_readlane_b32 s6, v0, 7 -; GCN-NEXT: v_readlane_b32 s7, v0, 8 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 57 -; GCN-NEXT: v_readlane_b32 s1, v0, 58 -; GCN-NEXT: v_readlane_b32 s2, v0, 59 -; GCN-NEXT: v_readlane_b32 s3, v0, 60 -; GCN-NEXT: v_readlane_b32 s4, v0, 61 -; GCN-NEXT: v_readlane_b32 s5, v0, 62 -; GCN-NEXT: v_readlane_b32 s6, v0, 63 -; GCN-NEXT: v_readlane_b32 s7, v1, 0 +; GCN-NEXT: v_readlane_b32 s0, v0, 56 +; GCN-NEXT: v_readlane_b32 s1, v0, 57 +; GCN-NEXT: v_readlane_b32 s2, v0, 58 +; GCN-NEXT: v_readlane_b32 s3, v0, 59 +; GCN-NEXT: v_readlane_b32 s4, v0, 60 +; GCN-NEXT: v_readlane_b32 s5, v0, 61 +; GCN-NEXT: v_readlane_b32 s6, v0, 62 +; GCN-NEXT: v_readlane_b32 s7, v0, 63 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 1 -; GCN-NEXT: v_readlane_b32 s1, v1, 2 -; GCN-NEXT: v_readlane_b32 s2, v1, 3 -; GCN-NEXT: v_readlane_b32 s3, v1, 4 -; GCN-NEXT: v_readlane_b32 s4, v1, 5 -; GCN-NEXT: v_readlane_b32 s5, v1, 6 -; GCN-NEXT: v_readlane_b32 s6, v1, 7 -; GCN-NEXT: v_readlane_b32 s7, v1, 8 +; GCN-NEXT: v_readlane_b32 s0, v1, 0 +; GCN-NEXT: v_readlane_b32 s1, v1, 1 +; GCN-NEXT: v_readlane_b32 s2, v1, 2 +; GCN-NEXT: v_readlane_b32 s3, v1, 3 +; GCN-NEXT: v_readlane_b32 s4, v1, 4 +; GCN-NEXT: v_readlane_b32 s5, v1, 5 +; GCN-NEXT: v_readlane_b32 s6, v1, 6 +; GCN-NEXT: v_readlane_b32 s7, v1, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 9 -; GCN-NEXT: v_readlane_b32 s1, v1, 10 -; GCN-NEXT: v_readlane_b32 s2, v1, 11 -; GCN-NEXT: v_readlane_b32 s3, v1, 12 -; GCN-NEXT: v_readlane_b32 s4, v1, 13 -; GCN-NEXT: v_readlane_b32 s5, v1, 14 -; GCN-NEXT: v_readlane_b32 s6, v1, 15 -; GCN-NEXT: v_readlane_b32 s7, v1, 16 +; GCN-NEXT: v_readlane_b32 s0, v1, 8 +; GCN-NEXT: v_readlane_b32 s1, v1, 9 +; GCN-NEXT: v_readlane_b32 s2, v1, 10 +; GCN-NEXT: v_readlane_b32 s3, v1, 11 +; GCN-NEXT: v_readlane_b32 s4, v1, 12 +; GCN-NEXT: v_readlane_b32 s5, v1, 13 +; GCN-NEXT: v_readlane_b32 s6, v1, 14 +; GCN-NEXT: v_readlane_b32 s7, v1, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 17 -; GCN-NEXT: v_readlane_b32 s1, v1, 18 -; GCN-NEXT: v_readlane_b32 s2, v1, 19 -; GCN-NEXT: v_readlane_b32 s3, v1, 20 -; GCN-NEXT: v_readlane_b32 s4, v1, 21 -; GCN-NEXT: v_readlane_b32 s5, v1, 22 -; GCN-NEXT: v_readlane_b32 s6, v1, 23 -; GCN-NEXT: v_readlane_b32 s7, v1, 24 +; GCN-NEXT: v_readlane_b32 s0, v1, 16 +; GCN-NEXT: v_readlane_b32 s1, v1, 17 +; GCN-NEXT: v_readlane_b32 s2, v1, 18 +; GCN-NEXT: v_readlane_b32 s3, v1, 19 +; GCN-NEXT: v_readlane_b32 s4, v1, 20 +; GCN-NEXT: v_readlane_b32 s5, v1, 21 +; GCN-NEXT: v_readlane_b32 s6, v1, 22 +; GCN-NEXT: v_readlane_b32 s7, v1, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 25 -; GCN-NEXT: v_readlane_b32 s1, v1, 26 -; GCN-NEXT: v_readlane_b32 s2, v1, 27 -; GCN-NEXT: v_readlane_b32 s3, v1, 28 -; GCN-NEXT: v_readlane_b32 s4, v1, 29 -; GCN-NEXT: v_readlane_b32 s5, v1, 30 -; GCN-NEXT: v_readlane_b32 s6, v1, 31 -; GCN-NEXT: v_readlane_b32 s7, v1, 32 +; GCN-NEXT: v_readlane_b32 s0, v1, 24 +; GCN-NEXT: v_readlane_b32 s1, v1, 25 +; GCN-NEXT: v_readlane_b32 s2, v1, 26 +; GCN-NEXT: v_readlane_b32 s3, v1, 27 +; GCN-NEXT: v_readlane_b32 s4, v1, 28 +; GCN-NEXT: v_readlane_b32 s5, v1, 29 +; GCN-NEXT: v_readlane_b32 s6, v1, 30 +; GCN-NEXT: v_readlane_b32 s7, v1, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 33 -; GCN-NEXT: v_readlane_b32 s1, v1, 34 -; GCN-NEXT: v_readlane_b32 s2, v1, 35 -; GCN-NEXT: v_readlane_b32 s3, v1, 36 -; GCN-NEXT: v_readlane_b32 s4, v1, 37 -; GCN-NEXT: v_readlane_b32 s5, v1, 38 -; GCN-NEXT: v_readlane_b32 s6, v1, 39 -; GCN-NEXT: v_readlane_b32 s7, v1, 40 +; GCN-NEXT: v_readlane_b32 s0, v1, 32 +; GCN-NEXT: v_readlane_b32 s1, v1, 33 +; GCN-NEXT: v_readlane_b32 s2, v1, 34 +; GCN-NEXT: v_readlane_b32 s3, v1, 35 +; GCN-NEXT: v_readlane_b32 s4, v1, 36 +; GCN-NEXT: v_readlane_b32 s5, v1, 37 +; GCN-NEXT: v_readlane_b32 s6, v1, 38 +; GCN-NEXT: v_readlane_b32 s7, v1, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 41 -; GCN-NEXT: v_readlane_b32 s1, v1, 42 -; GCN-NEXT: v_readlane_b32 s2, v1, 43 -; GCN-NEXT: v_readlane_b32 s3, v1, 44 -; GCN-NEXT: v_readlane_b32 s4, v1, 45 -; GCN-NEXT: v_readlane_b32 s5, v1, 46 -; GCN-NEXT: v_readlane_b32 s6, v1, 47 -; GCN-NEXT: v_readlane_b32 s7, v1, 48 +; GCN-NEXT: v_readlane_b32 s0, v1, 40 +; GCN-NEXT: v_readlane_b32 s1, v1, 41 +; GCN-NEXT: v_readlane_b32 s2, v1, 42 +; GCN-NEXT: v_readlane_b32 s3, v1, 43 +; GCN-NEXT: v_readlane_b32 s4, v1, 44 +; GCN-NEXT: v_readlane_b32 s5, v1, 45 +; GCN-NEXT: v_readlane_b32 s6, v1, 46 +; GCN-NEXT: v_readlane_b32 s7, v1, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 49 -; GCN-NEXT: v_readlane_b32 s1, v1, 50 -; GCN-NEXT: v_readlane_b32 s2, v1, 51 -; GCN-NEXT: v_readlane_b32 s3, v1, 52 -; GCN-NEXT: v_readlane_b32 s4, v1, 53 -; GCN-NEXT: v_readlane_b32 s5, v1, 54 -; GCN-NEXT: v_readlane_b32 s6, v1, 55 -; GCN-NEXT: v_readlane_b32 s7, v1, 56 +; GCN-NEXT: v_readlane_b32 s0, v1, 48 +; GCN-NEXT: v_readlane_b32 s1, v1, 49 +; GCN-NEXT: v_readlane_b32 s2, v1, 50 +; GCN-NEXT: v_readlane_b32 s3, v1, 51 +; GCN-NEXT: v_readlane_b32 s4, v1, 52 +; GCN-NEXT: v_readlane_b32 s5, v1, 53 +; GCN-NEXT: v_readlane_b32 s6, v1, 54 +; GCN-NEXT: v_readlane_b32 s7, v1, 55 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 57 -; GCN-NEXT: v_readlane_b32 s1, v1, 58 -; GCN-NEXT: v_readlane_b32 s2, v1, 59 -; GCN-NEXT: v_readlane_b32 s3, v1, 60 -; GCN-NEXT: v_readlane_b32 s4, v1, 61 -; GCN-NEXT: v_readlane_b32 s5, v1, 62 -; GCN-NEXT: v_readlane_b32 s6, v1, 63 -; GCN-NEXT: v_readlane_b32 s7, v2, 0 +; GCN-NEXT: v_readlane_b32 s0, v1, 56 +; GCN-NEXT: v_readlane_b32 s1, v1, 57 +; GCN-NEXT: v_readlane_b32 s2, v1, 58 +; GCN-NEXT: v_readlane_b32 s3, v1, 59 +; GCN-NEXT: v_readlane_b32 s4, v1, 60 +; GCN-NEXT: v_readlane_b32 s5, v1, 61 +; GCN-NEXT: v_readlane_b32 s6, v1, 62 +; GCN-NEXT: v_readlane_b32 s7, v1, 63 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 9 -; GCN-NEXT: v_readlane_b32 s1, v0, 10 -; GCN-NEXT: v_readlane_b32 s2, v0, 11 -; GCN-NEXT: v_readlane_b32 s3, v0, 12 -; GCN-NEXT: v_readlane_b32 s4, v0, 13 -; GCN-NEXT: v_readlane_b32 s5, v0, 14 -; GCN-NEXT: v_readlane_b32 s6, v0, 15 -; GCN-NEXT: v_readlane_b32 s7, v0, 16 +; GCN-NEXT: v_readlane_b32 s0, v0, 8 +; GCN-NEXT: v_readlane_b32 s1, v0, 9 +; GCN-NEXT: v_readlane_b32 s2, v0, 10 +; GCN-NEXT: v_readlane_b32 s3, v0, 11 +; GCN-NEXT: v_readlane_b32 s4, v0, 12 +; GCN-NEXT: v_readlane_b32 s5, v0, 13 +; GCN-NEXT: v_readlane_b32 s6, v0, 14 +; GCN-NEXT: v_readlane_b32 s7, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 17 -; GCN-NEXT: v_readlane_b32 s1, v0, 18 -; GCN-NEXT: v_readlane_b32 s2, v0, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 20 -; GCN-NEXT: v_readlane_b32 s4, v0, 21 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 -; GCN-NEXT: v_readlane_b32 s6, v0, 23 -; GCN-NEXT: v_readlane_b32 s7, v0, 24 +; GCN-NEXT: v_readlane_b32 s0, v0, 16 +; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s2, v0, 18 +; GCN-NEXT: v_readlane_b32 s3, v0, 19 +; GCN-NEXT: v_readlane_b32 s4, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v0, 21 +; GCN-NEXT: v_readlane_b32 s6, v0, 22 +; GCN-NEXT: v_readlane_b32 s7, v0, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 25 -; GCN-NEXT: v_readlane_b32 s1, v0, 26 -; GCN-NEXT: v_readlane_b32 s2, v0, 27 -; GCN-NEXT: v_readlane_b32 s3, v0, 28 -; GCN-NEXT: v_readlane_b32 s4, v0, 29 -; GCN-NEXT: v_readlane_b32 s5, v0, 30 -; GCN-NEXT: v_readlane_b32 s6, v0, 31 -; GCN-NEXT: v_readlane_b32 s7, v0, 32 +; GCN-NEXT: v_readlane_b32 s0, v0, 24 +; GCN-NEXT: v_readlane_b32 s1, v0, 25 +; GCN-NEXT: v_readlane_b32 s2, v0, 26 +; GCN-NEXT: v_readlane_b32 s3, v0, 27 +; GCN-NEXT: v_readlane_b32 s4, v0, 28 +; GCN-NEXT: v_readlane_b32 s5, v0, 29 +; GCN-NEXT: v_readlane_b32 s6, v0, 30 +; GCN-NEXT: v_readlane_b32 s7, v0, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 33 -; GCN-NEXT: v_readlane_b32 s1, v0, 34 -; GCN-NEXT: v_readlane_b32 s2, v0, 35 -; GCN-NEXT: v_readlane_b32 s3, v0, 36 -; GCN-NEXT: v_readlane_b32 s4, v0, 37 -; GCN-NEXT: v_readlane_b32 s5, v0, 38 -; GCN-NEXT: v_readlane_b32 s6, v0, 39 -; GCN-NEXT: v_readlane_b32 s7, v0, 40 +; GCN-NEXT: v_readlane_b32 s0, v0, 32 +; GCN-NEXT: v_readlane_b32 s1, v0, 33 +; GCN-NEXT: v_readlane_b32 s2, v0, 34 +; GCN-NEXT: v_readlane_b32 s3, v0, 35 +; GCN-NEXT: v_readlane_b32 s4, v0, 36 +; GCN-NEXT: v_readlane_b32 s5, v0, 37 +; GCN-NEXT: v_readlane_b32 s6, v0, 38 +; GCN-NEXT: v_readlane_b32 s7, v0, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 41 -; GCN-NEXT: v_readlane_b32 s1, v0, 42 -; GCN-NEXT: v_readlane_b32 s2, v0, 43 -; GCN-NEXT: v_readlane_b32 s3, v0, 44 -; GCN-NEXT: v_readlane_b32 s4, v0, 45 -; GCN-NEXT: v_readlane_b32 s5, v0, 46 -; GCN-NEXT: v_readlane_b32 s6, v0, 47 -; GCN-NEXT: v_readlane_b32 s7, v0, 48 +; GCN-NEXT: v_readlane_b32 s0, v0, 40 +; GCN-NEXT: v_readlane_b32 s1, v0, 41 +; GCN-NEXT: v_readlane_b32 s2, v0, 42 +; GCN-NEXT: v_readlane_b32 s3, v0, 43 +; GCN-NEXT: v_readlane_b32 s4, v0, 44 +; GCN-NEXT: v_readlane_b32 s5, v0, 45 +; GCN-NEXT: v_readlane_b32 s6, v0, 46 +; GCN-NEXT: v_readlane_b32 s7, v0, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 49 -; GCN-NEXT: v_readlane_b32 s1, v0, 50 -; GCN-NEXT: v_readlane_b32 s2, v0, 51 -; GCN-NEXT: v_readlane_b32 s3, v0, 52 -; GCN-NEXT: v_readlane_b32 s4, v0, 53 -; GCN-NEXT: v_readlane_b32 s5, v0, 54 -; GCN-NEXT: v_readlane_b32 s6, v0, 55 -; GCN-NEXT: v_readlane_b32 s7, v0, 56 +; GCN-NEXT: v_readlane_b32 s0, v0, 48 +; GCN-NEXT: v_readlane_b32 s1, v0, 49 +; GCN-NEXT: v_readlane_b32 s2, v0, 50 +; GCN-NEXT: v_readlane_b32 s3, v0, 51 +; GCN-NEXT: v_readlane_b32 s4, v0, 52 +; GCN-NEXT: v_readlane_b32 s5, v0, 53 +; GCN-NEXT: v_readlane_b32 s6, v0, 54 +; GCN-NEXT: v_readlane_b32 s7, v0, 55 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v2, 1 -; GCN-NEXT: v_readlane_b32 s1, v2, 2 -; GCN-NEXT: v_readlane_b32 s2, v2, 3 -; GCN-NEXT: v_readlane_b32 s3, v2, 4 -; GCN-NEXT: v_readlane_b32 s4, v2, 5 -; GCN-NEXT: v_readlane_b32 s5, v2, 6 -; GCN-NEXT: v_readlane_b32 s6, v2, 7 -; GCN-NEXT: v_readlane_b32 s7, v2, 8 +; GCN-NEXT: v_readlane_b32 s0, v2, 0 +; GCN-NEXT: v_readlane_b32 s1, v2, 1 +; GCN-NEXT: v_readlane_b32 s2, v2, 2 +; GCN-NEXT: v_readlane_b32 s3, v2, 3 +; GCN-NEXT: v_readlane_b32 s4, v2, 4 +; GCN-NEXT: v_readlane_b32 s5, v2, 5 +; GCN-NEXT: v_readlane_b32 s6, v2, 6 +; GCN-NEXT: v_readlane_b32 s7, v2, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND @@ -444,195 +442,193 @@ ret: define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: split_sgpr_spill_2_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[36:51] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s12, 8 +; GCN-NEXT: v_writelane_b32 v0, s13, 9 +; GCN-NEXT: v_writelane_b32 v0, s14, 10 +; GCN-NEXT: v_writelane_b32 v0, s15, 11 +; GCN-NEXT: v_writelane_b32 v0, s16, 12 +; GCN-NEXT: v_writelane_b32 v0, s17, 13 +; GCN-NEXT: v_writelane_b32 v0, s18, 14 +; GCN-NEXT: v_writelane_b32 v0, s19, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 16 +; GCN-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-NEXT: v_writelane_b32 v0, s6, 18 +; GCN-NEXT: v_writelane_b32 v0, s7, 19 +; GCN-NEXT: v_writelane_b32 v0, s8, 20 +; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v0, s10, 22 +; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: v_writelane_b32 v0, s12, 24 +; GCN-NEXT: v_writelane_b32 v0, s13, 25 +; GCN-NEXT: v_writelane_b32 v0, s14, 26 +; GCN-NEXT: v_writelane_b32 v0, s15, 27 +; GCN-NEXT: v_writelane_b32 v0, s16, 28 +; GCN-NEXT: v_writelane_b32 v0, s17, 29 +; GCN-NEXT: v_writelane_b32 v0, s18, 30 +; GCN-NEXT: v_writelane_b32 v0, s19, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[20:27] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:1] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s4, 1 -; GCN-NEXT: v_writelane_b32 v0, s5, 2 -; GCN-NEXT: v_writelane_b32 v0, s6, 3 -; GCN-NEXT: v_writelane_b32 v0, s7, 4 -; GCN-NEXT: v_writelane_b32 v0, s8, 5 -; GCN-NEXT: v_writelane_b32 v0, s9, 6 -; GCN-NEXT: v_writelane_b32 v0, s10, 7 -; GCN-NEXT: v_writelane_b32 v0, s11, 8 -; GCN-NEXT: v_writelane_b32 v0, s12, 9 -; GCN-NEXT: v_writelane_b32 v0, s13, 10 -; GCN-NEXT: v_writelane_b32 v0, s14, 11 -; GCN-NEXT: v_writelane_b32 v0, s15, 12 -; GCN-NEXT: v_writelane_b32 v0, s16, 13 -; GCN-NEXT: v_writelane_b32 v0, s17, 14 -; GCN-NEXT: v_writelane_b32 v0, s18, 15 -; GCN-NEXT: v_writelane_b32 v0, s19, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:15] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[16:31] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s1, 18 -; GCN-NEXT: v_writelane_b32 v0, s2, 19 -; GCN-NEXT: v_writelane_b32 v0, s3, 20 -; GCN-NEXT: v_writelane_b32 v0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s5, 22 -; GCN-NEXT: v_writelane_b32 v0, s6, 23 -; GCN-NEXT: v_writelane_b32 v0, s7, 24 -; GCN-NEXT: v_writelane_b32 v0, s8, 25 -; GCN-NEXT: v_writelane_b32 v0, s9, 26 -; GCN-NEXT: v_writelane_b32 v0, s10, 27 -; GCN-NEXT: v_writelane_b32 v0, s11, 28 -; GCN-NEXT: v_writelane_b32 v0, s12, 29 -; GCN-NEXT: v_writelane_b32 v0, s13, 30 -; GCN-NEXT: v_writelane_b32 v0, s14, 31 -; GCN-NEXT: v_writelane_b32 v0, s15, 32 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[8:9] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s10, 0 -; GCN-NEXT: v_readlane_b32 s11, v0, 0 -; GCN-NEXT: s_cmp_lg_u32 s11, s10 -; GCN-NEXT: v_writelane_b32 v0, s36, 33 -; GCN-NEXT: v_writelane_b32 v0, s37, 34 -; GCN-NEXT: v_writelane_b32 v0, s38, 35 -; GCN-NEXT: v_writelane_b32 v0, s39, 36 -; GCN-NEXT: v_writelane_b32 v0, s40, 37 -; GCN-NEXT: v_writelane_b32 v0, s41, 38 -; GCN-NEXT: v_writelane_b32 v0, s42, 39 -; GCN-NEXT: v_writelane_b32 v0, s43, 40 -; GCN-NEXT: v_writelane_b32 v0, s44, 41 -; GCN-NEXT: v_writelane_b32 v0, s45, 42 -; GCN-NEXT: v_writelane_b32 v0, s46, 43 -; GCN-NEXT: v_writelane_b32 v0, s47, 44 -; GCN-NEXT: v_writelane_b32 v0, s48, 45 -; GCN-NEXT: v_writelane_b32 v0, s49, 46 -; GCN-NEXT: v_writelane_b32 v0, s50, 47 -; GCN-NEXT: v_writelane_b32 v0, s51, 48 -; GCN-NEXT: v_writelane_b32 v0, s16, 49 -; GCN-NEXT: v_writelane_b32 v0, s17, 50 -; GCN-NEXT: v_writelane_b32 v0, s18, 51 -; GCN-NEXT: v_writelane_b32 v0, s19, 52 -; GCN-NEXT: v_writelane_b32 v0, s20, 53 -; GCN-NEXT: v_writelane_b32 v0, s21, 54 -; GCN-NEXT: v_writelane_b32 v0, s22, 55 -; GCN-NEXT: v_writelane_b32 v0, s23, 56 -; GCN-NEXT: v_writelane_b32 v0, s24, 57 -; GCN-NEXT: v_writelane_b32 v0, s25, 58 -; GCN-NEXT: v_writelane_b32 v0, s26, 59 -; GCN-NEXT: v_writelane_b32 v0, s27, 60 -; GCN-NEXT: v_writelane_b32 v0, s28, 61 -; GCN-NEXT: v_writelane_b32 v0, s29, 62 -; GCN-NEXT: v_writelane_b32 v0, s30, 63 -; GCN-NEXT: v_writelane_b32 v1, s31, 0 -; GCN-NEXT: v_writelane_b32 v1, s0, 1 -; GCN-NEXT: v_writelane_b32 v1, s1, 2 -; GCN-NEXT: v_writelane_b32 v1, s2, 3 -; GCN-NEXT: v_writelane_b32 v1, s3, 4 -; GCN-NEXT: v_writelane_b32 v1, s4, 5 -; GCN-NEXT: v_writelane_b32 v1, s5, 6 -; GCN-NEXT: v_writelane_b32 v1, s6, 7 -; GCN-NEXT: v_writelane_b32 v1, s7, 8 -; GCN-NEXT: v_writelane_b32 v1, s8, 9 -; GCN-NEXT: v_writelane_b32 v1, s9, 10 +; GCN-NEXT: s_cmp_lg_u32 s2, s3 +; GCN-NEXT: v_writelane_b32 v0, s36, 32 +; GCN-NEXT: v_writelane_b32 v0, s37, 33 +; GCN-NEXT: v_writelane_b32 v0, s38, 34 +; GCN-NEXT: v_writelane_b32 v0, s39, 35 +; GCN-NEXT: v_writelane_b32 v0, s40, 36 +; GCN-NEXT: v_writelane_b32 v0, s41, 37 +; GCN-NEXT: v_writelane_b32 v0, s42, 38 +; GCN-NEXT: v_writelane_b32 v0, s43, 39 +; GCN-NEXT: v_writelane_b32 v0, s44, 40 +; GCN-NEXT: v_writelane_b32 v0, s45, 41 +; GCN-NEXT: v_writelane_b32 v0, s46, 42 +; GCN-NEXT: v_writelane_b32 v0, s47, 43 +; GCN-NEXT: v_writelane_b32 v0, s48, 44 +; GCN-NEXT: v_writelane_b32 v0, s49, 45 +; GCN-NEXT: v_writelane_b32 v0, s50, 46 +; GCN-NEXT: v_writelane_b32 v0, s51, 47 +; GCN-NEXT: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: v_writelane_b32 v0, s12, 56 +; GCN-NEXT: v_writelane_b32 v0, s13, 57 +; GCN-NEXT: v_writelane_b32 v0, s14, 58 +; GCN-NEXT: v_writelane_b32 v0, s15, 59 +; GCN-NEXT: v_writelane_b32 v0, s16, 60 +; GCN-NEXT: v_writelane_b32 v0, s17, 61 +; GCN-NEXT: v_writelane_b32 v0, s18, 62 +; GCN-NEXT: v_writelane_b32 v0, s19, 63 +; GCN-NEXT: v_writelane_b32 v1, s20, 0 +; GCN-NEXT: v_writelane_b32 v1, s21, 1 +; GCN-NEXT: v_writelane_b32 v1, s22, 2 +; GCN-NEXT: v_writelane_b32 v1, s23, 3 +; GCN-NEXT: v_writelane_b32 v1, s24, 4 +; GCN-NEXT: v_writelane_b32 v1, s25, 5 +; GCN-NEXT: v_writelane_b32 v1, s26, 6 +; GCN-NEXT: v_writelane_b32 v1, s27, 7 +; GCN-NEXT: v_writelane_b32 v1, s0, 8 +; GCN-NEXT: v_writelane_b32 v1, s1, 9 ; GCN-NEXT: s_cbranch_scc1 BB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 1 -; GCN-NEXT: v_readlane_b32 s1, v0, 2 -; GCN-NEXT: v_readlane_b32 s2, v0, 3 -; GCN-NEXT: v_readlane_b32 s3, v0, 4 -; GCN-NEXT: v_readlane_b32 s4, v0, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 -; GCN-NEXT: v_readlane_b32 s6, v0, 7 -; GCN-NEXT: v_readlane_b32 s7, v0, 8 -; GCN-NEXT: v_readlane_b32 s8, v0, 9 -; GCN-NEXT: v_readlane_b32 s9, v0, 10 -; GCN-NEXT: v_readlane_b32 s10, v0, 11 -; GCN-NEXT: v_readlane_b32 s11, v0, 12 -; GCN-NEXT: v_readlane_b32 s12, v0, 13 -; GCN-NEXT: v_readlane_b32 s13, v0, 14 -; GCN-NEXT: v_readlane_b32 s14, v0, 15 -; GCN-NEXT: v_readlane_b32 s15, v0, 16 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s8, v0, 8 +; GCN-NEXT: v_readlane_b32 s9, v0, 9 +; GCN-NEXT: v_readlane_b32 s10, v0, 10 +; GCN-NEXT: v_readlane_b32 s11, v0, 11 +; GCN-NEXT: v_readlane_b32 s12, v0, 12 +; GCN-NEXT: v_readlane_b32 s13, v0, 13 +; GCN-NEXT: v_readlane_b32 s14, v0, 14 +; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 33 -; GCN-NEXT: v_readlane_b32 s1, v0, 34 -; GCN-NEXT: v_readlane_b32 s2, v0, 35 -; GCN-NEXT: v_readlane_b32 s3, v0, 36 -; GCN-NEXT: v_readlane_b32 s4, v0, 37 -; GCN-NEXT: v_readlane_b32 s5, v0, 38 -; GCN-NEXT: v_readlane_b32 s6, v0, 39 -; GCN-NEXT: v_readlane_b32 s7, v0, 40 -; GCN-NEXT: v_readlane_b32 s8, v0, 41 -; GCN-NEXT: v_readlane_b32 s9, v0, 42 -; GCN-NEXT: v_readlane_b32 s10, v0, 43 -; GCN-NEXT: v_readlane_b32 s11, v0, 44 -; GCN-NEXT: v_readlane_b32 s12, v0, 45 -; GCN-NEXT: v_readlane_b32 s13, v0, 46 -; GCN-NEXT: v_readlane_b32 s14, v0, 47 -; GCN-NEXT: v_readlane_b32 s15, v0, 48 +; GCN-NEXT: v_readlane_b32 s0, v0, 32 +; GCN-NEXT: v_readlane_b32 s1, v0, 33 +; GCN-NEXT: v_readlane_b32 s2, v0, 34 +; GCN-NEXT: v_readlane_b32 s3, v0, 35 +; GCN-NEXT: v_readlane_b32 s4, v0, 36 +; GCN-NEXT: v_readlane_b32 s5, v0, 37 +; GCN-NEXT: v_readlane_b32 s6, v0, 38 +; GCN-NEXT: v_readlane_b32 s7, v0, 39 +; GCN-NEXT: v_readlane_b32 s8, v0, 40 +; GCN-NEXT: v_readlane_b32 s9, v0, 41 +; GCN-NEXT: v_readlane_b32 s10, v0, 42 +; GCN-NEXT: v_readlane_b32 s11, v0, 43 +; GCN-NEXT: v_readlane_b32 s12, v0, 44 +; GCN-NEXT: v_readlane_b32 s13, v0, 45 +; GCN-NEXT: v_readlane_b32 s14, v0, 46 +; GCN-NEXT: v_readlane_b32 s15, v0, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 17 -; GCN-NEXT: v_readlane_b32 s1, v0, 18 -; GCN-NEXT: v_readlane_b32 s2, v0, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 20 -; GCN-NEXT: v_readlane_b32 s4, v0, 21 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 -; GCN-NEXT: v_readlane_b32 s6, v0, 23 -; GCN-NEXT: v_readlane_b32 s7, v0, 24 -; GCN-NEXT: v_readlane_b32 s8, v0, 25 -; GCN-NEXT: v_readlane_b32 s9, v0, 26 -; GCN-NEXT: v_readlane_b32 s10, v0, 27 -; GCN-NEXT: v_readlane_b32 s11, v0, 28 -; GCN-NEXT: v_readlane_b32 s12, v0, 29 -; GCN-NEXT: v_readlane_b32 s13, v0, 30 -; GCN-NEXT: v_readlane_b32 s14, v0, 31 -; GCN-NEXT: v_readlane_b32 s15, v0, 32 +; GCN-NEXT: v_readlane_b32 s0, v0, 16 +; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s2, v0, 18 +; GCN-NEXT: v_readlane_b32 s3, v0, 19 +; GCN-NEXT: v_readlane_b32 s4, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v0, 21 +; GCN-NEXT: v_readlane_b32 s6, v0, 22 +; GCN-NEXT: v_readlane_b32 s7, v0, 23 +; GCN-NEXT: v_readlane_b32 s8, v0, 24 +; GCN-NEXT: v_readlane_b32 s9, v0, 25 +; GCN-NEXT: v_readlane_b32 s10, v0, 26 +; GCN-NEXT: v_readlane_b32 s11, v0, 27 +; GCN-NEXT: v_readlane_b32 s12, v0, 28 +; GCN-NEXT: v_readlane_b32 s13, v0, 29 +; GCN-NEXT: v_readlane_b32 s14, v0, 30 +; GCN-NEXT: v_readlane_b32 s15, v0, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 1 -; GCN-NEXT: v_readlane_b32 s1, v1, 2 -; GCN-NEXT: v_readlane_b32 s2, v1, 3 -; GCN-NEXT: v_readlane_b32 s3, v1, 4 -; GCN-NEXT: v_readlane_b32 s4, v1, 5 -; GCN-NEXT: v_readlane_b32 s5, v1, 6 -; GCN-NEXT: v_readlane_b32 s6, v1, 7 -; GCN-NEXT: v_readlane_b32 s7, v1, 8 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 9 -; GCN-NEXT: v_readlane_b32 s1, v1, 10 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:1] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 49 -; GCN-NEXT: v_readlane_b32 s1, v0, 50 -; GCN-NEXT: v_readlane_b32 s2, v0, 51 -; GCN-NEXT: v_readlane_b32 s3, v0, 52 -; GCN-NEXT: v_readlane_b32 s4, v0, 53 -; GCN-NEXT: v_readlane_b32 s5, v0, 54 -; GCN-NEXT: v_readlane_b32 s6, v0, 55 -; GCN-NEXT: v_readlane_b32 s7, v0, 56 -; GCN-NEXT: v_readlane_b32 s8, v0, 57 -; GCN-NEXT: v_readlane_b32 s9, v0, 58 -; GCN-NEXT: v_readlane_b32 s10, v0, 59 -; GCN-NEXT: v_readlane_b32 s11, v0, 60 -; GCN-NEXT: v_readlane_b32 s12, v0, 61 -; GCN-NEXT: v_readlane_b32 s13, v0, 62 -; GCN-NEXT: v_readlane_b32 s14, v0, 63 -; GCN-NEXT: v_readlane_b32 s15, v1, 0 +; GCN-NEXT: v_readlane_b32 s16, v1, 0 +; GCN-NEXT: v_readlane_b32 s17, v1, 1 +; GCN-NEXT: v_readlane_b32 s18, v1, 2 +; GCN-NEXT: v_readlane_b32 s19, v1, 3 +; GCN-NEXT: v_readlane_b32 s20, v1, 4 +; GCN-NEXT: v_readlane_b32 s21, v1, 5 +; GCN-NEXT: v_readlane_b32 s22, v1, 6 +; GCN-NEXT: v_readlane_b32 s23, v1, 7 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[16:23] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s24, v1, 8 +; GCN-NEXT: v_readlane_b32 s25, v1, 9 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[24:25] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s0, v0, 48 +; GCN-NEXT: v_readlane_b32 s1, v0, 49 +; GCN-NEXT: v_readlane_b32 s2, v0, 50 +; GCN-NEXT: v_readlane_b32 s3, v0, 51 +; GCN-NEXT: v_readlane_b32 s4, v0, 52 +; GCN-NEXT: v_readlane_b32 s5, v0, 53 +; GCN-NEXT: v_readlane_b32 s6, v0, 54 +; GCN-NEXT: v_readlane_b32 s7, v0, 55 +; GCN-NEXT: v_readlane_b32 s8, v0, 56 +; GCN-NEXT: v_readlane_b32 s9, v0, 57 +; GCN-NEXT: v_readlane_b32 s10, v0, 58 +; GCN-NEXT: v_readlane_b32 s11, v0, 59 +; GCN-NEXT: v_readlane_b32 s12, v0, 60 +; GCN-NEXT: v_readlane_b32 s13, v0, 61 +; GCN-NEXT: v_readlane_b32 s14, v0, 62 +; GCN-NEXT: v_readlane_b32 s15, v0, 63 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND @@ -667,13 +663,13 @@ ret: define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s58, -1 -; GCN-NEXT: s_mov_b32 s59, 0xe8f000 -; GCN-NEXT: s_add_u32 s56, s56, s3 -; GCN-NEXT: s_addc_u32 s57, s57, 0 -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb +; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s22, -1 +; GCN-NEXT: s_mov_b32 s23, 0xe8f000 +; GCN-NEXT: s_add_u32 s20, s20, s3 +; GCN-NEXT: s_addc_u32 s21, s21, 0 +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -692,179 +688,177 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[36:51] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 0 +; GCN-NEXT: v_writelane_b32 v31, s5, 1 +; GCN-NEXT: v_writelane_b32 v31, s6, 2 +; GCN-NEXT: v_writelane_b32 v31, s7, 3 +; GCN-NEXT: v_writelane_b32 v31, s8, 4 +; GCN-NEXT: v_writelane_b32 v31, s9, 5 +; GCN-NEXT: v_writelane_b32 v31, s10, 6 +; GCN-NEXT: v_writelane_b32 v31, s11, 7 +; GCN-NEXT: v_writelane_b32 v31, s12, 8 +; GCN-NEXT: v_writelane_b32 v31, s13, 9 +; GCN-NEXT: v_writelane_b32 v31, s14, 10 +; GCN-NEXT: v_writelane_b32 v31, s15, 11 +; GCN-NEXT: v_writelane_b32 v31, s16, 12 +; GCN-NEXT: v_writelane_b32 v31, s17, 13 +; GCN-NEXT: v_writelane_b32 v31, s18, 14 +; GCN-NEXT: v_writelane_b32 v31, s19, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 16 +; GCN-NEXT: v_writelane_b32 v31, s5, 17 +; GCN-NEXT: v_writelane_b32 v31, s6, 18 +; GCN-NEXT: v_writelane_b32 v31, s7, 19 +; GCN-NEXT: v_writelane_b32 v31, s8, 20 +; GCN-NEXT: v_writelane_b32 v31, s9, 21 +; GCN-NEXT: v_writelane_b32 v31, s10, 22 +; GCN-NEXT: v_writelane_b32 v31, s11, 23 +; GCN-NEXT: v_writelane_b32 v31, s12, 24 +; GCN-NEXT: v_writelane_b32 v31, s13, 25 +; GCN-NEXT: v_writelane_b32 v31, s14, 26 +; GCN-NEXT: v_writelane_b32 v31, s15, 27 +; GCN-NEXT: v_writelane_b32 v31, s16, 28 +; GCN-NEXT: v_writelane_b32 v31, s17, 29 +; GCN-NEXT: v_writelane_b32 v31, s18, 30 +; GCN-NEXT: v_writelane_b32 v31, s19, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:1] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v31, s0, 0 -; GCN-NEXT: v_writelane_b32 v31, s4, 1 -; GCN-NEXT: v_writelane_b32 v31, s5, 2 -; GCN-NEXT: v_writelane_b32 v31, s6, 3 -; GCN-NEXT: v_writelane_b32 v31, s7, 4 -; GCN-NEXT: v_writelane_b32 v31, s8, 5 -; GCN-NEXT: v_writelane_b32 v31, s9, 6 -; GCN-NEXT: v_writelane_b32 v31, s10, 7 -; GCN-NEXT: v_writelane_b32 v31, s11, 8 -; GCN-NEXT: v_writelane_b32 v31, s12, 9 -; GCN-NEXT: v_writelane_b32 v31, s13, 10 -; GCN-NEXT: v_writelane_b32 v31, s14, 11 -; GCN-NEXT: v_writelane_b32 v31, s15, 12 -; GCN-NEXT: v_writelane_b32 v31, s16, 13 -; GCN-NEXT: v_writelane_b32 v31, s17, 14 -; GCN-NEXT: v_writelane_b32 v31, s18, 15 -; GCN-NEXT: v_writelane_b32 v31, s19, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:15] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[16:31] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[34:35] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s33, 0 -; GCN-NEXT: v_readlane_b32 s52, v31, 0 -; GCN-NEXT: s_cmp_lg_u32 s52, s33 -; GCN-NEXT: v_writelane_b32 v31, s36, 17 -; GCN-NEXT: v_writelane_b32 v31, s37, 18 -; GCN-NEXT: v_writelane_b32 v31, s38, 19 -; GCN-NEXT: v_writelane_b32 v31, s39, 20 -; GCN-NEXT: v_writelane_b32 v31, s40, 21 -; GCN-NEXT: v_writelane_b32 v31, s41, 22 -; GCN-NEXT: v_writelane_b32 v31, s42, 23 -; GCN-NEXT: v_writelane_b32 v31, s43, 24 -; GCN-NEXT: v_writelane_b32 v31, s44, 25 -; GCN-NEXT: v_writelane_b32 v31, s45, 26 -; GCN-NEXT: v_writelane_b32 v31, s46, 27 -; GCN-NEXT: v_writelane_b32 v31, s47, 28 -; GCN-NEXT: v_writelane_b32 v31, s48, 29 -; GCN-NEXT: v_writelane_b32 v31, s49, 30 -; GCN-NEXT: v_writelane_b32 v31, s50, 31 -; GCN-NEXT: v_writelane_b32 v31, s51, 32 -; GCN-NEXT: v_writelane_b32 v31, s0, 33 -; GCN-NEXT: v_writelane_b32 v31, s1, 34 -; GCN-NEXT: v_writelane_b32 v31, s2, 35 -; GCN-NEXT: v_writelane_b32 v31, s3, 36 -; GCN-NEXT: v_writelane_b32 v31, s4, 37 -; GCN-NEXT: v_writelane_b32 v31, s5, 38 -; GCN-NEXT: v_writelane_b32 v31, s6, 39 -; GCN-NEXT: v_writelane_b32 v31, s7, 40 -; GCN-NEXT: v_writelane_b32 v31, s8, 41 -; GCN-NEXT: v_writelane_b32 v31, s9, 42 -; GCN-NEXT: v_writelane_b32 v31, s10, 43 -; GCN-NEXT: v_writelane_b32 v31, s11, 44 -; GCN-NEXT: v_writelane_b32 v31, s12, 45 -; GCN-NEXT: v_writelane_b32 v31, s13, 46 -; GCN-NEXT: v_writelane_b32 v31, s14, 47 -; GCN-NEXT: v_writelane_b32 v31, s15, 48 -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 -; GCN-NEXT: v_writelane_b32 v0, s16, 0 -; GCN-NEXT: v_writelane_b32 v0, s17, 1 -; GCN-NEXT: v_writelane_b32 v0, s18, 2 -; GCN-NEXT: v_writelane_b32 v0, s19, 3 -; GCN-NEXT: v_writelane_b32 v0, s20, 4 -; GCN-NEXT: v_writelane_b32 v0, s21, 5 -; GCN-NEXT: v_writelane_b32 v0, s22, 6 -; GCN-NEXT: v_writelane_b32 v0, s23, 7 -; GCN-NEXT: v_writelane_b32 v0, s24, 8 -; GCN-NEXT: v_writelane_b32 v0, s25, 9 -; GCN-NEXT: v_writelane_b32 v0, s26, 10 -; GCN-NEXT: v_writelane_b32 v0, s27, 11 -; GCN-NEXT: v_writelane_b32 v0, s28, 12 -; GCN-NEXT: v_writelane_b32 v0, s29, 13 -; GCN-NEXT: v_writelane_b32 v0, s30, 14 -; GCN-NEXT: v_writelane_b32 v0, s31, 15 -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 0xffff -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v31, s34, 49 -; GCN-NEXT: v_writelane_b32 v31, s35, 50 -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 +; GCN-NEXT: s_cmp_lg_u32 s2, s3 +; GCN-NEXT: v_writelane_b32 v31, s36, 32 +; GCN-NEXT: v_writelane_b32 v31, s37, 33 +; GCN-NEXT: v_writelane_b32 v31, s38, 34 +; GCN-NEXT: v_writelane_b32 v31, s39, 35 +; GCN-NEXT: v_writelane_b32 v31, s40, 36 +; GCN-NEXT: v_writelane_b32 v31, s41, 37 +; GCN-NEXT: v_writelane_b32 v31, s42, 38 +; GCN-NEXT: v_writelane_b32 v31, s43, 39 +; GCN-NEXT: v_writelane_b32 v31, s44, 40 +; GCN-NEXT: v_writelane_b32 v31, s45, 41 +; GCN-NEXT: v_writelane_b32 v31, s46, 42 +; GCN-NEXT: v_writelane_b32 v31, s47, 43 +; GCN-NEXT: v_writelane_b32 v31, s48, 44 +; GCN-NEXT: v_writelane_b32 v31, s49, 45 +; GCN-NEXT: v_writelane_b32 v31, s50, 46 +; GCN-NEXT: v_writelane_b32 v31, s51, 47 +; GCN-NEXT: v_writelane_b32 v31, s4, 48 +; GCN-NEXT: v_writelane_b32 v31, s5, 49 +; GCN-NEXT: v_writelane_b32 v31, s6, 50 +; GCN-NEXT: v_writelane_b32 v31, s7, 51 +; GCN-NEXT: v_writelane_b32 v31, s8, 52 +; GCN-NEXT: v_writelane_b32 v31, s9, 53 +; GCN-NEXT: v_writelane_b32 v31, s10, 54 +; GCN-NEXT: v_writelane_b32 v31, s11, 55 +; GCN-NEXT: v_writelane_b32 v31, s12, 56 +; GCN-NEXT: v_writelane_b32 v31, s13, 57 +; GCN-NEXT: v_writelane_b32 v31, s14, 58 +; GCN-NEXT: v_writelane_b32 v31, s15, 59 +; GCN-NEXT: v_writelane_b32 v31, s16, 60 +; GCN-NEXT: v_writelane_b32 v31, s17, 61 +; GCN-NEXT: v_writelane_b32 v31, s18, 62 +; GCN-NEXT: v_writelane_b32 v31, s19, 63 +; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 +; GCN-NEXT: v_writelane_b32 v0, s0, 0 +; GCN-NEXT: v_writelane_b32 v0, s1, 1 +; GCN-NEXT: s_mov_b64 s[0:1], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[0:1] +; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; GCN-NEXT: s_cbranch_scc1 BB2_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v31, 1 -; GCN-NEXT: v_readlane_b32 s1, v31, 2 -; GCN-NEXT: v_readlane_b32 s2, v31, 3 -; GCN-NEXT: v_readlane_b32 s3, v31, 4 -; GCN-NEXT: v_readlane_b32 s4, v31, 5 -; GCN-NEXT: v_readlane_b32 s5, v31, 6 -; GCN-NEXT: v_readlane_b32 s6, v31, 7 -; GCN-NEXT: v_readlane_b32 s7, v31, 8 -; GCN-NEXT: v_readlane_b32 s8, v31, 9 -; GCN-NEXT: v_readlane_b32 s9, v31, 10 -; GCN-NEXT: v_readlane_b32 s10, v31, 11 -; GCN-NEXT: v_readlane_b32 s11, v31, 12 -; GCN-NEXT: v_readlane_b32 s12, v31, 13 -; GCN-NEXT: v_readlane_b32 s13, v31, 14 -; GCN-NEXT: v_readlane_b32 s14, v31, 15 -; GCN-NEXT: v_readlane_b32 s15, v31, 16 +; GCN-NEXT: v_readlane_b32 s0, v31, 0 +; GCN-NEXT: v_readlane_b32 s1, v31, 1 +; GCN-NEXT: v_readlane_b32 s2, v31, 2 +; GCN-NEXT: v_readlane_b32 s3, v31, 3 +; GCN-NEXT: v_readlane_b32 s4, v31, 4 +; GCN-NEXT: v_readlane_b32 s5, v31, 5 +; GCN-NEXT: v_readlane_b32 s6, v31, 6 +; GCN-NEXT: v_readlane_b32 s7, v31, 7 +; GCN-NEXT: v_readlane_b32 s8, v31, 8 +; GCN-NEXT: v_readlane_b32 s9, v31, 9 +; GCN-NEXT: v_readlane_b32 s10, v31, 10 +; GCN-NEXT: v_readlane_b32 s11, v31, 11 +; GCN-NEXT: v_readlane_b32 s12, v31, 12 +; GCN-NEXT: v_readlane_b32 s13, v31, 13 +; GCN-NEXT: v_readlane_b32 s14, v31, 14 +; GCN-NEXT: v_readlane_b32 s15, v31, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 17 -; GCN-NEXT: v_readlane_b32 s1, v31, 18 -; GCN-NEXT: v_readlane_b32 s2, v31, 19 -; GCN-NEXT: v_readlane_b32 s3, v31, 20 -; GCN-NEXT: v_readlane_b32 s4, v31, 21 -; GCN-NEXT: v_readlane_b32 s5, v31, 22 -; GCN-NEXT: v_readlane_b32 s6, v31, 23 -; GCN-NEXT: v_readlane_b32 s7, v31, 24 -; GCN-NEXT: v_readlane_b32 s8, v31, 25 -; GCN-NEXT: v_readlane_b32 s9, v31, 26 -; GCN-NEXT: v_readlane_b32 s10, v31, 27 -; GCN-NEXT: v_readlane_b32 s11, v31, 28 -; GCN-NEXT: v_readlane_b32 s12, v31, 29 -; GCN-NEXT: v_readlane_b32 s13, v31, 30 -; GCN-NEXT: v_readlane_b32 s14, v31, 31 -; GCN-NEXT: v_readlane_b32 s15, v31, 32 +; GCN-NEXT: v_readlane_b32 s0, v31, 32 +; GCN-NEXT: v_readlane_b32 s1, v31, 33 +; GCN-NEXT: v_readlane_b32 s2, v31, 34 +; GCN-NEXT: v_readlane_b32 s3, v31, 35 +; GCN-NEXT: v_readlane_b32 s4, v31, 36 +; GCN-NEXT: v_readlane_b32 s5, v31, 37 +; GCN-NEXT: v_readlane_b32 s6, v31, 38 +; GCN-NEXT: v_readlane_b32 s7, v31, 39 +; GCN-NEXT: v_readlane_b32 s8, v31, 40 +; GCN-NEXT: v_readlane_b32 s9, v31, 41 +; GCN-NEXT: v_readlane_b32 s10, v31, 42 +; GCN-NEXT: v_readlane_b32 s11, v31, 43 +; GCN-NEXT: v_readlane_b32 s12, v31, 44 +; GCN-NEXT: v_readlane_b32 s13, v31, 45 +; GCN-NEXT: v_readlane_b32 s14, v31, 46 +; GCN-NEXT: v_readlane_b32 s15, v31, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 33 -; GCN-NEXT: v_readlane_b32 s1, v31, 34 -; GCN-NEXT: v_readlane_b32 s2, v31, 35 -; GCN-NEXT: v_readlane_b32 s3, v31, 36 -; GCN-NEXT: v_readlane_b32 s4, v31, 37 -; GCN-NEXT: v_readlane_b32 s5, v31, 38 -; GCN-NEXT: v_readlane_b32 s6, v31, 39 -; GCN-NEXT: v_readlane_b32 s7, v31, 40 -; GCN-NEXT: v_readlane_b32 s8, v31, 41 -; GCN-NEXT: v_readlane_b32 s9, v31, 42 -; GCN-NEXT: v_readlane_b32 s10, v31, 43 -; GCN-NEXT: v_readlane_b32 s11, v31, 44 -; GCN-NEXT: v_readlane_b32 s12, v31, 45 -; GCN-NEXT: v_readlane_b32 s13, v31, 46 -; GCN-NEXT: v_readlane_b32 s14, v31, 47 -; GCN-NEXT: v_readlane_b32 s15, v31, 48 +; GCN-NEXT: v_readlane_b32 s0, v31, 16 +; GCN-NEXT: v_readlane_b32 s1, v31, 17 +; GCN-NEXT: v_readlane_b32 s2, v31, 18 +; GCN-NEXT: v_readlane_b32 s3, v31, 19 +; GCN-NEXT: v_readlane_b32 s4, v31, 20 +; GCN-NEXT: v_readlane_b32 s5, v31, 21 +; GCN-NEXT: v_readlane_b32 s6, v31, 22 +; GCN-NEXT: v_readlane_b32 s7, v31, 23 +; GCN-NEXT: v_readlane_b32 s8, v31, 24 +; GCN-NEXT: v_readlane_b32 s9, v31, 25 +; GCN-NEXT: v_readlane_b32 s10, v31, 26 +; GCN-NEXT: v_readlane_b32 s11, v31, 27 +; GCN-NEXT: v_readlane_b32 s12, v31, 28 +; GCN-NEXT: v_readlane_b32 s13, v31, 29 +; GCN-NEXT: v_readlane_b32 s14, v31, 30 +; GCN-NEXT: v_readlane_b32 s15, v31, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 s[0:1], exec -; GCN-NEXT: s_mov_b64 exec, 0xffff -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[0:1] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 -; GCN-NEXT: v_readlane_b32 s8, v0, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 9 -; GCN-NEXT: v_readlane_b32 s10, v0, 10 -; GCN-NEXT: v_readlane_b32 s11, v0, 11 -; GCN-NEXT: v_readlane_b32 s12, v0, 12 -; GCN-NEXT: v_readlane_b32 s13, v0, 13 -; GCN-NEXT: v_readlane_b32 s14, v0, 14 -; GCN-NEXT: v_readlane_b32 s15, v0, 15 +; GCN-NEXT: v_readlane_b32 s0, v31, 48 +; GCN-NEXT: v_readlane_b32 s1, v31, 49 +; GCN-NEXT: v_readlane_b32 s2, v31, 50 +; GCN-NEXT: v_readlane_b32 s3, v31, 51 +; GCN-NEXT: v_readlane_b32 s4, v31, 52 +; GCN-NEXT: v_readlane_b32 s5, v31, 53 +; GCN-NEXT: v_readlane_b32 s6, v31, 54 +; GCN-NEXT: v_readlane_b32 s7, v31, 55 +; GCN-NEXT: v_readlane_b32 s8, v31, 56 +; GCN-NEXT: v_readlane_b32 s9, v31, 57 +; GCN-NEXT: v_readlane_b32 s10, v31, 58 +; GCN-NEXT: v_readlane_b32 s11, v31, 59 +; GCN-NEXT: v_readlane_b32 s12, v31, 60 +; GCN-NEXT: v_readlane_b32 s13, v31, 61 +; GCN-NEXT: v_readlane_b32 s14, v31, 62 +; GCN-NEXT: v_readlane_b32 s15, v31, 63 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 49 -; GCN-NEXT: v_readlane_b32 s1, v31, 50 +; GCN-NEXT: s_mov_b64 s[16:17], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[16:17] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s16, v0, 0 +; GCN-NEXT: v_readlane_b32 s17, v0, 1 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:1] +; GCN-NEXT: ; use s[16:17] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: BB2_2: ; %ret ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index 9b629a5f91110..a03318ead716c 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -77,101 +77,6 @@ endif: ; preds = %else, %if ret void } -; Force save and restore of m0 during SMEM spill -; GCN-LABEL: {{^}}m0_unavailable_spill: - -; GCN: ; def m0, 1 - -; GCN: s_mov_b32 m0, s0 -; GCN: v_interp_mov_f32 - -; GCN: ; clobber m0 - -; TOSMEM: s_mov_b32 s2, m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill -; TOSMEM: s_mov_b32 m0, s2 - -; TOSMEM: s_mov_b64 exec, -; TOSMEM: s_cbranch_execz -; TOSMEM: s_branch - -; TOSMEM: BB{{[0-9]+_[0-9]+}}: -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload - -; GCN-NOT: v_readlane_b32 m0 -; GCN-NOT: s_buffer_store_dword m0 -; GCN-NOT: s_buffer_load_dword m0 -define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 { -main_body: - %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0 - %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg) - call void asm sideeffect "; clobber $0", "~{m0}"() #0 - %cmp = fcmp ueq float 0.000000e+00, %tmp - br i1 %cmp, label %if, label %else - -if: ; preds = %main_body - store volatile i32 8, i32 addrspace(1)* undef - br label %endif - -else: ; preds = %main_body - store volatile i32 11, i32 addrspace(1)* undef - br label %endif - -endif: - ret void -} - -; GCN-LABEL: {{^}}restore_m0_lds: -; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] -; TOSMEM: s_cmp_eq_u32 -; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x200 -; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_cbranch_scc1 - -; TOSMEM: s_mov_b32 m0, -1 - -; TOSMEM: s_mov_b32 s2, m0 -; TOSMEM: s_add_u32 m0, s3, 0x200 -; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload -; TOSMEM: s_mov_b32 m0, s2 -; TOSMEM: s_waitcnt lgkmcnt(0) - -; TOSMEM: ds_write_b64 - -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_waitcnt lgkmcnt(0) -; TOSMEM-NOT: m0 -; TOSMEM: s_mov_b32 m0, s2 -; TOSMEM: ; use m0 - -; TOSMEM: s_dcache_wb -; TOSMEM: s_endpgm -define amdgpu_kernel void @restore_m0_lds(i32 %arg) { - %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0 - %sval = load volatile i64, i64 addrspace(4)* undef - %cmp = icmp eq i32 %arg, 0 - br i1 %cmp, label %ret, label %bb - -bb: - store volatile i64 %sval, i64 addrspace(3)* undef - call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0 - br label %ret - -ret: - ret void -} - declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index 1a48e76a241bb..e4beac77e1be2 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -94,10 +94,10 @@ define i32 @called(i32 %a) noinline { ; GFX9-LABEL: {{^}}call: define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s0 +; GFX9-O0: v_mov_b32_e32 v0, s2 ; GFX9-O3: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) @@ -142,8 +142,8 @@ define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { ; GFX9-O0: buffer_store_dword v1 ; GFX9: s_swappc_b64 %tmp134 = call i64 @called_i64(i64 %tmp107) -; GFX9-O0: buffer_load_dword v4 -; GFX9-O0: buffer_load_dword v5 +; GFX9-O0: buffer_load_dword v6 +; GFX9-O0: buffer_load_dword v7 %tmp136 = add i64 %tmp134, %tmp107 %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136) %tmp138 = bitcast i64 %tmp137 to <2 x i32> diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll index 529775df5fd7d..478ff985bf475 100644 --- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll +++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll @@ -49,9 +49,9 @@ define i16 @int_to_vec(i80 %in) { ; CHECK-NEXT: vmov.32 d16[0], r0 ; CHECK-NEXT: @ implicit-def: $q9 ; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vrev32.16 q8, q9 -; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8 -; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vrev32.16 q9, q9 +; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9 +; CHECK-NEXT: vmov.u16 r0, d18[0] ; CHECK-NEXT: bx lr %vec = bitcast i80 %in to <5 x i16> %e0 = extractelement <5 x i16> %vec, i32 0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll index a98c6eb9fd6cb..c63f24ea692ce 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -235,15 +235,15 @@ define i32 @f64tou32(double %a) { ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: sub.d $f2, $f12, $f0 -; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mtc1 $3, $f2 +; FP32-NEXT: mtc1 $2, $f3 +; FP32-NEXT: sub.d $f4, $f12, $f2 +; FP32-NEXT: trunc.w.d $f0, $f4 +; FP32-NEXT: mfc1 $2, $f0 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f0 +; FP32-NEXT: c.ult.d $f12, $f2 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -256,15 +256,15 @@ define i32 @f64tou32(double %a) { ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: sub.d $f1, $f12, $f0 -; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mtc1 $3, $f1 +; FP64-NEXT: mthc1 $2, $f1 +; FP64-NEXT: sub.d $f2, $f12, $f1 +; FP64-NEXT: trunc.w.d $f0, $f2 +; FP64-NEXT: mfc1 $2, $f0 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f0 +; FP64-NEXT: c.ult.d $f12, $f1 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 @@ -282,15 +282,15 @@ define zeroext i16 @f64tou16(double %a) { ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: sub.d $f2, $f12, $f0 -; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mtc1 $3, $f2 +; FP32-NEXT: mtc1 $2, $f3 +; FP32-NEXT: sub.d $f4, $f12, $f2 +; FP32-NEXT: trunc.w.d $f0, $f4 +; FP32-NEXT: mfc1 $2, $f0 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f0 +; FP32-NEXT: c.ult.d $f12, $f2 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -304,15 +304,15 @@ define zeroext i16 @f64tou16(double %a) { ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: sub.d $f1, $f12, $f0 -; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mtc1 $3, $f1 +; FP64-NEXT: mthc1 $2, $f1 +; FP64-NEXT: sub.d $f2, $f12, $f1 +; FP64-NEXT: trunc.w.d $f0, $f2 +; FP64-NEXT: mfc1 $2, $f0 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f0 +; FP64-NEXT: c.ult.d $f12, $f1 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 @@ -331,15 +331,15 @@ define zeroext i8 @f64tou8(double %a) { ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: sub.d $f2, $f12, $f0 -; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mtc1 $3, $f2 +; FP32-NEXT: mtc1 $2, $f3 +; FP32-NEXT: sub.d $f4, $f12, $f2 +; FP32-NEXT: trunc.w.d $f0, $f4 +; FP32-NEXT: mfc1 $2, $f0 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f0 +; FP32-NEXT: c.ult.d $f12, $f2 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -353,15 +353,15 @@ define zeroext i8 @f64tou8(double %a) { ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: sub.d $f1, $f12, $f0 -; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mtc1 $3, $f1 +; FP64-NEXT: mthc1 $2, $f1 +; FP64-NEXT: sub.d $f2, $f12, $f1 +; FP64-NEXT: trunc.w.d $f0, $f2 +; FP64-NEXT: mfc1 $2, $f0 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f0 +; FP64-NEXT: c.ult.d $f12, $f1 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll index 646af650c00e7..a6200851940cd 100644 --- a/llvm/test/CodeGen/Mips/atomic-min-max.ll +++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll @@ -1154,26 +1154,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB4_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB4_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: slt $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movn $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB4_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1194,26 +1194,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB4_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB4_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: slt $11, $8, $5 +; MIPS64R6-NEXT: seleqz $9, $8, $11 +; MIPS64R6-NEXT: selnez $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB4_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1232,28 +1232,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB4_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB4_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: slt $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movn $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB4_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1273,28 +1273,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB4_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: slt $11, $8, $5 +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 +; MIPS64ELR6-NEXT: selnez $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB4_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1635,26 +1635,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB5_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB5_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: slt $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movz $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB5_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1675,26 +1675,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB5_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB5_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: slt $11, $8, $5 +; MIPS64R6-NEXT: selnez $9, $8, $11 +; MIPS64R6-NEXT: seleqz $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB5_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1713,28 +1713,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB5_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB5_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: slt $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movz $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB5_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1754,28 +1754,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB5_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: slt $11, $8, $5 +; MIPS64ELR6-NEXT: selnez $9, $8, $11 +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB5_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2116,26 +2116,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB6_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB6_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: sltu $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movn $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB6_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2156,26 +2156,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB6_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB6_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: sltu $11, $8, $5 +; MIPS64R6-NEXT: seleqz $9, $8, $11 +; MIPS64R6-NEXT: selnez $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB6_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2194,28 +2194,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB6_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB6_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: sltu $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movn $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB6_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2235,28 +2235,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB6_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: sltu $11, $8, $5 +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 +; MIPS64ELR6-NEXT: selnez $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB6_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2597,26 +2597,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB7_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB7_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: sltu $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movz $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB7_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2637,26 +2637,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB7_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB7_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: sltu $11, $8, $5 +; MIPS64R6-NEXT: selnez $9, $8, $11 +; MIPS64R6-NEXT: seleqz $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB7_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2675,28 +2675,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB7_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB7_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: sltu $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movz $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB7_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2716,28 +2716,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB7_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: sltu $11, $8, $5 +; MIPS64ELR6-NEXT: selnez $9, $8, $11 +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB7_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3079,26 +3079,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB8_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB8_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: slt $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movn $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB8_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3119,26 +3119,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB8_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: slt $11, $8, $5 +; MIPS64R6-NEXT: seleqz $9, $8, $11 +; MIPS64R6-NEXT: selnez $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB8_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3157,28 +3157,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB8_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB8_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: slt $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movn $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB8_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3198,28 +3198,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB8_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: slt $11, $8, $5 +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 +; MIPS64ELR6-NEXT: selnez $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB8_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3560,26 +3560,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB9_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB9_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: slt $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movz $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB9_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3600,26 +3600,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB9_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: slt $11, $8, $5 +; MIPS64R6-NEXT: selnez $9, $8, $11 +; MIPS64R6-NEXT: seleqz $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB9_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3638,28 +3638,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB9_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB9_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: slt $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movz $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB9_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3679,28 +3679,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB9_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: slt $11, $8, $5 +; MIPS64ELR6-NEXT: selnez $9, $8, $11 +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB9_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4041,26 +4041,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB10_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB10_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: sltu $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movn $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB10_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4081,26 +4081,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB10_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: sltu $11, $8, $5 +; MIPS64R6-NEXT: seleqz $9, $8, $11 +; MIPS64R6-NEXT: selnez $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB10_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4119,28 +4119,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB10_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB10_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: sltu $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movn $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB10_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4160,28 +4160,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB10_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: sltu $11, $8, $5 +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 +; MIPS64ELR6-NEXT: selnez $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB10_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4522,26 +4522,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 +; MIPS64-NEXT: nor $6, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB11_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB11_1 +; MIPS64-NEXT: ll $8, 0($1) +; MIPS64-NEXT: sltu $11, $8, $5 +; MIPS64-NEXT: move $9, $8 +; MIPS64-NEXT: movz $9, $5, $11 +; MIPS64-NEXT: and $9, $9, $3 +; MIPS64-NEXT: and $10, $8, $6 +; MIPS64-NEXT: or $10, $10, $9 +; MIPS64-NEXT: sc $10, 0($1) +; MIPS64-NEXT: beqz $10, .LBB11_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $7, $8, $3 +; MIPS64-NEXT: srlv $7, $7, $2 +; MIPS64-NEXT: seh $7, $7 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4562,26 +4562,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 +; MIPS64R6-NEXT: nor $6, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB11_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6-NEXT: ll $8, 0($1) +; MIPS64R6-NEXT: sltu $11, $8, $5 +; MIPS64R6-NEXT: selnez $9, $8, $11 +; MIPS64R6-NEXT: seleqz $11, $5, $11 +; MIPS64R6-NEXT: or $9, $9, $11 +; MIPS64R6-NEXT: and $9, $9, $3 +; MIPS64R6-NEXT: and $10, $8, $6 +; MIPS64R6-NEXT: or $10, $10, $9 +; MIPS64R6-NEXT: sc $10, 0($1) +; MIPS64R6-NEXT: beqzc $10, .LBB11_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $7, $8, $3 +; MIPS64R6-NEXT: srlv $7, $7, $2 +; MIPS64R6-NEXT: seh $7, $7 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4600,28 +4600,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 +; MIPS64EL-NEXT: nor $6, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB11_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: ll $8, 0($1) ; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB11_1 +; MIPS64EL-NEXT: and $5, $5, $3 +; MIPS64EL-NEXT: sltu $11, $8, $5 +; MIPS64EL-NEXT: move $9, $8 +; MIPS64EL-NEXT: movz $9, $5, $11 +; MIPS64EL-NEXT: and $9, $9, $3 +; MIPS64EL-NEXT: and $10, $8, $6 +; MIPS64EL-NEXT: or $10, $10, $9 +; MIPS64EL-NEXT: sc $10, 0($1) +; MIPS64EL-NEXT: beqz $10, .LBB11_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $7, $8, $3 +; MIPS64EL-NEXT: srlv $7, $7, $2 +; MIPS64EL-NEXT: seh $7, $7 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4641,28 +4641,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 +; MIPS64ELR6-NEXT: nor $6, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB11_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: ll $8, 0($1) ; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1 +; MIPS64ELR6-NEXT: and $5, $5, $3 +; MIPS64ELR6-NEXT: sltu $11, $8, $5 +; MIPS64ELR6-NEXT: selnez $9, $8, $11 +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 +; MIPS64ELR6-NEXT: or $9, $9, $11 +; MIPS64ELR6-NEXT: and $9, $9, $3 +; MIPS64ELR6-NEXT: and $10, $8, $6 +; MIPS64ELR6-NEXT: or $10, $10, $9 +; MIPS64ELR6-NEXT: sc $10, 0($1) +; MIPS64ELR6-NEXT: beqzc $10, .LBB11_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $7, $8, $3 +; MIPS64ELR6-NEXT: srlv $7, $7, $2 +; MIPS64ELR6-NEXT: seh $7, $7 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll index 59ff83e4969cc..3846fda47b138 100644 --- a/llvm/test/CodeGen/Mips/atomic.ll +++ b/llvm/test/CodeGen/Mips/atomic.ll @@ -2559,28 +2559,28 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: andi $3, $1, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: ori $5, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: nor $6, $zero, $5 +; MIPS64R6O0-NEXT: sllv $4, $4, $3 ; MIPS64R6O0-NEXT: .LBB8_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: addu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: addu $9, $8, $4 +; MIPS64R6O0-NEXT: and $9, $9, $5 +; MIPS64R6O0-NEXT: and $10, $8, $6 +; MIPS64R6O0-NEXT: or $10, $10, $9 +; MIPS64R6O0-NEXT: sc $10, 0($2) +; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $7, $8, $5 +; MIPS64R6O0-NEXT: srlv $7, $7, $3 +; MIPS64R6O0-NEXT: seb $7, $7 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3075,28 +3075,28 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: andi $3, $1, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: ori $5, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: nor $6, $zero, $5 +; MIPS64R6O0-NEXT: sllv $4, $4, $3 ; MIPS64R6O0-NEXT: .LBB9_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: subu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: subu $9, $8, $4 +; MIPS64R6O0-NEXT: and $9, $9, $5 +; MIPS64R6O0-NEXT: and $10, $8, $6 +; MIPS64R6O0-NEXT: or $10, $10, $9 +; MIPS64R6O0-NEXT: sc $10, 0($2) +; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $7, $8, $5 +; MIPS64R6O0-NEXT: srlv $7, $7, $3 +; MIPS64R6O0-NEXT: seb $7, $7 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3601,29 +3601,29 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: andi $3, $1, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: ori $5, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: nor $6, $zero, $5 +; MIPS64R6O0-NEXT: sllv $4, $4, $3 ; MIPS64R6O0-NEXT: .LBB10_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: and $8, $7, $4 -; MIPS64R6O0-NEXT: nor $8, $zero, $8 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: and $9, $8, $4 +; MIPS64R6O0-NEXT: nor $9, $zero, $9 +; MIPS64R6O0-NEXT: and $9, $9, $5 +; MIPS64R6O0-NEXT: and $10, $8, $6 +; MIPS64R6O0-NEXT: or $10, $10, $9 +; MIPS64R6O0-NEXT: sc $10, 0($2) +; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $7, $8, $5 +; MIPS64R6O0-NEXT: srlv $7, $7, $3 +; MIPS64R6O0-NEXT: seb $7, $7 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4115,27 +4115,27 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: andi $3, $1, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: ori $5, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: nor $6, $zero, $5 +; MIPS64R6O0-NEXT: sllv $4, $4, $3 ; MIPS64R6O0-NEXT: .LBB11_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: and $8, $4, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: and $9, $4, $5 +; MIPS64R6O0-NEXT: and $10, $8, $6 +; MIPS64R6O0-NEXT: or $10, $10, $9 +; MIPS64R6O0-NEXT: sc $10, 0($2) +; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $7, $8, $5 +; MIPS64R6O0-NEXT: srlv $7, $7, $3 +; MIPS64R6O0-NEXT: seb $7, $7 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4666,32 +4666,32 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $6, $zero, $3 +; MIPS64R6O0-NEXT: andi $3, $1, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: ori $6, $zero, 255 +; MIPS64R6O0-NEXT: sllv $6, $6, $3 +; MIPS64R6O0-NEXT: nor $7, $zero, $6 ; MIPS64R6O0-NEXT: andi $4, $4, 255 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: sllv $4, $4, $3 ; MIPS64R6O0-NEXT: andi $5, $5, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $1 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 ; MIPS64R6O0-NEXT: .LBB12_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $8, $3 -; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3 +; MIPS64R6O0-NEXT: ll $9, 0($2) +; MIPS64R6O0-NEXT: and $10, $9, $6 +; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS64R6O0-NEXT: and $8, $8, $6 -; MIPS64R6O0-NEXT: or $8, $8, $5 -; MIPS64R6O0-NEXT: sc $8, 0($2) -; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1 +; MIPS64R6O0-NEXT: and $9, $9, $7 +; MIPS64R6O0-NEXT: or $9, $9, $5 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1 ; MIPS64R6O0-NEXT: .LBB12_3: # %entry -; MIPS64R6O0-NEXT: srlv $7, $9, $1 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: srlv $8, $10, $3 +; MIPS64R6O0-NEXT: seb $8, $8 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -5236,28 +5236,28 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS64R6O0-NEXT: sll $2, $2, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 ; MIPS64R6O0-NEXT: sllv $3, $3, $2 -; MIPS64R6O0-NEXT: nor $4, $zero, $3 -; MIPS64R6O0-NEXT: andi $7, $5, 255 -; MIPS64R6O0-NEXT: sllv $7, $7, $2 +; MIPS64R6O0-NEXT: nor $7, $zero, $3 +; MIPS64R6O0-NEXT: andi $8, $5, 255 +; MIPS64R6O0-NEXT: sllv $8, $8, $2 ; MIPS64R6O0-NEXT: andi $6, $6, 255 ; MIPS64R6O0-NEXT: sllv $6, $6, $2 ; MIPS64R6O0-NEXT: .LBB13_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($1) -; MIPS64R6O0-NEXT: and $10, $9, $3 -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3 +; MIPS64R6O0-NEXT: ll $10, 0($1) +; MIPS64R6O0-NEXT: and $11, $10, $3 +; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $4 -; MIPS64R6O0-NEXT: or $9, $9, $6 -; MIPS64R6O0-NEXT: sc $9, 0($1) -; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1 +; MIPS64R6O0-NEXT: and $10, $10, $7 +; MIPS64R6O0-NEXT: or $10, $10, $6 +; MIPS64R6O0-NEXT: sc $10, 0($1) +; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1 ; MIPS64R6O0-NEXT: .LBB13_3: # %entry -; MIPS64R6O0-NEXT: srlv $8, $10, $2 -; MIPS64R6O0-NEXT: seb $8, $8 +; MIPS64R6O0-NEXT: srlv $9, $11, $2 +; MIPS64R6O0-NEXT: seb $9, $9 ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -5775,28 +5775,28 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 2 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: andi $3, $1, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 2 +; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: ori $5, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: nor $6, $zero, $5 +; MIPS64R6O0-NEXT: sllv $4, $4, $3 ; MIPS64R6O0-NEXT: .LBB14_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: addu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: addu $9, $8, $4 +; MIPS64R6O0-NEXT: and $9, $9, $5 +; MIPS64R6O0-NEXT: and $10, $8, $6 +; MIPS64R6O0-NEXT: or $10, $10, $9 +; MIPS64R6O0-NEXT: sc $10, 0($2) +; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seh $6, $6 +; MIPS64R6O0-NEXT: and $7, $8, $5 +; MIPS64R6O0-NEXT: srlv $7, $7, $3 +; MIPS64R6O0-NEXT: seh $7, $7 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -6359,33 +6359,33 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS64R6O0-NEXT: sll $3, $5, 0 ; MIPS64R6O0-NEXT: addu $2, $3, $2 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 -; MIPS64R6O0-NEXT: and $3, $4, $3 -; MIPS64R6O0-NEXT: andi $4, $4, 3 -; MIPS64R6O0-NEXT: xori $4, $4, 2 -; MIPS64R6O0-NEXT: sll $4, $4, 3 +; MIPS64R6O0-NEXT: daddiu $8, $zero, -4 +; MIPS64R6O0-NEXT: and $8, $4, $8 +; MIPS64R6O0-NEXT: andi $3, $4, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 2 +; MIPS64R6O0-NEXT: sll $3, $3, 3 ; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $4 +; MIPS64R6O0-NEXT: sllv $5, $5, $3 ; MIPS64R6O0-NEXT: nor $6, $zero, $5 ; MIPS64R6O0-NEXT: andi $7, $2, 65535 -; MIPS64R6O0-NEXT: sllv $7, $7, $4 +; MIPS64R6O0-NEXT: sllv $7, $7, $3 ; MIPS64R6O0-NEXT: andi $1, $1, 65535 -; MIPS64R6O0-NEXT: sllv $1, $1, $4 +; MIPS64R6O0-NEXT: sllv $1, $1, $3 ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($3) -; MIPS64R6O0-NEXT: and $10, $9, $5 -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3 +; MIPS64R6O0-NEXT: ll $10, 0($8) +; MIPS64R6O0-NEXT: and $11, $10, $5 +; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3 ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $6 -; MIPS64R6O0-NEXT: or $9, $9, $1 -; MIPS64R6O0-NEXT: sc $9, 0($3) -; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1 +; MIPS64R6O0-NEXT: and $10, $10, $6 +; MIPS64R6O0-NEXT: or $10, $10, $1 +; MIPS64R6O0-NEXT: sc $10, 0($8) +; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1 ; MIPS64R6O0-NEXT: .LBB15_3: -; MIPS64R6O0-NEXT: srlv $8, $10, $4 -; MIPS64R6O0-NEXT: seh $8, $8 +; MIPS64R6O0-NEXT: srlv $9, $11, $3 +; MIPS64R6O0-NEXT: seh $9, $9 ; MIPS64R6O0-NEXT: # %bb.4: ; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -7145,8 +7145,8 @@ define i32 @zeroreg() nounwind { ; MIPS64R6O0-NEXT: sc $6, 0($1) ; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1 ; MIPS64R6O0-NEXT: .LBB17_3: # %entry -; MIPS64R6O0-NEXT: xor $1, $5, $3 -; MIPS64R6O0-NEXT: sltiu $2, $1, 1 +; MIPS64R6O0-NEXT: xor $2, $5, $3 +; MIPS64R6O0-NEXT: sltiu $2, $2, 1 ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra ; diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll index b9f6568e40c92..e86cec37d5100 100644 --- a/llvm/test/CodeGen/Mips/implicit-sret.ll +++ b/llvm/test/CodeGen/Mips/implicit-sret.ll @@ -48,8 +48,8 @@ define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind { ; CHECK-NEXT: sd $zero, 8($4) ; CHECK-NEXT: daddiu $3, $zero, 30 ; CHECK-NEXT: sd $3, 24($4) -; CHECK-NEXT: addiu $3, $zero, 10 -; CHECK-NEXT: sw $3, 0($4) +; CHECK-NEXT: addiu $5, $zero, 10 +; CHECK-NEXT: sw $5, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } @@ -70,12 +70,10 @@ define internal void @test2() unnamed_addr nounwind { ; CHECK-NEXT: lw $3, 4($sp) ; CHECK-NEXT: # implicit-def: $a0_64 ; CHECK-NEXT: move $4, $3 -; CHECK-NEXT: # implicit-def: $v1_64 -; CHECK-NEXT: move $3, $2 -; CHECK-NEXT: # implicit-def: $v0_64 -; CHECK-NEXT: move $2, $1 -; CHECK-NEXT: move $5, $3 -; CHECK-NEXT: move $6, $2 +; CHECK-NEXT: # implicit-def: $a1_64 +; CHECK-NEXT: move $5, $2 +; CHECK-NEXT: # implicit-def: $a2_64 +; CHECK-NEXT: move $6, $1 ; CHECK-NEXT: jal use_sret2 ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll index c38f377869f86..a1d9805458368 100644 --- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -21,11 +21,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n ; CHECK-NEXT: addze 5, 5 ; CHECK-NEXT: add 4, 5, 4 ; CHECK-NEXT: cmpld 7, 4, 5 -; CHECK-NEXT: mfocrf 4, 1 -; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x5 -; CHECK-NEXT: mr 5, 4 -; CHECK-NEXT: clrldi 4, 5, 32 +; CHECK-NEXT: mfocrf 10, 1 +; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31 +; CHECK-NEXT: # implicit-def: $x4 +; CHECK-NEXT: mr 4, 10 +; CHECK-NEXT: clrldi 4, 4, 32 ; CHECK-NEXT: std 4, 0(3) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 diff --git a/llvm/test/CodeGen/PowerPC/popcount.ll b/llvm/test/CodeGen/PowerPC/popcount.ll index fb20f1d3ee43b..170d3d77d0886 100644 --- a/llvm/test/CodeGen/PowerPC/popcount.ll +++ b/llvm/test/CodeGen/PowerPC/popcount.ll @@ -58,17 +58,17 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 ; CHECK-NEXT: mffprd 3, 0 ; CHECK-NEXT: popcntd 3, 3 -; CHECK-NEXT: xxswapd 0, 34 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 -; CHECK-NEXT: mffprd 4, 0 +; CHECK-NEXT: xxswapd 1, 34 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: mffprd 4, 1 ; CHECK-NEXT: popcntd 4, 4 ; CHECK-NEXT: add 3, 4, 3 ; CHECK-NEXT: mtfprd 0, 3 -; CHECK-NEXT: # kill: def $vsl0 killed $f0 +; CHECK-NEXT: fmr 2, 0 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: mtfprd 1, 3 -; CHECK-NEXT: # kill: def $vsl1 killed $f1 -; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: mtfprd 0, 3 +; CHECK-NEXT: fmr 3, 0 +; CHECK-NEXT: xxmrghd 34, 3, 2 ; CHECK-NEXT: blr Entry: %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0) diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 4a78218262ca0..39469d63b9078 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -1548,8 +1548,8 @@ define <2 x i64> @test46(<2 x float> %a) { ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 +; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test46: @@ -1616,8 +1616,8 @@ define <2 x i64> @test47(<2 x float> %a) { ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 +; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test47: @@ -1859,13 +1859,13 @@ define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: sld r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: sld r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: sld r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: sld r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -1925,13 +1925,13 @@ define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: srd r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: srd r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: srd r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: srd r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -1991,13 +1991,13 @@ define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: srad r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: srad r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: srad r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: srad r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -2426,12 +2426,12 @@ define <2 x i32> @test80(i32 %v) { ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-FISL-NEXT: stw r3, -16(r1) -; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvw4x vs0, 0, r3 +; CHECK-FISL-NEXT: addi r4, r1, -16 +; CHECK-FISL-NEXT: lxvw4x vs0, 0, r4 ; CHECK-FISL-NEXT: xxspltw v2, vs0, 0 -; CHECK-FISL-NEXT: addis r3, r2, .LCPI65_0@toc@ha -; CHECK-FISL-NEXT: addi r3, r3, .LCPI65_0@toc@l -; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 +; CHECK-FISL-NEXT: addis r4, r2, .LCPI65_0@toc@ha +; CHECK-FISL-NEXT: addi r4, r4, .LCPI65_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v3, 0, r4 ; CHECK-FISL-NEXT: vadduwm v2, v2, v3 ; CHECK-FISL-NEXT: blr ; diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll index 0c402430dadc1..9709322f48a57 100644 --- a/llvm/test/CodeGen/SPARC/fp16-promote.ll +++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll @@ -182,11 +182,11 @@ define void @test_fptrunc_double(double %d, half* %p) nounwind { ; V8-UNOPT-NEXT: std %i4, [%fp+-8] ; V8-UNOPT-NEXT: ldd [%fp+-8], %f0 ; V8-UNOPT-NEXT: std %f0, [%fp+-16] -; V8-UNOPT-NEXT: ldd [%fp+-16], %i0 -; V8-UNOPT-NEXT: mov %i0, %i3 -; V8-UNOPT-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1 -; V8-UNOPT-NEXT: mov %i3, %o0 -; V8-UNOPT-NEXT: mov %i1, %o1 +; V8-UNOPT-NEXT: ldd [%fp+-16], %i4 +; V8-UNOPT-NEXT: mov %i4, %i0 +; V8-UNOPT-NEXT: ! kill: def $i5 killed $i5 killed $i4_i5 +; V8-UNOPT-NEXT: mov %i0, %o0 +; V8-UNOPT-NEXT: mov %i5, %o1 ; V8-UNOPT-NEXT: call __truncdfhf2 ; V8-UNOPT-NEXT: st %i2, [%fp+-20] ; V8-UNOPT-NEXT: ld [%fp+-20], %i0 ! 4-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll index b5635c7e0f067..48ad2a2c07770 100644 --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -8,34 +8,34 @@ define i32 @z() nounwind ssp { ; CHECK-LABEL: z: ; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $148, %esp +; CHECK-NEXT: subl $144, %esp ; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax ; CHECK-NEXT: movl (%eax), %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb $48, {{[0-9]+}}(%esp) -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl $8, %ecx -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl $8, %edx +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %edx, %ecx ; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: addl $36, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edx, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl -; CHECK-NEXT: movb %cl, 32(%eax) -; CHECK-NEXT: movb %cl, 68(%eax) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl +; CHECK-NEXT: movb %bl, 32(%eax) +; CHECK-NEXT: movb %bl, 68(%eax) ; CHECK-NEXT: calll _f ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -50,9 +50,10 @@ define i32 @z() nounwind ssp { ; CHECK-NEXT: jne LBB0_3 ; CHECK-NEXT: ## %bb.2: ## %SP_return ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: addl $148, %esp +; CHECK-NEXT: addl $144, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk ; CHECK-NEXT: calll ___stack_chk_fail diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 7a1f34c65c183..16fde4074ea0e 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -126,8 +126,8 @@ define void @narrow_writeback_and(i64* %ptr) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $-256, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, (%rdi) +; CHECK-O0-NEXT: movl %eax, %ecx +; CHECK-O0-NEXT: movq %rcx, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: narrow_writeback_and: @@ -231,10 +231,10 @@ define i128 @load_i128(i128* %ptr) { ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK-O0-NEXT: movl %eax, %ecx +; CHECK-O0-NEXT: movq %rcx, %rax +; CHECK-O0-NEXT: movq %rcx, %rdx +; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O0-NEXT: popq %rbx @@ -326,14 +326,14 @@ define i256 @load_i256(i256* %ptr) { ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: callq __atomic_load ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; CHECK-O0-NEXT: movq %rsi, 24(%rdi) -; CHECK-O0-NEXT: movq %rdx, 16(%rdi) -; CHECK-O0-NEXT: movq %rcx, 8(%rdi) -; CHECK-O0-NEXT: movq %rax, (%rdi) +; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; CHECK-O0-NEXT: movq %rdi, 24(%r9) +; CHECK-O0-NEXT: movq %rsi, 16(%r9) +; CHECK-O0-NEXT: movq %rdx, 8(%r9) +; CHECK-O0-NEXT: movq %rax, (%r9) ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -831,8 +831,8 @@ define i64 @load_fold_udiv1(i64* %p) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx -; CHECK-O0-NEXT: movl $15, %ecx -; CHECK-O0-NEXT: divq %rcx +; CHECK-O0-NEXT: movl $15, %esi +; CHECK-O0-NEXT: divq %rsi ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_udiv1: @@ -1024,8 +1024,8 @@ define i64 @load_fold_urem1(i64* %p) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx -; CHECK-O0-NEXT: movl $15, %ecx -; CHECK-O0-NEXT: divq %rcx +; CHECK-O0-NEXT: movl $15, %esi +; CHECK-O0-NEXT: divq %rsi ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; @@ -1475,9 +1475,9 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax -; CHECK-O0-NEXT: sete %cl +; CHECK-O0-NEXT: sete %dl ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: movb %dl, %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_icmp3: @@ -2076,8 +2076,8 @@ define void @rmw_fold_and1(i64* %p, i64 %v) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $15, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, (%rdi) +; CHECK-O0-NEXT: movl %eax, %ecx +; CHECK-O0-NEXT: movq %rcx, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_and1: @@ -2541,8 +2541,9 @@ define i16 @load_i8_anyext_i16(i8* %ptr) { ; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movb (%rdi), %al -; CHECK-O0-CUR-NEXT: movzbl %al, %eax -; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-O0-CUR-NEXT: movzbl %al, %ecx +; CHECK-O0-CUR-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-O0-CUR-NEXT: movw %cx, %ax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: @@ -2670,12 +2671,13 @@ define i16 @load_combine(i8* %p) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: movb 1(%rdi), %cl -; CHECK-O0-NEXT: movzbl %al, %eax -; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax -; CHECK-O0-NEXT: movzbl %cl, %ecx -; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-O0-NEXT: shlw $8, %cx -; CHECK-O0-NEXT: orw %cx, %ax +; CHECK-O0-NEXT: movzbl %al, %edx +; CHECK-O0-NEXT: # kill: def $dx killed $dx killed $edx +; CHECK-O0-NEXT: movzbl %cl, %esi +; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi +; CHECK-O0-NEXT: shlw $8, %si +; CHECK-O0-NEXT: orw %si, %dx +; CHECK-O0-NEXT: movw %dx, %ax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_combine: diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll index 05a10966a4f1a..24aebbba60d19 100644 --- a/llvm/test/CodeGen/X86/atomic32.ll +++ b/llvm/test/CodeGen/X86/atomic32.ll @@ -70,8 +70,8 @@ define void @atomic_fetch_and32() nounwind { ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -94,8 +94,8 @@ define void @atomic_fetch_and32() nounwind { ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl +; X86-NEXT: sete %dl +; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -124,8 +124,8 @@ define void @atomic_fetch_or32() nounwind { ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -148,8 +148,8 @@ define void @atomic_fetch_or32() nounwind { ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl +; X86-NEXT: sete %dl +; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -178,8 +178,8 @@ define void @atomic_fetch_xor32() nounwind { ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -202,8 +202,8 @@ define void @atomic_fetch_xor32() nounwind { ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl +; X86-NEXT: sete %dl +; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -234,8 +234,8 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X64-NEXT: andl %edx, %ecx ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -244,6 +244,7 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl sc32, %ecx @@ -257,13 +258,14 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X86-NEXT: andl %edx, %ecx ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl +; X86-NEXT: sete %bl +; X86-NEXT: testb $1, %bl ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end ; X86-NEXT: addl $8, %esp +; X86-NEXT: popl %ebx ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -283,8 +285,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovgl %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 @@ -294,6 +296,7 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_max32: ; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -307,18 +310,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovgl %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: sete %bl +; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -347,18 +352,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: sete %bl +; X86-NOCMOV-NEXT: testb $1, %bl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_max32: ; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -387,14 +394,15 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB6_2 ; X86-NOX87-NEXT: jmp .LBB6_1 ; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void @@ -414,8 +422,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovlel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 @@ -425,6 +433,7 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_min32: ; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -438,18 +447,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovlel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: sete %bl +; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -478,18 +489,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: sete %bl +; X86-NOCMOV-NEXT: testb $1, %bl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_min32: ; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -518,14 +531,15 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB7_2 ; X86-NOX87-NEXT: jmp .LBB7_1 ; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void @@ -545,8 +559,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmoval %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 @@ -556,6 +570,7 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_umax32: ; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -569,18 +584,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmoval %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: sete %bl +; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -609,18 +626,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: sete %bl +; X86-NOCMOV-NEXT: testb $1, %bl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umax32: ; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -649,14 +668,15 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB8_2 ; X86-NOX87-NEXT: jmp .LBB8_1 ; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void @@ -676,8 +696,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovbel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 @@ -687,6 +707,7 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_umin32: ; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -700,18 +721,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovbel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: sete %bl +; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -740,18 +763,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: sete %bl +; X86-NOCMOV-NEXT: testb $1, %bl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umin32: ; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -780,14 +805,15 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB9_2 ; X86-NOX87-NEXT: jmp .LBB9_1 ; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll index 963561dc8deb2..8b40380afcb2a 100644 --- a/llvm/test/CodeGen/X86/atomic64.ll +++ b/llvm/test/CodeGen/X86/atomic64.ll @@ -137,12 +137,12 @@ define void @atomic_fetch_and64() nounwind { ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx -; X64-NEXT: # kill: def $rcx killed $ecx -; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -202,8 +202,8 @@ define void @atomic_fetch_or64() nounwind { ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: orq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -265,8 +265,8 @@ define void @atomic_fetch_xor64() nounwind { ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: xorq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -330,8 +330,8 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { ; X64-NEXT: andq %rdx, %rcx ; X64-NEXT: notq %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %cl -; X64-NEXT: testb $1, %cl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -373,8 +373,8 @@ define void @atomic_fetch_max64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovgq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB6_2 @@ -471,8 +471,8 @@ define void @atomic_fetch_min64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovleq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB7_2 @@ -569,8 +569,8 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovaq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB8_2 @@ -667,8 +667,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovbeq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %sil +; X64-NEXT: testb $1, %sil ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB9_2 diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index f448bfec2ec99..718449d7a771f 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -175,8 +175,8 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp ; CHECK_O0: # %bb.0: ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -197,8 +197,8 @@ define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nou ; CHECK_O0: # %bb.0: ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -239,10 +239,10 @@ define void @f_f() nounwind { ; CHECK_O0-NEXT: .LBB9_3: # %cif_mixed_test_all ; CHECK_O0-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0] ; CHECK_O0-NEXT: vmovdqa %xmm0, %xmm0 -; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 +; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 ; CHECK_O0-NEXT: # implicit-def: $rax -; CHECK_O0-NEXT: # implicit-def: $ymm1 -; CHECK_O0-NEXT: vmaskmovps %ymm1, %ymm0, (%rax) +; CHECK_O0-NEXT: # implicit-def: $ymm2 +; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rax) ; CHECK_O0-NEXT: .LBB9_4: # %cif_mixed_test_any_check allocas: br i1 undef, label %cif_mask_all, label %cif_mask_mixed @@ -276,8 +276,8 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind { ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <8 x i32>, <8 x i32>* %bp, align 1 @@ -321,8 +321,8 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind { ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <4 x i64>, <4 x i64>* %bp, align 16 diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll index 186370ca675c7..c4e009d54ec7a 100755 --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -40,20 +40,22 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %f ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kmovd %k0, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload -; CHECK-NEXT: movl $4, %edx -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: kmovd %k0, %esi +; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi +; CHECK-NEXT: movzbl %sil, %edi +; CHECK-NEXT: ## kill: def $di killed $di killed $edi +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movq %rcx, %rdi +; CHECK-NEXT: movl $4, %r8d +; CHECK-NEXT: movl %r8d, %esi +; CHECK-NEXT: movl %r8d, %edx ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax -; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload -; CHECK-NEXT: movzwl %cx, %edi +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r9w ## 2-byte Reload +; CHECK-NEXT: movzwl %r9w, %edi ; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll index 9f9e5584d6f21..a93d3dd267b52 100644 --- a/llvm/test/CodeGen/X86/crash-O0.ll +++ b/llvm/test/CodeGen/X86/crash-O0.ll @@ -79,12 +79,11 @@ define i64 @addressModeWith32bitIndex(i32 %V) { ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: def $rax killed $eax -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: cqto -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload -; CHECK-NEXT: idivq (%rsi,%rcx,8) +; CHECK-NEXT: movslq %edi, %rsi +; CHECK-NEXT: idivq (%rcx,%rsi,8) ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq %gep = getelementptr i64, i64* null, i32 %V diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll index 664d9ded1e0e1..7d05a869be893 100644 --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -7,8 +7,8 @@ define void @foo(i32* %p) !dbg !4 { bb: %tmp = load i32, i32* %p, align 4, !dbg !7 ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) - ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 - ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7 + ; CHECK-NEXT: $ecx = MOV32rr killed $eax, implicit-def $rcx, debug-location !7 + ; CHECK-NEXT: $rdx = MOV64rr $rcx, debug-location !7 switch i32 %tmp, label %bb7 [ i32 0, label %bb1 diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index 7fffa21f0d24d..5d7c83fa19d44 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -1013,11 +1013,11 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: $ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1067,11 +1067,11 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: $ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1121,11 +1121,11 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: $ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1175,11 +1175,11 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: $ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1229,11 +1229,11 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: $ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1283,11 +1283,11 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: $ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: diff --git a/llvm/test/CodeGen/X86/lvi-hardening-loads.ll b/llvm/test/CodeGen/X86/lvi-hardening-loads.ll index ff8276f6f1c22..e660f306ef75b 100644 --- a/llvm/test/CodeGen/X86/lvi-hardening-loads.ll +++ b/llvm/test/CodeGen/X86/lvi-hardening-loads.ll @@ -117,9 +117,9 @@ if.then: ; preds = %for.body ; X64-NOOPT-NEXT: lfence ; X64-NOOPT-NEXT: movq (%rax,%rcx,8), %rax ; X64-NOOPT-NEXT: lfence -; X64-NOOPT-NEXT: movl (%rax), %eax +; X64-NOOPT-NEXT: movl (%rax), %edx ; X64-NOOPT-NEXT: lfence -; X64-NOOPT-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NOOPT-NEXT: movl %edx, -{{[0-9]+}}(%rsp) if.end: ; preds = %if.then, %for.body br label %for.inc diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll index ac55e1a1fc653..a1ad7f3c0f534 100644 --- a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll +++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll @@ -69,8 +69,8 @@ define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) { ; CHECK-O0-LABEL: test_zero_ext: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %edx, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, 8(%rcx) +; CHECK-O0-NEXT: movl %eax, %r8d +; CHECK-O0-NEXT: movq %r8, 8(%rcx) ; CHECK-O0-NEXT: jmp use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(271)* %i to i32* @@ -125,23 +125,19 @@ entry: ; Test that null can be passed as a 32-bit pointer. define dso_local void @test_null_arg(%struct.Foo* %f) { -; CHECK-LABEL: test_null_arg: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $40, %rsp -; CHECK: xorl %edx, %edx -; CHECK-NEXT: callq test_noop1 -; CHECK-NEXT: nop -; CHECK-NEXT: addq $40, %rsp -; CHECK-NEXT: retq -; -; CHECK-O0-LABEL: test_null_arg: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: subq $40, %rsp -; CHECK-O0: xorl %edx, %edx -; CHECK-O0-NEXT: callq test_noop1 -; CHECK-O0-NEXT: nop -; CHECK-O0-NEXT: addq $40, %rsp -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_null_arg: +; ALL: # %bb.0: # %entry +; ALL-NEXT: subq $40, %rsp +; ALL-NEXT: .seh_stackalloc 40 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: xorl %edx, %edx +; ALL-NEXT: callq test_noop1 +; ALL-NEXT: nop +; ALL-NEXT: addq $40, %rsp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc entry: call void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* null) ret void @@ -177,8 +173,8 @@ define void @test_unrecognized2(%struct.Foo* %f, i32 addrspace(271)* %i) { ; CHECK-O0-LABEL: test_unrecognized2: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %edx, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, 16(%rcx) +; CHECK-O0-NEXT: movl %eax, %r8d +; CHECK-O0-NEXT: movq %r8, 16(%rcx) ; CHECK-O0-NEXT: jmp use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(9)* @@ -189,16 +185,11 @@ entry: } define i32 @test_load_sptr32(i32 addrspace(270)* %i) { -; CHECK-LABEL: test_load_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movslq %ecx, %rax -; CHECK-NEXT: movl (%rax), %eax -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_load_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movslq %ecx, %rax -; CHECK-O0-NEXT: movl (%rax), %eax -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_load_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movslq %ecx, %rax +; ALL-NEXT: movl (%rax), %eax +; ALL-NEXT: retq entry: %0 = load i32, i32 addrspace(270)* %i, align 4 ret i32 %0 @@ -210,11 +201,12 @@ define i32 @test_load_uptr32(i32 addrspace(271)* %i) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl (%rax), %eax ; CHECK-NEXT: retq +; ; CHECK-O0-LABEL: test_load_uptr32: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %ecx, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movl (%rax), %eax +; CHECK-O0-NEXT: movl %eax, %edx +; CHECK-O0-NEXT: movl (%rdx), %eax ; CHECK-O0-NEXT: retq entry: %0 = load i32, i32 addrspace(271)* %i, align 4 @@ -222,30 +214,21 @@ entry: } define i32 @test_load_ptr64(i32 addrspace(272)* %i) { -; CHECK-LABEL: test_load_ptr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl (%rcx), %eax -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_load_ptr64: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl (%rcx), %eax -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_load_ptr64: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl (%rcx), %eax +; ALL-NEXT: retq entry: %0 = load i32, i32 addrspace(272)* %i, align 8 ret i32 %0 } define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) { -; CHECK-LABEL: test_store_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movslq %ecx, %rax -; CHECK-NEXT: movl %edx, (%rax) -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_store_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movslq %ecx, %rax -; CHECK-O0-NEXT: movl %edx, (%rax) -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_store_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movslq %ecx, %rax +; ALL-NEXT: movl %edx, (%rax) +; ALL-NEXT: retq entry: store i32 %i, i32 addrspace(270)* %s, align 4 ret void @@ -257,11 +240,12 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq +; ; CHECK-O0-LABEL: test_store_uptr32: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %ecx, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movl %edx, (%rax) +; CHECK-O0-NEXT: movl %eax, %r8d +; CHECK-O0-NEXT: movl %edx, (%r8) ; CHECK-O0-NEXT: retq entry: store i32 %i, i32 addrspace(271)* %s, align 4 @@ -269,14 +253,10 @@ entry: } define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) { -; CHECK-LABEL: test_store_ptr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edx, (%rcx) -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_store_ptr64: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl %edx, (%rcx) -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_store_ptr64: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl %edx, (%rcx) +; ALL-NEXT: retq entry: store i32 %i, i32 addrspace(272)* %s, align 8 ret void diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll index d1148eecb0da9..6226ea6caf90f 100644 --- a/llvm/test/CodeGen/X86/pr1489.ll +++ b/llvm/test/CodeGen/X86/pr1489.ll @@ -16,9 +16,9 @@ define i32 @quux() nounwind { ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %al -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -42,9 +42,9 @@ define i32 @foo() nounwind { ; CHECK-NEXT: movl $-1236950581, (%eax) ## imm = 0xB645A1CB ; CHECK-NEXT: calll _lrint ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %al -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -67,9 +67,9 @@ define i32 @bar() nounwind { ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %al -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -90,9 +90,9 @@ define i32 @baz() nounwind { ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %al -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll index 7455584ac698a..97ad6814f1926 100644 --- a/llvm/test/CodeGen/X86/pr27591.ll +++ b/llvm/test/CodeGen/X86/pr27591.ll @@ -9,9 +9,9 @@ define void @test1(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: movl %ecx, %edi ; CHECK-NEXT: callq callee1 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq @@ -27,10 +27,10 @@ define void @test2(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: negl %eax -; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: negl %ecx +; CHECK-NEXT: movl %ecx, %edi ; CHECK-NEXT: callq callee2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll index e524245daa112..4d40aa09eeab1 100644 --- a/llvm/test/CodeGen/X86/pr30430.ll +++ b/llvm/test/CodeGen/X86/pr30430.ll @@ -75,28 +75,28 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; CHECK-NEXT: # implicit-def: $ymm2 ; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0] ; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 -; CHECK-NEXT: # implicit-def: $zmm2 -; CHECK-NEXT: vmovaps %ymm1, %ymm2 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %xmm1, %xmm3 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 +; CHECK-NEXT: # implicit-def: $zmm24 +; CHECK-NEXT: vmovaps %zmm3, %zmm24 +; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24 +; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr30813.ll b/llvm/test/CodeGen/X86/pr30813.ll index 7266c5bd8d015..e3e096bda6c28 100644 --- a/llvm/test/CodeGen/X86/pr30813.ll +++ b/llvm/test/CodeGen/X86/pr30813.ll @@ -1,8 +1,9 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s ; CHECK: patatino: ; CHECK: .cfi_startproc -; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]] -; CHECK: movq %r[[REG0]], ({{%r[abcd]x}}) +; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]] +; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x +; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}}) ; CHECK: retq define void @patatino() { diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll index 1f3d273dfc416..6d628e6962eda 100644 --- a/llvm/test/CodeGen/X86/pr32241.ll +++ b/llvm/test/CodeGen/X86/pr32241.ll @@ -23,14 +23,14 @@ define i32 @_Z3foov() { ; CHECK-NEXT: .LBB0_2: # %lor.end ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: cmpl %eax, %ecx +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: xorl $-1, %eax -; CHECK-NEXT: cmpl $0, %eax +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: xorl $-1, %ecx +; CHECK-NEXT: cmpl $0, %ecx ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_4 @@ -42,9 +42,9 @@ define i32 @_Z3foov() { ; CHECK-NEXT: .LBB0_4: # %lor.end5 ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: def $ax killed $ax killed $eax -; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll index 533473663d73b..a1041ab889c23 100644 --- a/llvm/test/CodeGen/X86/pr32284.ll +++ b/llvm/test/CodeGen/X86/pr32284.ll @@ -10,28 +10,28 @@ define void @foo() { ; X86-O0-LABEL: foo: ; X86-O0: # %bb.0: # %entry ; X86-O0-NEXT: xorl %eax, %eax -; X86-O0-NEXT: # kill: def $rax killed $eax -; X86-O0-NEXT: xorl %ecx, %ecx +; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: xorl %eax, %eax ; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: subl %edx, %ecx -; X86-O0-NEXT: movslq %ecx, %rcx -; X86-O0-NEXT: subq %rcx, %rax -; X86-O0-NEXT: # kill: def $al killed $al killed $rax -; X86-O0-NEXT: cmpb $0, %al -; X86-O0-NEXT: setne %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: subl %edx, %eax +; X86-O0-NEXT: movslq %eax, %rsi +; X86-O0-NEXT: subq %rsi, %rcx +; X86-O0-NEXT: # kill: def $cl killed $cl killed $rcx +; X86-O0-NEXT: cmpb $0, %cl +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movb %cl, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: cmpb $0, c -; X86-O0-NEXT: setne %al -; X86-O0-NEXT: xorb $-1, %al -; X86-O0-NEXT: xorb $-1, %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: movzbl c, %ecx -; X86-O0-NEXT: cmpl %ecx, %eax -; X86-O0-NEXT: setle %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %eax +; X86-O0-NEXT: movzbl c, %edx +; X86-O0-NEXT: cmpl %edx, %eax +; X86-O0-NEXT: setle %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %eax ; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: retq ; @@ -63,13 +63,13 @@ define void @foo() { ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: movzbl c, %ecx -; 686-O0-NEXT: cmpl %ecx, %eax +; 686-O0-NEXT: movzbl %al, %ecx +; 686-O0-NEXT: movzbl c, %edx +; 686-O0-NEXT: cmpl %edx, %ecx ; 686-O0-NEXT: setle %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: movl %eax, (%esp) +; 686-O0-NEXT: movzbl %al, %ecx +; 686-O0-NEXT: movl %ecx, (%esp) ; 686-O0-NEXT: addl $8, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl @@ -126,33 +126,33 @@ define void @f1() { ; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5 ; X86-O0-NEXT: addq %rcx, %rax ; X86-O0-NEXT: cmpq $0, %rax -; X86-O0-NEXT: setne %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movl var_5, %eax -; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: cmpl $0, %eax -; X86-O0-NEXT: setne %al -; X86-O0-NEXT: xorb $-1, %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: setne %dl +; X86-O0-NEXT: andb $1, %dl +; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movl var_5, %esi +; X86-O0-NEXT: xorl $-1, %esi +; X86-O0-NEXT: cmpl $0, %esi +; X86-O0-NEXT: setne %dl +; X86-O0-NEXT: xorb $-1, %dl +; X86-O0-NEXT: andb $1, %dl +; X86-O0-NEXT: movzbl %dl, %esi +; X86-O0-NEXT: movl %esi, %eax ; X86-O0-NEXT: movslq var_5, %rcx ; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5 ; X86-O0-NEXT: cmpq %rcx, %rax -; X86-O0-NEXT: setg %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: setg %dl +; X86-O0-NEXT: andb $1, %dl +; X86-O0-NEXT: movzbl %dl, %esi +; X86-O0-NEXT: movl %esi, %eax ; X86-O0-NEXT: movq %rax, var_57 -; X86-O0-NEXT: movl var_5, %eax -; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: cmpl $0, %eax -; X86-O0-NEXT: setne %al -; X86-O0-NEXT: xorb $-1, %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: movl var_5, %esi +; X86-O0-NEXT: xorl $-1, %esi +; X86-O0-NEXT: cmpl $0, %esi +; X86-O0-NEXT: setne %dl +; X86-O0-NEXT: xorb $-1, %dl +; X86-O0-NEXT: andb $1, %dl +; X86-O0-NEXT: movzbl %dl, %esi +; X86-O0-NEXT: movl %esi, %eax ; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E ; X86-O0-NEXT: retq ; @@ -178,17 +178,20 @@ define void @f1() { ; ; 686-O0-LABEL: f1: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %ebx +; 686-O0-NEXT: pushl %ebp ; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: pushl %edi +; 686-O0-NEXT: pushl %ebx ; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: pushl %esi +; 686-O0-NEXT: pushl %edi ; 686-O0-NEXT: .cfi_def_cfa_offset 16 +; 686-O0-NEXT: pushl %esi +; 686-O0-NEXT: .cfi_def_cfa_offset 20 ; 686-O0-NEXT: subl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 17 -; 686-O0-NEXT: .cfi_offset %esi, -16 -; 686-O0-NEXT: .cfi_offset %edi, -12 -; 686-O0-NEXT: .cfi_offset %ebx, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 21 +; 686-O0-NEXT: .cfi_offset %esi, -20 +; 686-O0-NEXT: .cfi_offset %edi, -16 +; 686-O0-NEXT: .cfi_offset %ebx, -12 +; 686-O0-NEXT: .cfi_offset %ebp, -8 ; 686-O0-NEXT: movl var_5, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: sarl $31, %ecx @@ -214,16 +217,18 @@ define void @f1() { ; 686-O0-NEXT: movl var_5, %edi ; 686-O0-NEXT: subl $-1, %edi ; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %ebx -; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E +; 686-O0-NEXT: movzbl %bl, %ebp +; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 ; 686-O0-NEXT: addl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 16 +; 686-O0-NEXT: .cfi_def_cfa_offset 20 ; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 +; 686-O0-NEXT: .cfi_def_cfa_offset 16 ; 686-O0-NEXT: popl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 +; 686-O0-NEXT: .cfi_def_cfa_offset 12 ; 686-O0-NEXT: popl %ebx +; 686-O0-NEXT: .cfi_def_cfa_offset 8 +; 686-O0-NEXT: popl %ebp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -305,25 +310,25 @@ define void @f2() { ; X86-O0-NEXT: setne %cl ; X86-O0-NEXT: xorb $-1, %cl ; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %ecx -; X86-O0-NEXT: xorl %ecx, %eax +; X86-O0-NEXT: movzbl %cl, %edx +; X86-O0-NEXT: xorl %edx, %eax ; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax ; X86-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movzbl var_7, %eax -; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax -; X86-O0-NEXT: cmpw $0, %ax -; X86-O0-NEXT: setne %al -; X86-O0-NEXT: xorb $-1, %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: movzbl var_7, %ecx -; X86-O0-NEXT: cmpl %ecx, %eax -; X86-O0-NEXT: sete %al -; X86-O0-NEXT: andb $1, %al -; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax -; X86-O0-NEXT: # implicit-def: $rcx -; X86-O0-NEXT: movw %ax, (%rcx) +; X86-O0-NEXT: movzbl var_7, %edx +; X86-O0-NEXT: # kill: def $dx killed $dx killed $edx +; X86-O0-NEXT: cmpw $0, %dx +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %esi +; X86-O0-NEXT: movzbl var_7, %edi +; X86-O0-NEXT: cmpl %edi, %esi +; X86-O0-NEXT: sete %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %esi +; X86-O0-NEXT: # kill: def $si killed $si killed $esi +; X86-O0-NEXT: # implicit-def: $r8 +; X86-O0-NEXT: movw %si, (%r8) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -345,33 +350,43 @@ define void @f2() { ; ; 686-O0-LABEL: f2: ; 686-O0: # %bb.0: # %entry +; 686-O0-NEXT: pushl %edi +; 686-O0-NEXT: .cfi_def_cfa_offset 8 +; 686-O0-NEXT: pushl %esi +; 686-O0-NEXT: .cfi_def_cfa_offset 12 ; 686-O0-NEXT: subl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 6 +; 686-O0-NEXT: .cfi_def_cfa_offset 14 +; 686-O0-NEXT: .cfi_offset %esi, -12 +; 686-O0-NEXT: .cfi_offset %edi, -8 ; 686-O0-NEXT: movzbl var_7, %eax ; 686-O0-NEXT: cmpb $0, var_7 ; 686-O0-NEXT: setne %cl ; 686-O0-NEXT: xorb $-1, %cl ; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %ecx -; 686-O0-NEXT: xorl %ecx, %eax +; 686-O0-NEXT: movzbl %cl, %edx +; 686-O0-NEXT: xorl %edx, %eax ; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax ; 686-O0-NEXT: movw %ax, (%esp) -; 686-O0-NEXT: movzbl var_7, %eax -; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax -; 686-O0-NEXT: cmpw $0, %ax -; 686-O0-NEXT: setne %al -; 686-O0-NEXT: xorb $-1, %al -; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: movzbl var_7, %ecx -; 686-O0-NEXT: cmpl %ecx, %eax -; 686-O0-NEXT: sete %al -; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax -; 686-O0-NEXT: # implicit-def: $ecx -; 686-O0-NEXT: movw %ax, (%ecx) +; 686-O0-NEXT: movzbl var_7, %edx +; 686-O0-NEXT: # kill: def $dx killed $dx killed $edx +; 686-O0-NEXT: cmpw $0, %dx +; 686-O0-NEXT: setne %cl +; 686-O0-NEXT: xorb $-1, %cl +; 686-O0-NEXT: andb $1, %cl +; 686-O0-NEXT: movzbl %cl, %esi +; 686-O0-NEXT: movzbl var_7, %edi +; 686-O0-NEXT: cmpl %edi, %esi +; 686-O0-NEXT: sete %cl +; 686-O0-NEXT: andb $1, %cl +; 686-O0-NEXT: movzbl %cl, %esi +; 686-O0-NEXT: # kill: def $si killed $si killed $esi +; 686-O0-NEXT: # implicit-def: $edi +; 686-O0-NEXT: movw %si, (%edi) ; 686-O0-NEXT: addl $2, %esp +; 686-O0-NEXT: .cfi_def_cfa_offset 12 +; 686-O0-NEXT: popl %esi +; 686-O0-NEXT: .cfi_def_cfa_offset 8 +; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -431,35 +446,35 @@ define void @f3() #0 { ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: movl %eax, %ecx ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %ecx -; X86-O0-NEXT: # kill: def $rcx killed $ecx -; X86-O0-NEXT: movl var_13, %edx -; X86-O0-NEXT: xorl $-1, %edx -; X86-O0-NEXT: xorl var_16, %edx -; X86-O0-NEXT: movl %edx, %edx -; X86-O0-NEXT: # kill: def $rdx killed $edx -; X86-O0-NEXT: andq %rdx, %rcx -; X86-O0-NEXT: orq %rcx, %rax -; X86-O0-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: setne %dl +; X86-O0-NEXT: xorb $-1, %dl +; X86-O0-NEXT: andb $1, %dl +; X86-O0-NEXT: movzbl %dl, %eax +; X86-O0-NEXT: movl %eax, %esi ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: xorl var_16, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: movl %eax, %edi +; X86-O0-NEXT: andq %rdi, %rsi +; X86-O0-NEXT: orq %rsi, %rcx +; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movl var_13, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: movl %eax, %eax +; X86-O0-NEXT: movl %eax, %ecx ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %ecx -; X86-O0-NEXT: # kill: def $rcx killed $ecx -; X86-O0-NEXT: andq $0, %rcx -; X86-O0-NEXT: orq %rcx, %rax -; X86-O0-NEXT: # kill: def $eax killed $eax killed $rax -; X86-O0-NEXT: movl %eax, var_46 +; X86-O0-NEXT: setne %dl +; X86-O0-NEXT: xorb $-1, %dl +; X86-O0-NEXT: andb $1, %dl +; X86-O0-NEXT: movzbl %dl, %eax +; X86-O0-NEXT: movl %eax, %esi +; X86-O0-NEXT: andq $0, %rsi +; X86-O0-NEXT: orq %rsi, %rcx +; X86-O0-NEXT: # kill: def $ecx killed $ecx killed $rcx +; X86-O0-NEXT: movl %ecx, var_46 ; X86-O0-NEXT: retq ; ; X64-LABEL: f3: @@ -484,28 +499,31 @@ define void @f3() #0 { ; 686-O0-NEXT: .cfi_offset %ebp, -8 ; 686-O0-NEXT: movl %esp, %ebp ; 686-O0-NEXT: .cfi_def_cfa_register %ebp +; 686-O0-NEXT: pushl %edi ; 686-O0-NEXT: pushl %esi ; 686-O0-NEXT: andl $-8, %esp -; 686-O0-NEXT: subl $16, %esp -; 686-O0-NEXT: .cfi_offset %esi, -12 +; 686-O0-NEXT: subl $8, %esp +; 686-O0-NEXT: .cfi_offset %esi, -16 +; 686-O0-NEXT: .cfi_offset %edi, -12 ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: notl %ecx ; 686-O0-NEXT: testl %eax, %eax -; 686-O0-NEXT: sete %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: movl var_16, %edx -; 686-O0-NEXT: movl %ecx, %esi -; 686-O0-NEXT: xorl %edx, %esi -; 686-O0-NEXT: andl %esi, %eax +; 686-O0-NEXT: sete %dl +; 686-O0-NEXT: movzbl %dl, %eax +; 686-O0-NEXT: movl var_16, %esi +; 686-O0-NEXT: movl %ecx, %edi +; 686-O0-NEXT: xorl %esi, %edi +; 686-O0-NEXT: andl %edi, %eax ; 686-O0-NEXT: orl %eax, %ecx ; 686-O0-NEXT: movl %ecx, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: notl %eax ; 686-O0-NEXT: movl %eax, var_46 -; 686-O0-NEXT: leal -4(%ebp), %esp +; 686-O0-NEXT: leal -8(%ebp), %esp ; 686-O0-NEXT: popl %esi +; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: popl %ebp ; 686-O0-NEXT: .cfi_def_cfa %esp, 4 ; 686-O0-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr32340.ll b/llvm/test/CodeGen/X86/pr32340.ll index 98685b959f642..1e428ac7d83a6 100644 --- a/llvm/test/CodeGen/X86/pr32340.ll +++ b/llvm/test/CodeGen/X86/pr32340.ll @@ -14,37 +14,37 @@ define void @foo() { ; X64-LABEL: foo: ; X64: # %bb.0: # %entry ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: # kill: def $rax killed $eax +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movw $0, var_825 -; X64-NEXT: movzwl var_32, %ecx +; X64-NEXT: movzwl var_32, %eax ; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: movl %ecx, %esi +; X64-NEXT: movl %eax, %esi ; X64-NEXT: xorl %edx, %esi -; X64-NEXT: movl %ecx, %edx +; X64-NEXT: movl %eax, %edx ; X64-NEXT: xorl %esi, %edx -; X64-NEXT: addl %ecx, %edx -; X64-NEXT: movslq %edx, %rcx -; X64-NEXT: movq %rcx, var_826 -; X64-NEXT: movzwl var_32, %ecx -; X64-NEXT: # kill: def $rcx killed $ecx -; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %rsi, %rdx -; X64-NEXT: movq %rcx, %rsi -; X64-NEXT: xorq %rdx, %rsi -; X64-NEXT: xorq $-1, %rsi -; X64-NEXT: xorq %rsi, %rcx -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: orq var_57, %rdx -; X64-NEXT: orq %rdx, %rcx -; X64-NEXT: # kill: def $cx killed $cx killed $rcx -; X64-NEXT: movw %cx, var_900 -; X64-NEXT: cmpq var_28, %rax -; X64-NEXT: setne %al -; X64-NEXT: andb $1, %al -; X64-NEXT: movzbl %al, %eax +; X64-NEXT: addl %eax, %edx +; X64-NEXT: movslq %edx, %rdi +; X64-NEXT: movq %rdi, var_826 +; X64-NEXT: movzwl var_32, %eax +; X64-NEXT: movl %eax, %edi +; X64-NEXT: movzwl var_901, %eax +; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D +; X64-NEXT: movslq %eax, %r8 +; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 +; X64-NEXT: xorq %r9, %r8 +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: xorq %r8, %r9 +; X64-NEXT: xorq $-1, %r9 +; X64-NEXT: xorq %r9, %rdi +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: orq var_57, %r8 +; X64-NEXT: orq %r8, %rdi +; X64-NEXT: # kill: def $di killed $di killed $rdi +; X64-NEXT: movw %di, var_900 +; X64-NEXT: cmpq var_28, %rcx +; X64-NEXT: setne %r10b +; X64-NEXT: andb $1, %r10b +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: movw %ax, var_827 ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll index 165e0292d4648..d5f7fde77f6d2 100644 --- a/llvm/test/CodeGen/X86/pr32345.ll +++ b/llvm/test/CodeGen/X86/pr32345.ll @@ -15,23 +15,23 @@ define void @foo() { ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: cltq -; X640-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X640-NEXT: movslq %eax, %rdx +; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; X640-NEXT: movzwl var_22, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: cltq -; X640-NEXT: movzwl var_27, %ecx -; X640-NEXT: subl $16610, %ecx # imm = 0x40E2 -; X640-NEXT: movl %ecx, %ecx -; X640-NEXT: # kill: def $rcx killed $ecx +; X640-NEXT: movslq %eax, %rdx +; X640-NEXT: movzwl var_27, %eax +; X640-NEXT: subl $16610, %eax # imm = 0x40E2 +; X640-NEXT: movl %eax, %eax +; X640-NEXT: movl %eax, %ecx ; X640-NEXT: # kill: def $cl killed $rcx -; X640-NEXT: sarq %cl, %rax -; X640-NEXT: # kill: def $al killed $al killed $rax -; X640-NEXT: # implicit-def: $rcx -; X640-NEXT: movb %al, (%rcx) +; X640-NEXT: sarq %cl, %rdx +; X640-NEXT: # kill: def $dl killed $dl killed $rdx +; X640-NEXT: # implicit-def: $rsi +; X640-NEXT: movb %dl, (%rsi) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -41,37 +41,43 @@ define void @foo() { ; 6860-NEXT: .cfi_offset %ebp, -8 ; 6860-NEXT: movl %esp, %ebp ; 6860-NEXT: .cfi_def_cfa_register %ebp +; 6860-NEXT: pushl %ebx +; 6860-NEXT: pushl %edi +; 6860-NEXT: pushl %esi ; 6860-NEXT: andl $-8, %esp -; 6860-NEXT: subl $24, %esp +; 6860-NEXT: subl $32, %esp +; 6860-NEXT: .cfi_offset %esi, -20 +; 6860-NEXT: .cfi_offset %edi, -16 +; 6860-NEXT: .cfi_offset %ebx, -12 ; 6860-NEXT: movw var_22, %ax ; 6860-NEXT: movzwl var_27, %ecx ; 6860-NEXT: movw %cx, %dx ; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edx -; 6860-NEXT: movw %ax, %dx -; 6860-NEXT: xorl %ecx, %edx -; 6860-NEXT: # kill: def $dx killed $dx killed $edx -; 6860-NEXT: movzwl %dx, %eax -; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) +; 6860-NEXT: # implicit-def: $esi +; 6860-NEXT: movw %ax, %si +; 6860-NEXT: xorl %ecx, %esi +; 6860-NEXT: # kill: def $si killed $si killed $esi +; 6860-NEXT: movzwl %si, %ecx +; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) ; 6860-NEXT: movw var_22, %ax ; 6860-NEXT: movzwl var_27, %ecx ; 6860-NEXT: movw %cx, %dx ; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edx -; 6860-NEXT: movw %ax, %dx -; 6860-NEXT: xorl %ecx, %edx -; 6860-NEXT: # kill: def $dx killed $dx killed $edx -; 6860-NEXT: movzwl %dx, %eax +; 6860-NEXT: # implicit-def: $edi +; 6860-NEXT: movw %ax, %di +; 6860-NEXT: xorl %ecx, %edi +; 6860-NEXT: # kill: def $di killed $di killed $edi +; 6860-NEXT: movzwl %di, %ebx ; 6860-NEXT: # kill: def $cl killed $cl killed $ecx ; 6860-NEXT: addb $30, %cl -; 6860-NEXT: xorl %edx, %edx +; 6860-NEXT: xorl %eax, %eax ; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; 6860-NEXT: shrdl %cl, %edx, %eax +; 6860-NEXT: shrdl %cl, %eax, %ebx ; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; 6860-NEXT: testb $32, %cl +; 6860-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -81,7 +87,10 @@ define void @foo() { ; 6860-NEXT: # kill: def $al killed $al killed $eax ; 6860-NEXT: # implicit-def: $ecx ; 6860-NEXT: movb %al, (%ecx) -; 6860-NEXT: movl %ebp, %esp +; 6860-NEXT: leal -12(%ebp), %esp +; 6860-NEXT: popl %esi +; 6860-NEXT: popl %edi +; 6860-NEXT: popl %ebx ; 6860-NEXT: popl %ebp ; 6860-NEXT: .cfi_def_cfa %esp, 4 ; 6860-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll index 3b1997234ce55..4754d8e4cf6cb 100644 --- a/llvm/test/CodeGen/X86/pr32451.ll +++ b/llvm/test/CodeGen/X86/pr32451.ll @@ -9,24 +9,29 @@ target triple = "x86_64-unknown-linux-gnu" define i8** @japi1_convert_690(i8**, i8***, i32) { ; CHECK-LABEL: japi1_convert_690: ; CHECK: # %bb.0: # %top +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset %ebx, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; CHECK-NEXT: calll julia.gc_root_decl -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; CHECK-NEXT: calll jl_get_ptls_states -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; CHECK-NEXT: movl 4(%ecx), %edx -; CHECK-NEXT: movb (%edx), %dl -; CHECK-NEXT: andb $1, %dl -; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: movb (%edx), %bl +; CHECK-NEXT: andb $1, %bl +; CHECK-NEXT: movzbl %bl, %edx ; CHECK-NEXT: movl %edx, (%esp) -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; CHECK-NEXT: calll jl_box_int32 -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl top: diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll index 25b068c8fad6f..0f73036a4c6c9 100644 --- a/llvm/test/CodeGen/X86/pr34592.ll +++ b/llvm/test/CodeGen/X86/pr34592.ll @@ -10,7 +10,7 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $160, %rsp +; CHECK-NEXT: subq $192, %rsp ; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 ; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 ; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 @@ -27,14 +27,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] ; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0] ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] -; CHECK-NEXT: vmovaps %xmm7, %xmm2 -; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; CHECK-NEXT: # implicit-def: $ymm9 -; CHECK-NEXT: vmovaps %xmm2, %xmm9 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7] +; CHECK-NEXT: vmovaps %xmm7, %xmm9 +; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7] +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm9, %xmm2 +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload +; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm9[4,5,6,7] ; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] ; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3] ; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] @@ -43,11 +43,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero ; CHECK-NEXT: # implicit-def: $ymm8 ; CHECK-NEXT: vmovaps %xmm7, %xmm8 -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm8[0,1],ymm6[0,1] ; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm5, %ymm1 +; CHECK-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm6, %ymm2 +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm9, %ymm3 +; CHECK-NEXT: vmovaps %ymm5, %ymm3 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll index 31bd5b71d0a6e..cfe5832d7ad66 100644 --- a/llvm/test/CodeGen/X86/pr39733.ll +++ b/llvm/test/CodeGen/X86/pr39733.ll @@ -23,8 +23,8 @@ define void @test55() { ; CHECK-NEXT: vmovaps %xmm1, %xmm2 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; CHECK-NEXT: vmovdqa %ymm0, (%rsp) +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 +; CHECK-NEXT: vmovdqa %ymm2, (%rsp) ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll index 1012d8c723b13..d465009c7c38a 100644 --- a/llvm/test/CodeGen/X86/pr44749.ll +++ b/llvm/test/CodeGen/X86/pr44749.ll @@ -14,22 +14,20 @@ define i32 @a() { ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _b ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 -; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax -; CHECK-NEXT: subq $-1, %rax -; CHECK-NEXT: setne %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $rcx killed $ecx -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rcx +; CHECK-NEXT: subq $-1, %rcx +; CHECK-NEXT: setne %dl +; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: setae %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $rcx killed $ecx -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: setae %dl +; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %ecx -; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cvttsd2si %xmm0, %eax ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll index 083aa780a07c2..922b6403cc4f4 100755 --- a/llvm/test/CodeGen/X86/pr47000.ll +++ b/llvm/test/CodeGen/X86/pr47000.ll @@ -12,47 +12,51 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $124, %esp -; CHECK-NEXT: movl 144(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movw 176(%esp), %dx -; CHECK-NEXT: movw 172(%esp), %si -; CHECK-NEXT: movw 168(%esp), %di -; CHECK-NEXT: movw 164(%esp), %bx -; CHECK-NEXT: movw 160(%esp), %bp +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload +; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movw 156(%esp), %ax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 152(%esp), %ax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 148(%esp), %ax -; CHECK-NEXT: movw %ax, 112(%esp) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 114(%esp) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 116(%esp) -; CHECK-NEXT: movw %bp, 118(%esp) -; CHECK-NEXT: movw %dx, 110(%esp) -; CHECK-NEXT: movw %si, 108(%esp) -; CHECK-NEXT: movw %di, 106(%esp) -; CHECK-NEXT: movw %bx, 104(%esp) -; CHECK-NEXT: movzwl 118(%esp), %edx -; CHECK-NEXT: movzwl 116(%esp), %esi -; CHECK-NEXT: movzwl 114(%esp), %edi -; CHECK-NEXT: movzwl 112(%esp), %ebx -; CHECK-NEXT: movzwl 110(%esp), %ebp -; CHECK-NEXT: movzwl 108(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movzwl 106(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movzwl 104(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl %ebx, (%eax) ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll __gnu_h2f_ieee ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -68,58 +72,58 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee ; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, (%ecx) ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, (%ecx) ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: fstps 4(%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: fstps 4(%ecx) ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: fstps (%ecx) ; CHECK-NEXT: calll fmodf -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: fstps (%ecx) ; CHECK-NEXT: calll __gnu_f2h_ieee ; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, (%ecx) ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, (%ecx) ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: fstps 4(%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: fstps 4(%ecx) ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: fstps (%ecx) ; CHECK-NEXT: calll fmodf -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: fstps (%ecx) ; CHECK-NEXT: calll __gnu_f2h_ieee ; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, (%ecx) ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, (%ecx) ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: fstps 4(%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: fstps 4(%ecx) ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: fstps (%ecx) ; CHECK-NEXT: calll fmodf -; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: fstps (%ecx) ; CHECK-NEXT: calll __gnu_f2h_ieee ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movw %ax, 6(%ecx) @@ -127,9 +131,10 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-NEXT: movw %ax, 4(%ecx) ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload ; CHECK-NEXT: movw %dx, 2(%ecx) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload -; CHECK-NEXT: movw %si, (%ecx) -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload +; CHECK-NEXT: movw %bp, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl $124, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir index 2821f00940ecf..0fe9f60897fd1 100644 --- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir +++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir @@ -23,15 +23,15 @@ body: | ; CHECK: successors: %bb.3(0x80000000) ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags - ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit + ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0) ; CHECK: bb.3: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: renamable $ecx = MOV32r0 implicit-def dead $eflags - ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1) + ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1) ; CHECK: JMP64r killed renamable $rax bb.0: liveins: $edi, $rsi diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll index 4934419055acd..c62e92f2cac55 100644 --- a/llvm/test/CodeGen/X86/swift-return.ll +++ b/llvm/test/CodeGen/X86/swift-return.ll @@ -28,10 +28,11 @@ define i16 @test(i32 %key) { ; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: callq gen -; CHECK-O0-NEXT: cwtl -; CHECK-O0-NEXT: movsbl %dl, %ecx -; CHECK-O0-NEXT: addl %ecx, %eax -; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-O0-NEXT: movswl %ax, %ecx +; CHECK-O0-NEXT: movsbl %dl, %esi +; CHECK-O0-NEXT: addl %esi, %ecx +; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-O0-NEXT: movw %cx, %ax ; CHECK-O0-NEXT: popq %rcx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -79,16 +80,16 @@ define i32 @test2(i32 %key) #0 { ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movq %rsp, %rax ; CHECK-O0-NEXT: callq gen2 -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-O0-NEXT: movl (%rsp), %esi -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi -; CHECK-O0-NEXT: addl %edi, %esi -; CHECK-O0-NEXT: addl %edx, %esi -; CHECK-O0-NEXT: addl %ecx, %esi -; CHECK-O0-NEXT: addl %eax, %esi -; CHECK-O0-NEXT: movl %esi, %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi +; CHECK-O0-NEXT: movl (%rsp), %edi +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; CHECK-O0-NEXT: addl %r8d, %edi +; CHECK-O0-NEXT: addl %esi, %edi +; CHECK-O0-NEXT: addl %edx, %edi +; CHECK-O0-NEXT: addl %ecx, %edi +; CHECK-O0-NEXT: movl %edi, %eax ; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -263,17 +264,17 @@ define void @consume_i1_ret() { ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: callq produce_i1_ret ; CHECK-O0-NEXT: andb $1, %al -; CHECK-O0-NEXT: movzbl %al, %eax -; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: movzbl %al, %esi +; CHECK-O0-NEXT: movl %esi, var ; CHECK-O0-NEXT: andb $1, %dl -; CHECK-O0-NEXT: movzbl %dl, %eax -; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: movzbl %dl, %esi +; CHECK-O0-NEXT: movl %esi, var ; CHECK-O0-NEXT: andb $1, %cl -; CHECK-O0-NEXT: movzbl %cl, %eax -; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: movzbl %cl, %esi +; CHECK-O0-NEXT: movl %esi, var ; CHECK-O0-NEXT: andb $1, %r8b -; CHECK-O0-NEXT: movzbl %r8b, %eax -; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: movzbl %r8b, %esi +; CHECK-O0-NEXT: movl %esi, var ; CHECK-O0-NEXT: popq %rax ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll index 1afae31b2b8d2..1388c61c18984 100644 --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -790,8 +790,8 @@ a: ; CHECK-O0-LABEL: testAssign4 ; CHECK-O0: callq _foo2 ; CHECK-O0: xorl %eax, %eax -; CHECK-O0: ## kill: def $rax killed $eax -; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] +; CHECK-O0: movl %eax, %ecx +; CHECK-O0: movq %rcx, [[SLOT:[-a-z0-9\(\)\%]*]] ; CHECK-O0: movq [[SLOT]], %rax ; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] ; CHECK-O0: movq [[SLOT2]], %r12 diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll index 1b49dc554f7ef..5de9976d6de2a 100644 --- a/llvm/test/DebugInfo/X86/op_deref.ll +++ b/llvm/test/DebugInfo/X86/op_deref.ll @@ -6,10 +6,10 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 -; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") @@ -17,8 +17,8 @@ ; Check the DEBUG_VALUE comments for good measure. ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK ; vla should have a register-indirect address at one point. -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] -; ASM-CHECK: DW_OP_breg2 +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rdx+0] +; ASM-CHECK: DW_OP_breg1 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT ; PRETTY-PRINT: DIExpression(DW_OP_deref) From 0a2213c6eb24c9deec738e30509815e5bddd860c Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 28 Aug 2020 12:31:16 +0200 Subject: [PATCH 0675/1079] [lldb/cmake] Fix testing support library dependencies lldbUtilityHelpers does not depend on lldbSymbolHelpers. Remove that dependency, and add direct lldbSymbolHelpers dependencies where needed. --- lldb/unittests/Expression/CMakeLists.txt | 1 + lldb/unittests/SymbolFile/DWARF/CMakeLists.txt | 3 ++- lldb/unittests/TestingSupport/CMakeLists.txt | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lldb/unittests/Expression/CMakeLists.txt b/lldb/unittests/Expression/CMakeLists.txt index 2f5304ab212d9..0e8230d19bad9 100644 --- a/lldb/unittests/Expression/CMakeLists.txt +++ b/lldb/unittests/Expression/CMakeLists.txt @@ -11,5 +11,6 @@ add_lldb_unittest(ExpressionTests lldbPluginTypeSystemClang lldbUtility lldbUtilityHelpers + lldbSymbolHelpers LLVMTestingSupport ) diff --git a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt index 64a7b78c478a1..30620a61dc5fd 100644 --- a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt @@ -11,8 +11,9 @@ add_lldb_unittest(SymbolFileDWARFTests lldbPluginSymbolFileDWARF lldbPluginSymbolFilePDB lldbPluginTypeSystemClang - lldbUtilityHelpers lldbPluginPlatformMacOSX + lldbUtilityHelpers + lldbSymbolHelpers LINK_COMPONENTS Support DebugInfoPDB diff --git a/lldb/unittests/TestingSupport/CMakeLists.txt b/lldb/unittests/TestingSupport/CMakeLists.txt index 4599ada1ec506..c62bc3b023b77 100644 --- a/lldb/unittests/TestingSupport/CMakeLists.txt +++ b/lldb/unittests/TestingSupport/CMakeLists.txt @@ -5,7 +5,6 @@ add_lldb_library(lldbUtilityHelpers LINK_LIBS lldbUtility - lldbSymbolHelpers gtest LINK_COMPONENTS From af3789a188116e400dd021bae54d91dc543aca7d Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 15 Sep 2020 13:20:09 +0200 Subject: [PATCH 0676/1079] [lldb] Improve qemu interop for aarch64 qemu calls the "fp" and "lr" registers via their generic names (x29/x30). This mismatch manifested itself as not being able to unwind or display values of some local variables. --- .../source/Plugins/ABI/AArch64/ABIAArch64.cpp | 6 ++ lldb/source/Plugins/ABI/AArch64/ABIAArch64.h | 5 +- .../TestQemuAArch64TargetXml.py | 73 +++++++++++++++++++ .../basic_eh_frame-aarch64.yaml | 25 +++++++ 4 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 lldb/test/API/functionalities/gdb_remote_client/TestQemuAArch64TargetXml.py create mode 100644 lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-aarch64.yaml diff --git a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp index 5cf9fb4ad37f9..7cae4cc427501 100644 --- a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp +++ b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp @@ -33,6 +33,12 @@ ABIAArch64::GetEHAndDWARFNums(llvm::StringRef name) { return MCBasedABI::GetEHAndDWARFNums(name); } +std::string ABIAArch64::GetMCName(std::string reg) { + MapRegisterName(reg, "v", "q"); + MapRegisterName(reg, "x29", "fp"); + MapRegisterName(reg, "x30", "lr"); + return reg; +} uint32_t ABIAArch64::GetGenericNum(llvm::StringRef name) { return llvm::StringSwitch(name) .Case("pc", LLDB_REGNUM_GENERIC_PC) diff --git a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h index 981145e2017e3..bdff648f1b522 100644 --- a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h +++ b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h @@ -20,10 +20,7 @@ class ABIAArch64: public lldb_private::MCBasedABI { std::pair GetEHAndDWARFNums(llvm::StringRef name) override; - std::string GetMCName(std::string reg) override { - MapRegisterName(reg, "v", "q"); - return reg; - } + std::string GetMCName(std::string reg) override; uint32_t GetGenericNum(llvm::StringRef name) override; diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestQemuAArch64TargetXml.py b/lldb/test/API/functionalities/gdb_remote_client/TestQemuAArch64TargetXml.py new file mode 100644 index 0000000000000..9368de7b055aa --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestQemuAArch64TargetXml.py @@ -0,0 +1,73 @@ +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from gdbclientutils import * +from textwrap import dedent + +class MyResponder(MockGDBServerResponder): + def qXferRead(self, obj, annex, offset, length): + if annex == "target.xml": + return dedent("""\ + + + aarch64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + """), False + else: + return None, False + +class TestQemuAarch64TargetXml(GDBRemoteTestBase): + + @skipIfXmlSupportMissing + @skipIfRemote + @skipIfLLVMTargetMissing("AArch64") + def test_register_augmentation(self): + """ + Test that we correctly associate the register info with the eh_frame + register numbers. + """ + + target = self.createTarget("basic_eh_frame-aarch64.yaml") + self.server.responder = MyResponder() + + process = self.connect(target) + lldbutil.expect_state_changes(self, self.dbg.GetListener(), process, + [lldb.eStateStopped]) + self.filecheck("image show-unwind -n foo", __file__, + "--check-prefix=UNWIND") +# UNWIND: eh_frame UnwindPlan: +# UNWIND: row[0]: 0: CFA=x29+16 => x30=[CFA-8] diff --git a/lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-aarch64.yaml b/lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-aarch64.yaml new file mode 100644 index 0000000000000..acc66082495e7 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-aarch64.yaml @@ -0,0 +1,25 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000401000 + AddressAlign: 0x0000000000000001 + Content: DEADBEEF + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x0000000000402000 + AddressAlign: 0x0000000000000008 + Content: 0c000000000000000100017C1E0000001c0000001400000000104000000000000100000000000000000C1d109e820000 +Symbols: + - Name: foo + Section: .text + Binding: STB_GLOBAL + Value: 0x0000000000401000 +... From aa8be5aeead7ad894270aa025e7165169c1a54d2 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Mon, 14 Sep 2020 22:53:54 +0200 Subject: [PATCH 0677/1079] [Scalarizer] Avoid changing name of non-instructions The "takeName" logic in ScalarizerVisitor::gather did not consider that the value vector could refer to non-instructions, such as global variables. This patch make sure that we avoid changing the name of a value if it isn't an instruction. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D87685 --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 3 ++- .../Transforms/Scalarizer/global-bug-2.ll | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Scalarizer/global-bug-2.ll diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 3bc0cbde8c19d..c7fe21f2a3dac 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -398,7 +398,8 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) { continue; Instruction *Old = cast(V); - CV[I]->takeName(Old); + if (isa(CV[I])) + CV[I]->takeName(Old); Old->replaceAllUsesWith(CV[I]); PotentiallyDeadInstrs.emplace_back(Old); } diff --git a/llvm/test/Transforms/Scalarizer/global-bug-2.ll b/llvm/test/Transforms/Scalarizer/global-bug-2.ll new file mode 100644 index 0000000000000..60f61ab08184b --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/global-bug-2.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -scalarizer -S -o - | FileCheck %s +; RUN: opt < %s -passes='function(scalarizer)' -S | FileCheck %s + +; The scalarizer used to change the name of the global variable +; Check that the we don't do that any longer. +; +; CHECK: @c.a = global i16 0, align 1 + +@c.a = global i16 0, align 1 + +define void @c() { +entry: + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %entry + %d.sroa.0.0 = phi <4 x i16*> [ , %entry ], [ %d.sroa.0.1.vec.insert, %for.cond1 ] + %d.sroa.0.0.vec.extract = extractelement <4 x i16*> %d.sroa.0.0, i32 0 + %d.sroa.0.1.vec.insert = shufflevector <4 x i16*> , <4 x i16*> %d.sroa.0.0, <4 x i32> + br label %for.cond1 +} From 635b87511ec3d6d2fa8f65a3ed1876f01367584e Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 15 Sep 2020 13:10:30 +0100 Subject: [PATCH 0678/1079] [ARM][MVE] Tail-predication: use unsigned SCEV ranges for tripcount Loop tripcount expressions have a positive range, so use unsigned SCEV ranges for them. Differential Revision: https://reviews.llvm.org/D87608 --- llvm/lib/Target/ARM/MVETailPredication.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index b2c15be75cd4e..987df73970e57 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -457,13 +457,10 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, // upperbound(TC) <= UINT_MAX - VectorWidth // unsigned SizeInBits = TripCount->getType()->getScalarSizeInBits(); - auto Diff = APInt(SizeInBits, ~0) - APInt(SizeInBits, VectorWidth); - uint64_t MaxMinusVW = Diff.getZExtValue(); - // FIXME: since ranges can be negative we work with signed ranges here, but - // we shouldn't extract the zext'ed values for them. - uint64_t UpperboundTC = SE->getSignedRange(TC).getUpper().getZExtValue(); + auto MaxMinusVW = APInt(SizeInBits, ~0) - APInt(SizeInBits, VectorWidth); + APInt UpperboundTC = SE->getUnsignedRangeMax(TC); - if (UpperboundTC > MaxMinusVW && !ForceTailPredication) { + if (UpperboundTC.ugt(MaxMinusVW) && !ForceTailPredication) { LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible in tripcount rounding:\n"; dbgs() << "upperbound(TC) <= UINT_MAX - VectorWidth\n"; dbgs() << UpperboundTC << " <= " << MaxMinusVW << " == false\n";); @@ -501,8 +498,8 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth))); - ConstantRange RangeCeil = SE->getSignedRange(Ceil) ; - ConstantRange RangeTC = SE->getSignedRange(TC) ; + ConstantRange RangeCeil = SE->getUnsignedRange(Ceil) ; + ConstantRange RangeTC = SE->getUnsignedRange(TC) ; if (!RangeTC.isSingleElement()) { auto ZeroRange = ConstantRange(APInt(TripCount->getType()->getScalarSizeInBits(), 0)); From 6d40f35c9fa66d34db88542a77b8f185906ae20b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 12:59:00 +0100 Subject: [PATCH 0679/1079] AliasSetTracker.cpp - remove unnecessary includes. NFCI. These are all directly included in AliasSetTracker.h --- llvm/lib/Analysis/AliasSetTracker.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 5cc68f05dc0ec..03f486477b4e1 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -21,7 +20,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" @@ -30,15 +28,11 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include -#include -#include using namespace llvm; From 796c80526929e672efbdb2dfae1add1cc66c46b8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 13:09:03 +0100 Subject: [PATCH 0680/1079] ProvenanceAnalysis.h - remove unnecessary AliasAnalysis.h include. NFCI. Forward declare AAResults instead of the (old) AliasAnalysis type. --- llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h index 8fd842fd42d64..9e18052641a13 100644 --- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h +++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -26,12 +26,12 @@ #define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/ValueHandle.h" #include namespace llvm { +class AAResults; class DataLayout; class PHINode; class SelectInst; @@ -49,7 +49,7 @@ namespace objcarc { /// not two pointers have the same provenance source and thus could /// potentially be related. class ProvenanceAnalysis { - AliasAnalysis *AA; + AAResults *AA; using ValuePairTy = std::pair; using CachedResultsTy = DenseMap; @@ -67,9 +67,9 @@ class ProvenanceAnalysis { ProvenanceAnalysis(const ProvenanceAnalysis &) = delete; ProvenanceAnalysis &operator=(const ProvenanceAnalysis &) = delete; - void setAA(AliasAnalysis *aa) { AA = aa; } + void setAA(AAResults *aa) { AA = aa; } - AliasAnalysis *getAA() const { return AA; } + AAResults *getAA() const { return AA; } bool related(const Value *A, const Value *B, const DataLayout &DL); From 50d2a5d4c747855dc86a8b66a4a228abb66ca08e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 13:34:19 +0100 Subject: [PATCH 0681/1079] LoopCacheAnalysis.h - remove unnecessary includes. NFCI. More remaining dependencies down to LoopCacheAnalysis.cpp --- .../include/llvm/Analysis/LoopCacheAnalysis.h | 23 ++++++++++--------- llvm/lib/Analysis/LoopCacheAnalysis.cpp | 12 ++++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index ffec78b6db2c7..832122e8a97ae 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -14,19 +14,20 @@ #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Instructions.h" -#include "llvm/Pass.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/raw_ostream.h" namespace llvm { +class AAResults; +class DependenceInfo; class LPMUpdater; +class ScalarEvolution; +class SCEV; +class TargetTransformInfo; + using CacheCostTy = int64_t; using LoopVectorTy = SmallVector; @@ -70,7 +71,7 @@ class IndexedReference { /// the same chace line iff the distance between them in the innermost /// dimension is less than the cache line size. Return None if unsure. Optional hasSpacialReuse(const IndexedReference &Other, unsigned CLS, - AliasAnalysis &AA) const; + AAResults &AA) const; /// Return true if the current object and the indexed reference \p Other /// have distance smaller than \p MaxDistance in the dimension associated with @@ -78,7 +79,7 @@ class IndexedReference { /// MaxDistance and None if unsure. Optional hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, - DependenceInfo &DI, AliasAnalysis &AA) const; + DependenceInfo &DI, AAResults &AA) const; /// Compute the cost of the reference w.r.t. the given loop \p L when it is /// considered in the innermost position in the loop nest. @@ -118,7 +119,7 @@ class IndexedReference { /// Return true if the given reference \p Other is definetely aliased with /// the indexed reference represented by this class. - bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const; + bool isAliased(const IndexedReference &Other, AAResults &AA) const; private: /// True if the reference can be delinearized, false otherwise. @@ -183,7 +184,7 @@ class CacheCost { /// between array elements accessed in a loop so that the elements are /// classified to have temporal reuse. CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, - TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, + TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional TRT = None); /// Create a CacheCost for the loop nest rooted by \p Root. @@ -258,7 +259,7 @@ class CacheCost { const LoopInfo &LI; ScalarEvolution &SE; TargetTransformInfo &TTI; - AliasAnalysis &AA; + AAResults &AA; DependenceInfo &DI; }; diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 6ba247a87c226..47b08a61ccb2a 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -29,7 +29,11 @@ #include "llvm/ADT/BreadthFirstIterator.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -145,7 +149,7 @@ IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, Optional IndexedReference::hasSpacialReuse(const IndexedReference &Other, unsigned CLS, - AliasAnalysis &AA) const { + AAResults &AA) const { assert(IsValid && "Expecting a valid reference"); if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { @@ -202,7 +206,7 @@ Optional IndexedReference::hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, - AliasAnalysis &AA) const { + AAResults &AA) const { assert(IsValid && "Expecting a valid reference"); if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { @@ -457,7 +461,7 @@ bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, } bool IndexedReference::isAliased(const IndexedReference &Other, - AliasAnalysis &AA) const { + AAResults &AA) const { const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); return AA.isMustAlias(Loc1, Loc2); @@ -476,7 +480,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, - AliasAnalysis &AA, DependenceInfo &DI, + AAResults &AA, DependenceInfo &DI, Optional TRT) : Loops(Loops), TripCounts(), LoopCosts(), TRT((TRT == None) ? Optional(TemporalReuseThreshold) : TRT), From da104444fafbc8f657f06c2188ab2e8284563e3d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 15 Sep 2020 08:43:08 -0400 Subject: [PATCH 0682/1079] [libc++] Allow building without threads in standalone builds Setting _LIBCPP_HAS_NO_THREADS is needed when building libcxxabi without threads in standalone mode. This is useful when target WASM. Otherwise, you get an error like "No thread API" when building libcxxabi. It would be better to link against a properly-configured libc++ headers CMake target when building libc++abi instead, but we don't generate such targets yet. Thanks to Matthew Bauer for the patch. Differential Revision: https://reviews.llvm.org/D60743 --- libcxxabi/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 96a1c625222a8..10ac112c90d9f 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -352,6 +352,7 @@ if (NOT LIBCXXABI_ENABLE_THREADS) " is also set to ON.") endif() add_definitions(-D_LIBCXXABI_HAS_NO_THREADS) + add_definitions(-D_LIBCPP_HAS_NO_THREADS) endif() if (LIBCXXABI_HAS_EXTERNAL_THREAD_API) From 98e07b5596c8692c43770bc4e21a2b19467e35f7 Mon Sep 17 00:00:00 2001 From: Felix Berger Date: Tue, 15 Sep 2020 08:44:13 -0400 Subject: [PATCH 0683/1079] Restrict UnnecessaryCopyInitialization check to variables initialized from free functions without arguments This restriction avoids cases where an alias is returned to an argument and which could lead to to a false positive change. --- .../UnnecessaryCopyInitialization.cpp | 10 +++++++++- ...ormance-unnecessary-copy-initialization.cpp | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp index f7b21a50203cb..03b4450d8ca8c 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp @@ -54,7 +54,8 @@ void UnnecessaryCopyInitialization::registerMatchers(MatchFinder *Finder) { on(declRefExpr(to(varDecl().bind("objectArg"))))); auto ConstRefReturningFunctionCall = callExpr(callee(functionDecl(returns(ConstReference))), - unless(callee(cxxMethodDecl()))); + unless(callee(cxxMethodDecl()))) + .bind("initFunctionCall"); auto localVarCopiedFrom = [this](const internal::Matcher &CopyCtorArg) { return compoundStmt( @@ -96,6 +97,8 @@ void UnnecessaryCopyInitialization::check( const auto *ObjectArg = Result.Nodes.getNodeAs("objectArg"); const auto *BlockStmt = Result.Nodes.getNodeAs("blockStmt"); const auto *CtorCall = Result.Nodes.getNodeAs("ctorCall"); + const auto *InitFunctionCall = + Result.Nodes.getNodeAs("initFunctionCall"); TraversalKindScope RAII(*Result.Context, ast_type_traits::TK_AsIs); @@ -113,6 +116,11 @@ void UnnecessaryCopyInitialization::check( return; if (OldVar == nullptr) { + // Only allow initialization of a const reference from a free function if it + // has no arguments. Otherwise it could return an alias to one of its + // arguments and the arguments need to be checked for const use as well. + if (InitFunctionCall != nullptr && InitFunctionCall->getNumArgs() > 0) + return; handleCopyFromMethodReturn(*NewVar, *BlockStmt, IssueFix, ObjectArg, *Result.Context); } else { diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-copy-initialization.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-copy-initialization.cpp index 50dcfd8f8bf22..7a70bc18a28c8 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-copy-initialization.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-copy-initialization.cpp @@ -23,6 +23,9 @@ struct WeirdCopyCtorType { ExpensiveToCopyType global_expensive_to_copy_type; const ExpensiveToCopyType &ExpensiveTypeReference(); +const ExpensiveToCopyType &freeFunctionWithArg(const ExpensiveToCopyType &); +const ExpensiveToCopyType &freeFunctionWithDefaultArg( + const ExpensiveToCopyType *arg = nullptr); const TrivialToCopyType &TrivialTypeReference(); void mutate(ExpensiveToCopyType &); @@ -387,3 +390,18 @@ void implicitVarFalsePositive() { for (const Element &E : Container()) { } } + +// This should not trigger the check as the argument could introduce an alias. +void negativeInitializedFromFreeFunctionWithArg() { + ExpensiveToCopyType Orig; + const ExpensiveToCopyType Copy = freeFunctionWithArg(Orig); +} + +void negativeInitializedFromFreeFunctionWithDefaultArg() { + const ExpensiveToCopyType Copy = freeFunctionWithDefaultArg(); +} + +void negativeInitialzedFromFreeFunctionWithNonDefaultArg() { + ExpensiveToCopyType Orig; + const ExpensiveToCopyType Copy = freeFunctionWithDefaultArg(&Orig); +} From db22e70d010744573df19d69ed3de5b84ea60d1c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 15 Sep 2020 13:50:11 +0100 Subject: [PATCH 0684/1079] [ConstraintSolver] Add isConditionImplied helper. This patch adds a isConditionImplied function that takes a constraint and returns true if the constraint is implied by the current constraints in the system. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D84545 --- llvm/include/llvm/Analysis/ConstraintSystem.h | 11 +++ llvm/lib/Analysis/ConstraintSystem.cpp | 10 +++ .../Analysis/ConstraintSystemTest.cpp | 73 ++++++++++++++++++- 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h index 7de787c1fc390..01f09f3daaaa6 100644 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -51,6 +51,17 @@ class ConstraintSystem { /// Returns true if there may be a solution for the constraints in the system. bool mayHaveSolution(); + + static SmallVector negate(SmallVector R) { + // The negated constraint R is obtained by multiplying by -1 and adding 1 to + // the constant. + R[0] += 1; + for (auto &C : R) + C *= -1; + return R; + } + + bool isConditionImplied(SmallVector R); }; } // namespace llvm diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 21115fc946e9b..818cfe0a171eb 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -140,3 +140,13 @@ bool ConstraintSystem::mayHaveSolution() { LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n"); return HasSolution; } + +bool ConstraintSystem::isConditionImplied(SmallVector R) { + // If there is no solution with the negation of R added to the system, the + // condition must hold based on the existing constraints. + R = ConstraintSystem::negate(R); + + auto NewSystem = *this; + NewSystem.addVariableRow(R); + return !NewSystem.mayHaveSolution(); +} diff --git a/llvm/unittests/Analysis/ConstraintSystemTest.cpp b/llvm/unittests/Analysis/ConstraintSystemTest.cpp index 2301da7ec296f..337a111634186 100644 --- a/llvm/unittests/Analysis/ConstraintSystemTest.cpp +++ b/llvm/unittests/Analysis/ConstraintSystemTest.cpp @@ -13,7 +13,7 @@ using namespace llvm; namespace { -TEST(ConstraintSloverTest, TestSolutionChecks) { +TEST(ConstraintSolverTest, TestSolutionChecks) { { ConstraintSystem CS; // x + y <= 10, x >= 5, y >= 6, x <= 10, y <= 10 @@ -79,4 +79,75 @@ TEST(ConstraintSloverTest, TestSolutionChecks) { EXPECT_TRUE(CS.mayHaveSolution()); } } + +TEST(ConstraintSolverTest, IsConditionImplied) { + { + // For the test below, we assume we know + // x <= 5 && y <= 3 + ConstraintSystem CS; + CS.addVariableRow({5, 1, 0}); + CS.addVariableRow({3, 0, 1}); + + // x + y <= 6 does not hold. + EXPECT_FALSE(CS.isConditionImplied({6, 1, 1})); + // x + y <= 7 does not hold. + EXPECT_FALSE(CS.isConditionImplied({7, 1, 1})); + // x + y <= 8 does hold. + EXPECT_TRUE(CS.isConditionImplied({8, 1, 1})); + + // 2 * x + y <= 12 does hold. + EXPECT_FALSE(CS.isConditionImplied({12, 2, 1})); + // 2 * x + y <= 13 does hold. + EXPECT_TRUE(CS.isConditionImplied({13, 2, 1})); + + // x + y <= 12 does hold. + EXPECT_FALSE(CS.isConditionImplied({12, 2, 1})); + // 2 * x + y <= 13 does hold. + EXPECT_TRUE(CS.isConditionImplied({13, 2, 1})); + + // x <= y == x - y <= 0 does not hold. + EXPECT_FALSE(CS.isConditionImplied({0, 1, -1})); + // y <= x == -x + y <= 0 does not hold. + EXPECT_FALSE(CS.isConditionImplied({0, -1, 1})); + } + + { + // For the test below, we assume we know + // x + 1 <= y + 1 == x - y <= 0 + ConstraintSystem CS; + CS.addVariableRow({0, 1, -1}); + + // x <= y == x - y <= 0 does hold. + EXPECT_TRUE(CS.isConditionImplied({0, 1, -1})); + // y <= x == -x + y <= 0 does not hold. + EXPECT_FALSE(CS.isConditionImplied({0, -1, 1})); + + // x <= y + 10 == x - y <= 10 does hold. + EXPECT_TRUE(CS.isConditionImplied({10, 1, -1})); + // x + 10 <= y == x - y <= -10 does NOT hold. + EXPECT_FALSE(CS.isConditionImplied({-10, 1, -1})); + } + + { + // For the test below, we assume we know + // x <= y == x - y <= 0 + // y <= z == y - x <= 0 + ConstraintSystem CS; + CS.addVariableRow({0, 1, -1, 0}); + CS.addVariableRow({0, 0, 1, -1}); + + // z <= y == -y + z <= 0 does not hold. + EXPECT_FALSE(CS.isConditionImplied({0, 0, -1, 1})); + // x <= z == x - z <= 0 does hold. + EXPECT_TRUE(CS.isConditionImplied({0, 1, 0, -1})); + } +} + +TEST(ConstraintSolverTest, IsConditionImpliedOverflow) { + ConstraintSystem CS; + // Make sure isConditionImplied returns false when there is an overflow. + int64_t Limit = std::numeric_limits::max(); + CS.addVariableRow({Limit - 1, Limit - 2, Limit - 3}); + EXPECT_FALSE(CS.isConditionImplied({Limit - 1, Limit - 2, Limit - 3})); +} } // namespace From fe395aecd9e70b815e6490639098d815385f9932 Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Sun, 13 Sep 2020 17:24:34 +0530 Subject: [PATCH 0685/1079] [Flang] Add GettingInvolved documentation page and sidebar. Adds a new GettingInvolved page to documentation which provides details about mailing list, chats and calls. Adds a sidebar page which provides common links on all documentation pages. The links include: - Getting Started - Getting Involved - Github Repository - Bug Reports - Code Review Depends on https://reviews.llvm.org/D87242 Reviewed By: richard.barton.arm Differential Revision: https://reviews.llvm.org/D87270 --- flang/docs/GettingInvolved.md | 72 +++++++++++++++++++++++++ flang/docs/_templates/indexsidebar.html | 26 +++++++++ flang/docs/_templates/layout.html | 14 +++++ flang/docs/conf.py | 8 ++- flang/docs/index.md | 1 + 5 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 flang/docs/GettingInvolved.md create mode 100644 flang/docs/_templates/indexsidebar.html create mode 100644 flang/docs/_templates/layout.html diff --git a/flang/docs/GettingInvolved.md b/flang/docs/GettingInvolved.md new file mode 100644 index 0000000000000..a244fbcee56a0 --- /dev/null +++ b/flang/docs/GettingInvolved.md @@ -0,0 +1,72 @@ + +# Getting Involved + +```eval_rst +.. contents:: + :local: +``` + +The Flang Project welcomes contributions of all kinds. +Please feel free to join the mailing list or the slack channel for discussions related to development of Flang. +To understand the status of various developments in Flang please join the respective call. + +## Mailing Lists + +[Developer's List (flang-dev)](http://lists.llvm.org/mailman/listinfo/flang-dev) + + This list is for people who want to be included in technical discussions related to Flang. People post to this list when they have questions about writing code + for or using the Flang tools. It is relatively low volume. + + +[Commits Archive (flang-commits)](http://lists.llvm.org/pipermail/flang-commits) + + This list contains all commit messages that are made when Flang developers + commit code changes to the repository. It also serves as a forum for + patch review (i.e. send patches here). It is useful for those who want to + stay on the bleeding edge of Flang development. This list is high + volume. + +## Chat + +### Flang Slack Workspace + +- There is a Slack workspace dedicated to Flang. +- There are a number of topic-oriented channels available (e.g., #driver, #f18-semantics, #fir). +- Add yourself via the *[invitation link](https://join.slack.com/t/flang-compiler/shared_invite/zt-2pcn51lh-VrRQL_YUOkxA_1CEfMGQhw "title")* + +## Calls + +### Flang Community Biweekly Call + +- General updates on the Flang Project, both LLVM Flang and current Flang. +- Join [Flang Community Biweekly Call](https://nvmeet.webex.com/nvmeet/j.php?MTID=mb4edb8c799f69ec2dc0554acc969a162) +- Time: On Wednesdays 8:30 Pacific Time, on the weeks alternating with regular Flang Community Technical Biweekly Call. +- Minutes: They are sent to [flang-dev](http://lists.llvm.org/mailman/listinfo/flang-dev). Search for `Flang Biweekly Sync - Notes`. + +### Flang Community Technical Biweekly Call + +- Technical topics call. +- Join [Flang Community Technical Biweekly Call](https://bluejeans.com/625064848?src=join_info) +- Time: On Mondays 8:30 Pacific Time, on the weeks alternating with regular Flang Community Biweekly Call. +- The agenda is in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). + +### LLVM Alias Analysis Technical Call + +- For people working on improvements to LLVM alias analysis. +- Join [LLVM Alias Analysis Technical Call](https://bluejeans.com/101176001?src=join_info) +- Time: Tuesdays 10:00 AM Pacific Time, every 4 weeks. +- The agenda is in this [Google Doc](https://docs.google.com/document/d/1ybwEKDVtIbhIhK50qYtwKsL50K-NvB6LfuBsfepBZ9Y/). + +### OpenMP Technical Call + +- Development updates on OpenMP and OpenACC in the Flang Project. +- Join [OpenMP Technical Call](https://bit.ly/39eQW3o) +- Time: Weekly call on every Thursdays 8:00 AM Pacific time. +- Meeting minutes are [here](https://docs.google.com/document/d/1yA-MeJf6RYY-ZXpdol0t7YoDoqtwAyBhFLr5thu5pFI). +- Status tracking [page](https://docs.google.com/spreadsheets/d/1FvHPuSkGbl4mQZRAwCIndvQx9dQboffiD-xD0oqxgU0/edit#gid=0). diff --git a/flang/docs/_templates/indexsidebar.html b/flang/docs/_templates/indexsidebar.html new file mode 100644 index 0000000000000..3c8f1abdf9000 --- /dev/null +++ b/flang/docs/_templates/indexsidebar.html @@ -0,0 +1,26 @@ +{# This template defines sidebar which can be used to provide common links on + all documentation pages. #} + +

Documentation

+ + + +

Getting Involved

+ + + +

Additional Links

+ + diff --git a/flang/docs/_templates/layout.html b/flang/docs/_templates/layout.html new file mode 100644 index 0000000000000..12b7731ccca7d --- /dev/null +++ b/flang/docs/_templates/layout.html @@ -0,0 +1,14 @@ +{% extends "!layout.html" %} + +{% block extrahead %} + +{% endblock %} + +{% block rootrellink %} + +
  • Flang Home | 
  • +
  • Documentation»
  • +{% endblock %} diff --git a/flang/docs/conf.py b/flang/docs/conf.py index 21362fc3449e9..851b233767a91 100644 --- a/flang/docs/conf.py +++ b/flang/docs/conf.py @@ -167,7 +167,13 @@ def setup(app): #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +html_sidebars = { + '**': [ + 'indexsidebar.html', + 'searchbox.html', + ] +} + # Additional templates that should be rendered to pages, maps page names to # template names. diff --git a/flang/docs/index.md b/flang/docs/index.md index 4c07170565227..bd7092a418f33 100644 --- a/flang/docs/index.md +++ b/flang/docs/index.md @@ -15,6 +15,7 @@ Flang is LLVM's Fortran frontend .. toctree:: :titlesonly: + GettingInvolved FortranForCProgrammers C++style C++17 From 3f411e97739ffbdca0077d1c4fdc9c1fc1819019 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Mon, 14 Sep 2020 18:28:26 -0700 Subject: [PATCH 0686/1079] [lld][WebAssembly] Fix --export-all when __stack_pointer is present With https://reviews.llvm.org/D87537 we made it an error to import or export a mutable global with the +mutable-globals feature present. However the scan was of the entire symbol table rather than just the imports or exports and the filter didn't match exaclyt meaning the `__stack_pointer` (a mutable global) was always triggering with error when the `--export-all` flag was used. This also revealed that we didn't have any test coverage for the `--export-all` flag. This change fixes the current breakage on the emscripten-releases roller. Differential Revision: https://reviews.llvm.org/D87663 --- lld/test/wasm/export-all.s | 48 ++++++++++++++++++++++++++++++++++++ lld/wasm/SyntheticSections.h | 1 + lld/wasm/Writer.cpp | 31 +++++++++++------------ 3 files changed, 63 insertions(+), 17 deletions(-) create mode 100644 lld/test/wasm/export-all.s diff --git a/lld/test/wasm/export-all.s b/lld/test/wasm/export-all.s new file mode 100644 index 0000000000000..5f013813cdf17 --- /dev/null +++ b/lld/test/wasm/export-all.s @@ -0,0 +1,48 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --export-all -o %t.wasm %t.o +# RUN: obj2yaml %t.wasm | FileCheck %s + +.globl _start + +_start: + .functype _start () -> () + i32.const 3 + global.set __stack_pointer + end_function + +foo: + .functype foo () -> (i32) + i32.const 42 + end_function + +.globaltype __stack_pointer, i32 + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: __wasm_call_ctors +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: _start +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Name: __dso_handle +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Name: __data_end +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: - Name: __global_base +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 3 +# CHECK-NEXT: - Name: __heap_base +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 4 +# CHECK-NEXT: - Name: __memory_base +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 5 +# CHECK-NEXT: - Name: __table_base +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 6 diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h index 3e125ca84e401..335bfe843184a 100644 --- a/lld/wasm/SyntheticSections.h +++ b/lld/wasm/SyntheticSections.h @@ -221,6 +221,7 @@ class ExportSection : public SyntheticSection { void writeBody() override; std::vector exports; + std::vector exportedSymbols; }; class StartSection : public SyntheticSection { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 82b1aec8d1e92..8d5b98050cb13 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -463,26 +463,22 @@ void Writer::populateTargetFeatures() { return; if (!config->relocatable && used.count("mutable-globals") == 0) { - for (Symbol *sym : symtab->getSymbols()) { + for (const Symbol *sym : out.importSec->importedSymbols) { if (auto *global = dyn_cast(sym)) { if (global->getGlobalType()->Mutable) { - if (!sym->isLive()) - continue; - if (!sym->isUsedInRegularObj) - continue; - if (sym->isUndefined() && sym->isWeak() && !config->relocatable) - continue; - if (sym->isUndefined()) - error(Twine("mutable global imported but 'mutable-globals' feature " - "not present in inputs: `") + - toString(*sym) + "`. Use --no-check-features to suppress."); - else if (sym->isExported()) - error(Twine("mutable global exported but 'mutable-globals' feature " - "not present in inputs: `") + - toString(*sym) + "`. Use --no-check-features to suppress."); + error(Twine("mutable global imported but 'mutable-globals' feature " + "not present in inputs: `") + + toString(*sym) + "`. Use --no-check-features to suppress."); } } } + for (const Symbol *sym : out.exportSec->exportedSymbols) { + if (auto *global = dyn_cast(sym)) { + error(Twine("mutable global exported but 'mutable-globals' feature " + "not present in inputs: `") + + toString(*sym) + "`. Use --no-check-features to suppress."); + } + } } if (config->sharedMemory) { @@ -603,6 +599,7 @@ void Writer::calculateExports() { LLVM_DEBUG(dbgs() << "Export: " << name << "\n"); out.exportSec->exports.push_back(export_); + out.exportSec->exportedSymbols.push_back(sym); } } @@ -1075,8 +1072,6 @@ void Writer::run() { createSyntheticSections(); log("-- populateProducers"); populateProducers(); - log("-- populateTargetFeatures"); - populateTargetFeatures(); log("-- calculateImports"); calculateImports(); log("-- layoutMemory"); @@ -1119,6 +1114,8 @@ void Writer::run() { calculateCustomSections(); log("-- populateSymtab"); populateSymtab(); + log("-- populateTargetFeatures"); + populateTargetFeatures(); log("-- addSections"); addSections(); From 57752b1449440a7d034d2d1781f586c3c664712e Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 15 Sep 2020 09:25:19 -0400 Subject: [PATCH 0687/1079] [gn build] (semi-manually) port 380e746bcca --- llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index 2cf9a4e05c2dd..220067c0e343a 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -13,9 +13,11 @@ unittest("CodeGenTests") { "//llvm/lib/Support", "//llvm/lib/Target", "//llvm/lib/Target:TargetsToBuild", + "//llvm/lib/Testing/Support", ] sources = [ "AArch64SelectionDAGTest.cpp", + "AsmPrinterDwarfTest.cpp", "DIEHashTest.cpp", "LexicalScopesTest.cpp", "LowLevelTypeTest.cpp", @@ -25,6 +27,7 @@ unittest("CodeGenTests") { "PassManagerTest.cpp", "ScalableVectorMVTsTest.cpp", "TargetOptionsTest.cpp", + "TestAsmPrinter.cpp", "TypeTraitsTest.cpp", ] has_custom_main = true From 802d21cdf08ea43d5c32924ac29c44b00c4a841f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 24 Jul 2020 19:36:48 +0100 Subject: [PATCH 0688/1079] [ConstraintElimination] Add initial tests. --- .../Transforms/ConstraintElimination/dom.ll | 136 +++++++ .../ConstraintElimination/geps.2d.ll | 134 +++++++ .../Transforms/ConstraintElimination/geps.ll | 332 ++++++++++++++++++ .../Transforms/ConstraintElimination/i128.ll | 37 ++ .../Transforms/ConstraintElimination/loops.ll | 47 +++ .../Transforms/ConstraintElimination/mixed.ll | 40 +++ .../Transforms/ConstraintElimination/uge.ll | 255 ++++++++++++++ .../ConstraintElimination/ugt-ule.ll | 38 ++ .../Transforms/ConstraintElimination/ule.ll | 254 ++++++++++++++ 9 files changed, 1273 insertions(+) create mode 100644 llvm/test/Transforms/ConstraintElimination/dom.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/geps.2d.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/geps.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/i128.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/loops.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/mixed.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/uge.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/ugt-ule.ll create mode 100644 llvm/test/Transforms/ConstraintElimination/ule.ll diff --git a/llvm/test/Transforms/ConstraintElimination/dom.ll b/llvm/test/Transforms/ConstraintElimination/dom.ll new file mode 100644 index 0000000000000..a6b8629bed78a --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/dom.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +; Test cases where both the true and false successors reach the same block, +; dominated by one of them. + +declare void @use(i1) + +define i32 @test1(i32 %x) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp ule i32 %x, 10 + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %c.2 = icmp ule i32 %x, 10 + call void @use(i1 %c.2) + br label %bb2 + +bb2: + %c.3 = icmp ugt i32 %x, 10 + call void @use(i1 %c.3) + ret i32 20 +} + + +define i32 @test2(i32 %x) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: ret i32 20 +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: br label [[BB1]] +; +entry: + %c.1 = icmp ule i32 %x, 10 + br i1 %c.1, label %bb2, label %bb1 + +bb1: + %c.2 = icmp ugt i32 %x, 10 + call void @use(i1 %c.2) + ret i32 20 + +bb2: + %c.3 = icmp ule i32 %x, 10 + call void @use(i1 %c.3) + br label %bb1 +} + + +; Test cases where the true/false successors are not domianted by the conditional branching block. +define i32 @test3(i32 %x, i1 %c) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB_COND:%.*]], label [[BB1:%.*]] +; CHECK: bb.cond: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: ret i32 10 +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: ret i32 20 +; +entry: + br i1 %c, label %bb.cond, label %bb1 + +bb.cond: + %c.1 = icmp ule i32 %x, 10 + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %c.2 = icmp ule i32 %x, 10 + call void @use(i1 %c.2) + ret i32 10 + +bb2: + %c.3 = icmp ugt i32 %x, 10 + call void @use(i1 %c.3) + ret i32 20 +} + +define i32 @test4(i32 %x, i1 %c) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB_COND:%.*]], label [[BB2:%.*]] +; CHECK: bb.cond: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: ret i32 10 +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: ret i32 20 +; +entry: + br i1 %c, label %bb.cond, label %bb2 + +bb.cond: + %c.1 = icmp ule i32 %x, 10 + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %c.2 = icmp ule i32 %x, 10 + call void @use(i1 %c.2) + ret i32 10 + +bb2: + %c.3 = icmp ugt i32 %x, 10 + call void @use(i1 %c.3) + ret i32 20 +} diff --git a/llvm/test/Transforms/ConstraintElimination/geps.2d.ll b/llvm/test/Transforms/ConstraintElimination/geps.2d.ll new file mode 100644 index 0000000000000..bb24514404414 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/geps.2d.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +define void @test.not.uge.ult([10 x i8]* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.ult( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START:%.*]], i64 1, i64 3 +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[START_0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START]], i64 10, i64 0 +; CHECK-NEXT: [[C_0:%.*]] = icmp ult i8* [[START_0]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_0]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 1, i64 3 + %c.1 = icmp uge i8* %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %start.0 = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 10, i64 0 + %c.0 = icmp ult i8* %start.0, %high + call void @use(i1 %c.0) + ret void +} + +define void @test.not.uge.ule([10 x i8]* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.ule( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START:%.*]], i64 1, i64 3 +; CHECK-NEXT: [[C:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[START_0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START]], i64 10, i64 0 +; CHECK-NEXT: [[C_0:%.*]] = icmp ule i8* [[START_0]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_0]]) +; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START]], i64 2, i64 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i8* [[START_1]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_1]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 1, i64 3 + %c = icmp uge i8* %add.ptr.i, %high + br i1 %c, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %start.0 = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 10, i64 0 + %c.0 = icmp ule i8* %start.0, %high + call void @use(i1 %c.0) + %start.1 = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 2, i64 1 + %c.1 = icmp ule i8* %start.1, %high + call void @use(i1 %c.1) + ret void +} + +define void @test.not.uge.ugt([10 x i8]* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.ugt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START:%.*]], i64 1, i64 3 +; CHECK-NEXT: [[C:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[START_0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START]], i64 3, i64 0 +; CHECK-NEXT: [[C_0:%.*]] = icmp ugt i8* [[START_0]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_0]]) +; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START]], i64 3, i64 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8* [[START_1]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_1]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 1, i64 3 + %c = icmp uge i8* %add.ptr.i, %high + br i1 %c, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %start.0 = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 3, i64 0 + %c.0 = icmp ugt i8* %start.0, %high + call void @use(i1 %c.0) + + %start.1 = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 3, i64 1 + %c.1 = icmp ugt i8* %start.1, %high + call void @use(i1 %c.1) + ret void +} + +define void @test.not.uge.uge([10 x i8]* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.uge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START:%.*]], i64 1, i64 3 +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[START_0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[START]], i64 3, i64 0 +; CHECK-NEXT: [[C_0:%.*]] = icmp uge i8* [[START_0]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_0]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 1, i64 3 + %c.1 = icmp uge i8* %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %start.0 = getelementptr inbounds [10 x i8], [10 x i8]* %start, i64 3, i64 0 + %c.0 = icmp uge i8* %start.0, %high + call void @use(i1 %c.0) + + ret void +} + +declare void @use(i1) diff --git a/llvm/test/Transforms/ConstraintElimination/geps.ll b/llvm/test/Transforms/ConstraintElimination/geps.ll new file mode 100644 index 0000000000000..0e36ebf07f0f4 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/geps.ll @@ -0,0 +1,332 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +define i32 @test.ult(i32* readonly %src, i32* readnone %min, i32* readnone %max) { +; CHECK-LABEL: @test.ult( +; CHECK-NEXT: check.0.min: +; CHECK-NEXT: [[C_MIN_0:%.*]] = icmp ult i32* [[SRC:%.*]], [[MIN:%.*]] +; CHECK-NEXT: br i1 [[C_MIN_0]], label [[TRAP:%.*]], label [[CHECK_0_MAX:%.*]] +; CHECK: trap: +; CHECK-NEXT: ret i32 10 +; CHECK: check.0.max: +; CHECK-NEXT: [[C_MAX_0:%.*]] = icmp ult i32* [[SRC]], [[MAX:%.*]] +; CHECK-NEXT: br i1 [[C_MAX_0]], label [[CHECK_3_MIN:%.*]], label [[TRAP]] +; CHECK: check.3.min: +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[SRC]], align 4 +; CHECK-NEXT: [[ADD_PTR_I36:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 +; CHECK-NEXT: [[C_3_MIN:%.*]] = icmp ult i32* [[ADD_PTR_I36]], [[MIN]] +; CHECK-NEXT: br i1 [[C_3_MIN]], label [[TRAP]], label [[CHECK_3_MAX:%.*]] +; CHECK: check.3.max: +; CHECK-NEXT: [[C_3_MAX:%.*]] = icmp ult i32* [[ADD_PTR_I36]], [[MAX]] +; CHECK-NEXT: br i1 [[C_3_MAX]], label [[CHECK_1_MIN:%.*]], label [[TRAP]] +; CHECK: check.1.min: +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ADD_PTR_I36]], align 4 +; CHECK-NEXT: [[ADD_PTR_I29:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1 +; CHECK-NEXT: [[C_1_MIN:%.*]] = icmp ult i32* [[ADD_PTR_I29]], [[MIN]] +; CHECK-NEXT: br i1 [[C_1_MIN]], label [[TRAP]], label [[CHECK_1_MAX:%.*]] +; CHECK: check.1.max: +; CHECK-NEXT: [[C_1_MAX:%.*]] = icmp ult i32* [[ADD_PTR_I29]], [[MAX]] +; CHECK-NEXT: br i1 [[C_1_MAX]], label [[CHECK_2_MIN:%.*]], label [[TRAP]] +; CHECK: check.2.min: +; CHECK-NEXT: [[L2:%.*]] = load i32, i32* [[ADD_PTR_I29]], align 4 +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 +; CHECK-NEXT: [[C_2_MIN:%.*]] = icmp ult i32* [[ADD_PTR_I]], [[MIN]] +; CHECK-NEXT: br i1 [[C_2_MIN]], label [[TRAP]], label [[CHECK_2_MAX:%.*]] +; CHECK: check.2.max: +; CHECK-NEXT: [[C_2_MAX:%.*]] = icmp ult i32* [[ADD_PTR_I]], [[MAX]] +; CHECK-NEXT: br i1 [[C_2_MAX]], label [[EXIT:%.*]], label [[TRAP]] +; CHECK: exit: +; CHECK-NEXT: [[L3:%.*]] = load i32, i32* [[ADD_PTR_I]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] +; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD]], [[L2]] +; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD8]], [[L3]] +; CHECK-NEXT: ret i32 [[ADD9]] +; +check.0.min: + %c.min.0 = icmp ult i32* %src, %min + br i1 %c.min.0, label %trap, label %check.0.max + +trap: ; preds = %check.2.max, %check.2.min, %check.1.max, %check.1.min, %check.3.max, %check.3.min, %check.0.max, %check.0.min + ret i32 10 + +check.0.max: ; preds = %check.0.min + %c.max.0 = icmp ult i32* %src, %max + br i1 %c.max.0, label %check.3.min, label %trap + +check.3.min: ; preds = %check.0.max + %l0 = load i32, i32* %src, align 4 + %add.ptr.i36 = getelementptr inbounds i32, i32* %src, i64 3 + %c.3.min = icmp ult i32* %add.ptr.i36, %min + br i1 %c.3.min, label %trap, label %check.3.max + +check.3.max: ; preds = %check.3.min + %c.3.max = icmp ult i32* %add.ptr.i36, %max + br i1 %c.3.max, label %check.1.min, label %trap + +check.1.min: ; preds = %check.3.max + %l1 = load i32, i32* %add.ptr.i36, align 4 + %add.ptr.i29 = getelementptr inbounds i32, i32* %src, i64 1 + %c.1.min = icmp ult i32* %add.ptr.i29, %min + br i1 %c.1.min, label %trap, label %check.1.max + +check.1.max: ; preds = %check.1.min + %c.1.max = icmp ult i32* %add.ptr.i29, %max + br i1 %c.1.max, label %check.2.min, label %trap + +check.2.min: ; preds = %check.1.max + %l2 = load i32, i32* %add.ptr.i29, align 4 + %add.ptr.i = getelementptr inbounds i32, i32* %src, i64 2 + %c.2.min = icmp ult i32* %add.ptr.i, %min + br i1 %c.2.min, label %trap, label %check.2.max + +check.2.max: ; preds = %check.2.min + %c.2.max = icmp ult i32* %add.ptr.i, %max + br i1 %c.2.max, label %exit, label %trap + +exit: ; preds = %check.2.max + %l3 = load i32, i32* %add.ptr.i, align 4 + %add = add nsw i32 %l1, %l0 + %add8 = add nsw i32 %add, %l2 + %add9 = add nsw i32 %add8, %l3 + ret i32 %add9 +} + +define void @test.not.uge.ult(i8* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.ult( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[START:%.*]], i64 3 +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[T_0:%.*]] = icmp ult i8* [[START]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_0]]) +; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult i8* [[START_1]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 +; CHECK-NEXT: [[T_2:%.*]] = icmp ult i8* [[START_2]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 +; CHECK-NEXT: [[T_3:%.*]] = icmp ult i8* [[START_3]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 +; CHECK-NEXT: [[C_4:%.*]] = icmp ult i8* [[START_4]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds i8, i8* %start, i64 3 + %c.1 = icmp uge i8* %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %t.0 = icmp ult i8* %start, %high + call void @use(i1 %t.0) + %start.1 = getelementptr inbounds i8, i8* %start, i64 1 + %t.1 = icmp ult i8* %start.1, %high + call void @use(i1 %t.1) + %start.2 = getelementptr inbounds i8, i8* %start, i64 2 + %t.2 = icmp ult i8* %start.2, %high + call void @use(i1 %t.2) + %start.3 = getelementptr inbounds i8, i8* %start, i64 3 + %t.3 = icmp ult i8* %start.3, %high + call void @use(i1 %t.3) + %start.4 = getelementptr inbounds i8, i8* %start, i64 4 + %c.4 = icmp ult i8* %start.4, %high + call void @use(i1 %c.4) + ret void +} + +define void @test.not.uge.ule(i8* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.ule( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[START:%.*]], i64 3 +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[T_0:%.*]] = icmp ule i8* [[START]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_0]]) +; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ule i8* [[START_1]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 +; CHECK-NEXT: [[T_2:%.*]] = icmp ule i8* [[START_2]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 +; CHECK-NEXT: [[T_3:%.*]] = icmp ule i8* [[START_3]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 +; CHECK-NEXT: [[T_4:%.*]] = icmp ule i8* [[START_4]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[T_4]]) +; CHECK-NEXT: [[START_5:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 5 +; CHECK-NEXT: [[C_5:%.*]] = icmp ule i8* [[START_5]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds i8, i8* %start, i64 3 + %c.1 = icmp uge i8* %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %t.0 = icmp ule i8* %start, %high + call void @use(i1 %t.0) + %start.1 = getelementptr inbounds i8, i8* %start, i64 1 + %t.1 = icmp ule i8* %start.1, %high + call void @use(i1 %t.1) + %start.2 = getelementptr inbounds i8, i8* %start, i64 2 + %t.2 = icmp ule i8* %start.2, %high + call void @use(i1 %t.2) + %start.3 = getelementptr inbounds i8, i8* %start, i64 3 + %t.3 = icmp ule i8* %start.3, %high + call void @use(i1 %t.3) + %start.4 = getelementptr inbounds i8, i8* %start, i64 4 + %t.4 = icmp ule i8* %start.4, %high + call void @use(i1 %t.4) + + %start.5 = getelementptr inbounds i8, i8* %start, i64 5 + %c.5 = icmp ule i8* %start.5, %high + call void @use(i1 %c.5) + + ret void +} + +define void @test.not.uge.ugt(i8* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.ugt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[START:%.*]], i64 3 +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[F_0:%.*]] = icmp ugt i8* [[START]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_0]]) +; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 +; CHECK-NEXT: [[F_1:%.*]] = icmp ugt i8* [[START_1]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 +; CHECK-NEXT: [[F_2:%.*]] = icmp ugt i8* [[START_2]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_2]]) +; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 +; CHECK-NEXT: [[F_3:%.*]] = icmp ugt i8* [[START_3]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_3]]) +; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 +; CHECK-NEXT: [[F_4:%.*]] = icmp ugt i8* [[START_4]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_4]]) +; CHECK-NEXT: [[START_5:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 5 +; CHECK-NEXT: [[C_5:%.*]] = icmp ugt i8* [[START_5]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds i8, i8* %start, i64 3 + %c.1 = icmp uge i8* %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %f.0 = icmp ugt i8* %start, %high + call void @use(i1 %f.0) + + %start.1 = getelementptr inbounds i8, i8* %start, i64 1 + %f.1 = icmp ugt i8* %start.1, %high + call void @use(i1 %f.1) + + %start.2 = getelementptr inbounds i8, i8* %start, i64 2 + %f.2 = icmp ugt i8* %start.2, %high + call void @use(i1 %f.2) + + %start.3 = getelementptr inbounds i8, i8* %start, i64 3 + %f.3 = icmp ugt i8* %start.3, %high + call void @use(i1 %f.3) + + %start.4 = getelementptr inbounds i8, i8* %start, i64 4 + %f.4 = icmp ugt i8* %start.4, %high + call void @use(i1 %f.4) + + %start.5 = getelementptr inbounds i8, i8* %start, i64 5 + %c.5 = icmp ugt i8* %start.5, %high + call void @use(i1 %c.5) + + ret void +} + +define void @test.not.uge.uge(i8* %start, i8* %low, i8* %high) { +; CHECK-LABEL: @test.not.uge.uge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[START:%.*]], i64 3 +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[ADD_PTR_I]], [[HIGH:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[F_0:%.*]] = icmp ugt i8* [[START]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_0]]) +; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 +; CHECK-NEXT: [[F_1:%.*]] = icmp uge i8* [[START_1]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 +; CHECK-NEXT: [[F_2:%.*]] = icmp uge i8* [[START_2]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_2]]) +; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 +; CHECK-NEXT: [[F_3:%.*]] = icmp uge i8* [[START_3]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[F_3]]) +; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 +; CHECK-NEXT: [[C_4:%.*]] = icmp uge i8* [[START_4]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: [[START_5:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 5 +; CHECK-NEXT: [[C_5:%.*]] = icmp uge i8* [[START_5]], [[HIGH]] +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: ret void +; +entry: + %add.ptr.i = getelementptr inbounds i8, i8* %start, i64 3 + %c.1 = icmp uge i8* %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %f.0 = icmp ugt i8* %start, %high + call void @use(i1 %f.0) + + %start.1 = getelementptr inbounds i8, i8* %start, i64 1 + %f.1 = icmp uge i8* %start.1, %high + call void @use(i1 %f.1) + + %start.2 = getelementptr inbounds i8, i8* %start, i64 2 + %f.2 = icmp uge i8* %start.2, %high + call void @use(i1 %f.2) + + %start.3 = getelementptr inbounds i8, i8* %start, i64 3 + %f.3 = icmp uge i8* %start.3, %high + call void @use(i1 %f.3) + + %start.4 = getelementptr inbounds i8, i8* %start, i64 4 + %c.4 = icmp uge i8* %start.4, %high + call void @use(i1 %c.4) + + %start.5 = getelementptr inbounds i8, i8* %start, i64 5 + %c.5 = icmp uge i8* %start.5, %high + call void @use(i1 %c.5) + + ret void +} + + +declare void @use(i1) +declare void @llvm.trap() diff --git a/llvm/test/Transforms/ConstraintElimination/i128.ll b/llvm/test/Transforms/ConstraintElimination/i128.ll new file mode 100644 index 0000000000000..6a10ea770dd58 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/i128.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +declare void @use(i1) + +define void @test_unsigned_too_large(i128 %x) { +; CHECK-LABEL: @test_unsigned_too_large( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i128 [[X:%.*]], 12345678901234123123123 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ult i128 [[X]], -12345678901234123123123 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[C_3:%.*]] = icmp uge i128 [[X]], -12345678901234123123123 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp uge i128 [[X]], -12345678901234123123123 +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: ret void +; +entry: + %c.1 = icmp ule i128 %x, 12345678901234123123123 + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %c.2 = icmp ult i128 %x, -12345678901234123123123 + call void @use(i1 %c.2) + %c.3 = icmp uge i128 %x, -12345678901234123123123 + call void @use(i1 %c.3) + %c.4 = icmp uge i128 %x, -12345678901234123123123 + call void @use(i1 %c.4) + ret void + +bb2: + ret void +} diff --git a/llvm/test/Transforms/ConstraintElimination/loops.ll b/llvm/test/Transforms/ConstraintElimination/loops.ll new file mode 100644 index 0000000000000..be25308c46dfe --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/loops.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +; Make sure conditions in loops are not used to simplify themselves. + +define void @loop1(float* %T, float* %x, i32 %points, i32 %trigint) { +; CHECK-LABEL: @loop1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[POINTS:%.*]] to i64 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 -8 +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[POINTS]], 1 +; CHECK-NEXT: [[IDX_EXT2:%.*]] = sext i32 [[SHR]] to i64 +; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[IDX_EXT2]] +; CHECK-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds float, float* [[ADD_PTR3]], i64 -8 +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[X2_0:%.*]] = phi float* [ [[ADD_PTR4]], [[ENTRY:%.*]] ], [ [[ADD_PTR106:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[X1_0:%.*]] = phi float* [ [[ADD_PTR1]], [[ENTRY]] ], [ [[ADD_PTR105:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[ADD_PTR105]] = getelementptr inbounds float, float* [[X1_0]], i64 -8 +; CHECK-NEXT: [[ADD_PTR106]] = getelementptr inbounds float, float* [[X2_0]], i64 -8 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge float* [[ADD_PTR106]], [[X]] +; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]] +; CHECK: do.end: +; CHECK-NEXT: ret void +; +entry: + %idx.ext = sext i32 %points to i64 + %add.ptr = getelementptr inbounds float, float* %x, i64 %idx.ext + %add.ptr1 = getelementptr inbounds float, float* %add.ptr, i64 -8 + %shr = ashr i32 %points, 1 + %idx.ext2 = sext i32 %shr to i64 + %add.ptr3 = getelementptr inbounds float, float* %x, i64 %idx.ext2 + %add.ptr4 = getelementptr inbounds float, float* %add.ptr3, i64 -8 + br label %do.body + +do.body: ; preds = %do.body, %entry + %x2.0 = phi float* [ %add.ptr4, %entry ], [ %add.ptr106, %do.body ] + %x1.0 = phi float* [ %add.ptr1, %entry ], [ %add.ptr105, %do.body ] + %add.ptr105 = getelementptr inbounds float, float* %x1.0, i64 -8 + %add.ptr106 = getelementptr inbounds float, float* %x2.0, i64 -8 + %cmp = icmp uge float* %add.ptr106, %x + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} diff --git a/llvm/test/Transforms/ConstraintElimination/mixed.ll b/llvm/test/Transforms/ConstraintElimination/mixed.ll new file mode 100644 index 0000000000000..e4a264a8f0a0f --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/mixed.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +; Make sure we do not incorrectly add variables to the system. + +define i1 @test(i32* %p1, i32* %p2, i32 %num_rows, i32 %start_row, i1 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[NUM_ROWS:%.*]], [[START_ROW:%.*]] +; CHECK-NEXT: [[L3:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[L3]], [[START_ROW]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_END36:%.*]], label [[IF_END36]] +; CHECK: if.end36: +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[P2:%.*]], align 4 +; CHECK-NEXT: [[CMP37:%.*]] = icmp ult i32 [[L1]], [[ADD]] +; CHECK-NEXT: br i1 [[CMP37]], label [[IF_THEN39:%.*]], label [[EXIT:%.*]] +; CHECK: if.then39: +; CHECK-NEXT: [[CMP41:%.*]] = icmp ult i32 [[L1]], [[START_ROW]] +; CHECK-NEXT: ret i1 [[CMP41]] +; CHECK: exit: +; CHECK-NEXT: ret i1 false +; +entry: + %add = add i32 %num_rows, %start_row + %l3 = load i32, i32* %p1, align 4 + %cmp6 = icmp ugt i32 %l3, %start_row + br i1 %c, label %if.end36, label %if.end36 + +if.end36: ; preds = %if.then11 + %l1 = load i32, i32* %p2, align 4 + %cmp37 = icmp ult i32 %l1, %add + br i1 %cmp37, label %if.then39, label %exit + +if.then39: ; preds = %if.end36 + %cmp41 = icmp ult i32 %l1, %start_row + ret i1 %cmp41 + +exit: ; preds = %if.end36 + ret i1 false +} diff --git a/llvm/test/Transforms/ConstraintElimination/uge.ll b/llvm/test/Transforms/ConstraintElimination/uge.ll new file mode 100644 index 0000000000000..ca91733d2af98 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/uge.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +declare void @use(i1) + +define void @test_1_variable_constraint(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test_1_variable_constraint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[T_1:%.*]] = icmp uge i32 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[C_3:%.*]] = icmp uge i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp uge i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: [[T_2:%.*]] = icmp uge i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: [[F_1:%.*]] = icmp uge i32 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: [[C_5:%.*]] = icmp uge i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: [[C_6:%.*]] = icmp uge i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_6]]) +; CHECK-NEXT: ret void +; +entry: + %c.1 = icmp uge i32 %x, %y + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %t.1 = icmp uge i32 %x, %y + call void @use(i1 %t.1) + %c.2 = icmp uge i32 %x, 10 + call void @use(i1 %c.2) + %c.3 = icmp uge i32 %y, %x + call void @use(i1 %c.3) + %c.4 = icmp uge i32 10, %x + call void @use(i1 %c.4) + ret void + +bb2: + %t.2 = icmp uge i32 %y, %x + call void @use(i1 %t.2) + %f.1 = icmp uge i32 %x, %y + call void @use(i1 %f.1) + %c.5 = icmp uge i32 %x, 10 + call void @use(i1 %c.5) + %c.6 = icmp uge i32 10, %x + call void @use(i1 %c.6) + ret void +} + +define void @test_1_constant_constraint(i32 %x) { +; CHECK-LABEL: @test_1_constant_constraint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[T_1:%.*]] = icmp uge i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[T_2:%.*]] = icmp uge i32 [[X]], 9 +; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[X]], 11 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp uge i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: [[T_3:%.*]] = icmp uge i32 11, [[X]] +; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: [[F_1:%.*]] = icmp uge i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: [[F_1_1:%.*]] = icmp uge i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[F_1_1]]) +; CHECK-NEXT: [[C_5:%.*]] = icmp uge i32 [[X]], 9 +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: [[C_6:%.*]] = icmp uge i32 1, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_6]]) +; CHECK-NEXT: ret void +; +entry: + %c.1 = icmp uge i32 %x, 10 + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %t.1 = icmp uge i32 %x, 10 + call void @use(i1 %t.1) + %t.2 = icmp uge i32 %x, 9 + call void @use(i1 %t.2) + %c.2 = icmp uge i32 %x, 11 + call void @use(i1 %c.2) + %c.4 = icmp uge i32 10, %x + call void @use(i1 %c.4) + ret void + +bb2: + %t.3 = icmp uge i32 11, %x + call void @use(i1 %t.3) + %f.1 = icmp uge i32 %x, 10 + call void @use(i1 %f.1) + + + %f.1.1 = icmp uge i32 %x, 10 + call void @use(i1 %f.1.1) + %c.5 = icmp uge i32 %x, 9 + call void @use(i1 %c.5) + %c.6 = icmp uge i32 1, %x + call void @use(i1 %c.6) + ret void +} + +define i32 @test1(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[Y]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp uge i32 [[X]], [[Z]] +; CHECK-NEXT: br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp uge i32 %x, %y + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp uge i32 %y, %z + br i1 %c.2, label %bb2, label %exit + +bb2: + %c.3 = icmp uge i32 %x, %z + br i1 %c.3, label %bb3, label %exit + +bb3: + ret i32 10 + +exit: + ret i32 20 +} + + +define i32 @test2(i32 %x, i32 %y, i32 %z, i32 %a) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[Y]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp uge i32 [[X]], [[A:%.*]] +; CHECK-NEXT: br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp uge i32 %x, %y + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp uge i32 %y, %z + br i1 %c.2, label %bb2, label %exit + +bb2: + %c.3 = icmp uge i32 %x, %a + br i1 %c.3, label %bb3, label %exit + +bb3: + ret i32 10 + +exit: + ret i32 20 +} + + +define i32 @test3(i32 %x, i32 %y) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[Y:%.*]], 20 +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp uge i32 %x, 10 + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp uge i32 %y, 20 + br i1 %c.2, label %bb2, label %exit + +bb2: + ret i32 10 + +exit: + ret i32 20 +} + +define i32 @test4(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[Y]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[T_1:%.*]] = icmp uge i32 [[X]], [[Z]] +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[U_1:%.*]] = icmp eq i32 [[X]], [[Z]] +; CHECK-NEXT: call void @use(i1 [[U_1]]) +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp uge i32 %x, %y + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp uge i32 %y, %z + br i1 %c.2, label %bb2, label %exit + +bb2: + %t.1 = icmp uge i32 %x, %z + call void @use(i1 %t.1) + %u.1 = icmp eq i32 %x, %z + call void @use(i1 %u.1) + ret i32 10 + + +exit: + ret i32 20 +} + + diff --git a/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll b/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll new file mode 100644 index 0000000000000..c49ce7360cd68 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +declare void @use(i1) + +define void @test(i8* %m, i8* %ptr) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i8* [[M:%.*]], [[PTR:%.*]] +; CHECK-NEXT: br i1 [[CMP_1]], label [[BB_1:%.*]], label [[BB_2:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i8* [[M]], [[PTR]] +; CHECK-NEXT: call void @use(i1 [[CMP_2]]) +; CHECK-NEXT: ret void +; CHECK: bb.2: +; CHECK-NEXT: br label [[BB_2_NEXT:%.*]] +; CHECK: bb.2.next: +; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i8* [[M]], [[PTR]] +; CHECK-NEXT: call void @use(i1 [[CMP_3]]) +; CHECK-NEXT: ret void +; +entry: + %cmp.1 = icmp ult i8* %m, %ptr + br i1 %cmp.1, label %bb.1, label %bb.2 + +bb.1: + %cmp.2 = icmp uge i8* %m, %ptr + call void @use(i1 %cmp.2) + ret void + +bb.2: + br label %bb.2.next + +bb.2.next: + %cmp.3 = icmp uge i8* %m, %ptr + call void @use(i1 %cmp.3) + ret void +} diff --git a/llvm/test/Transforms/ConstraintElimination/ule.ll b/llvm/test/Transforms/ConstraintElimination/ule.ll new file mode 100644 index 0000000000000..2cb3750fad243 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/ule.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S %s | FileCheck %s + +declare void @use(i1) + +define void @test_1_variable_constraint(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test_1_variable_constraint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[T_1:%.*]] = icmp ule i32 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp ule i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: [[T_2:%.*]] = icmp ule i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: [[F_1:%.*]] = icmp ule i32 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: [[C_5:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: [[C_6:%.*]] = icmp ule i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_6]]) +; CHECK-NEXT: ret void +; +entry: + %c.1 = icmp ule i32 %x, %y + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %t.1 = icmp ule i32 %x, %y + call void @use(i1 %t.1) + %c.2 = icmp ule i32 %x, 10 + call void @use(i1 %c.2) + %c.3 = icmp ule i32 %y, %x + call void @use(i1 %c.3) + %c.4 = icmp ule i32 10, %x + call void @use(i1 %c.4) + ret void + +bb2: + %t.2 = icmp ule i32 %y, %x + call void @use(i1 %t.2) + %f.1 = icmp ule i32 %x, %y + call void @use(i1 %f.1) + %c.5 = icmp ule i32 %x, 10 + call void @use(i1 %c.5) + %c.6 = icmp ule i32 10, %x + call void @use(i1 %c.6) + ret void +} + +define void @test_1_constant_constraint(i32 %x) { +; CHECK-LABEL: @test_1_constant_constraint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[T_1:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[T_2:%.*]] = icmp ule i32 [[X]], 11 +; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 9 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp ule i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: [[T_3:%.*]] = icmp ule i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: [[F_1:%.*]] = icmp ule i32 [[X]], 9 +; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: [[F_1_1:%.*]] = icmp ule i32 [[X]], 10 +; CHECK-NEXT: call void @use(i1 [[F_1_1]]) +; CHECK-NEXT: [[C_5:%.*]] = icmp ule i32 [[X]], 11 +; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: [[C_6:%.*]] = icmp ule i32 10, [[X]] +; CHECK-NEXT: call void @use(i1 [[C_6]]) +; CHECK-NEXT: ret void +; +entry: + %c.1 = icmp ule i32 %x, 10 + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %t.1 = icmp ule i32 %x, 10 + call void @use(i1 %t.1) + %t.2 = icmp ule i32 %x, 11 + call void @use(i1 %t.2) + %c.2 = icmp ule i32 %x, 9 + call void @use(i1 %c.2) + %c.4 = icmp ule i32 10, %x + call void @use(i1 %c.4) + ret void + +bb2: + %t.3 = icmp ule i32 10, %x + call void @use(i1 %t.3) + %f.1 = icmp ule i32 %x, 9 + call void @use(i1 %f.1) + + + %f.1.1 = icmp ule i32 %x, 10 + call void @use(i1 %f.1.1) + %c.5 = icmp ule i32 %x, 11 + call void @use(i1 %c.5) + %c.6 = icmp ule i32 10, %x + call void @use(i1 %c.6) + ret void +} + + +define i32 @test1(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[Y]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[X]], [[Z]] +; CHECK-NEXT: br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp ule i32 %x, %y + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp ule i32 %y, %z + br i1 %c.2, label %bb2, label %exit + +bb2: + %c.3 = icmp ule i32 %x, %z + br i1 %c.3, label %bb3, label %exit + +bb3: + ret i32 10 + +exit: + ret i32 20 +} + + +define i32 @test2(i32 %x, i32 %y, i32 %z, i32 %a) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[Y]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[X]], [[A:%.*]] +; CHECK-NEXT: br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp ule i32 %x, %y + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp ule i32 %y, %z + br i1 %c.2, label %bb2, label %exit + +bb2: + %c.3 = icmp ule i32 %x, %a + br i1 %c.3, label %bb3, label %exit + +bb3: + ret i32 10 + +exit: + ret i32 20 +} + + +define i32 @test3(i32 %x, i32 %y) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], 10 +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[Y:%.*]], 20 +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp ule i32 %x, 10 + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp ule i32 %y, 20 + br i1 %c.2, label %bb2, label %exit + +bb2: + ret i32 10 + +exit: + ret i32 20 +} + +define i32 @test4(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[Y]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[T_1:%.*]] = icmp ule i32 [[X]], [[Z]] +; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: [[U_1:%.*]] = icmp eq i32 [[X]], [[Z]] +; CHECK-NEXT: call void @use(i1 [[U_1]]) +; CHECK-NEXT: ret i32 10 +; CHECK: exit: +; CHECK-NEXT: ret i32 20 +; +entry: + %c.1 = icmp ule i32 %x, %y + br i1 %c.1, label %bb1, label %exit + +bb1: + %c.2 = icmp ule i32 %y, %z + br i1 %c.2, label %bb2, label %exit + +bb2: + %t.1 = icmp ule i32 %x, %z + call void @use(i1 %t.1) + %u.1 = icmp eq i32 %x, %z + call void @use(i1 %u.1) + ret i32 10 + + +exit: + ret i32 20 +} From 2744c2e2957221c8e9379e2232790c3e56efd90d Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Tue, 15 Sep 2020 11:40:05 +0100 Subject: [PATCH 0689/1079] [libcxx] Disable failing test for no-exceptions build This test tries to create a 2 GiB std::string, catching the bad_alloc exception if the allocation fails. However, for no-exceptions builds there is no way for the error to be reported, so this crashes with a null pointer dereference. Differential revision: https://reviews.llvm.org/D87682 --- .../streambuf.put.area/pbump2gig.pass.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp index eee48f3dfdb12..e34dbc999592f 100644 --- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp +++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp @@ -15,6 +15,10 @@ // // REQUIRES: long_tests +// Unsupported for no-exceptions builds because they have no way to report an +// allocation failure when attempting to allocate the 2GiB string. +// UNSUPPORTED: no-exceptions + #include #include #include "test_macros.h" @@ -28,18 +32,14 @@ struct SB : std::stringbuf int main(int, char**) { -#ifndef TEST_HAS_NO_EXCEPTIONS try { -#endif std::string str(2147483648, 'a'); SB sb; sb.str(str); assert(sb.pubpbase() <= sb.pubpptr()); -#ifndef TEST_HAS_NO_EXCEPTIONS } catch (const std::length_error &) {} // maybe the string can't take 2GB catch (const std::bad_alloc &) {} // maybe we don't have enough RAM -#endif return 0; } From eb66b04cbecfbc971bf8b8abbb4c58dbd4a7564a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 15 Sep 2020 08:38:51 -0400 Subject: [PATCH 0690/1079] [InstCombine] improve test names; NFC This is not a valid transform unless we can prove that the program does not read errno after the pow call and before some other function changes it. --- llvm/test/Transforms/InstCombine/pow-1.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll index 724f004e6ca99..dfb62f6d0af0e 100644 --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -247,8 +247,8 @@ define <2 x double> @test_simplify6v(<2 x double> %x) { ; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity. -define float @test_simplify7(float %x) { -; CHECK-LABEL: @test_simplify7( +define float @powf_libcall_to_select_sqrt(float %x) { +; CHECK-LABEL: @powf_libcall_to_select_sqrt( ; ANY-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]]) ; ANY-NEXT: [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]]) ; ANY-NEXT: [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 @@ -275,8 +275,8 @@ define float @test_simplify7(float %x) { ret float %retval } -define double @test_simplify8(double %x) { -; CHECK-LABEL: @test_simplify8( +define double @pow_libcall_to_select_sqrt(double %x) { +; CHECK-LABEL: @pow_libcall_to_select_sqrt( ; LIB-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) ; LIB-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) ; LIB-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 From 7ffc9aa538dfa3facbbb09d3b0d517a59e967d0e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 15 Sep 2020 09:21:20 -0400 Subject: [PATCH 0691/1079] [InstCombine] add RUN to show miscompile of pow expansion; NFC The code drops the sqrt op instead of bailing out, so this is very wrong. --- llvm/test/Transforms/InstCombine/pow-4.ll | 56 +++++++++++++++-------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll index 4aac27fe72f0c..e68dfb857caab 100644 --- a/llvm/test/Transforms/InstCombine/pow-4.ll +++ b/llvm/test/Transforms/InstCombine/pow-4.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -instcombine -S < %s | FileCheck %s +; RUN: opt -instcombine -S < %s | FileCheck %s --check-prefixes=CHECK,SQRT +; RUN: opt -instcombine -S < %s -disable-builtin sqrt | FileCheck %s --check-prefixes=CHECK,NOSQRT declare double @llvm.pow.f64(double, double) declare float @llvm.pow.f32(float, float) @@ -151,31 +152,50 @@ define double @test_simplify_neg_16_5(double %x) { } ; pow(x, 16.5) with double +; FIXME: This is wrong without sqrt. + define double @test_simplify_16_5_libcall(double %x) { -; CHECK-LABEL: @test_simplify_16_5_libcall( -; CHECK-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; CHECK-NEXT: ret double [[TMP4]] +; SQRT-LABEL: @test_simplify_16_5_libcall( +; SQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; SQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] +; SQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; SQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] +; SQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] +; SQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] +; SQRT-NEXT: ret double [[TMP4]] +; +; NOSQRT-LABEL: @test_simplify_16_5_libcall( +; NOSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; NOSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; NOSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] +; NOSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] +; NOSQRT-NEXT: ret double [[TMP3]] ; %1 = call fast double @pow(double %x, double 1.650000e+01) ret double %1 } ; pow(x, -16.5) with double +; FIXME: This is wrong without sqrt. + define double @test_simplify_neg_16_5_libcall(double %x) { -; CHECK-LABEL: @test_simplify_neg_16_5_libcall( -; CHECK-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] -; CHECK-NEXT: ret double [[RECIPROCAL]] +; SQRT-LABEL: @test_simplify_neg_16_5_libcall( +; SQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; SQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] +; SQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; SQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] +; SQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] +; SQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] +; SQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] +; SQRT-NEXT: ret double [[RECIPROCAL]] +; +; NOSQRT-LABEL: @test_simplify_neg_16_5_libcall( +; NOSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; NOSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; NOSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] +; NOSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] +; NOSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP3]] +; NOSQRT-NEXT: ret double [[RECIPROCAL]] ; %1 = call fast double @pow(double %x, double -1.650000e+01) ret double %1 From aa57c1c967078a8c02e7fc2c837853dbd7cc66f4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 15 Sep 2020 09:27:16 -0400 Subject: [PATCH 0692/1079] [InstCombine] fix bug in pow expansion There at least one other bug related to pow -> sqrt transforms: http://lists.llvm.org/pipermail/llvm-dev/2020-September/145051.html ...but we probably can't solve that without fixing this first. --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 ++ llvm/test/Transforms/InstCombine/pow-4.ll | 17 ++++------------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 34eb9e1b8124f..60b7da7e64feb 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1748,6 +1748,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), Pow->doesNotAccessMemory(), M, B, TLI); + if (!Sqrt) + return nullptr; } // We will memoize intermediate products of the Addition Chain. diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll index e68dfb857caab..23cc2d801a160 100644 --- a/llvm/test/Transforms/InstCombine/pow-4.ll +++ b/llvm/test/Transforms/InstCombine/pow-4.ll @@ -152,7 +152,6 @@ define double @test_simplify_neg_16_5(double %x) { } ; pow(x, 16.5) with double -; FIXME: This is wrong without sqrt. define double @test_simplify_16_5_libcall(double %x) { ; SQRT-LABEL: @test_simplify_16_5_libcall( @@ -165,18 +164,14 @@ define double @test_simplify_16_5_libcall(double %x) { ; SQRT-NEXT: ret double [[TMP4]] ; ; NOSQRT-LABEL: @test_simplify_16_5_libcall( -; NOSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] -; NOSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; NOSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; NOSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; NOSQRT-NEXT: ret double [[TMP3]] +; NOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 1.650000e+01) +; NOSQRT-NEXT: ret double [[TMP1]] ; %1 = call fast double @pow(double %x, double 1.650000e+01) ret double %1 } ; pow(x, -16.5) with double -; FIXME: This is wrong without sqrt. define double @test_simplify_neg_16_5_libcall(double %x) { ; SQRT-LABEL: @test_simplify_neg_16_5_libcall( @@ -190,12 +185,8 @@ define double @test_simplify_neg_16_5_libcall(double %x) { ; SQRT-NEXT: ret double [[RECIPROCAL]] ; ; NOSQRT-LABEL: @test_simplify_neg_16_5_libcall( -; NOSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] -; NOSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; NOSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; NOSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; NOSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP3]] -; NOSQRT-NEXT: ret double [[RECIPROCAL]] +; NOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -1.650000e+01) +; NOSQRT-NEXT: ret double [[TMP1]] ; %1 = call fast double @pow(double %x, double -1.650000e+01) ret double %1 From 46dc41e1ef9c38cc4cef0a995528bbf58d616a09 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 15 Sep 2020 13:32:47 +0000 Subject: [PATCH 0693/1079] [gn build] Port a8058c6f8d1 --- llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index 220067c0e343a..fe5ee15605c0b 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -19,6 +19,7 @@ unittest("CodeGenTests") { "AArch64SelectionDAGTest.cpp", "AsmPrinterDwarfTest.cpp", "DIEHashTest.cpp", + "DIETest.cpp", "LexicalScopesTest.cpp", "LowLevelTypeTest.cpp", "MachineInstrBundleIteratorTest.cpp", From c0809f8d79045941d45c7bd60a12ddd0f6e0811a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 15 Sep 2020 13:32:48 +0000 Subject: [PATCH 0694/1079] [gn build] Port cd4edf94cd4 --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 1c6d22dd672af..335e54b4f68c5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -35,6 +35,7 @@ static_library("Analysis") { "CmpInstAnalysis.cpp", "CodeMetrics.cpp", "ConstantFolding.cpp", + "ConstraintSystem.cpp", "CostModel.cpp", "DDG.cpp", "Delinearization.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index c4bed481e051b..6adc9866e883f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -19,6 +19,7 @@ unittest("AnalysisTests") { "CGSCCPassManagerTest.cpp", "CallGraphTest.cpp", "CaptureTrackingTest.cpp", + "ConstraintSystemTest.cpp", "DDGTest.cpp", "DivergenceAnalysisTest.cpp", "DomTreeUpdaterTest.cpp", From c897a7fb3e2a5c200a3e87a92886eab20d9f7fc7 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Mon, 14 Sep 2020 11:54:55 +0200 Subject: [PATCH 0695/1079] [mlir][Standard] Add canonicalizer for dynamic_tensor_from_elements This add canonicalizer for - extracting an element from a dynamic_tensor_from_elements - propagating constant operands to the type of dynamic_tensor_from_elements Differential Revision: https://reviews.llvm.org/D87525 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 2 + mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 102 +++++++++++++++++- mlir/test/Transforms/canonicalize.mlir | 76 +++++++++++++ 3 files changed, 177 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 4d0cf76ec9d8b..b0aa9b9e3c76a 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1511,6 +1511,8 @@ def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", "ValueRange dynamicExtents, " "function_ref">, ]; + + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index c77bc12cca333..0c86c87384d33 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -11,6 +11,7 @@ #include "mlir/Dialect/CommonFolders.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/Matchers.h" @@ -1730,6 +1731,101 @@ void DynamicTensorFromElementsOp::build( bodyBuilder(b, result.location, bodyBlock->getArguments()); } +namespace { + +/// Canonicalizes dynamic_tensor_from_elements operations with a constant +/// operand into the equivalent operation with the operand expressed in the +/// result type, instead. We also insert a type cast to make sure that the +/// resulting IR is still well-typed. +struct StaticDynamicTensorFromElements + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(DynamicTensorFromElementsOp tensorFromElements, + PatternRewriter &rewriter) const final { + auto resultType = + tensorFromElements.getResult().getType().cast(); + + if (resultType.hasStaticShape()) + return failure(); + + SmallVector newOperands; + SmallVector newShape; + auto operandsIt = tensorFromElements.dynamicExtents().begin(); + + for (int64_t dim : resultType.getShape()) { + if (dim != RankedTensorType::kDynamicSize) { + newShape.push_back(dim); + continue; + } + APInt index; + if (!matchPattern(*operandsIt, m_ConstantInt(&index))) { + newShape.push_back(RankedTensorType::kDynamicSize); + newOperands.push_back(*operandsIt++); + continue; + } + newShape.push_back(index.getSExtValue()); + operandsIt++; + } + + if (newOperands.size() == tensorFromElements.dynamicExtents().size()) + return failure(); + + auto loc = tensorFromElements.getLoc(); + auto newOp = rewriter.create( + loc, RankedTensorType::get(newShape, resultType.getElementType()), + newOperands); + rewriter.inlineRegionBefore(tensorFromElements.body(), newOp.body(), + newOp.body().begin()); + rewriter.replaceOpWithNewOp(tensorFromElements, resultType, + newOp); + return success(); + } +}; + +/// Canonicalizes the pattern of the form +/// +/// %tensor = dynamic_tensor_from_elements %x { +/// ^bb0(%arg0: index): // no predecessors +/// +/// yield %1 : index +/// } : tensor +/// %extracted_element = extract_element %tensor[%c0] : tensor +/// +/// to just with %arg0 replaced by %c0. We only do this if the +/// dynamic_tensor_from_elements operation has no side-effects. +struct ExtractElementFromDynamicTensorFromElements + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ExtractElementOp extract, + PatternRewriter &rewriter) const final { + auto tensorFromElements = + extract.aggregate().getDefiningOp(); + if (!tensorFromElements || !wouldOpBeTriviallyDead(tensorFromElements)) + return failure(); + + BlockAndValueMapping mapping; + Block *body = tensorFromElements.getBody(); + mapping.map(body->getArguments(), extract.indices()); + for (auto &op : body->without_terminator()) + rewriter.clone(op, mapping); + + auto yield = cast(body->getTerminator()); + + rewriter.replaceOp(extract, mapping.lookupOrDefault(yield.value())); + return success(); + } +}; + +} // namespace + +void DynamicTensorFromElementsOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// @@ -1807,16 +1903,16 @@ struct ExtractElementFromTensorFromElements if (extract.indices().size() != 1) return failure(); - auto tensor_from_elements = dyn_cast_or_null( + auto tensorFromElements = dyn_cast_or_null( extract.aggregate().getDefiningOp()); - if (tensor_from_elements == nullptr) + if (tensorFromElements == nullptr) return failure(); APInt index; if (!matchPattern(*extract.indices().begin(), m_ConstantInt(&index))) return failure(); rewriter.replaceOp(extract, - tensor_from_elements.getOperand(index.getZExtValue())); + tensorFromElements.getOperand(index.getZExtValue())); return success(); } }; diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 76fe82588be3e..320418545893e 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -986,3 +986,79 @@ func @extract_element_from_tensor_from_elements(%element : index) -> index { // CHECK: [[ARG]] : index return %extracted_element : index } + +// ----- + +// CHECK-LABEL: func @extract_element_from_dynamic_tensor_from_elements +// CHECK-SAME: %[[IDX:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32> +func @extract_element_from_dynamic_tensor_from_elements(%idx: index, %tensor: tensor<*xf32>) -> index { + %size = rank %tensor : tensor<*xf32> + // CHECK-NEXT: %[[RES:.*]] = dim %[[TENSOR]], %[[IDX]] + %0 = dynamic_tensor_from_elements %size { + ^bb0(%arg0: index): + %1 = dim %tensor, %arg0 : tensor<*xf32> + yield %1 : index + } : tensor + %1 = extract_element %0[%idx] : tensor + // CHECK-NEXT: return %[[RES]] + return %1 : index +} + +// ----- + +// CHECK-LABEL: func @extract_element_from_dynamic_tensor_from_elements_2d +// CHECK-SAME: %[[IDX0:.*]]: index, %[[IDX1:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32> +func @extract_element_from_dynamic_tensor_from_elements_2d(%idx0: index, %idx1: index, %tensor: tensor<*xf32>) -> index { + %size = rank %tensor : tensor<*xf32> + // CHECK-NEXT: %[[DIM0:.*]] = dim %[[TENSOR]], %[[IDX0]] + // CHECK-NEXT: %[[DIM1:.*]] = dim %[[TENSOR]], %[[IDX1]] + // CHECK-NEXT: %[[RES:.*]] = addi %[[DIM0]], %[[DIM1]] + %0 = dynamic_tensor_from_elements %size, %size { + ^bb0(%arg0: index, %arg1: index): + %1 = dim %tensor, %arg0 : tensor<*xf32> + %2 = dim %tensor, %arg1 : tensor<*xf32> + %3 = addi %1, %2 : index + yield %3 : index + } : tensor + %4 = extract_element %0[%idx0, %idx1] : tensor + // CHECK-NEXT: return %[[RES]] + return %4 : index +} + +// ----- + +// CHECK-LABEL: func @extract_element_from_dynamic_tensor_from_elements_sideeffects +// CHECK-SAME: %[[IDX:.*]]: index +func @extract_element_from_dynamic_tensor_from_elements_sideeffects(%idx: index, %tensor: tensor<*xf32>) -> index { + %size = rank %tensor : tensor<*xf32> + %mem = alloc(%size) : memref + // CHECK: %[[DTENSOR:.*]] = dynamic_tensor_from_elements + %0 = dynamic_tensor_from_elements %size { + ^bb0(%arg0: index): + %1 = dim %tensor, %arg0 : tensor<*xf32> + store %1, %mem[%arg0] : memref + yield %1 : index + } : tensor + // CHECK: %[[RES:.*]] = extract_element %[[DTENSOR]][%[[IDX]]] + %1 = extract_element %0[%idx] : tensor + // CHECK-NEXT: return %[[RES]] + return %1 : index +} + +// ----- + +// CHECK-LABEL: @static_dynamic_tensor_from_elements +// CHECK-SAME: %[[SIZE1:.*]]: index, %[[SIZE4:.*]]: index) +func @static_dynamic_tensor_from_elements(%size1: index, %size4: index) -> tensor<3x?x?x7x?xindex> { + %c5 = constant 5 : index + // CHECK: dynamic_tensor_from_elements %[[SIZE1]], %[[SIZE4]] + %0 = dynamic_tensor_from_elements %size1, %c5, %size4 { + ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index): + %1 = constant 32 : index + yield %1 : index + // CHECK: : tensor<3x?x5x7x?xindex> + } : tensor<3x?x?x7x?xindex> + // CHECK: tensor_cast %{{.*}} : tensor<3x?x5x7x?xindex> to tensor<3x?x?x7x?xindex> + return %0 : tensor<3x?x?x7x?xindex> +} + From 2d8f0c05dbe76a31060a729928b9b9d7ebbf0c40 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 15 Sep 2020 09:48:24 -0400 Subject: [PATCH 0696/1079] [mlir][openacc] Add missing print of vector_length in parallel op This patch adds the missing print for the vector_length in the parallel operation. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D87630 --- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 11 ++++++++--- mlir/test/Dialect/OpenACC/ops.mlir | 12 ++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index b5dfa2c133585..11a774828194e 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -269,22 +269,27 @@ static void print(OpAsmPrinter &printer, ParallelOp &op) { printer << ParallelOp::getOperationName(); // async()? - if (auto async = op.async()) + if (Value async = op.async()) printer << " " << ParallelOp::getAsyncKeyword() << "(" << async << ")"; // wait()? printOperandList(op.waitOperands(), ParallelOp::getWaitKeyword(), printer); // num_gangs()? - if (auto numGangs = op.numGangs()) + if (Value numGangs = op.numGangs()) printer << " " << ParallelOp::getNumGangsKeyword() << "(" << numGangs << ")"; // num_workers()? - if (auto numWorkers = op.numWorkers()) + if (Value numWorkers = op.numWorkers()) printer << " " << ParallelOp::getNumWorkersKeyword() << "(" << numWorkers << ")"; + // vector_length()? + if (Value vectorLength = op.vectorLength()) + printer << " " << ParallelOp::getVectorLengthKeyword() << "(" + << vectorLength << ")"; + // if()? if (Value ifCond = op.ifCond()) printer << " " << ParallelOp::getIfKeyword() << "(" << ifCond << ")"; diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 6cdba227d5dab..b534f703e05e2 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -232,3 +232,15 @@ func @testop() -> () { // CHECK-NEXT: } // CHECK-NEXT: acc.loop tile([[TILESIZE]]: i64, [[TILESIZE]]: i64) { // CHECK-NEXT: } + + +func @testparallelop() -> () { + %vectorLength = constant 128 : index + acc.parallel vector_length(%vectorLength) { + } + return +} + +// CHECK: [[VECTORLENGTH:%.*]] = constant 128 : index +// CHECK-NEXT: acc.parallel vector_length([[VECTORLENGTH]]) { +// CHECK-NEXT: } From 65c6ae3b6aceb934a76c5b10b244edeed80e9cac Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 13:48:40 +0100 Subject: [PATCH 0697/1079] [Utils] isLegalToPromote - Fix missing null check before writing to FailureReason. The FailureReason input parameter maybe null, we check this in all other cases in the method but this one was missed somehow. Fixes clang-tidy warning. --- llvm/lib/Transforms/Utils/CallPromotionUtils.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 5a47c1fd0b6cb..7141e4b1e879e 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -430,10 +430,11 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, } } for (; I < NumArgs; I++) { - // Vararg functions can have more arguments than paramters. + // Vararg functions can have more arguments than parameters. assert(Callee->isVarArg()); if (CB.paramHasAttr(I, Attribute::StructRet)) { - *FailureReason = "SRet arg to vararg function"; + if (FailureReason) + *FailureReason = "SRet arg to vararg function"; return false; } } From 97a23ab28ad91d589e6c0bb5dee6ae78c154da8a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 14:48:40 +0100 Subject: [PATCH 0698/1079] AMDGPUPrintfRuntimeBinding.cpp - drop unnecessary casts/dyn_casts. NFCI. GetElementPtrInst::Create returns a GetElementPtrInst* so we don't need to cast. Similarly IntegerType inherits from the Type base class. Also, I've used auto* in a few places to cleanup the code. Helps fix some clang-tidy warnings which saw the dyn_casts and warned that these can return null. --- .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 524a34be876ff..31c6c0bb0c2f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -379,9 +379,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); ZeroIdxList.push_back(zeroInt); - GetElementPtrInst *BufferIdx = - dyn_cast(GetElementPtrInst::Create( - nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); + GetElementPtrInst *BufferIdx = GetElementPtrInst::Create( + nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch); Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); Value *id_gep_cast = @@ -395,8 +394,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id // the following GEP is the buffer pointer - BufferIdx = cast(GetElementPtrInst::Create( - nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); + BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList, + "PrintBuffGep", Brnch); Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); @@ -409,17 +408,15 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( if (ArgType->isFPOrFPVectorTy() && !isa(ArgType)) { Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; if (OpConvSpecifiers[ArgCount - 1] == 'f') { - ConstantFP *fpCons = dyn_cast(Arg); - if (fpCons) { - APFloat Val(fpCons->getValueAPF()); + if (auto *FpCons = dyn_cast(Arg)) { + APFloat Val(FpCons->getValueAPF()); bool Lost = false; Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Lost); Arg = ConstantFP::get(Ctx, Val); IType = Int32Ty; - } else { - FPExtInst *FpExt = dyn_cast(Arg); - if (FpExt && FpExt->getType()->isDoubleTy() && + } else if (auto *FpExt = dyn_cast(Arg)) { + if (FpExt->getType()->isDoubleTy() && FpExt->getOperand(0)->getType()->isFloatTy()) { Arg = FpExt->getOperand(0); IType = Int32Ty; @@ -431,9 +428,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( } else if (ArgType->getTypeID() == Type::PointerTyID) { if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { const char *S = NonLiteralStr; - if (ConstantExpr *ConstExpr = dyn_cast(Arg)) { - GlobalVariable *GV = - dyn_cast(ConstExpr->getOperand(0)); + if (auto *ConstExpr = dyn_cast(Arg)) { + auto *GV = dyn_cast(ConstExpr->getOperand(0)); if (GV && GV->hasInitializer()) { Constant *Init = GV->getInitializer(); ConstantDataArray *CA = dyn_cast(Init); @@ -491,27 +487,27 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( switch (EleSize) { default: EleCount = TotalSize / 64; - IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + IType = Type::getInt64Ty(ArgType->getContext()); break; case 8: if (EleCount >= 8) { EleCount = TotalSize / 64; - IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + IType = Type::getInt64Ty(ArgType->getContext()); } else if (EleCount >= 3) { EleCount = 1; - IType = dyn_cast(Type::getInt32Ty(ArgType->getContext())); + IType = Type::getInt32Ty(ArgType->getContext()); } else { EleCount = 1; - IType = dyn_cast(Type::getInt16Ty(ArgType->getContext())); + IType = Type::getInt16Ty(ArgType->getContext()); } break; case 16: if (EleCount >= 3) { EleCount = TotalSize / 64; - IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + IType = Type::getInt64Ty(ArgType->getContext()); } else { EleCount = 1; - IType = dyn_cast(Type::getInt32Ty(ArgType->getContext())); + IType = Type::getInt32Ty(ArgType->getContext()); } break; } @@ -539,8 +535,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( (void)StBuff; if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) break; - BufferIdx = dyn_cast(GetElementPtrInst::Create( - nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); + BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset, + "PrintBuffNextPtr", Brnch); LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" << *BufferIdx << '\n'); } From e1669843f2aaf1e4929afdd8f125c14536d27664 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 15 Sep 2020 22:03:50 +0800 Subject: [PATCH 0699/1079] Revert "[SelectionDAG] Remove unused FP constant in getNegatedExpression" 2508ef01 doesn't totally fix the issue since we did not handle the case when unused temporary negated result is the same with the result, which is found by address sanitizer. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 4 +-- llvm/test/CodeGen/X86/pr47517.ll | 28 ------------------- 2 files changed, 1 insertion(+), 31 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/pr47517.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 749a5e83058e7..3446ee0efc450 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5773,10 +5773,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // If we already have the use of the negated floating constant, it is free // to negate it even it has multiple uses. - if (!Op.hasOneUse() && CFP.use_empty()) { - RemoveDeadNode(CFP); + if (!Op.hasOneUse() && CFP.use_empty()) break; - } Cost = NegatibleCost::Neutral; return CFP; } diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll deleted file mode 100644 index 6b508acf15dda..0000000000000 --- a/llvm/test/CodeGen/X86/pr47517.ll +++ /dev/null @@ -1,28 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple x86_64 < %s | FileCheck %s - -; To ensure unused floating point constant is removed in negation -define float @test(float %src, float* %p) { -; CHECK-LABEL: test: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq $0, (%rdi) -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: retq -entry: - %a0 = getelementptr inbounds float, float* %p, i32 0 - %a1 = getelementptr inbounds float, float* %p, i32 1 - store float 0.000000e+00, float* %a0 - store float 0.000000e+00, float* %a1 - %zero = load float, float* %a0 - %fmul1 = fmul fast float %zero, %src - %fadd1 = fadd fast float %fmul1, %zero - %fmul2 = fmul fast float %fadd1, 2.000000e+00 - %fmul3 = fmul fast float %fmul2, %fmul2 - %fmul4 = fmul fast float %fmul2, 2.000000e+00 - %fadd2 = fadd fast float %fmul4, -3.000000e+00 - %fmul5 = fmul fast float %fadd2, %fmul2 - %fadd3 = fadd fast float %fmul2, %src - %fadd4 = fadd fast float %fadd3, %fmul5 - %fmul6 = fmul fast float %fmul3, %fadd4 - ret float %fmul6 -} From 65f6810d3a4b0ef1fdaad49e808459fbd133bb20 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Tue, 15 Sep 2020 08:23:58 -0500 Subject: [PATCH 0700/1079] [LLD][PowerPC] Add support for R_PPC64_TPREL34 used in TLS Local Exec Add Thread Local Storage Local Exec support to LLD. This is to support PC Relative addressing of Local Exec. The patch teaches LLD to handle: ``` paddi r9, r13, x1@tprel ``` The relocation is: ``` R_PPC_TPREL34 ``` Reviewed By: NeHuang, MaskRay Differential Revision: https://reviews.llvm.org/D86608 --- lld/ELF/Arch/PPC64.cpp | 4 ++- lld/test/ELF/ppc64-tls-pcrel-le.s | 56 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/ppc64-tls-pcrel-le.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index bdd7d55172132..522546331f51f 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -938,6 +938,7 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_TPREL16_HIGHERA: case R_PPC64_TPREL16_HIGHEST: case R_PPC64_TPREL16_HIGHESTA: + case R_PPC64_TPREL34: return R_TLS; case R_PPC64_DTPREL16: case R_PPC64_DTPREL16_DS: @@ -1235,7 +1236,8 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { (val & si1Mask)); break; } - case R_PPC64_GOT_PCREL34: { + case R_PPC64_GOT_PCREL34: + case R_PPC64_TPREL34: { const uint64_t si0Mask = 0x00000003ffff0000; const uint64_t si1Mask = 0x000000000000ffff; const uint64_t fullMask = 0x0003ffff0000ffff; diff --git a/lld/test/ELF/ppc64-tls-pcrel-le.s b/lld/test/ELF/ppc64-tls-pcrel-le.s new file mode 100644 index 0000000000000..bff7d075eda49 --- /dev/null +++ b/lld/test/ELF/ppc64-tls-pcrel-le.s @@ -0,0 +1,56 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s + +## This test checks the LLD implementation of the Local Exec TLS model +## when using prefixed instructions like paddi. + +# SYMBOL: Symbol table '.symtab' contains 6 entries: +# SYMBOL: 3: 0000000000000000 0 TLS LOCAL DEFAULT 2 x +# SYMBOL-NEXT: 4: 0000000000000004 0 TLS LOCAL DEFAULT 2 y +# SYMBOL-NEXT: 5: 0000000000000008 0 TLS LOCAL DEFAULT 2 z + +# CHECK-LABEL: : +# CHECK: paddi 3, 13, -28672, 0 +# CHECK-NEXT: paddi 3, 13, -28668, 0 +# CHECK-NEXT: paddi 3, 13, -28652, 0 +# CHECK-NEXT: blr + +# CHECK-LABEL: : +# CHECK: paddi 3, 13, -28672, 0 +# CHECK-NEXT: lwz 3, 0(3) +# CHECK-NEXT: paddi 3, 13, -28668, 0 +# CHECK-NEXT: lwz 3, 0(3) +# CHECK-NEXT: paddi 3, 13, -28652, 0 +# CHECK-NEXT: lwz 3, 0(3) +# CHECK-NEXT: blr + +LocalExecAddr: + paddi 3, 13, x@TPREL, 0 + paddi 3, 13, y@TPREL, 0 + paddi 3, 13, z@TPREL+12, 0 + blr + +LocalExecVal: + paddi 3, 13, x@TPREL, 0 + lwz 3, 0(3) + paddi 3, 13, y@TPREL, 0 + lwz 3, 0(3) + paddi 3, 13, z@TPREL+12, 0 + lwz 3, 0(3) + blr + +.section .tbss, "awT", @nobits +x: + .long 0 +y: + .long 0 +z: + .space 20 From 85763e0758fbd238c81f233c6f9510e81c7de177 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Tue, 15 Sep 2020 14:25:00 +0000 Subject: [PATCH 0701/1079] [libc] Fix typo in platform_defs.h.inc Differential Revision: https://reviews.llvm.org/D87687 --- .../config/linux/{platfrom_defs.h.inc => platform_defs.h.inc} | 0 libc/src/__support/CMakeLists.txt | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename libc/config/linux/{platfrom_defs.h.inc => platform_defs.h.inc} (100%) diff --git a/libc/config/linux/platfrom_defs.h.inc b/libc/config/linux/platform_defs.h.inc similarity index 100% rename from libc/config/linux/platfrom_defs.h.inc rename to libc/config/linux/platform_defs.h.inc diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index c1ee46cd62cf6..e9f9579b6d0fe 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -2,8 +2,8 @@ add_gen_header( common DEF_FILE common.h.def PARAMS - platform_defs=../../config/${LIBC_TARGET_OS}/platfrom_defs.h.inc + platform_defs=../../config/${LIBC_TARGET_OS}/platform_defs.h.inc GEN_HDR common.h DATA_FILES - ../../config/${LIBC_TARGET_OS}/platfrom_defs.h.inc + ../../config/${LIBC_TARGET_OS}/platform_defs.h.inc ) From 00d6e7116c208b06e4c85bb58a40e76412be65a6 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Tue, 15 Sep 2020 14:25:34 +0000 Subject: [PATCH 0702/1079] [libc] Add missing LibcFPTestHelpers library Differential Revision: https://reviews.llvm.org/D87690 --- libc/utils/MPFRWrapper/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt index 6a3c24e27b158..cc66d1c47d62c 100644 --- a/libc/utils/MPFRWrapper/CMakeLists.txt +++ b/libc/utils/MPFRWrapper/CMakeLists.txt @@ -13,7 +13,7 @@ if(LIBC_TESTS_CAN_USE_MPFR) MPFRUtils.h ) add_dependencies(libcMPFRWrapper libc.utils.CPP.standalone_cpp libc.utils.FPUtil.fputil LibcUnitTest LLVMSupport) - target_link_libraries(libcMPFRWrapper -lmpfr -lgmp LibcUnitTest LLVMSupport) + target_link_libraries(libcMPFRWrapper -lmpfr -lgmp LibcFPTestHelpers LibcUnitTest LLVMSupport) else() message(WARNING "Math tests using MPFR will be skipped.") endif() From e328456a9e6fa8c1ef05e183c1506ed837005847 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Tue, 15 Sep 2020 14:26:04 +0000 Subject: [PATCH 0703/1079] [libc] Add missing TableGen dependency Differential Revision: https://reviews.llvm.org/D87689 --- libc/utils/LibcTableGenUtil/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/utils/LibcTableGenUtil/CMakeLists.txt b/libc/utils/LibcTableGenUtil/CMakeLists.txt index ae887a8bdb03a..d2632a240bd3d 100644 --- a/libc/utils/LibcTableGenUtil/CMakeLists.txt +++ b/libc/utils/LibcTableGenUtil/CMakeLists.txt @@ -2,6 +2,6 @@ add_llvm_library( LibcTableGenUtil APIIndexer.cpp APIIndexer.h - LINK_COMPONENTS Support + LINK_COMPONENTS Support TableGen ) target_include_directories(LibcTableGenUtil PUBLIC ${LIBC_SOURCE_DIR}) From a012bc4c42e4408a18e4c4d67306b79c576df961 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 3 Sep 2020 13:23:49 +0200 Subject: [PATCH 0704/1079] [analyzer][StdLibraryFunctionsChecker] Elaborate the summary of fread and fwrite Add the BufferSize argument constraint to fread and fwrite. This change itself makes it possible to discover a security critical case, described in SEI-CERT ARR38-C. We also add the not-null constraint on the 3rd arguments. In this patch, I also remove those lambdas that don't take any parameters (Fwrite, Fread, Getc), thus making the code better structured. Differential Revision: https://reviews.llvm.org/D87081 --- .../clang/StaticAnalyzer/Checkers/Checkers.td | 3 + .../Checkers/StdLibraryFunctionsChecker.cpp | 59 ++++++++++--------- .../Analysis/Inputs/system-header-simulator.h | 4 +- .../test/Analysis/analyzer-enabled-checkers.c | 2 +- .../std-c-library-functions-arg-constraints.c | 16 +++++ ...td-c-library-functions-vs-stream-checker.c | 58 ++++++++++++++++++ 6 files changed, 112 insertions(+), 30 deletions(-) create mode 100644 clang/test/Analysis/std-c-library-functions-vs-stream-checker.c diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index a61af45231348..cbc048ba74c42 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -349,6 +349,9 @@ let ParentPackage = APIModeling in { def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, HelpText<"Improve modeling of the C standard library functions">, + // Uninitialized value check is a mandatory dependency. This Checker asserts + // that arguments are always initialized. + Dependencies<[CallAndMessageModeling]>, CheckerOptions<[ CmdLineOption FilePtrRestrictTy = getRestrictTy(FilePtrTy); // Templates for summaries that are reused by many functions. - auto Getc = [&]() { - return Summary(ArgTypes{FilePtrTy}, RetType{IntTy}, NoEvalCall) - .Case({ReturnValueCondition(WithinRange, - {{EOFv, EOFv}, {0, UCharRangeMax}})}); - }; auto Read = [&](RetType R, RangeInt Max) { return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, NoEvalCall) .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), ReturnValueCondition(WithinRange, Range(-1, Max))}); }; - auto Fread = [&]() { - return Summary( - ArgTypes{VoidPtrRestrictTy, SizeTy, SizeTy, FilePtrRestrictTy}, - RetType{SizeTy}, NoEvalCall) - .Case({ - ReturnValueCondition(LessThanOrEq, ArgNo(2)), - }) - .ArgConstraint(NotNull(ArgNo(0))); - }; - auto Fwrite = [&]() { - return Summary(ArgTypes{ConstVoidPtrRestrictTy, SizeTy, SizeTy, - FilePtrRestrictTy}, - RetType{SizeTy}, NoEvalCall) - .Case({ - ReturnValueCondition(LessThanOrEq, ArgNo(2)), - }) - .ArgConstraint(NotNull(ArgNo(0))); - }; auto Getline = [&](RetType R, RangeInt Max) { return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, NoEvalCall) @@ -1283,19 +1260,45 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))); // The getc() family of functions that returns either a char or an EOF. - addToFunctionSummaryMap("getc", Getc()); - addToFunctionSummaryMap("fgetc", Getc()); + addToFunctionSummaryMap( + {"getc", "fgetc"}, Signature(ArgTypes{FilePtrTy}, RetType{IntTy}), + Summary(NoEvalCall) + .Case({ReturnValueCondition(WithinRange, + {{EOFv, EOFv}, {0, UCharRangeMax}})})); addToFunctionSummaryMap( "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) .Case({ReturnValueCondition( WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})); // read()-like functions that never return more than buffer size. - addToFunctionSummaryMap("fread", Fread()); - addToFunctionSummaryMap("fwrite", Fwrite()); + auto FreadSummary = + Summary(NoEvalCall) + .Case({ + ReturnValueCondition(LessThanOrEq, ArgNo(2)), + }) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(3))) + .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1), + /*BufSizeMultiplier=*/ArgNo(2))); + + // size_t fread(void *restrict ptr, size_t size, size_t nitems, + // FILE *restrict stream); + addToFunctionSummaryMap( + "fread", + Signature(ArgTypes{VoidPtrRestrictTy, SizeTy, SizeTy, FilePtrRestrictTy}, + RetType{SizeTy}), + FreadSummary); + // size_t fwrite(const void *restrict ptr, size_t size, size_t nitems, + // FILE *restrict stream); + addToFunctionSummaryMap("fwrite", + Signature(ArgTypes{ConstVoidPtrRestrictTy, SizeTy, + SizeTy, FilePtrRestrictTy}, + RetType{SizeTy}), + FreadSummary); // We are not sure how ssize_t is defined on every platform, so we // provide three variants that should cover common cases. + // FIXME Use lookupTy("ssize_t") instead of the `Read` lambda. // FIXME these are actually defined by POSIX and not by the C standard, we // should handle them together with the rest of the POSIX functions. addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax), @@ -1304,11 +1307,13 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( Read(LongLongTy, LongLongMax)}); // getline()-like functions either fail or read at least the delimiter. + // FIXME Use lookupTy("ssize_t") instead of the `Getline` lambda. // FIXME these are actually defined by POSIX and not by the C standard, we // should handle them together with the rest of the POSIX functions. addToFunctionSummaryMap("getline", {Getline(IntTy, IntMax), Getline(LongTy, LongMax), Getline(LongLongTy, LongLongMax)}); + // FIXME getdelim's signature is different than getline's! addToFunctionSummaryMap("getdelim", {Getline(IntTy, IntMax), Getline(LongTy, LongMax), Getline(LongLongTy, LongLongMax)}); diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h index a98546c7056c9..b72f45a9b0e55 100644 --- a/clang/test/Analysis/Inputs/system-header-simulator.h +++ b/clang/test/Analysis/Inputs/system-header-simulator.h @@ -46,8 +46,8 @@ FILE *fopen(const char *path, const char *mode); FILE *tmpfile(void); FILE *freopen(const char *pathname, const char *mode, FILE *stream); int fclose(FILE *fp); -size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); -size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); +size_t fread(void *restrict, size_t, size_t, FILE *restrict); +size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict); int fputc(int ch, FILE *stream); int fseek(FILE *__stream, long int __off, int __whence); long int ftell(FILE *__stream); diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index bef786a1a59b6..7c00e78c16acd 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -6,11 +6,11 @@ // CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List // CHECK-EMPTY: +// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: apiModeling.StdCLibraryFunctions // CHECK-NEXT: apiModeling.TrustNonnull // CHECK-NEXT: apiModeling.llvm.CastValue // CHECK-NEXT: apiModeling.llvm.ReturnValue -// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.DivideZero // CHECK-NEXT: core.DynamicTypePropagation diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c index 28979abd43b58..afc2ce28efc62 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -194,6 +194,22 @@ void test_notnull_symbolic2(FILE *fp, int *buf) { // bugpath-warning{{Function argument constraint is not satisfied}} \ // bugpath-note{{Function argument constraint is not satisfied}} } +typedef __WCHAR_TYPE__ wchar_t; +// This is one test case for the ARR38-C SEI-CERT rule. +void ARR38_C_F(FILE *file) { + enum { BUFFER_SIZE = 1024 }; + wchar_t wbuf[BUFFER_SIZE]; // bugpath-note{{'wbuf' initialized here}} + + const size_t size = sizeof(*wbuf); + const size_t nitems = sizeof(wbuf); + + // The 3rd parameter should be the number of elements to read, not + // the size in bytes. + fread(wbuf, size, nitems, file); // \ + // report-warning{{Function argument constraint is not satisfied}} \ + // bugpath-warning{{Function argument constraint is not satisfied}} \ + // bugpath-note{{Function argument constraint is not satisfied}} +} int __two_constrained_args(int, int); void test_constraints_on_multiple_args(int x, int y) { diff --git a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c new file mode 100644 index 0000000000000..61106f1f8d6bc --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c @@ -0,0 +1,58 @@ +// Check the case when only the StreamChecker is enabled. +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,alpha.unix.Stream \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -triple x86_64-unknown-linux \ +// RUN: -verify=stream + +// Check the case when only the StdLibraryFunctionsChecker is enabled. +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -triple x86_64-unknown-linux \ +// RUN: -verify=stdLib 2>&1 | FileCheck %s + +// Check the case when both the StreamChecker and the +// StdLibraryFunctionsChecker are enabled. +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,alpha.unix.Stream \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -triple x86_64-unknown-linux \ +// RUN: -verify=both 2>&1 | FileCheck %s + +// Verify that the summaries are loaded when the StdLibraryFunctionsChecker is +// enabled. +// CHECK: Loaded summary for: int getchar() +// CHECK-NEXT: Loaded summary for: unsigned long fread(void *restrict, size_t, size_t, FILE *restrict) +// CHECK-NEXT: Loaded summary for: unsigned long fwrite(const void *restrict, size_t, size_t, FILE *restrict) + +#include "Inputs/system-header-simulator.h" + +void clang_analyzer_eval(int); + +void test_fread_fwrite(FILE *fp, int *buf) { + fp = fopen("foo", "r"); + if (!fp) + return; + size_t x = fwrite(buf, sizeof(int), 10, fp); + + clang_analyzer_eval(x <= 10); // \ + // stream-warning{{TRUE}} \ + // stdLib-warning{{TRUE}} \ + // both-warning{{TRUE}} \ + + clang_analyzer_eval(x == 10); // \ + // stream-warning{{TRUE}} \ + // stream-warning{{FALSE}} \ + // stdLib-warning{{UNKNOWN}} \ + // both-warning{{TRUE}} \ + // both-warning{{FALSE}} + + fclose(fp); +} From 7df873f9c67099a209f0122a1f5411e701a9d425 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 1 Sep 2020 11:11:34 +0300 Subject: [PATCH 0705/1079] [llvm-readobj/elf] - Don't crash when the size of s dynamic symbol table, inferred from the hash table, is broken. Currently we might derive the dynamic symbol table size from the DT_HASH hash table (using its `nchain` field). It is possible to crash dumpers with a broken relocation that refers to a symbol with an index that is too large. To trigger it, the inferred size of the dynamic symbol table should go past the end of the object. This patch adds a size validation + warning. Differential revision: https://reviews.llvm.org/D86923 --- .../ELF/dyn-symbols-size-from-hash-table.test | 91 +++++++++++++++++++ .../llvm-readobj/ELF/hash-histogram.test | 2 + .../tools/llvm-readobj/ELF/hash-symbols.test | 1 + .../tools/llvm-readobj/ELF/hash-table.test | 2 + llvm/tools/llvm-readobj/ELFDumper.cpp | 17 +++- 5 files changed, 111 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test index df9ff8d95ecad..bd862e2669a1d 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test @@ -324,3 +324,94 @@ ProgramHeaders: # LLVM3: DynamicSymbols [ # LLVM3: ] + +## Case 4: The size of the dynamic symbol table, inferred from the hash table, is broken. +## It is so large that symbol table goes past the end of the file. We have a dynamic +## relocation which refers to a symbol with an index that is also too large to be +## in the file. Check we report a warning when trying to dump this relocation. + +# RUN: yaml2obj --docnum=3 %s -o %t4.1 + +## Remember the size of the output produced. +# RUN: wc -c %t4.1 > %t4.out.gnu.txt +# RUN: llvm-readelf --sections --dyn-relocations %t4.1 >> %t4.out.gnu.txt 2>&1 +# RUN: FileCheck %s -DFILE=%t4.1 --input-file=%t4.out.gnu.txt --check-prefix=BROKEN-NCHAIN-GNU + +# BROKEN-NCHAIN-GNU: [[#%u, FILESIZE:]] +# BROKEN-NCHAIN-GNU: warning: '[[FILE]]': the size (0x17ffffffe8) of the dynamic symbol table at 0x[[#%x, DYNSYMOFF:]], derived from the hash table, goes past the end of the file (0x[[#%x, FILESIZE]]) and will be ignored + +# BROKEN-NCHAIN-GNU: [Nr] Name Type Address Off +# BROKEN-NCHAIN-GNU: [ 1] .rela.plt RELA 0000000000001000 0000[[#%x, RELAOFF:]] +# BROKEN-NCHAIN-GNU: [ 4] .dynsym DYNSYM 0000000000001078 0000[[#%x, DYNSYMOFF]] + +# BROKEN-NCHAIN-GNU: 'PLT' relocation section at offset 0x[[#%x, RELAOFF]] contains 24 bytes: +# BROKEN-NCHAIN-GNU-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# BROKEN-NCHAIN-GNU-NEXT: warning: '[[FILE]]': unable to get name of the dynamic symbol with index 4292739037: index is greater than or equal to the number of dynamic symbols (1) +# BROKEN-NCHAIN-GNU-NEXT: 0000000000000000 ffddffdd00000000 R_X86_64_NONE + 0 + +# RUN: wc -c %t4.1 > %t4.out.llvm.txt +# RUN: llvm-readobj --sections --dyn-relocations %t4.1 2>&1 >> %t4.out.llvm.txt 2>&1 +# RUN: FileCheck %s -DFILE=%t4.1 --input-file=%t4.out.llvm.txt --check-prefix=BROKEN-NCHAIN-LLVM + +# BROKEN-NCHAIN-LLVM: {{^}}[[#%u, FILESIZE:]] +# BROKEN-NCHAIN-LLVM: warning: '[[FILE]]': the size (0x17ffffffe8) of the dynamic symbol table at 0x[[#%x, DYNSYMOFF:]], derived from the hash table, goes past the end of the file (0x[[#%x, FILESIZE]]) and will be ignored + +# BROKEN-NCHAIN-LLVM: Name: .dynsym +# BROKEN-NCHAIN-LLVM-NEXT: Type: SHT_DYNSYM +# BROKEN-NCHAIN-LLVM-NEXT: Flags [ +# BROKEN-NCHAIN-LLVM-NEXT: SHF_ALLOC +# BROKEN-NCHAIN-LLVM-NEXT: ] +# BROKEN-NCHAIN-LLVM-NEXT: Address: 0x1078 +# BROKEN-NCHAIN-LLVM-NEXT: Offset: 0x[[#%X, DYNSYMOFF]] + +# BROKEN-NCHAIN-LLVM: Dynamic Relocations { +# BROKEN-NCHAIN-LLVM-NEXT: warning: '[[FILE]]': unable to get name of the dynamic symbol with index 4292739037: index is greater than or equal to the number of dynamic symbols (1) +# BROKEN-NCHAIN-LLVM-NEXT: 0x0 R_X86_64_NONE 0x0 +# BROKEN-NCHAIN-LLVM-NEXT: } + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .rela.plt + Type: SHT_RELA + Flags: [ SHF_ALLOC ] + Address: 0x1000 + Relocations: + - Type: R_X86_64_NONE + Symbol: 0xFFDDFFDD + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_ALLOC ] + Entries: + - Tag: DT_PLTRELSZ + Value: 0x18 + - Tag: DT_JMPREL +## 0x1000 - PT_LOAD's p_vaddr (0x1000) == 0x0. +## 0x0 + PT_LOAD's p_offset (0x78) == .rela.plt section offset (0x78). + Value: 0x1000 + - Tag: DT_PLTREL + Value: 0x7 ## 7 == DT_RELA + - Tag: DT_HASH +## 0x1068 - PT_LOAD's p_vaddr (0x1000) == 0x68. +## 0x68 + PT_LOAD's p_offset (0x78) == .hash section offset (0xE0). + Value: 0x1068 + - Tag: DT_NULL + Value: 0x0 + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + Chain: [ 0 ] + NChain: 0xFFFFFFFF +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + Sections: + - Section: .rela.plt + - Section: .dynamic + - Section: .hash + VAddr: 0x1000 diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test b/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test index b6df8ff2a82ff..d6158e66acc74 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test @@ -167,6 +167,7 @@ ProgramHeaders: # RUN: llvm-readelf --elf-hash-histogram %t4.3.o 2>&1 | \ # RUN: FileCheck %s --check-prefix=ERR3 -DFILE=%t4.3.o --implicit-check-not="warning:" # ERR3: warning: '[[FILE]]': hash table nchain (93) differs from symbol count derived from SHT_DYNSYM section header (1){{$}} +# ERR3: warning: '[[FILE]]': the size (0x5d0) of the dynamic symbol table at 0x78, derived from the hash table, goes past the end of the file (0x1d4) and will be ignored ## Case B.2: the hash table ends 1 byte past the EOF. We have a broken nchain ## field that has a value larger than the number of chains. @@ -174,6 +175,7 @@ ProgramHeaders: # RUN: llvm-readelf --elf-hash-histogram %t4.4.o 2>&1 | \ # RUN: FileCheck %s --check-prefix=ERR4 -DFILE=%t4.4.o --implicit-check-not="warning:" # ERR4: warning: '[[FILE]]': hash table nchain (94) differs from symbol count derived from SHT_DYNSYM section header (1){{$}} +# ERR4: warning: '[[FILE]]': the size (0x5e0) of the dynamic symbol table at 0x78, derived from the hash table, goes past the end of the file (0x1d4) and will be ignored # ERR4: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 1, nchain = 94{{$}} --- !ELF diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test b/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test index e398ba7af99c6..5b9904bf442ca 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test @@ -402,6 +402,7 @@ ProgramHeaders: # RUN: llvm-readelf --hash-symbols %t7.3.o 2>&1 | \ # RUN: FileCheck %s --implicit-check-not="warning:" --check-prefix=NOERR2 -DFILE=%t7.3.o # NOERR2: warning: '[[FILE]]': hash table nchain (93) differs from symbol count derived from SHT_DYNSYM section header (1) +# NOERR2: warning: '[[FILE]]': the size (0x5d0) of the dynamic symbol table at 0x78, derived from the hash table, goes past the end of the file (0x1d4) and will be ignored # NOERR2: Symbol table of .hash for image: # NOERR2-NEXT: Num Buc: Value Size Type Bind Vis Ndx Name # NOERR2-NOT: {{.}} diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-table.test b/llvm/test/tools/llvm-readobj/ELF/hash-table.test index 823c6c8ece9c3..1102d848f03e4 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-table.test @@ -169,6 +169,7 @@ ProgramHeaders: # RUN: FileCheck %s --check-prefix=NOERR2 -DFILE=%t5.3.o --implicit-check-not="warning:" # NOERR2: warning: '[[FILE]]': hash table nchain (93) differs from symbol count derived from SHT_DYNSYM section header (1) +# NOERR2: warning: '[[FILE]]': the size (0x5d0) of the dynamic symbol table at 0x78, derived from the hash table, goes past the end of the file (0x1d4) and will be ignored # NOERR2: HashTable { # NOERR2-NEXT: Num Buckets: 1 # NOERR2-NEXT: Num Chains: 93 @@ -187,6 +188,7 @@ ProgramHeaders: # RUN: FileCheck %s --check-prefix=ERR3 -DFILE=%t5.4.o --implicit-check-not="warning:" # ERR3: warning: '[[FILE]]': hash table nchain (94) differs from symbol count derived from SHT_DYNSYM section header (1) +# ERR3: warning: '[[FILE]]': the size (0x5e0) of the dynamic symbol table at 0x78, derived from the hash table, goes past the end of the file (0x1d4) and will be ignored # ERR3: HashTable { # ERR3-NEXT: Num Buckets: 1 # ERR3-NEXT: Num Chains: 94 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index e28d4ece226ce..051308ed7d448 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -2250,8 +2250,21 @@ void ELFDumper::parseDynamicTable(const ELFFile *Obj) { // Derive the dynamic symbol table size from the DT_HASH hash table, if // present. - if (HashTable && DynSymRegion) - DynSymRegion->Size = HashTable->nchain * DynSymRegion->EntSize; + if (HashTable && DynSymRegion) { + const uint64_t FileSize = ObjF->getELFFile()->getBufSize(); + const uint64_t DerivedSize = + (uint64_t)HashTable->nchain * DynSymRegion->EntSize; + const uint64_t Offset = + (const uint8_t *)DynSymRegion->Addr - ObjF->getELFFile()->base(); + if (DerivedSize > FileSize - Offset) + reportUniqueWarning(createError( + "the size (0x" + Twine::utohexstr(DerivedSize) + + ") of the dynamic symbol table at 0x" + Twine::utohexstr(Offset) + + ", derived from the hash table, goes past the end of the file (0x" + + Twine::utohexstr(FileSize) + ") and will be ignored")); + else + DynSymRegion->Size = HashTable->nchain * DynSymRegion->EntSize; + } } template From 7c6f5b7fbf5a9eee7f3ef9192c354d1536a8f1c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= Date: Tue, 25 Aug 2020 13:49:41 +0200 Subject: [PATCH 0706/1079] [analyzer] Add documentation for alpha.fuchsia.Lock and alpha.core.C11Lock Differential Revision: https://reviews.llvm.org/D86532 --- clang/docs/analyzer/checkers.rst | 37 +++++++++++++++++++ .../user-docs/CrossTranslationUnit.rst | 2 + 2 files changed, 39 insertions(+) diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 7a294f916bcf9..9fb6782cf5a5e 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1491,6 +1491,23 @@ Warn about assigning non-{0,1} values to boolean variables. alpha.core ^^^^^^^^^^ +.. _alpha-core-C11Lock: + +alpha.core.C11Lock +"""""""""""""""""" +Similarly to :ref:`alpha.unix.PthreadLock `, checks for +the locking/unlocking of ``mtx_t`` mutexes. + +.. code-block:: cpp + + mtx_t mtx1; + + void bad1(void) + { + mtx_lock(&mtx1); + mtx_lock(&mtx1); // warn: This lock has already been acquired + } + .. _alpha-core-CallAndMessageUnInitRefArg: alpha.core.CallAndMessageUnInitRefArg (C,C++, ObjC) @@ -1868,6 +1885,26 @@ Check for dereference of null smart pointers. *P; // warn: dereference of a default constructed smart unique_ptr } +alpha.fuchsia +^^^^^^^^^^^^^ + +.. _alpha-fuchsia-lock: + +alpha.fuchsia.Lock +"""""""""""""""""" +Similarly to :ref:`alpha.unix.PthreadLock `, checks for +the locking/unlocking of fuchsia mutexes. + +.. code-block:: cpp + + spin_lock_t mtx1; + + void bad1(void) + { + spin_lock(&mtx1); + spin_lock(&mtx1); // warn: This lock has already been acquired + } + alpha.llvm ^^^^^^^^^^ diff --git a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst index 36be82f209ef2..0606185f39e64 100644 --- a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst +++ b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst @@ -201,6 +201,8 @@ Example usage of scan-build-py: ^C $ +.. _ctu-on-demand: + On-demand analysis __________________ The analysis produces the necessary AST structure of external TUs during analysis. This requires the From 8985755762a429573af2ce657274772339d3b9db Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 15 Sep 2020 10:30:35 -0400 Subject: [PATCH 0707/1079] [InstSimplify] add limit folds for fmin/fmax If the constant operand is the opposite of the min/max value, then the result must be the other value. This is based on the similar codegen transform proposed in: D87571 --- llvm/lib/Analysis/InstructionSimplify.cpp | 10 ++-- .../Transforms/InstSimplify/fminmax-folds.ll | 51 +++++++------------ .../X86/vector-reductions-expanded.ll | 31 ++++++----- 3 files changed, 38 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 716af06769f9e..9e38a4d8595a2 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5477,10 +5477,12 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs())) return ConstantFP::get(ReturnType, *C); - // TODO: minimum(nnan x, inf) -> x - // TODO: minnum(nnan ninf x, flt_max) -> x - // TODO: maximum(nnan x, -inf) -> x - // TODO: maxnum(nnan ninf x, -flt_max) -> x + // minnum(X, +inf) -> X if nnan + // maxnum(X, -inf) -> X if nnan + // minimum(X, +inf) -> X + // maximum(X, -inf) -> X + if (C->isNegative() != IsMin && (PropagateNaN || Q.CxtI->hasNoNaNs())) + return Op0; } // Min/max of the same operation with common operand: diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index f05837a8c2f66..c62f76c87faef 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -79,8 +79,7 @@ define float @test_maximum_const_inf(float %x) { define float @test_minimum_const_inf(float %x) { ; CHECK-LABEL: @test_minimum_const_inf( -; CHECK-NEXT: [[R:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 0x7FF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) ret float %r @@ -105,8 +104,7 @@ define float @test_maxnum_const_neg_inf(float %x) { define float @test_maximum_const_neg_inf(float %x) { ; CHECK-LABEL: @test_maximum_const_neg_inf( -; CHECK-NEXT: [[R:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float 0xFFF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call float @llvm.maximum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -123,8 +121,7 @@ define float @test_minimum_const_neg_inf(float %x) { define float @test_minnum_const_inf_nnan(float %x) { ; CHECK-LABEL: @test_minnum_const_inf_nnan( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float 0x7FF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan float @llvm.minnum.f32(float %x, float 0x7ff0000000000000) ret float %r @@ -148,8 +145,7 @@ define float @test_maximum_const_inf_nnan(float %x) { define float @test_minimum_const_inf_nnan(float %x) { ; CHECK-LABEL: @test_minimum_const_inf_nnan( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minimum.f32(float [[X:%.*]], float 0x7FF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan float @llvm.minimum.f32(float %x, float 0x7ff0000000000000) ret float %r @@ -157,8 +153,7 @@ define float @test_minimum_const_inf_nnan(float %x) { define float @test_minnum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: @test_minnum_const_inf_nnan_comm( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minnum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan float @llvm.minnum.f32(float 0x7ff0000000000000, float %x) ret float %r @@ -182,8 +177,7 @@ define float @test_maximum_const_inf_nnan_comm(float %x) { define float @test_minimum_const_inf_nnan_comm(float %x) { ; CHECK-LABEL: @test_minimum_const_inf_nnan_comm( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan float @llvm.minimum.f32(float 0x7ff0000000000000, float %x) ret float %r @@ -191,8 +185,7 @@ define float @test_minimum_const_inf_nnan_comm(float %x) { define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_minnum_const_inf_nnan_comm_vec( -; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> , <2 x float> [[X:%.*]]) -; CHECK-NEXT: ret <2 x float> [[R]] +; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -216,8 +209,7 @@ define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_minimum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_minimum_const_inf_nnan_comm_vec( -; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.minimum.v2f32(<2 x float> , <2 x float> [[X:%.*]]) -; CHECK-NEXT: ret <2 x float> [[R]] +; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; %r = call nnan <2 x float> @llvm.minimum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -233,8 +225,7 @@ define float @test_minnum_const_neg_inf_nnan(float %x) { define float @test_maxnum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: @test_maxnum_const_neg_inf_nnan( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maxnum.f32(float [[X:%.*]], float 0xFFF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan float @llvm.maxnum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -242,8 +233,7 @@ define float @test_maxnum_const_neg_inf_nnan(float %x) { define float @test_maximum_const_neg_inf_nnan(float %x) { ; CHECK-LABEL: @test_maximum_const_neg_inf_nnan( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.maximum.f32(float [[X:%.*]], float 0xFFF0000000000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan float @llvm.maximum.f32(float %x, float 0xfff0000000000000) ret float %r @@ -357,8 +347,7 @@ define float @test_maximum_const_max_ninf(float %x) { define float @test_minimum_const_max_ninf(float %x) { ; CHECK-LABEL: @test_minimum_const_max_ninf( -; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.minimum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -383,8 +372,7 @@ define float @test_maxnum_const_neg_max_ninf(float %x) { define float @test_maximum_const_neg_max_ninf(float %x) { ; CHECK-LABEL: @test_maximum_const_neg_max_ninf( -; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.maximum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -401,8 +389,7 @@ define float @test_minimum_const_neg_max_ninf(float %x) { define float @test_minnum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_minnum_const_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan ninf float @llvm.minnum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -426,8 +413,7 @@ define float @test_maximum_const_max_nnan_ninf(float %x) { define float @test_minimum_const_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_minimum_const_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.minimum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan ninf float @llvm.minimum.f32(float %x, float 0x47efffffe0000000) ret float %r @@ -443,8 +429,7 @@ define float @test_minnum_const_neg_max_nnan_ninf(float %x) { define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_maxnum_const_neg_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maxnum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan ninf float @llvm.maxnum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -452,8 +437,7 @@ define float @test_maxnum_const_neg_max_nnan_ninf(float %x) { define float @test_maximum_const_neg_max_nnan_ninf(float %x) { ; CHECK-LABEL: @test_maximum_const_neg_max_nnan_ninf( -; CHECK-NEXT: [[R:%.*]] = call nnan ninf float @llvm.maximum.f32(float [[X:%.*]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call nnan ninf float @llvm.maximum.f32(float %x, float 0xc7efffffe0000000) ret float %r @@ -1076,8 +1060,7 @@ define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { define float @minimum_inf(float %x) { ; CHECK-LABEL: @minimum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.minimum.f32(float 0x7FF0000000000000, float [[X:%.*]]) -; CHECK-NEXT: ret float [[VAL]] +; CHECK-NEXT: ret float [[X:%.*]] ; %val = call float @llvm.minimum.f32(float 0x7FF0000000000000, float %x) ret float %val diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll index 0e02a01291d84..c3699ff0d6b4f 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll @@ -12,7 +12,7 @@ define i32 @add_v4i32(i32* %p) #0 { ; CHECK-LABEL: @add_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa !0 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, [[TBAA0:!tbaa !.*]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -51,7 +51,7 @@ define signext i16 @mul_v8i16(i16* %p) #0 { ; CHECK-LABEL: @mul_v8i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>* -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, !tbaa !4 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, [[TBAA4:!tbaa !.*]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> @@ -95,7 +95,7 @@ define signext i8 @or_v16i8(i8* %p) #0 { ; CHECK-LABEL: @or_v16i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>* -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, !tbaa !6 +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, [[TBAA6:!tbaa !.*]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> undef, <16 x i32> @@ -141,7 +141,7 @@ define i32 @smin_v4i32(i32* %p) #0 { ; CHECK-LABEL: @smin_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa !0 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, [[TBAA0]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]] @@ -195,7 +195,7 @@ define i32 @umax_v4i32(i32* %p) #0 { ; CHECK-LABEL: @umax_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa !0 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, [[TBAA0]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]] @@ -249,7 +249,7 @@ define float @fadd_v4i32(float* %p) #0 { ; CHECK-LABEL: @fadd_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa !7 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, [[TBAA7:!tbaa !.*]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> @@ -290,7 +290,7 @@ define float @fmul_v4i32(float* %p) #0 { ; CHECK-LABEL: @fmul_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa !7 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, [[TBAA7]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> @@ -330,18 +330,17 @@ for.end: define float @fmin_v4i32(float* %p) #0 { ; CHECK-LABEL: @fmin_v4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P:%.*]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP0]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P:%.*]], align 4, [[TBAA7]] ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[P]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX_1]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP2]], float [[TMP1]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4, [[TBAA7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP1]], float [[TMP0]]) ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX_2]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP4]], float [[TMP3]]) +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX_2]], align 4, [[TBAA7]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP3]], float [[TMP2]]) ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_3]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP7:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_3]], align 4, [[TBAA7]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP5]], float [[TMP4]]) +; CHECK-NEXT: ret float [[TMP6]] ; entry: br label %for.cond From 39c8795141703a7d8313b2448d9d34e856df0b85 Mon Sep 17 00:00:00 2001 From: Marshall Clow Date: Tue, 15 Sep 2020 09:56:03 -0400 Subject: [PATCH 0708/1079] [libc++] Use allocator_traits to consistently allocate/deallocate/construct/destroy objects in std::any https://llvm.org/PR45099 notes (correctly) that we're inconsistent in memory allocation in `std::any`. We allocate memory with `std::allocator::allocate`, construct with placement new, destroy by calling the destructor directly, and deallocate by calling `delete`. Most of those are customizable by the user, but in different ways. The standard is silent on how these things are to be accomplished. This patch makes it so we use `allocator_traits>` for all of these operations (allocate, construct, destruct, deallocate). This is, at least, consistent. Fixes https://llvm.org/PR45099. Differential Revision: https://reviews.llvm.org/D81133 --- libcxx/include/any | 27 +++- .../libcxx/utilities/any/allocator.pass.cpp | 136 ++++++++++++++++++ 2 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 libcxx/test/libcxx/utilities/any/allocator.pass.cpp diff --git a/libcxx/include/any b/libcxx/include/any index 36b07c9d7e753..7546f31248772 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -82,7 +82,6 @@ namespace std { #include #include -#include #include #include #include @@ -368,7 +367,11 @@ namespace __any_imp template _LIBCPP_INLINE_VISIBILITY static _Tp& __create(any & __dest, _Args&&... __args) { - _Tp* __ret = ::new (static_cast(&__dest.__s.__buf)) _Tp(_VSTD::forward<_Args>(__args)...); + typedef allocator<_Tp> _Alloc; + typedef allocator_traits<_Alloc> _ATraits; + _Alloc __a; + _Tp * __ret = static_cast<_Tp*>(static_cast(&__dest.__s.__buf)); + _ATraits::construct(__a, __ret, _VSTD::forward<_Args>(__args)...); __dest.__h = &_SmallHandler::__handle; return *__ret; } @@ -376,8 +379,11 @@ namespace __any_imp private: _LIBCPP_INLINE_VISIBILITY static void __destroy(any & __this) { - _Tp & __value = *static_cast<_Tp *>(static_cast(&__this.__s.__buf)); - __value.~_Tp(); + typedef allocator<_Tp> _Alloc; + typedef allocator_traits<_Alloc> _ATraits; + _Alloc __a; + _Tp * __p = static_cast<_Tp *>(static_cast(&__this.__s.__buf)); + _ATraits::destroy(__a, __p); __this.__h = nullptr; } @@ -445,10 +451,12 @@ namespace __any_imp _LIBCPP_INLINE_VISIBILITY static _Tp& __create(any & __dest, _Args&&... __args) { typedef allocator<_Tp> _Alloc; + typedef allocator_traits<_Alloc> _ATraits; typedef __allocator_destructor<_Alloc> _Dp; _Alloc __a; - unique_ptr<_Tp, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - _Tp* __ret = ::new ((void*)__hold.get()) _Tp(_VSTD::forward<_Args>(__args)...); + unique_ptr<_Tp, _Dp> __hold(_ATraits::allocate(__a, 1), _Dp(__a, 1)); + _Tp * __ret = __hold.get(); + _ATraits::construct(__a, __ret, _VSTD::forward<_Args>(__args)...); __dest.__s.__ptr = __hold.release(); __dest.__h = &_LargeHandler::__handle; return *__ret; @@ -458,7 +466,12 @@ namespace __any_imp _LIBCPP_INLINE_VISIBILITY static void __destroy(any & __this){ - delete static_cast<_Tp*>(__this.__s.__ptr); + typedef allocator<_Tp> _Alloc; + typedef allocator_traits<_Alloc> _ATraits; + _Alloc __a; + _Tp * __p = static_cast<_Tp *>(__this.__s.__ptr); + _ATraits::destroy(__a, __p); + _ATraits::deallocate(__a, __p, 1); __this.__h = nullptr; } diff --git a/libcxx/test/libcxx/utilities/any/allocator.pass.cpp b/libcxx/test/libcxx/utilities/any/allocator.pass.cpp new file mode 100644 index 0000000000000..c6800eb832bda --- /dev/null +++ b/libcxx/test/libcxx/utilities/any/allocator.pass.cpp @@ -0,0 +1,136 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// + +// Check that we're consistently using std::allocator_traits to +// allocate/deallocate/construct/destroy objects in std::any. +// See https://llvm.org/PR45099 for details. + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + + +// Make sure we don't fit in std::any's SBO +struct Large { char big[sizeof(std::any) + 1]; }; + +// Make sure we fit in std::any's SBO +struct Small { }; + +bool Large_was_allocated = false; +bool Large_was_constructed = false; +bool Large_was_destroyed = false; +bool Large_was_deallocated = false; + +bool Small_was_allocated = false; +bool Small_was_constructed = false; +bool Small_was_destroyed = false; +bool Small_was_deallocated = false; + +namespace std { + template <> + struct allocator { + using value_type = Large; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + using is_always_equal = std::true_type; + + Large* allocate(std::size_t n) { + Large_was_allocated = true; + return static_cast(::operator new(n)); + } + + template + void construct(Large* p, Args&& ...args) { + new (p) Large(std::forward(args)...); + Large_was_constructed = true; + } + + void destroy(Large* p) { + p->~Large(); + Large_was_destroyed = true; + } + + void deallocate(Large* p, std::size_t) { + Large_was_deallocated = true; + return ::operator delete(p); + } + }; + + template <> + struct allocator { + using value_type = Small; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + using is_always_equal = std::true_type; + + Small* allocate(std::size_t n) { + Small_was_allocated = true; + return static_cast(::operator new(n)); + } + + template + void construct(Small* p, Args&& ...args) { + new (p) Small(std::forward(args)...); + Small_was_constructed = true; + } + + void destroy(Small* p) { + p->~Small(); + Small_was_destroyed = true; + } + + void deallocate(Small* p, std::size_t) { + Small_was_deallocated = true; + return ::operator delete(p); + } + }; +} // end namespace std + + +int main(int, char**) { + // Test large types + { + { + std::any a = Large(); + (void)a; + + assert(Large_was_allocated); + assert(Large_was_constructed); + } + + assert(Large_was_destroyed); + assert(Large_was_deallocated); + } + + // Test small types + { + { + std::any a = Small(); + (void)a; + + assert(!Small_was_allocated); + assert(Small_was_constructed); + } + + assert(Small_was_destroyed); + assert(!Small_was_deallocated); + } + + return 0; +} From 2b42d53e5ea4ee648cde5b2f73523f01f2405631 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 15:23:19 +0100 Subject: [PATCH 0709/1079] SLPVectorizer.h - remove unnecessary AliasAnalysis.h include. NFCI. Forward declare AAResults instead of the (old) AliasAnalysis type. Remove includes from SLPVectorizer.cpp that are already included in SLPVectorizer.h. --- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 7 ++++--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 77236dec75dc2..52a57939209cc 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -22,11 +22,11 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" namespace llvm { +class AAResults; class AssumptionCache; class BasicBlock; class CmpInst; @@ -34,6 +34,7 @@ class DataLayout; class DemandedBits; class DominatorTree; class Function; +class GetElementPtrInst; class InsertElementInst; class InsertValueInst; class Instruction; @@ -63,7 +64,7 @@ struct SLPVectorizerPass : public PassInfoMixin { ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; TargetLibraryInfo *TLI = nullptr; - AliasAnalysis *AA = nullptr; + AAResults *AA = nullptr; LoopInfo *LI = nullptr; DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; @@ -75,7 +76,7 @@ struct SLPVectorizerPass : public PassInfoMixin { // Glue for old PM. bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, + TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, OptimizationRemarkEmitter *ORE_); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 000bd863a7c54..e73113dab6d45 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -17,11 +17,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SLPVectorizer.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" @@ -30,7 +27,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" @@ -67,7 +63,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -507,7 +502,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, } /// \returns the AA location that is being access by the instruction. -static MemoryLocation getLocation(Instruction *I, AliasAnalysis *AA) { +static MemoryLocation getLocation(Instruction *I, AAResults *AA) { if (StoreInst *SI = dyn_cast(I)) return MemoryLocation::get(SI); if (LoadInst *LI = dyn_cast(I)) @@ -544,7 +539,7 @@ class BoUpSLP { MapVector>; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, - TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, + TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, const DataLayout *DL, OptimizationRemarkEmitter *ORE) : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), @@ -2240,7 +2235,7 @@ class BoUpSLP { ScalarEvolution *SE; TargetTransformInfo *TTI; TargetLibraryInfo *TLI; - AliasAnalysis *AA; + AAResults *AA; LoopInfo *LI; DominatorTree *DT; AssumptionCache *AC; @@ -5708,7 +5703,7 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, + TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, OptimizationRemarkEmitter *ORE_) { From 01f5fcd8290349265e6039ad9089b086ea783f00 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 15 Sep 2020 11:41:50 -0400 Subject: [PATCH 0710/1079] [mlir][openacc] Add loop op verifier Add a verifier for the loop op in the OpenACC dialect. Check basic restriction from 2.9 Loop construct from the OpenACC 3.0 specs. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D87546 --- .../mlir/Dialect/OpenACC/OpenACCOps.td | 14 ++-- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 38 +++++++--- mlir/test/Dialect/OpenACC/invalid.mlir | 70 +++++++++++++++++++ mlir/test/Dialect/OpenACC/ops.mlir | 40 +++++++++-- 4 files changed, 143 insertions(+), 19 deletions(-) create mode 100644 mlir/test/Dialect/OpenACC/invalid.mlir diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index c0178ebe9e48a..0d37215ea4e54 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -200,7 +200,8 @@ def OpenACC_TerminatorOp : OpenACC_Op<"terminator", [Terminator]> { //===----------------------------------------------------------------------===// def OpenACC_LoopOp : OpenACC_Op<"loop", - [AttrSizedOperandSegments]> { + [AttrSizedOperandSegments, + SingleBlockImplicitTerminator<"acc::YieldOp">]> { let summary = "loop construct"; let description = [{ @@ -228,13 +229,14 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", Optional:$gangStatic, Optional:$workerNum, Optional:$vectorLength, - UnitAttr:$loopSeq, - UnitAttr:$loopIndependent, - UnitAttr:$loopAuto, + UnitAttr:$seq, + UnitAttr:$independent, + UnitAttr:$auto_, Variadic:$tileOperands, Variadic:$privateOperands, OptionalAttr:$reductionOp, - Variadic:$reductionOperands); + Variadic:$reductionOperands, + DefaultValuedAttr:$exec_mapping); let results = (outs Variadic:$results); @@ -256,7 +258,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", static StringRef getReductionKeyword() { return "reduction"; } }]; - let verifier = ?; + let verifier = [{ return ::verifyLoopOp(*this); }]; } // Yield operation for the acc.loop and acc.parallel operations. diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 11a774828194e..3e4d1c3f0e7dc 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -487,7 +487,7 @@ static void print(OpAsmPrinter &printer, DataOp &op) { /// region attr-dict? static ParseResult parseLoopOp(OpAsmParser &parser, OperationState &result) { Builder &builder = parser.getBuilder(); - unsigned executionMapping = 0; + unsigned executionMapping = OpenACCExecMapping::NONE; SmallVector operandTypes; SmallVector privateOperands, reductionOperands; SmallVector tileOperands; @@ -567,7 +567,7 @@ static ParseResult parseLoopOp(OpAsmParser &parser, OperationState &result) { reductionOperands, operandTypes, result))) return failure(); - if (executionMapping != 0) + if (executionMapping != acc::OpenACCExecMapping::NONE) result.addAttribute(LoopOp::getExecutionMappingAttrName(), builder.getI64IntegerAttr(executionMapping)); @@ -597,13 +597,7 @@ static ParseResult parseLoopOp(OpAsmParser &parser, OperationState &result) { static void print(OpAsmPrinter &printer, LoopOp &op) { printer << LoopOp::getOperationName(); - unsigned execMapping = - (op.getAttrOfType(LoopOp::getExecutionMappingAttrName()) != - nullptr) - ? op.getAttrOfType(LoopOp::getExecutionMappingAttrName()) - .getInt() - : 0; - + unsigned execMapping = op.exec_mapping(); if (execMapping & OpenACCExecMapping::GANG) { printer << " " << LoopOp::getGangKeyword(); Value gangNum = op.gangNum(); @@ -661,5 +655,31 @@ static void print(OpAsmPrinter &printer, LoopOp &op) { LoopOp::getOperandSegmentSizeAttr()}); } +static LogicalResult verifyLoopOp(acc::LoopOp loopOp) { + // auto, independent and seq attribute are mutually exclusive. + if ((loopOp.auto_() && (loopOp.independent() || loopOp.seq())) || + (loopOp.independent() && loopOp.seq())) { + loopOp.emitError("only one of " + acc::LoopOp::getAutoAttrName() + ", " + + acc::LoopOp::getIndependentAttrName() + ", " + + acc::LoopOp::getSeqAttrName() + + " can be present at the same time"); + return failure(); + } + + // Gang, worker and vector are incompatible with seq. + if (loopOp.seq() && loopOp.exec_mapping() != OpenACCExecMapping::NONE) { + loopOp.emitError("gang, worker or vector cannot appear with the seq attr"); + return failure(); + } + + // Check non-empty body(). + if (loopOp.region().empty()) { + loopOp.emitError("expected non-empty body."); + return failure(); + } + + return success(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/OpenACC/OpenACCOps.cpp.inc" diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir new file mode 100644 index 0000000000000..61a13211ba262 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop gang { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop worker { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop vector { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop gang worker { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop gang vector { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop worker vector { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} +acc.loop gang worker vector { + "some.op"() : () -> () + acc.yield +} attributes {seq} + +// ----- + +// expected-error@+1 {{expected non-empty body.}} +acc.loop { +} + +// ----- + +// expected-error@+1 {{only one of auto, independent, seq can be present at the same time}} +acc.loop { + acc.yield +} attributes {auto_, seq} + +// ----- diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index b534f703e05e2..b1a78c61d65d9 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s | mlir-opt -allow-unregistered-dialect | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -mlir-print-op-generic %s | mlir-opt -allow-unregistered-dialect | FileCheck %s func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x10xf32>) -> memref<10x10xf32> { %c0 = constant 0 : index @@ -186,27 +186,43 @@ func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10xf32>, // CHECK-NEXT: return %{{.*}} : memref<10xf32> // CHECK-NEXT: } -func @testop() -> () { +func @testop(%a: memref<10xf32>) -> () { %workerNum = constant 1 : i64 %vectorLength = constant 128 : i64 %gangNum = constant 8 : i64 %gangStatic = constant 2 : i64 %tileSize = constant 2 : i64 acc.loop gang worker vector { + "some.op"() : () -> () + acc.yield } acc.loop gang(num: %gangNum) { + "some.op"() : () -> () + acc.yield } acc.loop gang(static: %gangStatic) { + "some.op"() : () -> () + acc.yield } acc.loop worker(%workerNum) { + "some.op"() : () -> () + acc.yield } acc.loop vector(%vectorLength) { + "some.op"() : () -> () + acc.yield } acc.loop gang(num: %gangNum) worker vector { + "some.op"() : () -> () + acc.yield } acc.loop gang(num: %gangNum, static: %gangStatic) worker(%workerNum) vector(%vectorLength) { + "some.op"() : () -> () + acc.yield } acc.loop tile(%tileSize : i64, %tileSize : i64) { + "some.op"() : () -> () + acc.yield } return } @@ -217,20 +233,36 @@ func @testop() -> () { // CHECK-NEXT: [[GANGSTATIC:%.*]] = constant 2 : i64 // CHECK-NEXT: [[TILESIZE:%.*]] = constant 2 : i64 // CHECK-NEXT: acc.loop gang worker vector { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop gang(num: [[GANGNUM]]) { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop gang(static: [[GANGSTATIC]]) { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop worker([[WORKERNUM]]) { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop vector([[VECTORLENGTH]]) { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop gang(num: [[GANGNUM]]) worker vector { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop gang(num: [[GANGNUM]], static: [[GANGSTATIC]]) worker([[WORKERNUM]]) vector([[VECTORLENGTH]]) { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.loop tile([[TILESIZE]]: i64, [[TILESIZE]]: i64) { +// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: acc.yield // CHECK-NEXT: } From dd1d5488e47d0a89217dfd22a726c3d3ad2b4984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= Date: Tue, 15 Sep 2020 17:43:02 +0200 Subject: [PATCH 0711/1079] [analyzer][Liveness][NFC] Get rid of statement liveness, because such a thing doesn't exist The summary and very short discussion in D82122 summarizes whats happening here. In short, liveness talks about variables, or expressions, anything that has a value. Well, statements just simply don't have a one. Differential Revision: https://reviews.llvm.org/D82598 --- .../analyzer/developer-docs/DebugChecks.rst | 2 +- .../clang/Analysis/Analyses/LiveVariables.h | 18 ++-- .../clang/StaticAnalyzer/Checkers/Checkers.td | 4 +- .../Core/PathSensitive/SymbolManager.h | 2 +- clang/lib/Analysis/LiveVariables.cpp | 102 +++++++++--------- .../StaticAnalyzer/Checkers/DebugCheckers.cpp | 10 +- clang/lib/StaticAnalyzer/Core/Environment.cpp | 14 +-- .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 2 +- clang/test/Analysis/live-stmts.cpp | 90 ++++++++++------ clang/test/Analysis/live-stmts.mm | 50 +++------ 10 files changed, 150 insertions(+), 144 deletions(-) diff --git a/clang/docs/analyzer/developer-docs/DebugChecks.rst b/clang/docs/analyzer/developer-docs/DebugChecks.rst index 48b584a463072..45985a1dfd793 100644 --- a/clang/docs/analyzer/developer-docs/DebugChecks.rst +++ b/clang/docs/analyzer/developer-docs/DebugChecks.rst @@ -30,7 +30,7 @@ using a 'dot' format viewer (such as Graphviz on macOS) instead. - debug.DumpLiveVars: Show the results of live variable analysis for each top-level function being analyzed. -- debug.DumpLiveStmts: Show the results of live statement analysis for each +- debug.DumpLiveExprs: Show the results of live expression analysis for each top-level function being analyzed. - debug.ViewExplodedGraph: Show the Exploded Graphs generated for the diff --git a/clang/include/clang/Analysis/Analyses/LiveVariables.h b/clang/include/clang/Analysis/Analyses/LiveVariables.h index 2e7dd5d81678a..8a3dd0c35e64c 100644 --- a/clang/include/clang/Analysis/Analyses/LiveVariables.h +++ b/clang/include/clang/Analysis/Analyses/LiveVariables.h @@ -30,22 +30,22 @@ class LiveVariables : public ManagedAnalysis { class LivenessValues { public: - llvm::ImmutableSet liveStmts; + llvm::ImmutableSet liveExprs; llvm::ImmutableSet liveDecls; llvm::ImmutableSet liveBindings; bool equals(const LivenessValues &V) const; LivenessValues() - : liveStmts(nullptr), liveDecls(nullptr), liveBindings(nullptr) {} + : liveExprs(nullptr), liveDecls(nullptr), liveBindings(nullptr) {} - LivenessValues(llvm::ImmutableSet LiveStmts, + LivenessValues(llvm::ImmutableSet liveExprs, llvm::ImmutableSet LiveDecls, llvm::ImmutableSet LiveBindings) - : liveStmts(LiveStmts), liveDecls(LiveDecls), + : liveExprs(liveExprs), liveDecls(LiveDecls), liveBindings(LiveBindings) {} - bool isLive(const Stmt *S) const; + bool isLive(const Expr *E) const; bool isLive(const VarDecl *D) const; friend class LiveVariables; @@ -83,17 +83,17 @@ class LiveVariables : public ManagedAnalysis { /// only returns liveness information for block-level expressions. bool isLive(const Stmt *S, const VarDecl *D); - /// Returns true the block-level expression "value" is live + /// Returns true the block-level expression value is live /// before the given block-level expression (see runOnAllBlocks). - bool isLive(const Stmt *Loc, const Stmt *StmtVal); + bool isLive(const Stmt *Loc, const Expr *Val); /// Print to stderr the variable liveness information associated with /// each basic block. void dumpBlockLiveness(const SourceManager &M); - /// Print to stderr the statement liveness information associated with + /// Print to stderr the expression liveness information associated with /// each basic block. - void dumpStmtLiveness(const SourceManager &M); + void dumpExprLiveness(const SourceManager &M); void runOnAllBlocks(Observer &obs); diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index cbc048ba74c42..3540fe5fe55c5 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -1478,8 +1478,8 @@ def LiveVariablesDumper : Checker<"DumpLiveVars">, HelpText<"Print results of live variable analysis">, Documentation; -def LiveStatementsDumper : Checker<"DumpLiveStmts">, - HelpText<"Print results of live statement analysis">, +def LiveExpressionsDumper : Checker<"DumpLiveExprs">, + HelpText<"Print results of live expression analysis">, Documentation; def CFGViewer : Checker<"ViewCFG">, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 75dfbde5c1519..c71cb88f5574c 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -539,7 +539,7 @@ class SymbolReaper { bool isLive(SymbolRef sym); bool isLiveRegion(const MemRegion *region); - bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const; + bool isLive(const Expr *ExprVal, const LocationContext *LCtx) const; bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const; /// Unconditionally marks a symbol as live. diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp index d24c40b457b4b..8cdc4cc5bd613 100644 --- a/clang/lib/Analysis/LiveVariables.cpp +++ b/clang/lib/Analysis/LiveVariables.cpp @@ -27,7 +27,7 @@ namespace { class LiveVariablesImpl { public: AnalysisDeclContext &analysisContext; - llvm::ImmutableSet::Factory SSetFact; + llvm::ImmutableSet::Factory ESetFact; llvm::ImmutableSet::Factory DSetFact; llvm::ImmutableSet::Factory BSetFact; llvm::DenseMap blocksEndToLiveness; @@ -45,16 +45,15 @@ class LiveVariablesImpl { LiveVariables::Observer *obs = nullptr); void dumpBlockLiveness(const SourceManager& M); - void dumpStmtLiveness(const SourceManager& M); + void dumpExprLiveness(const SourceManager& M); LiveVariablesImpl(AnalysisDeclContext &ac, bool KillAtAssign) - : analysisContext(ac), - SSetFact(false), // Do not canonicalize ImmutableSets by default. - DSetFact(false), // This is a *major* performance win. - BSetFact(false), - killAtAssign(KillAtAssign) {} + : analysisContext(ac), + ESetFact(false), // Do not canonicalize ImmutableSets by default. + DSetFact(false), // This is a *major* performance win. + BSetFact(false), killAtAssign(KillAtAssign) {} }; -} +} // namespace static LiveVariablesImpl &getImpl(void *x) { return *((LiveVariablesImpl *) x); @@ -64,8 +63,8 @@ static LiveVariablesImpl &getImpl(void *x) { // Operations and queries on LivenessValues. //===----------------------------------------------------------------------===// -bool LiveVariables::LivenessValues::isLive(const Stmt *S) const { - return liveStmts.contains(S); +bool LiveVariables::LivenessValues::isLive(const Expr *E) const { + return liveExprs.contains(E); } bool LiveVariables::LivenessValues::isLive(const VarDecl *D) const { @@ -97,10 +96,10 @@ LiveVariables::LivenessValues LiveVariablesImpl::merge(LiveVariables::LivenessValues valsA, LiveVariables::LivenessValues valsB) { - llvm::ImmutableSetRef - SSetRefA(valsA.liveStmts.getRootWithoutRetain(), SSetFact.getTreeFactory()), - SSetRefB(valsB.liveStmts.getRootWithoutRetain(), SSetFact.getTreeFactory()); - + llvm::ImmutableSetRef SSetRefA( + valsA.liveExprs.getRootWithoutRetain(), ESetFact.getTreeFactory()), + SSetRefB(valsB.liveExprs.getRootWithoutRetain(), + ESetFact.getTreeFactory()); llvm::ImmutableSetRef DSetRefA(valsA.liveDecls.getRootWithoutRetain(), DSetFact.getTreeFactory()), @@ -122,7 +121,7 @@ LiveVariablesImpl::merge(LiveVariables::LivenessValues valsA, } bool LiveVariables::LivenessValues::equals(const LivenessValues &V) const { - return liveStmts == V.liveStmts && liveDecls == V.liveDecls; + return liveExprs == V.liveExprs && liveDecls == V.liveDecls; } //===----------------------------------------------------------------------===// @@ -141,8 +140,8 @@ bool LiveVariables::isLive(const Stmt *S, const VarDecl *D) { return isAlwaysAlive(D) || getImpl(impl).stmtsToLiveness[S].isLive(D); } -bool LiveVariables::isLive(const Stmt *Loc, const Stmt *S) { - return getImpl(impl).stmtsToLiveness[Loc].isLive(S); +bool LiveVariables::isLive(const Stmt *Loc, const Expr *Val) { + return getImpl(impl).stmtsToLiveness[Loc].isLive(Val); } //===----------------------------------------------------------------------===// @@ -186,27 +185,27 @@ static const VariableArrayType *FindVA(QualType Ty) { return nullptr; } -static const Stmt *LookThroughStmt(const Stmt *S) { - while (S) { - if (const Expr *Ex = dyn_cast(S)) - S = Ex->IgnoreParens(); - if (const FullExpr *FE = dyn_cast(S)) { - S = FE->getSubExpr(); +static const Expr *LookThroughExpr(const Expr *E) { + while (E) { + if (const Expr *Ex = dyn_cast(E)) + E = Ex->IgnoreParens(); + if (const FullExpr *FE = dyn_cast(E)) { + E = FE->getSubExpr(); continue; } - if (const OpaqueValueExpr *OVE = dyn_cast(S)) { - S = OVE->getSourceExpr(); + if (const OpaqueValueExpr *OVE = dyn_cast(E)) { + E = OVE->getSourceExpr(); continue; } break; } - return S; + return E; } -static void AddLiveStmt(llvm::ImmutableSet &Set, - llvm::ImmutableSet::Factory &F, - const Stmt *S) { - Set = F.add(Set, LookThroughStmt(S)); +static void AddLiveExpr(llvm::ImmutableSet &Set, + llvm::ImmutableSet::Factory &F, + const Expr *E) { + Set = F.add(Set, LookThroughExpr(E)); } void TransferFunctions::Visit(Stmt *S) { @@ -215,8 +214,8 @@ void TransferFunctions::Visit(Stmt *S) { StmtVisitor::Visit(S); - if (isa(S)) { - val.liveStmts = LV.SSetFact.remove(val.liveStmts, S); + if (const auto *E = dyn_cast(S)) { + val.liveExprs = LV.ESetFact.remove(val.liveExprs, E); } // Mark all children expressions live. @@ -233,7 +232,7 @@ void TransferFunctions::Visit(Stmt *S) { // Include the implicit "this" pointer as being live. CXXMemberCallExpr *CE = cast(S); if (Expr *ImplicitObj = CE->getImplicitObjectArgument()) { - AddLiveStmt(val.liveStmts, LV.SSetFact, ImplicitObj); + AddLiveExpr(val.liveExprs, LV.ESetFact, ImplicitObj); } break; } @@ -250,7 +249,7 @@ void TransferFunctions::Visit(Stmt *S) { if (const VarDecl *VD = dyn_cast(DS->getSingleDecl())) { for (const VariableArrayType* VA = FindVA(VD->getType()); VA != nullptr; VA = FindVA(VA->getElementType())) { - AddLiveStmt(val.liveStmts, LV.SSetFact, VA->getSizeExpr()); + AddLiveExpr(val.liveExprs, LV.ESetFact, VA->getSizeExpr()); } } break; @@ -263,7 +262,7 @@ void TransferFunctions::Visit(Stmt *S) { if (OpaqueValueExpr *OV = dyn_cast(child)) child = OV->getSourceExpr(); child = child->IgnoreParens(); - val.liveStmts = LV.SSetFact.add(val.liveStmts, child); + val.liveExprs = LV.ESetFact.add(val.liveExprs, child); return; } @@ -284,36 +283,39 @@ void TransferFunctions::Visit(Stmt *S) { // If one of the branches is an expression rather than a compound // statement, it will be bad if we mark it as live at the terminator // of the if-statement (i.e., immediately after the condition expression). - AddLiveStmt(val.liveStmts, LV.SSetFact, cast(S)->getCond()); + AddLiveExpr(val.liveExprs, LV.ESetFact, cast(S)->getCond()); return; } case Stmt::WhileStmtClass: { // If the loop body is an expression rather than a compound statement, // it will be bad if we mark it as live at the terminator of the loop // (i.e., immediately after the condition expression). - AddLiveStmt(val.liveStmts, LV.SSetFact, cast(S)->getCond()); + AddLiveExpr(val.liveExprs, LV.ESetFact, cast(S)->getCond()); return; } case Stmt::DoStmtClass: { // If the loop body is an expression rather than a compound statement, // it will be bad if we mark it as live at the terminator of the loop // (i.e., immediately after the condition expression). - AddLiveStmt(val.liveStmts, LV.SSetFact, cast(S)->getCond()); + AddLiveExpr(val.liveExprs, LV.ESetFact, cast(S)->getCond()); return; } case Stmt::ForStmtClass: { // If the loop body is an expression rather than a compound statement, // it will be bad if we mark it as live at the terminator of the loop // (i.e., immediately after the condition expression). - AddLiveStmt(val.liveStmts, LV.SSetFact, cast(S)->getCond()); + AddLiveExpr(val.liveExprs, LV.ESetFact, cast(S)->getCond()); return; } } + // HACK + FIXME: What is this? One could only guess that this is an attempt to + // fish for live values, for example, arguments from a call expression. + // Maybe we could take inspiration from UninitializedVariable analysis? for (Stmt *Child : S->children()) { - if (Child) - AddLiveStmt(val.liveStmts, LV.SSetFact, Child); + if (const auto *E = dyn_cast_or_null(Child)) + AddLiveExpr(val.liveExprs, LV.ESetFact, E); } } @@ -416,7 +418,7 @@ VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE) const Expr *subEx = UE->getArgumentExpr(); if (subEx->getType()->isVariableArrayType()) { assert(subEx->isLValue()); - val.liveStmts = LV.SSetFact.add(val.liveStmts, subEx->IgnoreParens()); + val.liveExprs = LV.ESetFact.add(val.liveExprs, subEx->IgnoreParens()); } } @@ -613,19 +615,19 @@ void LiveVariablesImpl::dumpBlockLiveness(const SourceManager &M) { llvm::errs() << "\n"; } -void LiveVariables::dumpStmtLiveness(const SourceManager &M) { - getImpl(impl).dumpStmtLiveness(M); +void LiveVariables::dumpExprLiveness(const SourceManager &M) { + getImpl(impl).dumpExprLiveness(M); } -void LiveVariablesImpl::dumpStmtLiveness(const SourceManager &M) { +void LiveVariablesImpl::dumpExprLiveness(const SourceManager &M) { // Don't iterate over blockEndsToLiveness directly because it's not sorted. - for (auto I : *analysisContext.getCFG()) { + for (const CFGBlock *B : *analysisContext.getCFG()) { - llvm::errs() << "\n[ B" << I->getBlockID() - << " (live statements at block exit) ]\n"; - for (auto S : blocksEndToLiveness[I].liveStmts) { + llvm::errs() << "\n[ B" << B->getBlockID() + << " (live expressions at block exit) ]\n"; + for (const Expr *E : blocksEndToLiveness[B].liveExprs) { llvm::errs() << "\n"; - S->dump(); + E->dump(); } llvm::errs() << "\n"; } diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp index 03b7cbd1c833d..7cdd78b8adfb7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp @@ -131,21 +131,21 @@ bool ento::shouldRegisterLiveVariablesDumper(const CheckerManager &mgr) { //===----------------------------------------------------------------------===// namespace { -class LiveStatementsDumper : public Checker { +class LiveExpressionsDumper : public Checker { public: void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, BugReporter &BR) const { if (LiveVariables *L = Mgr.getAnalysis(D)) - L->dumpStmtLiveness(Mgr.getSourceManager()); + L->dumpExprLiveness(Mgr.getSourceManager()); } }; } -void ento::registerLiveStatementsDumper(CheckerManager &mgr) { - mgr.registerChecker(); +void ento::registerLiveExpressionsDumper(CheckerManager &mgr) { + mgr.registerChecker(); } -bool ento::shouldRegisterLiveStatementsDumper(const CheckerManager &mgr) { +bool ento::shouldRegisterLiveExpressionsDumper(const CheckerManager &mgr) { return true; } diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp index cba20b967b6fa..ee74745925283 100644 --- a/clang/lib/StaticAnalyzer/Core/Environment.cpp +++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -191,19 +191,15 @@ EnvironmentManager::removeDeadBindings(Environment Env, F.getTreeFactory()); // Iterate over the block-expr bindings. - for (Environment::iterator I = Env.begin(), E = Env.end(); - I != E; ++I) { + for (Environment::iterator I = Env.begin(), End = Env.end(); I != End; ++I) { const EnvironmentEntry &BlkExpr = I.getKey(); const SVal &X = I.getData(); - const bool IsBlkExprLive = - SymReaper.isLive(BlkExpr.getStmt(), BlkExpr.getLocationContext()); + const Expr *E = dyn_cast(BlkExpr.getStmt()); + if (!E) + continue; - assert((isa(BlkExpr.getStmt()) || !IsBlkExprLive) && - "Only Exprs can be live, LivenessAnalysis argues about the liveness " - "of *values*!"); - - if (IsBlkExprLive) { + if (SymReaper.isLive(E, BlkExpr.getLocationContext())) { // Copy the binding to the new map. EBMapRef = EBMapRef.add(BlkExpr, X); diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 700f91aed610f..79a8eef305768 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -489,7 +489,7 @@ bool SymbolReaper::isLive(SymbolRef sym) { } bool -SymbolReaper::isLive(const Stmt *ExprVal, const LocationContext *ELCtx) const { +SymbolReaper::isLive(const Expr *ExprVal, const LocationContext *ELCtx) const { if (LCtx == nullptr) return false; diff --git a/clang/test/Analysis/live-stmts.cpp b/clang/test/Analysis/live-stmts.cpp index 1b8a750c5e5ca..16954f30129f7 100644 --- a/clang/test/Analysis/live-stmts.cpp +++ b/clang/test/Analysis/live-stmts.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -w -analyzer-checker=debug.DumpLiveStmts %s 2>&1\ +// RUN: %clang_analyze_cc1 -w -analyzer-checker=debug.DumpLiveExprs %s 2>&1\ // RUN: | FileCheck %s int coin(); @@ -7,13 +7,24 @@ int coin(); int testThatDumperWorks(int x, int y, int z) { return x ? y : z; } -// CHECK: [ B0 (live statements at block exit) ] + +// [B5 (ENTRY)] +// | +// V +// [B4 (x)] ? [B2 (y)] : [B3 (z)] +// \ / +// ---|---- +// V +// [B1] --> [B0 (EXIT)] +// return + +// CHECK: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B1 (live statements at block exit) ] +// CHECK: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B2 (live statements at block exit) ] +// CHECK: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int' // CHECK-EMPTY: @@ -24,7 +35,7 @@ int testThatDumperWorks(int x, int y, int z) { // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B3 (live statements at block exit) ] +// CHECK: [ B3 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int' // CHECK-EMPTY: @@ -33,7 +44,7 @@ int testThatDumperWorks(int x, int y, int z) { // CHECK-NEXT: ImplicitCastExpr {{.*}} // CHECK-NEXT: `-ImplicitCastExpr {{.*}} // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' -// CHECK: [ B4 (live statements at block exit) ] +// CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int' // CHECK-EMPTY: @@ -44,7 +55,7 @@ int testThatDumperWorks(int x, int y, int z) { // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B5 (live statements at block exit) ] +// CHECK: [ B5 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int' // CHECK-EMPTY: @@ -61,22 +72,22 @@ void testIfBranchExpression(bool flag) { e; } } -// CHECK: [ B0 (live statements at block exit) ] +// CHECK: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B1 (live statements at block exit) ] +// CHECK: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B2 (live statements at block exit) ] +// CHECK: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B3 (live statements at block exit) ] +// CHECK: [ B3 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B4 (live statements at block exit) ] +// CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B5 (live statements at block exit) ] +// CHECK: [ B5 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: @@ -89,22 +100,22 @@ void testWhileBodyExpression(bool flag) { e; } } -// CHECK: [ B0 (live statements at block exit) ] +// CHECK: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B1 (live statements at block exit) ] +// CHECK: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B2 (live statements at block exit) ] +// CHECK: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B3 (live statements at block exit) ] +// CHECK: [ B3 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B4 (live statements at block exit) ] +// CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B5 (live statements at block exit) ] +// CHECK: [ B5 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: @@ -118,22 +129,22 @@ void testDoWhileBodyExpression(bool flag) { while (coin()); } } -// CHECK: [ B0 (live statements at block exit) ] +// CHECK: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B1 (live statements at block exit) ] +// CHECK: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B2 (live statements at block exit) ] +// CHECK: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B3 (live statements at block exit) ] +// CHECK: [ B3 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B4 (live statements at block exit) ] +// CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B5 (live statements at block exit) ] +// CHECK: [ B5 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: @@ -146,22 +157,39 @@ void testForBodyExpression(bool flag) { e; } } -// CHECK: [ B0 (live statements at block exit) ] +// CHECK: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B1 (live statements at block exit) ] +// CHECK: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B2 (live statements at block exit) ] +// CHECK: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B3 (live statements at block exit) ] +// CHECK: [ B3 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B4 (live statements at block exit) ] +// CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK: [ B5 (live statements at block exit) ] +// CHECK: [ B5 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: +void clang_analyzer_eval(bool); + +void test_lambda_refcapture() { + int a = 6; + [&](int &a) { a = 42; }(a); + clang_analyzer_eval(a == 42); // expected-warning{{TRUE}} +} + +// CHECK: [ B0 (live expressions at block exit) ] +// CHECK-EMPTY: +// CHECK-EMPTY: +// CHECK-NEXT: [ B1 (live expressions at block exit) ] +// CHECK-EMPTY: +// CHECK-EMPTY: +// CHECK-NEXT: [ B2 (live expressions at block exit) ] +// CHECK-EMPTY: +// CHECK-EMPTY: diff --git a/clang/test/Analysis/live-stmts.mm b/clang/test/Analysis/live-stmts.mm index a6ddd03ca5d85..8acdd77149ebe 100644 --- a/clang/test/Analysis/live-stmts.mm +++ b/clang/test/Analysis/live-stmts.mm @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 -w -fblocks %s \ -// RUN: -analyzer-checker=debug.DumpLiveStmts \ +// RUN: -analyzer-checker=debug.DumpLiveExprs \ // RUN: 2>&1 | FileCheck %s @interface Item @@ -18,25 +18,25 @@ @interface Collection public: RAII(Blk blk): blk(blk) {} -// CHECK: [ B0 (live statements at block exit) ] +// CHECK: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B1 (live statements at block exit) ] +// CHECK-NEXT: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B2 (live statements at block exit) ] +// CHECK-NEXT: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: ~RAII() { blk(); } -// CHECK-NEXT: [ B0 (live statements at block exit) ] +// CHECK-NEXT: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B1 (live statements at block exit) ] +// CHECK-NEXT: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B2 (live statements at block exit) ] +// CHECK-NEXT: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: }; @@ -45,57 +45,37 @@ void foo(Collection *coll) { RAII raii(^{}); for (Item *item in coll) {} } -// CHECK-NEXT: [ B0 (live statements at block exit) ] +// CHECK-NEXT: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B1 (live statements at block exit) ] +// CHECK-NEXT: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B2 (live statements at block exit) ] -// CHECK-EMPTY: -// CHECK-NEXT: DeclStmt {{.*}} -// CHECK-NEXT: `-VarDecl {{.*}} item 'Item *' +// CHECK-NEXT: [ B2 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: ImplicitCastExpr {{.*}} 'Collection *' // CHECK-NEXT: `-DeclRefExpr {{.*}} 'Collection *' lvalue ParmVar {{.*}} 'coll' 'Collection *' // CHECK-EMPTY: -// CHECK-NEXT: CompoundStmt {{.*}} -// CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B3 (live statements at block exit) ] -// CHECK-EMPTY: -// CHECK-NEXT: DeclStmt {{.*}} -// CHECK-NEXT: `-VarDecl {{.*}} item 'Item *' +// CHECK-NEXT: [ B3 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: ImplicitCastExpr {{.*}} 'Collection *' // CHECK-NEXT: `-DeclRefExpr {{.*}} 'Collection *' lvalue ParmVar {{.*}} 'coll' 'Collection *' // CHECK-EMPTY: -// CHECK-NEXT: CompoundStmt {{.*}} -// CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B4 (live statements at block exit) ] -// CHECK-EMPTY: -// CHECK-NEXT: DeclStmt {{.*}} -// CHECK-NEXT: `-VarDecl {{.*}} item 'Item *' +// CHECK-NEXT: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: ImplicitCastExpr {{.*}} 'Collection *' // CHECK-NEXT: `-DeclRefExpr {{.*}} 'Collection *' lvalue ParmVar {{.*}} 'coll' 'Collection *' // CHECK-EMPTY: -// CHECK-NEXT: CompoundStmt {{.*}} -// CHECK-EMPTY: -// CHECK-EMPTY: -// CHECK-NEXT: [ B5 (live statements at block exit) ] -// CHECK-EMPTY: -// CHECK-NEXT: DeclStmt {{.*}} -// CHECK-NEXT: `-VarDecl {{.*}} item 'Item *' // CHECK-EMPTY: -// CHECK-NEXT: CompoundStmt {{.*}} +// CHECK-NEXT: [ B5 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B0 (live statements at block exit) ] +// CHECK-NEXT: [ B0 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: -// CHECK-NEXT: [ B1 (live statements at block exit) ] +// CHECK-NEXT: [ B1 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-EMPTY: From ec2b0a51977861ed7be92c365ec2636fbf690528 Mon Sep 17 00:00:00 2001 From: jasonliu Date: Tue, 15 Sep 2020 15:50:26 +0000 Subject: [PATCH 0712/1079] [XCOFF] Run resource intense test only on platforms where it makes sense This is a follow up commit for the issue raised in https://reviews.llvm.org/D86879 --- llvm/test/CodeGen/PowerPC/aix-overflow-toc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py index 5e56b6f9fa250..870f83739dc08 100644 --- a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py +++ b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py @@ -1,3 +1,5 @@ +# REQUIRES: system-aix || system-linux + # RUN: python %s > %t.ll # RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=small -mcpu=pwr4 -mattr=-altivec -O0 < %t.ll | \ # RUN: FileCheck --check-prefix=ASM32 %s From 74a9c6d7e1c49cd0e3a8e8072b8aa03f7a84caff Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Tue, 15 Sep 2020 11:08:13 -0400 Subject: [PATCH 0713/1079] [libc++] Add a benchmark for std::map operations Before tackling http://llvm.org/PR38722, make sure there is a baseline benchmark. Differential Revision: https://reviews.llvm.org/D62778 --- libcxx/benchmarks/map.bench.cpp | 1037 +++++++++++++++++++++++++++++++ 1 file changed, 1037 insertions(+) create mode 100644 libcxx/benchmarks/map.bench.cpp diff --git a/libcxx/benchmarks/map.bench.cpp b/libcxx/benchmarks/map.bench.cpp new file mode 100644 index 0000000000000..dd1884f65032e --- /dev/null +++ b/libcxx/benchmarks/map.bench.cpp @@ -0,0 +1,1037 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "CartesianBenchmarks.h" +#include "benchmark/benchmark.h" +#include "test_macros.h" + +// When VALIDATE is defined the benchmark will run to validate the benchmarks. +// The time taken by several operations depend on whether or not an element +// exists. To avoid errors in the benchmark these operations have a validation +// mode to test the benchmark. Since they are not meant to be benchmarked the +// number of sizes tested is limited to 1. +//#define VALIDATE + +namespace { + +enum class Mode { Hit, Miss }; + +struct AllModes : EnumValuesAsTuple { + static constexpr const char* Names[] = {"ExistingElement", "NewElement"}; +}; + +// The positions of the hints to pick: +// - Begin picks the first item. The item cannot be put before this element. +// - Thrid picks the third item. This is just an element with a valid entry +// before and after it. +// - Correct contains the correct hint. +// - End contains a hint to the end of the map. +enum class Hint { Begin, Third, Correct, End }; +struct AllHints : EnumValuesAsTuple { + static constexpr const char* Names[] = {"Begin", "Third", "Correct", "End"}; +}; + +enum class Order { Sorted, Random }; +struct AllOrders : EnumValuesAsTuple { + static constexpr const char* Names[] = {"Sorted", "Random"}; +}; + +struct TestSets { + std::vector Keys; + std::vector > Maps; + std::vector< + std::vector::const_iterator> > + Hints; +}; + +enum class Shuffle { None, Keys, Hints }; + +TestSets makeTestingSets(size_t MapSize, Mode mode, Shuffle shuffle, + size_t max_maps) { + /* + * The shuffle does not retain the random number generator to use the same + * set of random numbers for every iteration. + */ + TestSets R; + + int MapCount = std::min(max_maps, 1000000 / MapSize); + + for (uint64_t I = 0; I < MapSize; ++I) { + R.Keys.push_back(mode == Mode::Hit ? 2 * I + 2 : 2 * I + 1); + } + if (shuffle == Shuffle::Keys) + std::shuffle(R.Keys.begin(), R.Keys.end(), std::mt19937()); + + for (int M = 0; M < MapCount; ++M) { + auto& map = R.Maps.emplace_back(); + auto& hints = R.Hints.emplace_back(); + for (uint64_t I = 0; I < MapSize; ++I) { + hints.push_back(map.insert(std::make_pair(2 * I + 2, 0)).first); + } + if (shuffle == Shuffle::Hints) + std::shuffle(hints.begin(), hints.end(), std::mt19937()); + } + + return R; +} + +struct Base { + size_t MapSize; + Base(size_t T) : MapSize(T) {} + + std::string baseName() const { return "_MapSize=" + std::to_string(MapSize); } +}; + +//*******************************************************************| +// Member functions | +//*******************************************************************| + +struct ConstructorDefault { + void run(benchmark::State& State) const { + for (auto _ : State) { + benchmark::DoNotOptimize(std::map()); + } + } + + std::string name() const { return "BM_ConstructorDefault"; } +}; + +struct ConstructorIterator : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { +#ifndef VALIDATE + benchmark::DoNotOptimize( + std::map(Map.begin(), Map.end())); +#else + std::map M{Map.begin(), Map.end()}; + if (M != Map) + State.SkipWithError("Map copy not identical"); +#endif + } + } + + std::string name() const { return "BM_ConstructorIterator" + baseName(); } +}; + +struct ConstructorCopy : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { +#ifndef VALIDATE + std::map M(Map); + benchmark::DoNotOptimize(M); +#else + std::map M(Map); + if (M != Map) + State.SkipWithError("Map copy not identical"); +#endif + } + } + + std::string name() const { return "BM_ConstructorCopy" + baseName(); } +}; + +struct ConstructorMove : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + std::map M(std::move(Map)); + benchmark::DoNotOptimize(M); + } + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + std::string name() const { return "BM_ConstructorMove" + baseName(); } +}; + +//*******************************************************************| +// Capacity | +//*******************************************************************| + +struct Empty : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1); + auto& Map = Data.Maps.front(); + for (auto _ : State) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.empty()); +#else + if (Map.empty()) + State.SkipWithError("Map contains an invalid number of elements."); +#endif + } + } + + std::string name() const { return "BM_Empty" + baseName(); } +}; + +struct Size : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1); + auto& Map = Data.Maps.front(); + for (auto _ : State) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.size()); +#else + if (Map.size() != MapSize) + State.SkipWithError("Map contains an invalid number of elements."); +#endif + } + } + + std::string name() const { return "BM_Size" + baseName(); } +}; + +//*******************************************************************| +// Modifiers | +//*******************************************************************| + +struct Clear : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + Map.clear(); + benchmark::DoNotOptimize(Map); + } + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + std::string name() const { return "BM_Clear" + baseName(); } +}; + +template +struct Insert : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.insert(std::make_pair(K, 1))); +#else + bool Inserted = Map.insert(std::make_pair(K, 1)).second; + if (Mode() == ::Mode::Hit) { + if (Inserted) + State.SkipWithError("Inserted a duplicate element"); + } else { + if (!Inserted) + State.SkipWithError("Failed to insert e new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys + : Shuffle::None, + 1000); + State.ResumeTiming(); + } + } + + std::string name() const { + return "BM_Insert" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct InsertHint : Base { + using Base::Base; + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto H = Data.Hints[I].begin(); + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.insert(*H, std::make_pair(K, 1))); +#else + auto Inserted = Map.insert(*H, std::make_pair(K, 1)); + if (Mode() == ::Mode::Hit) { + if (Inserted != *H) + State.SkipWithError("Inserted a duplicate element"); + } else { + if (++Inserted != *H) + State.SkipWithError("Failed to insert a new element"); + } +#endif + ++H; + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto Third = *(Data.Hints[I].begin() + 2); + for (auto K : Data.Keys) { + auto Itor = hint == ::Hint::Begin + ? Map.begin() + : hint == ::Hint::Third ? Third : Map.end(); +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.insert(Itor, std::make_pair(K, 1))); +#else + size_t Size = Map.size(); + Map.insert(Itor, std::make_pair(K, 1)); + if (Mode() == ::Mode::Hit) { + if (Size != Map.size()) + State.SkipWithError("Inserted a duplicate element"); + } else { + if (Size + 1 != Map.size()) + State.SkipWithError("Failed to insert a new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + void run(benchmark::State& State) const { + static constexpr auto h = Hint(); + run(State); + } + + std::string name() const { + return "BM_InsertHint" + baseName() + Mode::name() + Hint::name(); + } +}; + +template +struct InsertAssign : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.insert_or_assign(K, 1)); +#else + bool Inserted = Map.insert_or_assign(K, 1).second; + if (Mode() == ::Mode::Hit) { + if (Inserted) + State.SkipWithError("Inserted a duplicate element"); + } else { + if (!Inserted) + State.SkipWithError("Failed to insert e new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys + : Shuffle::None, + 1000); + State.ResumeTiming(); + } + } + + std::string name() const { + return "BM_InsertAssign" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct InsertAssignHint : Base { + using Base::Base; + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto H = Data.Hints[I].begin(); + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.insert_or_assign(*H, K, 1)); +#else + auto Inserted = Map.insert_or_assign(*H, K, 1); + if (Mode() == ::Mode::Hit) { + if (Inserted != *H) + State.SkipWithError("Inserted a duplicate element"); + } else { + if (++Inserted != *H) + State.SkipWithError("Failed to insert a new element"); + } +#endif + ++H; + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto Third = *(Data.Hints[I].begin() + 2); + for (auto K : Data.Keys) { + auto Itor = hint == ::Hint::Begin + ? Map.begin() + : hint == ::Hint::Third ? Third : Map.end(); +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.insert_or_assign(Itor, K, 1)); +#else + size_t Size = Map.size(); + Map.insert_or_assign(Itor, K, 1); + if (Mode() == ::Mode::Hit) { + if (Size != Map.size()) + State.SkipWithError("Inserted a duplicate element"); + } else { + if (Size + 1 != Map.size()) + State.SkipWithError("Failed to insert a new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + void run(benchmark::State& State) const { + static constexpr auto h = Hint(); + run(State); + } + + std::string name() const { + return "BM_InsertAssignHint" + baseName() + Mode::name() + Hint::name(); + } +}; + +template +struct Emplace : Base { + using Base::Base; + + void run(benchmark::State& State) const { + + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.emplace(K, 1)); +#else + bool Inserted = Map.emplace(K, 1).second; + if (Mode() == ::Mode::Hit) { + if (Inserted) + State.SkipWithError("Emplaced a duplicate element"); + } else { + if (!Inserted) + State.SkipWithError("Failed to emplace a new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys + : Shuffle::None, + 1000); + State.ResumeTiming(); + } + } + + std::string name() const { + return "BM_Emplace" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct EmplaceHint : Base { + using Base::Base; + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto H = Data.Hints[I].begin(); + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.emplace_hint(*H, K, 1)); +#else + auto Inserted = Map.emplace_hint(*H, K, 1); + if (Mode() == ::Mode::Hit) { + if (Inserted != *H) + State.SkipWithError("Emplaced a duplicate element"); + } else { + if (++Inserted != *H) + State.SkipWithError("Failed to emplace a new element"); + } +#endif + ++H; + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto Third = *(Data.Hints[I].begin() + 2); + for (auto K : Data.Keys) { + auto Itor = hint == ::Hint::Begin + ? Map.begin() + : hint == ::Hint::Third ? Third : Map.end(); +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.emplace_hint(Itor, K, 1)); +#else + size_t Size = Map.size(); + Map.emplace_hint(Itor, K, 1); + if (Mode() == ::Mode::Hit) { + if (Size != Map.size()) + State.SkipWithError("Emplaced a duplicate element"); + } else { + if (Size + 1 != Map.size()) + State.SkipWithError("Failed to emplace a new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + void run(benchmark::State& State) const { + static constexpr auto h = Hint(); + run(State); + } + + std::string name() const { + return "BM_EmplaceHint" + baseName() + Mode::name() + Hint::name(); + } +}; + +template +struct TryEmplace : Base { + using Base::Base; + + void run(benchmark::State& State) const { + + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.try_emplace(K, 1)); +#else + bool Inserted = Map.try_emplace(K, 1).second; + if (Mode() == ::Mode::Hit) { + if (Inserted) + State.SkipWithError("Emplaced a duplicate element"); + } else { + if (!Inserted) + State.SkipWithError("Failed to emplace a new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys + : Shuffle::None, + 1000); + State.ResumeTiming(); + } + } + + std::string name() const { + return "BM_TryEmplace" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct TryEmplaceHint : Base { + using Base::Base; + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto H = Data.Hints[I].begin(); + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.try_emplace(*H, K, 1)); +#else + auto Inserted = Map.try_emplace(*H, K, 1); + if (Mode() == ::Mode::Hit) { + if (Inserted != *H) + State.SkipWithError("Emplaced a duplicate element"); + } else { + if (++Inserted != *H) + State.SkipWithError("Failed to emplace a new element"); + } +#endif + ++H; + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + template < ::Hint hint> + typename std::enable_if::type + run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + auto Third = *(Data.Hints[I].begin() + 2); + for (auto K : Data.Keys) { + auto Itor = hint == ::Hint::Begin + ? Map.begin() + : hint == ::Hint::Third ? Third : Map.end(); +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.try_emplace(Itor, K, 1)); +#else + size_t Size = Map.size(); + Map.try_emplace(Itor, K, 1); + if (Mode() == ::Mode::Hit) { + if (Size != Map.size()) + State.SkipWithError("Emplaced a duplicate element"); + } else { + if (Size + 1 != Map.size()) + State.SkipWithError("Failed to emplace a new element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + void run(benchmark::State& State) const { + static constexpr auto h = Hint(); + run(State); + } + + std::string name() const { + return "BM_TryEmplaceHint" + baseName() + Mode::name() + Hint::name(); + } +}; + +template +struct Erase : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.erase(K)); +#else + size_t I = Map.erase(K); + if (Mode() == ::Mode::Hit) { + if (I == 0) + State.SkipWithError("Did not find the existing element"); + } else { + if (I == 1) + State.SkipWithError("Did find the non-existing element"); + } +#endif + } + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys + : Shuffle::None, + 1000); + State.ResumeTiming(); + } + } + + std::string name() const { + return "BM_Erase" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct EraseIterator : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode::Hit, + Order::value == ::Order::Random ? Shuffle::Hints : Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (size_t I = 0; I < Data.Maps.size(); ++I) { + auto& Map = Data.Maps[I]; + for (auto H : Data.Hints[I]) { + benchmark::DoNotOptimize(Map.erase(H)); + } +#ifdef VALIDATE + if (!Map.empty()) + State.SkipWithError("Did not erase the entire map"); +#endif + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode::Hit, + Order::value == ::Order::Random ? Shuffle::Hints + : Shuffle::None, + 1000); + State.ResumeTiming(); + } + } + + std::string name() const { + return "BM_EraseIterator" + baseName() + Order::name(); + } +}; + +struct EraseRange : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1000); + while (State.KeepRunningBatch(MapSize * Data.Maps.size())) { + for (auto& Map : Data.Maps) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.erase(Map.begin(), Map.end())); +#else + Map.erase(Map.begin(), Map.end()); + if (!Map.empty()) + State.SkipWithError("Did not erase the entire map"); +#endif + } + + State.PauseTiming(); + Data = makeTestingSets(MapSize, Mode::Hit, Shuffle::None, 1000); + State.ResumeTiming(); + } + } + + std::string name() const { return "BM_EraseRange" + baseName(); } +}; + +//*******************************************************************| +// Lookup | +//*******************************************************************| + +template +struct Count : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.count(K)); +#else + size_t I = Map.count(K); + if (Mode() == ::Mode::Hit) { + if (I == 0) + State.SkipWithError("Did not find the existing element"); + } else { + if (I == 1) + State.SkipWithError("Did find the non-existing element"); + } +#endif + } + } + } + + std::string name() const { + return "BM_Count" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct Find : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.find(K)); +#else + auto Itor = Map.find(K); + if (Mode() == ::Mode::Hit) { + if (Itor == Map.end()) + State.SkipWithError("Did not find the existing element"); + } else { + if (Itor != Map.end()) + State.SkipWithError("Did find the non-existing element"); + } +#endif + } + } + } + + std::string name() const { + return "BM_Find" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct EqualRange : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.equal_range(K)); +#else + auto Range = Map.equal_range(K); + if (Mode() == ::Mode::Hit) { + // Adjust validation for the last element. + auto Key = K; + if (Range.second == Map.end() && K == 2 * MapSize) { + --Range.second; + Key -= 2; + } + if (Range.first == Map.end() || Range.first->first != K || + Range.second == Map.end() || Range.second->first - 2 != Key) + State.SkipWithError("Did not find the existing element"); + } else { + if (Range.first == Map.end() || Range.first->first - 1 != K || + Range.second == Map.end() || Range.second->first - 1 != K) + State.SkipWithError("Did find the non-existing element"); + } +#endif + } + } + } + + std::string name() const { + return "BM_EqualRange" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct LowerBound : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.lower_bound(K)); +#else + auto Itor = Map.lower_bound(K); + if (Mode() == ::Mode::Hit) { + if (Itor == Map.end() || Itor->first != K) + State.SkipWithError("Did not find the existing element"); + } else { + if (Itor == Map.end() || Itor->first - 1 != K) + State.SkipWithError("Did find the non-existing element"); + } +#endif + } + } + } + + std::string name() const { + return "BM_LowerBound" + baseName() + Mode::name() + Order::name(); + } +}; + +template +struct UpperBound : Base { + using Base::Base; + + void run(benchmark::State& State) const { + auto Data = makeTestingSets( + MapSize, Mode(), + Order::value == ::Order::Random ? Shuffle::Keys : Shuffle::None, 1); + auto& Map = Data.Maps.front(); + while (State.KeepRunningBatch(MapSize)) { + for (auto K : Data.Keys) { +#ifndef VALIDATE + benchmark::DoNotOptimize(Map.upper_bound(K)); +#else + std::map::iterator Itor = Map.upper_bound(K); + if (Mode() == ::Mode::Hit) { + // Adjust validation for the last element. + auto Key = K; + if (Itor == Map.end() && K == 2 * MapSize) { + --Itor; + Key -= 2; + } + if (Itor == Map.end() || Itor->first - 2 != Key) + State.SkipWithError("Did not find the existing element"); + } else { + if (Itor == Map.end() || Itor->first - 1 != K) + State.SkipWithError("Did find the non-existing element"); + } +#endif + } + } + } + + std::string name() const { + return "BM_UpperBound" + baseName() + Mode::name() + Order::name(); + } +}; + +} // namespace + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + +#ifdef VALIDATE + const std::vector MapSize{10}; +#else + const std::vector MapSize{10, 100, 1000, 10000, 100000, 1000000}; +#endif + + // Member functions + makeCartesianProductBenchmark(); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + + // Capacity + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + + // Modifiers + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + + // Lookup + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + makeCartesianProductBenchmark(MapSize); + + benchmark::RunSpecifiedBenchmarks(); +} From 243ffd0cade71ddca9b0dffec1c8e9084b0f7745 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Tue, 15 Sep 2020 09:18:18 -0700 Subject: [PATCH 0714/1079] [MachineBasicBlock] Fix a typo in function copySuccessor The condition used to decide if need to copy probability should be reversed. Differential Revision: https://reviews.llvm.org/D87417 --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index b260af72043b4..42d519970c4d4 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -828,7 +828,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, succ_iterator I) { - if (Orig->Probs.empty()) + if (!Orig->Probs.empty()) addSuccessor(*I, Orig->getSuccProbability(I)); else addSuccessorWithoutProb(*I); From 4ddd985ca941e48a016e8d7270921b4aa76afbe1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 15 Sep 2020 12:29:41 -0400 Subject: [PATCH 0715/1079] NFC: Add whitespace change to .git-blame-ignore-revs --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 7c759a1adc950..690ab1d5af575 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -31,3 +31,6 @@ d8f0e6caa91e230a486c948ab643174e40bdf215 # Remove line-endings added by r320089. NFC. 100a0eedc00b2bf48bcdc6c209c000745a4a0e48 + +# Cleanup __config indention. NFC. +2b772b930e097ed6f06d698a51e291c7fd318baa From a43e68b58b085797e2f1435765255ebd431db297 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 15 Sep 2020 17:08:45 +0100 Subject: [PATCH 0716/1079] [X86][AVX] lowerShuffleWithSHUFPS - handle missed canonicalization cases. PR47534 exposes a case where calling lowerShuffleWithSHUFPS directly from a derived repeated mask (found by is128BitLaneRepeatedShuffleMask) results in us using an non-canonicalized mask. The missed canonicalization in this case is trivial - just commute the mask so we have more (swapped) LHS than RHS references so lowerShuffleWithSHUFPS can handle it. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++++++ llvm/test/CodeGen/X86/vector-shuffle-avx512.ll | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0af3cacb22813..ecf151ffeb664 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14031,6 +14031,12 @@ static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, NewMask[2] = Mask[2] < 4 ? 1 : 3; NewMask[3] = Mask[2] < 4 ? 3 : 1; } + } else if (NumV2Elements == 3) { + // Ideally canonicalizeShuffleMaskWithCommute should have caught this, but + // we can get here due to other paths (e.g repeated mask matching) that we + // don't want to do another round of lowerVECTOR_SHUFFLE. + ShuffleVectorSDNode::commuteMask(NewMask); + return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG); } return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index ccf1476e6a657..422f64d982bfb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -596,6 +596,21 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(<2 x i32>* %src, <8 x i32>* %d ret void } +define <32 x float> @PR47534(<8 x float> %tmp) { +; CHECK-LABEL: PR47534: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [7,25,26,27,7,29,30,31,7,25,26,27,7,29,30,31] +; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] +; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 +; CHECK-NEXT: ret{{[l|q]}} + %tmp1 = shufflevector <8 x float> %tmp, <8 x float> undef, <32 x i32> + %tmp2 = shufflevector <32 x float> , <32 x float> undef, <32 x i32> + %tmp18 = shufflevector <32 x float> %tmp2, <32 x float> %tmp1, <32 x i32> + ret <32 x float> %tmp18 +} + %union1= type { <16 x float> } @src1 = external dso_local local_unnamed_addr global %union1, align 64 From 127faae7529aee7e8508abebbc19212ce30bbf27 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 15 Sep 2020 09:36:28 -0700 Subject: [PATCH 0717/1079] [lldb] Add -l/--language option to script command Make it possible to run the script command with a different language than currently selected. $ ./bin/lldb -l python (lldb) script -l lua >>> io.stdout:write("Hello, World!\n") Hello, World! When passing the language option and a raw command, you need to separate the flag from the script code with --. $ ./bin/lldb -l python (lldb) script -l lua -- io.stdout:write("Hello, World!\n") Hello, World! Differential revision: https://reviews.llvm.org/D86996 --- lldb/source/Commands/CommandObjectScript.cpp | 81 +++++++++++++++++-- lldb/source/Commands/CommandObjectScript.h | 15 ++++ lldb/source/Commands/Options.td | 6 ++ .../ScriptInterpreter/Lua/lua-python.test | 17 ++++ .../test/Shell/ScriptInterpreter/Lua/lua.test | 6 +- .../ScriptInterpreter/Python/python.test | 13 +++ llvm/lib/Support/MemoryBuffer.cpp | 3 +- 7 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 lldb/test/Shell/ScriptInterpreter/Lua/lua-python.test create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/python.test diff --git a/lldb/source/Commands/CommandObjectScript.cpp b/lldb/source/Commands/CommandObjectScript.cpp index e5ae244cade19..9dadf11ebfc89 100644 --- a/lldb/source/Commands/CommandObjectScript.cpp +++ b/lldb/source/Commands/CommandObjectScript.cpp @@ -10,36 +10,107 @@ #include "lldb/Core/Debugger.h" #include "lldb/DataFormatters/DataVisualization.h" #include "lldb/Host/Config.h" +#include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" +#include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/ScriptInterpreter.h" #include "lldb/Utility/Args.h" using namespace lldb; using namespace lldb_private; -// CommandObjectScript +static constexpr OptionEnumValueElement g_script_option_enumeration[] = { + { + eScriptLanguagePython, + "python", + "Python", + }, + { + eScriptLanguageLua, + "lua", + "Lua", + }, + { + eScriptLanguageNone, + "default", + "The default scripting language.", + }, +}; + +static constexpr OptionEnumValues ScriptOptionEnum() { + return OptionEnumValues(g_script_option_enumeration); +} + +#define LLDB_OPTIONS_script +#include "CommandOptions.inc" + +Status CommandObjectScript::CommandOptions::SetOptionValue( + uint32_t option_idx, llvm::StringRef option_arg, + ExecutionContext *execution_context) { + Status error; + const int short_option = m_getopt_table[option_idx].val; + + switch (short_option) { + case 'l': + language = (lldb::ScriptLanguage)OptionArgParser::ToOptionEnum( + option_arg, GetDefinitions()[option_idx].enum_values, + eScriptLanguageNone, error); + if (!error.Success()) + error.SetErrorStringWithFormat("unrecognized value for language '%s'", + option_arg.str().c_str()); + break; + default: + llvm_unreachable("Unimplemented option"); + } + + return error; +} + +void CommandObjectScript::CommandOptions::OptionParsingStarting( + ExecutionContext *execution_context) { + language = lldb::eScriptLanguageNone; +} + +llvm::ArrayRef +CommandObjectScript::CommandOptions::GetDefinitions() { + return llvm::makeArrayRef(g_script_options); +} CommandObjectScript::CommandObjectScript(CommandInterpreter &interpreter) : CommandObjectRaw( interpreter, "script", "Invoke the script interpreter with provided code and display any " "results. Start the interactive interpreter if no code is supplied.", - "script []") {} + "script [--language --] []") {} CommandObjectScript::~CommandObjectScript() {} bool CommandObjectScript::DoExecute(llvm::StringRef command, CommandReturnObject &result) { - if (m_interpreter.GetDebugger().GetScriptLanguage() == - lldb::eScriptLanguageNone) { + // Try parsing the language option but when the command contains a raw part + // separated by the -- delimiter. + OptionsWithRaw raw_args(command); + if (raw_args.HasArgs()) { + if (!ParseOptions(raw_args.GetArgs(), result)) + return false; + command = raw_args.GetRawPart(); + } + + lldb::ScriptLanguage language = + (m_options.language == lldb::eScriptLanguageNone) + ? m_interpreter.GetDebugger().GetScriptLanguage() + : m_options.language; + + if (language == lldb::eScriptLanguageNone) { result.AppendError( "the script-lang setting is set to none - scripting not available"); result.SetStatus(eReturnStatusFailed); return false; } - ScriptInterpreter *script_interpreter = GetDebugger().GetScriptInterpreter(); + ScriptInterpreter *script_interpreter = + GetDebugger().GetScriptInterpreter(true, language); if (script_interpreter == nullptr) { result.AppendError("no script interpreter"); diff --git a/lldb/source/Commands/CommandObjectScript.h b/lldb/source/Commands/CommandObjectScript.h index 40abf8bd730c7..b9fee7124818a 100644 --- a/lldb/source/Commands/CommandObjectScript.h +++ b/lldb/source/Commands/CommandObjectScript.h @@ -17,9 +17,24 @@ class CommandObjectScript : public CommandObjectRaw { public: CommandObjectScript(CommandInterpreter &interpreter); ~CommandObjectScript() override; + Options *GetOptions() override { return &m_options; } + + class CommandOptions : public Options { + public: + CommandOptions() : Options() {} + ~CommandOptions() override = default; + Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, + ExecutionContext *execution_context) override; + void OptionParsingStarting(ExecutionContext *execution_context) override; + llvm::ArrayRef GetDefinitions() override; + lldb::ScriptLanguage language = lldb::eScriptLanguageNone; + }; protected: bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; + +private: + CommandOptions m_options; }; } // namespace lldb_private diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index eacd6de1910c1..b41b1871ad81f 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -717,6 +717,12 @@ let Command = "script add" in { "LLDB event system.">; } +let Command = "script" in { + def script_language : Option<"language", "l">, + EnumArg<"ScriptLang", "ScriptOptionEnum()">, Desc<"Specify the scripting " + " language. If none is specific the default scripting language is used.">; +} + let Command = "source info" in { def source_info_count : Option<"count", "c">, Arg<"Count">, Desc<"The number of line entries to display.">; diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/lua-python.test b/lldb/test/Shell/ScriptInterpreter/Lua/lua-python.test new file mode 100644 index 0000000000000..c40b8e068d9fe --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/lua-python.test @@ -0,0 +1,17 @@ +# REQUIRES: lua +# REQUIRES: python +# UNSUPPORTED: lldb-repro + +# RUN: mkdir -p %t +# RUN: cd %t +# RUN: echo "int main() { return 0; }" | %clang_host -x c - -o a.out +# RUN: cat %s | %lldb 2>&1 | FileCheck %s +script -l lua -- +target = lldb.debugger:CreateTarget("a.out") +print("target is valid:", tostring(target:IsValid())) +lldb.debugger:SetSelectedTarget(target) +quit +# CHECK: target is valid: true +script -l python -- +print("selected target: {}".format(lldb.debugger.GetSelectedTarget())) +# CHECK: selected target: a.out diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/lua.test b/lldb/test/Shell/ScriptInterpreter/Lua/lua.test index 70184edbab1a8..28042efa8c813 100644 --- a/lldb/test/Shell/ScriptInterpreter/Lua/lua.test +++ b/lldb/test/Shell/ScriptInterpreter/Lua/lua.test @@ -1,3 +1,7 @@ # REQUIRES: lua -# RUN: %lldb --script-language lua -o 'script print(1000+100+10+1)' 2>&1 | FileCheck %s +# RUN: %lldb --script-language lua -o 'script io.stdout:write(1000+100+10+1, "\n")' 2>&1 | FileCheck %s +# RUN: %lldb --script-language lua -o 'script -- io.stdout:write(1000+100+10+1, "\n")' 2>&1 | FileCheck %s +# RUN: %lldb --script-language lua -o 'script --language default -- io.stdout:write(1000+100+10+1, "\n")' 2>&1 | FileCheck %s +# RUN: %lldb -o 'script -l lua -- io.stdout:write(1000+100+10+1, "\n")' 2>&1 | FileCheck %s +# RUN: %lldb -o 'script --language lua -- io.stdout:write(1000+100+10+1, "\n")' 2>&1 | FileCheck %s # CHECK: 1111 diff --git a/lldb/test/Shell/ScriptInterpreter/Python/python.test b/lldb/test/Shell/ScriptInterpreter/Python/python.test new file mode 100644 index 0000000000000..77d20294bc476 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/python.test @@ -0,0 +1,13 @@ +# REQUIRES: python +# RUN: %lldb --script-language python -o 'script print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# RUN: %lldb --script-language python -o 'script -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# RUN: %lldb --script-language python -o 'script --language default -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# RUN: %lldb -o 'script -l python -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# RUN: %lldb -o 'script -lpython -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# RUN: %lldb -o 'script --language python -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# RUN: %lldb -o 'script --language=python -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s +# CHECK: 1111 + +# RUN: %lldb -o 'script --language invalid -- print("{}".format(1000+100+10+1))' 2>&1 | FileCheck %s --check-prefix INVALID +# INVALID: error: unrecognized value for language 'invalid' +# INVALID-NOT: 1111 diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index 248fb72c49689..e31c8e6b072dd 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -457,8 +457,7 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, MapSize = FileSize; } - if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, - PageSize, IsVolatile)) { + if (false) { std::error_code EC; std::unique_ptr Result( new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( From 4452cc4086aca1a424b2cd40da9fa120add522e7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 14 Sep 2020 15:11:55 -0700 Subject: [PATCH 0718/1079] [VectorCombine] Don't vectorize scalar load under asan/hwasan/memtag/tsan Similar to the tsan suppression in `Utils/VNCoercion.cpp:getLoadLoadClobberFullWidthSize` (rL175034; load widening used by GVN), the D81766 optimization should be suppressed under tsan due to potential spurious data race reports: struct A { int i; const short s; // the load cannot be vectorized because int modify; // it overlaps with bytes being concurrently modified long pad1, pad2; }; // __tsan_read16 does not know that some bytes are undef and accessing is safe Similarly, under asan, users can mark memory regions with `__asan_poison_memory_region`. A widened load can lead to a spurious use-after-poison error. hwasan/memtag should be similarly suppressed. `mustSuppressSpeculation` suppresses asan/hwasan/tsan but not memtag, so we need to exclude memtag in `vectorizeLoadInsert`. Note, memtag suppression can be relaxed if the load is aligned to the its granule (usually 16), but that is out of scope of this patch. Reviewed By: spatel, vitalybuka Differential Revision: https://reviews.llvm.org/D87538 --- .../Transforms/Vectorize/VectorCombine.cpp | 7 +- .../test/Transforms/VectorCombine/X86/load.ll | 73 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 29e9b92040d43..829f640941ac9 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -98,7 +98,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; auto *Load = dyn_cast(Scalar); Type *ScalarTy = Scalar->getType(); - if (!Load || !Load->isSimple()) + // Do not vectorize scalar load (widening) if atomic/volatile or under + // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions + // or create data races non-existent in the source. + if (!Load || !Load->isSimple() || + Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || + mustSuppressSpeculation(*Load)) return false; auto *Ty = dyn_cast(I.getType()); if (!Ty) diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index f0c5b6ef7ad81..9ea027940ad30 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -292,6 +292,66 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ret <8 x i16> %r } +; Negative test - disable under asan because widened load can cause spurious +; use-after-poison issues when __asan_poison_memory_region is used. + +define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +; hwasan and memtag should be similarly suppressed. + +define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +; Negative test - disable under tsan because widened load may overlap bytes +; being concurrently modified. tsan does not know that some bytes are undef. + +define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + ; Negative test - can't safely load the offset vector, but could load+shuffle. define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) { @@ -393,3 +453,16 @@ define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p %r = insertelement <2 x float> undef, float %s, i32 0 ret <2 x float> %r } + +; Negative test - suppress load widening for asan/hwasan/memtag/tsan. + +define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(16) %p) sanitize_address { +; CHECK-LABEL: @load_f32_insert_v2f32_asan( +; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: ret <2 x float> [[R]] +; + %s = load float, float* %p, align 4 + %r = insertelement <2 x float> undef, float %s, i32 0 + ret <2 x float> %r +} From 9c73e555104336109bb8327b80f3e6a42a17ef1d Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 15 Sep 2020 10:06:15 -0700 Subject: [PATCH 0719/1079] Revert "[DebugInfo] Remove dots from getFilenameByIndex return value" This is failing on Windows bots due to path separator normalization. This reverts commit 042c23506869b4ae9a49d2c4bc5ea6e6baeabe78. --- lld/test/COFF/duplicate-dwarf.s | 12 ++++++------ lld/test/COFF/undefined-symbol-dwarf.s | 4 ++-- lld/test/ELF/conflict-debug-variable2.s | 4 ++-- lld/test/wasm/debuginfo.test | 6 +++--- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 1 - llvm/test/tools/llvm-symbolizer/frame-fortran.s | 2 +- 6 files changed, 14 insertions(+), 15 deletions(-) diff --git a/lld/test/COFF/duplicate-dwarf.s b/lld/test/COFF/duplicate-dwarf.s index d3863e9ca366d..b81c13c4300ae 100644 --- a/lld/test/COFF/duplicate-dwarf.s +++ b/lld/test/COFF/duplicate-dwarf.s @@ -4,21 +4,21 @@ # RUN: not lld-link -lldmingw -out:%t.exe %t.o %t.dupl.o -entry:_Z4funcv 2>&1 | FileCheck %s # CHECK: error: duplicate symbol: func() -# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:6 +# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:6 # CHECK-NEXT: >>> {{.*}}.o -# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:6 +# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:6 # CHECK-NEXT: >>> {{.*}}.o # CHECK-EMPTY: # CHECK-NEXT: error: duplicate symbol: _var -# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:1 +# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:1 # CHECK-NEXT: >>> {{.*}}.o -# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:1 +# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:1 # CHECK-NEXT: >>> {{.*}}.o # CHECK-EMPTY: # CHECK-NEXT: error: duplicate symbol: A::namespaceVar -# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:3 +# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:3 # CHECK-NEXT: >>> {{.*}}.o -# CHECK-NEXT: >>> defined at /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}dupl.cpp:3 +# CHECK-NEXT: >>> defined at /path/to/src{{[/\\]}}dupl.cpp:3 # CHECK-NEXT: >>> {{.*}}.o .text diff --git a/lld/test/COFF/undefined-symbol-dwarf.s b/lld/test/COFF/undefined-symbol-dwarf.s index 4e890987a1f46..7e677f88b7e00 100644 --- a/lld/test/COFF/undefined-symbol-dwarf.s +++ b/lld/test/COFF/undefined-symbol-dwarf.s @@ -3,11 +3,11 @@ # RUN: not lld-link /lldmingw /out:%t.exe %t.o /entry:entry 2>&1 | FileCheck %s # CHECK: error: undefined symbol: bar() -# CHECK-NEXT: >>> referenced by /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}undef.cpp:17 +# CHECK-NEXT: >>> referenced by /path/to/src{{[/\\]}}undef.cpp:17 # CHECK-NEXT: >>> {{.*}}.o:(entry) # CHECK-EMPTY: # CHECK-NEXT: error: undefined symbol: foo() -# CHECK-NEXT: >>> referenced by /path{{[/\\]}}to{{[/\\]}}src{{[/\\]}}undef.cpp:7 +# CHECK-NEXT: >>> referenced by /path/to/src{{[/\\]}}undef.cpp:7 # CHECK-NEXT: >>> {{.*}}.o:(A::afunc()) .text diff --git a/lld/test/ELF/conflict-debug-variable2.s b/lld/test/ELF/conflict-debug-variable2.s index 2b5ea882012e9..3fb59e6b4d028 100644 --- a/lld/test/ELF/conflict-debug-variable2.s +++ b/lld/test/ELF/conflict-debug-variable2.s @@ -7,14 +7,14 @@ # INPUT-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000027] = "foo") # INPUT-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0033 => {0x00000033} "int") # INPUT-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{(/|\\)}}path{{(/|\\)}}test.c") +# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home/path/test.c") # INPUT-NEXT: DW_AT_decl_line [DW_FORM_data1] (1) # INPUT-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) # INPUT: DW_TAG_variable # INPUT-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002f] = "bar") # INPUT-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0033 => {0x00000033} "int") # INPUT-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home{{(/|\\)}}path{{(/|\\)}}test.c") +# INPUT-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/home/path/test.c") # INPUT-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) # INPUT-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test index f6aae5a6c2fdd..2566b74d93bf5 100644 --- a/lld/test/wasm/debuginfo.test +++ b/lld/test/wasm/debuginfo.test @@ -16,13 +16,13 @@ CHECK-NEXT: DW_AT_low_pc CHECK-NEXT: DW_AT_high_pc CHECK-NEXT: DW_AT_frame_base CHECK-NEXT: DW_AT_name ("test") -CHECK-NEXT: DW_AT_decl_file ("/Users{{(/|\\)}}yury{{(/|\\)}}llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users/yury/llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (3) CHECK-NEXT: DW_AT_prototyped (true) CHECK: DW_TAG_formal_parameter CHECK-NEXT: DW_AT_name ("t") -CHECK-NEXT: DW_AT_decl_file ("/Users{{(/|\\)}}yury{{(/|\\)}}llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users/yury/llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (3) CHECK: DW_TAG_subprogram @@ -30,7 +30,7 @@ CHECK-NEXT: DW_AT_low_pc CHECK-NEXT: DW_AT_high_pc CHECK-NEXT: DW_AT_frame_base CHECK-NEXT: DW_AT_name ("_start") -CHECK-NEXT: DW_AT_decl_file ("/Users{{(/|\\)}}yury{{(/|\\)}}llvmwasm{{(/|\\)}}hi.c") +CHECK-NEXT: DW_AT_decl_file ("/Users/yury/llvmwasm{{(/|\\)}}hi.c") CHECK-NEXT: DW_AT_decl_line (7) CHECK: DW_TAG_base_type diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index e7662fc5d295a..678f58694e0b5 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1391,7 +1391,6 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex( // sys::path::append skips empty strings. sys::path::append(FilePath, Style, IncludeDir, FileName); - sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, Style); Result = std::string(FilePath.str()); return true; } diff --git a/llvm/test/tools/llvm-symbolizer/frame-fortran.s b/llvm/test/tools/llvm-symbolizer/frame-fortran.s index 0cd6f2838a6b5..744236fd76f9c 100644 --- a/llvm/test/tools/llvm-symbolizer/frame-fortran.s +++ b/llvm/test/tools/llvm-symbolizer/frame-fortran.s @@ -13,7 +13,7 @@ // CHECK: foo // CHECK-NEXT: array -// CHECK-NEXT: /home/ubuntu{{/|\\}}example.cpp:1 +// CHECK-NEXT: /home/ubuntu{{/|\\}}.{{/|\\}}example.cpp:1 // CHECK-NEXT: -24 8 ?? .file "example.cpp" From 3a59628f3cc26eb085acfc9cbdc97243ef71a6c5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 15 Sep 2020 17:52:50 +0100 Subject: [PATCH 0720/1079] Revert "[DSE] Switch to MemorySSA-backed DSE by default." This reverts commit fb109c42d91c30c8c7497ef1fd7aff6f2969c6e7. Temporarily revert due to a mis-compile pointed out at D87163. --- clang/test/CodeGen/thinlto-distributed-newpm.ll | 2 +- clang/test/CodeGenObjC/exceptions.m | 3 +++ .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +- llvm/test/Analysis/BasicAA/modref.ll | 1 - llvm/test/CodeGen/AMDGPU/opt-pipeline.ll | 14 ++++++++------ llvm/test/Other/new-pm-defaults.ll | 3 +-- llvm/test/Other/new-pm-lto-defaults.ll | 2 -- llvm/test/Other/new-pm-thinlto-defaults.ll | 3 +-- llvm/test/Other/opt-O2-pipeline.ll | 7 ++++--- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll | 7 ++++--- llvm/test/Other/opt-O3-pipeline.ll | 7 ++++--- llvm/test/Other/opt-Os-pipeline.ll | 7 ++++--- llvm/test/Transforms/Coroutines/ArgAddr.ll | 11 ----------- llvm/test/Transforms/Coroutines/coro-retcon.ll | 1 + .../MSSA/2011-03-25-DSEMiscompile.ll | 2 +- .../MSSA/2011-09-06-EndOfFunction.ll | 2 +- .../DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll | 2 +- .../MSSA/2016-07-17-UseAfterFree.ll | 2 +- .../MSSA/OverwriteStoreBegin.ll | 2 +- .../DeadStoreElimination/MSSA/OverwriteStoreEnd.ll | 2 +- .../DeadStoreElimination/MSSA/PartialStore.ll | 2 +- .../DeadStoreElimination/MSSA/PartialStore2.ll | 4 ++-- .../MSSA/X86/gather-null-pointer.ll | 2 +- .../MSSA/atomic-overlapping.ll | 2 +- .../DeadStoreElimination/MSSA/atomic-todo.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/atomic.ll | 2 +- .../DeadStoreElimination/MSSA/calloc-store.ll | 2 +- .../MSSA/combined-partial-overwrites.ll | 4 ++-- .../DeadStoreElimination/MSSA/const-pointers.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/crash.ll | 2 +- .../DeadStoreElimination/MSSA/cs-cs-aliasing.ll | 2 +- .../DeadStoreElimination/MSSA/debug-counter.ll | 8 ++++---- .../DeadStoreElimination/MSSA/debuginfo.ll | 2 +- .../DeadStoreElimination/MSSA/dominate.ll | 2 +- .../DeadStoreElimination/MSSA/fence-todo.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/fence.ll | 2 +- .../Transforms/DeadStoreElimination/MSSA/free.ll | 2 +- .../DeadStoreElimination/MSSA/inst-limits.ll | 2 +- .../DeadStoreElimination/MSSA/int_sideeffect.ll | 2 +- .../DeadStoreElimination/MSSA/invariant.start.ll | 2 +- .../MSSA/launder.invariant.group.ll | 2 +- .../DeadStoreElimination/MSSA/libcalls.ll | 2 +- .../DeadStoreElimination/MSSA/lifetime.ll | 2 +- .../MSSA/mda-with-dbg-values.ll | 4 ++-- .../MSSA/memcpy-complete-overwrite.ll | 4 ++-- .../DeadStoreElimination/MSSA/memintrinsics.ll | 2 +- .../MSSA/memoryssa-scan-limit.ll | 8 ++++---- .../DeadStoreElimination/MSSA/memset-and-memcpy.ll | 4 ++-- .../MSSA/memset-missing-debugloc.ll | 2 +- .../MSSA/memset-unknown-sizes.ll | 2 +- .../MSSA/merge-stores-big-endian.ll | 2 +- .../DeadStoreElimination/MSSA/merge-stores.ll | 2 +- .../MSSA/multiblock-captures.ll | 2 +- .../MSSA/multiblock-exceptions.ll | 2 +- .../DeadStoreElimination/MSSA/multiblock-loops.ll | 2 +- .../MSSA/multiblock-malloc-free.ll | 2 +- .../MSSA/multiblock-memintrinsics.ll | 2 +- .../MSSA/multiblock-memoryphis.ll | 2 +- .../MSSA/multiblock-multipath-throwing.ll | 2 +- .../MSSA/multiblock-multipath.ll | 2 +- .../MSSA/multiblock-overlap.ll | 4 ++-- .../MSSA/multiblock-partial.ll | 2 +- .../DeadStoreElimination/MSSA/multiblock-simple.ll | 2 +- .../MSSA/multiblock-throwing.ll | 2 +- .../MSSA/multiblock-unreachable.ll | 2 +- .../DeadStoreElimination/MSSA/no-targetdata.ll | 2 +- .../DeadStoreElimination/MSSA/noop-stores.ll | 4 ++-- .../DeadStoreElimination/MSSA/operand-bundles.ll | 2 +- .../DeadStoreElimination/MSSA/overlap.ll | 4 ++-- .../DeadStoreElimination/MSSA/pr11390.ll | 2 +- .../pr47285-not-overwritten-on-all-exit-paths.ll | 2 +- .../MSSA/simple-preservation.ll | 2 +- .../DeadStoreElimination/MSSA/simple-todo.ll | 4 ++-- .../Transforms/DeadStoreElimination/MSSA/simple.ll | 4 ++-- .../Transforms/DeadStoreElimination/MSSA/stats.ll | 2 +- .../DeadStoreElimination/MSSA/tail-byval.ll | 2 +- llvm/test/Transforms/MemCpyOpt/memcpy.ll | 3 --- 77 files changed, 110 insertions(+), 119 deletions(-) diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll index 315d668aec0ac..9f9a8bec4ef5d 100644 --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -131,12 +131,12 @@ ; CHECK-O: Running pass: JumpThreadingPass on main ; CHECK-O: Running pass: CorrelatedValuePropagationPass on main ; CHECK-O: Running pass: DSEPass on main -; CHECK-O: Running analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running pass: LCSSAPass on main ; CHECK-O: Finished {{.*}}Function pass manager run. ; CHECK-O: Running pass: ADCEPass on main +; CHECK-O: Running analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Finished {{.*}}Function pass manager run. diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m index d95398e710147..55a117bcc3dd5 100644 --- a/clang/test/CodeGenObjC/exceptions.m +++ b/clang/test/CodeGenObjC/exceptions.m @@ -59,6 +59,9 @@ int f2() { // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[X]] // CHECK-NEXT: [[T2:%.*]] = add nsw i32 [[T1]], -1 + // This store is dead. + // CHECK-NEXT: store i32 [[T2]], i32* [[X]] + // CHECK: store i32 6, i32* [[X]] x++; // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]] diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 6615f6b1c32e9..261043743b7de 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,7 +106,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::desc("Enable partial store merging in DSE")); static cl::opt - EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden, + EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, cl::desc("Use the new MemorySSA-backed DSE.")); static cl::opt diff --git a/llvm/test/Analysis/BasicAA/modref.ll b/llvm/test/Analysis/BasicAA/modref.ll index 3ac94ad54f466..9904d13296e89 100644 --- a/llvm/test/Analysis/BasicAA/modref.ll +++ b/llvm/test/Analysis/BasicAA/modref.ll @@ -82,7 +82,6 @@ define void @test3a(i8* %P, i8 %X) { store i8 %Y, i8* %P2 call void @llvm.lifetime.end.p0i8(i64 10, i8* %P) ret void -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 10, i8* %P) ; CHECK-NEXT: ret void } diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index b0c0460165e13..31531a43fc3f2 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -511,14 +511,15 @@ ; GCN-O2-NEXT: Value Propagation ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Post-Dominator Tree Construction -; GCN-O2-NEXT: Memory SSA +; GCN-O2-NEXT: Phi Values Analysis +; GCN-O2-NEXT: Memory Dependence Analysis ; GCN-O2-NEXT: Dead Store Elimination +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass -; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Scalar Evolution Analysis ; GCN-O2-NEXT: Loop Pass Manager ; GCN-O2-NEXT: Loop Invariant Code Motion @@ -870,14 +871,15 @@ ; GCN-O3-NEXT: Value Propagation ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Post-Dominator Tree Construction -; GCN-O3-NEXT: Memory SSA +; GCN-O3-NEXT: Phi Values Analysis +; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: Dead Store Elimination +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass -; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Loop Pass Manager ; GCN-O3-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 02394ee0f6527..59c24acb17f04 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -205,7 +205,6 @@ ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass @@ -213,7 +212,7 @@ ; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 21e43abd5f7fb..a3be19ca29f1f 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -87,8 +87,6 @@ ; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MemCpyOptPass on foo ; CHECK-O2-NEXT: Running pass: DSEPass on foo -; CHECK-O2-NEXT: Running analysis: MemorySSAAnalysis on foo -; CHECK-O2-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O2-NEXT: Running pass: InstCombinePass on foo ; CHECK-O2-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O2-NEXT: Running pass: SCCPPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 9e5ff8d37f806..0b9b52a57e2a5 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -178,14 +178,13 @@ ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop at depth 1 containing: %loop ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 42aa8b0089a54..e606e7cfac171 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -158,14 +158,15 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index 5f78c2f36d509..aaee6f786bac9 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -163,14 +163,15 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 069ef2dbba7e5..b2d2f85ae21be 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -163,14 +163,15 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index b7855e6b3856f..cc91707c4b009 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -144,14 +144,15 @@ ; CHECK-NEXT: Value Propagation ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll index 99e418599c671..a1cac168ac402 100644 --- a/llvm/test/Transforms/Coroutines/ArgAddr.ll +++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -46,19 +46,8 @@ entry: call void @llvm.coro.destroy(i8* %hdl) ret i32 0 ; CHECK: call void @ctor -; CHECK-NEXT: %dec1.spill.addr.i = getelementptr inbounds i8, i8* %call.i, i64 20 -; CHECK-NEXT: bitcast i8* %dec1.spill.addr.i to i32* -; CHECK-NEXT: store i32 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: %index.addr13.i = getelementptr inbounds i8, i8* %call.i, i64 24 -; CHECK-NEXT: bitcast i8* %index.addr13.i to i1* -; CHECK-NEXT: store i1 false -; CHECK-NEXT: store i32 3 -; CHECK-NEXT: store i32 3 ; CHECK-NEXT: call void @print(i32 3) -; CHECK-NEXT: store i1 false -; CHECK-NEXT: store i32 2 -; CHECK-NEXT: store i32 2 ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 } diff --git a/llvm/test/Transforms/Coroutines/coro-retcon.ll b/llvm/test/Transforms/Coroutines/coro-retcon.ll index 0021bb497aad9..13283f05b2661 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -74,6 +74,7 @@ entry: ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll index 25c2d5ffe7f56..c90da22026727 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-03-25-DSEMiscompile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s ; PR9561 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.8" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll index 7e46d28a9c47f..b9a0ea76d7fbb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll index 665d772d03b91..30c95961d2b67 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-MemCpy.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll index 3501b43600168..85a749f81d50b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S -enable-dse-partial-overwrite-tracking | FileCheck %s ; PR28588 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll index b5d9c40cbdbc3..93e8860bdaf31 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s define void @write4to7(i32* nocapture %p) { ; CHECK-LABEL: @write4to7( diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll index b6ae657d17e5e..1cdeade120a69 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" %struct.vec2 = type { <4 x i32>, <4 x i32> } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll index 1dd894e6658cc..4f99ec09d2a03 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Ensure that the dead store is deleted in this case. It is wholely diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll index ebcb0c3808a15..3802d1c22cbec 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s -; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s +; RUN: opt < %s --data-layout "e" -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s +; RUN: opt < %s --data-layout "E" -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s ; This test used to hit an assertion (see PR41949). ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll index 6a5f4bb9eb25c..0997ce725b21a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/X86/gather-null-pointer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -dse -S | FileCheck %s +; RUN: opt < %s -dse -enable-dse-memoryssa -S | FileCheck %s ; Both stores should be emitted because we can't tell if the gather aliases. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll index d23208166136a..5a7bbdd0a6077 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-overlapping.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse %s -S | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck %s target datalayout = "e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll index b11000570ecc4..8dfb85719c309 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic-todo.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll index 30f799d59ef7f..51129fe2bcadb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll index ddb10d7ccc80f..d8fc8136f0d7e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s declare noalias i8* @calloc(i64, i64) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll index ec1b9a5ee5140..a3bd300c8b782 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s -; RUN: opt -S -dse -enable-dse-partial-store-merging=false -dse-memoryssa-partial-store-limit=10 < %s | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s +; RUN: opt -S -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s +; RUN: opt -S -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false -dse-memoryssa-partial-store-limit=10 < %s | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll index a2218b725cd3b..839fdfcf2d2cd 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/const-pointers.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %t = type { i32 } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll index ccee7fb8ba58b..c3860f1fe6421 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin10.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll index b403e3382234d..7ae6c450bb560 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/cs-cs-aliasing.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll index b881e38e92f30..9def782900899 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll @@ -3,16 +3,16 @@ ; REQUIRES: asserts ; Eliminates store to %R in the entry block. -; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP0-COUNT1 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP0-COUNT1 %s ; Eliminates store to %P in the entry block. -; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=1,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP1-COUNT1 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=1,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP1-COUNT1 %s ; Eliminates both stores in the entry block. -; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=2 -S | FileCheck --check-prefix=SKIP0-COUNT2 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=0,dse-memoryssa-count=2 -S | FileCheck --check-prefix=SKIP0-COUNT2 %s ; Eliminates no stores. -; RUN: opt < %s -basic-aa -dse -debug-counter=dse-memoryssa-skip=2,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP2-COUNT1 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -debug-counter=dse-memoryssa-skip=2,dse-memoryssa-count=1 -S | FileCheck --check-prefix=SKIP2-COUNT1 %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll index b927965dc4054..f4e7e1fd148c5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/debuginfo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -debugify -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -debugify -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll index 24dd65e07bbc2..32f8699dc61e6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/dominate.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -disable-output < %s +; RUN: opt -dse -enable-dse-memoryssa -disable-output < %s ; test that we don't crash declare void @bar() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll index ab4e65edaab9e..cdd12ef302736 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll @@ -1,6 +1,6 @@ ; XFAIL: * -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s ; We DSE stack alloc'ed and byval locations, in the presence of fences. ; Fence does not make an otherwise thread local store visible. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll index 5f2398812e93d..fc72f1d96ddaf 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s ; We conservative choose to prevent dead store elimination ; across release or stronger fences. It's not required diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll index 66ccc7b4f47b5..13cfb7002cf1e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll index 6357477ae43be..638571f6f4172 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; This test is not relevant for DSE with MemorySSA. Non-memory instructions diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll index 035e787f6bd7a..6ea0b190f21fb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/int_sideeffect.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -dse | FileCheck %s +; RUN: opt -S < %s -dse -enable-dse-memoryssa | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll index 27400cd4ed16c..82e168b45f754 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/invariant.start.ll @@ -1,5 +1,5 @@ ; Test to make sure llvm.invariant.start calls are not treated as clobbers. -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll index 28abe2eb5feea..46f3c261f7bc0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/launder.invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s ; CHECK-LABEL: void @skipBarrier(i8* %ptr) define void @skipBarrier(i8* %ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll index ac6efd54ddba6..ceffa47ca8fa9 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -inferattrs -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -inferattrs -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll index 9aa3c9c1fd420..29ff7726c4eee 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll index 79211609a5400..937f10d3502c7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -dse -dse-memoryssa-scanlimit=2 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -dse-memoryssa-scanlimit=2 < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s +; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s ; Test case to check that DSE gets the same result even if we have a dbg value ; between the memcpy. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll index 9b1624a931bc3..70c0265813634 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; XFAIL: * -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll index 088752c4ebae7..81ba0a6764a66 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll index 3a8b772b062e0..0e722c56f5f9f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck --check-prefix=NO-LIMIT %s -; RUN: opt < %s -basic-aa -dse -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s -; RUN: opt < %s -basic-aa -dse -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s -; RUN: opt < %s -basic-aa -dse -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll index ad888159ffa67..02fc8f22b6b40 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll index 9229157a9b6ed..c28f0cc901247 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -2,7 +2,7 @@ ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. -; RUN: opt -S -march=native -dse < %s| FileCheck %s +; RUN: opt -S -march=native -dse -enable-dse-memoryssa < %s| FileCheck %s ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 8, i1 false), !dbg ![[DBG:[0-9]+]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll index bbd0d01ee475f..115540e54a26b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -S %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s declare i8* @_Znwm() local_unnamed_addr #0 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll index 77784ac0c4047..8acc29f3f62e4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" define void @byte_by_byte_replacement(i32 *%ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll index 8cd593bb00e77..7643c3ba5b9e7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" define void @byte_by_byte_replacement(i32 *%ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll index 45f3e2c429754..fc3e99723d6e6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll index 08a15565e18ff..8357ef9302006 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" declare void @f() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll index dc6004bf71d78..ba61b3250f5e7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll index f60a8e536a0be..5c14f92b8d74a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll index b22f5b60d7584..df6113928fe53 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll index 1ad2e71f2d59a..0ace57e690fe1 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll index 4fe04e5467d3d..944586253bedb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll index ab7a056f7018d..8413251036676 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll index 8a71c73979170..e6e206ef5abc7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse %s -S | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s -; RUN: opt -dse -dse-memoryssa-partial-store-limit=10 %s -S | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s +; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck --check-prefixes=CHECK,DEFAULT-LIMIT %s +; RUN: opt -dse -enable-dse-memoryssa -dse-memoryssa-partial-store-limit=10 %s -S | FileCheck --check-prefixes=CHECK,LARGER-LIMIT %s %struct.ham = type { [3 x double], [3 x double]} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll index f998bb44a4716..b2a5c04f31fd4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll index 334e080bf8dbb..aa09235e76986 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll index c067a907892d9..f6031e86bef07 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" declare void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll index 6548ec34ae0ac..df08d619f9dcd 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-unreachable.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -S %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll index aec3076678787..7e6a4cdf3a7ce 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/no-targetdata.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll index ad93cfc72a7ec..6a9c4b80b3ddf 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll index f3df74be031b7..5940f2bf052bf 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s declare noalias i8* @malloc(i64) "malloc-like" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll index 31bb3234dc421..e3e6b8f583a92 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s declare void @use(i64*) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll index 56ca604eff98b..c58fc18d2a9d6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr11390.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa -S < %s | FileCheck %s ; PR11390 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll index 7c3bb913f5f70..aaff809d38d0b 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/pr47285-not-overwritten-on-all-exit-paths.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -S %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s @b = local_unnamed_addr global i32 0, align 4 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll index 6aedc1ca01f83..3562c611e76b2 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-preservation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-knowledge-retention -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -enable-knowledge-retention -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll index 444e139a4cf62..a4d3127d25f3d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; XFAIL: * -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Remove redundant store if loaded value is in another block inside a loop. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index 5ee1a55a7369f..9f719746f9f17 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll index 990f098533bfa..bd4f6f0e58668 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/stats.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll index ed2fbd434a75d..ec3bb495182f0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/tail-byval.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s ; Don't eliminate stores to allocas before tail calls to functions that use ; byval. It's correct to mark calls like these as 'tail'. To implement this tail diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 065230d4be139..1741da030c2ed 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -73,11 +73,8 @@ define void @test3(%0* noalias sret %agg.result) nounwind { call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false) ret void ; CHECK-LABEL: @test3( -; CHECK-NEXT: %x.0 = alloca -; CHECK-NEXT: %x.01 = bitcast ; CHECK-NEXT: %agg.result1 = bitcast ; CHECK-NEXT: call void @llvm.memcpy -; CHECK-NEXT: %agg.result2 = bitcast ; CHECK-NEXT: ret void } From 03f1516d6075f42dce95bcf9fde3f6fde97abd35 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 15 Sep 2020 10:20:08 -0700 Subject: [PATCH 0721/1079] [MemoryBuffer] Revert unintended MemoryBuffer change from D86996 Fixes SupportsTest MemoryBufferTest.mmapVolatileNoNull --- llvm/lib/Support/MemoryBuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index e31c8e6b072dd..248fb72c49689 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -457,7 +457,8 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, MapSize = FileSize; } - if (false) { + if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, + PageSize, IsVolatile)) { std::error_code EC; std::unique_ptr Result( new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( From 3bc3983f229f9277d5bea3692b691f72ab8740dd Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 15 Sep 2020 12:33:31 -0500 Subject: [PATCH 0722/1079] Fix bot failure after ccb4124a4172 The test case has a check line for the option on a line that includes the string lld surrounded by any characters. This causes failures when said string is in the build path. What the test case presumably means to test is the actual invocation of the LLD linker (i.e. a linker that has that string as a suffix). This patch simply removes the erroneous wildcard after the string. --- clang/test/Driver/hip-gz-options.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/hip-gz-options.hip b/clang/test/Driver/hip-gz-options.hip index b2544a42ebedc..705c1be7b94ef 100644 --- a/clang/test/Driver/hip-gz-options.hip +++ b/clang/test/Driver/hip-gz-options.hip @@ -9,6 +9,6 @@ // RUN: -ggdb -gz=zlib 2>&1 | FileCheck %s // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}} -// CHECK-DAG: {{".*lld.*" .* "--compress-debug-sections=zlib"}} +// CHECK-DAG: {{".*lld" .* "--compress-debug-sections=zlib"}} // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}} // CHECK: "--compress-debug-sections=zlib" From 738bab743b5c6cfcf1a1feb116de9e35a3f1e326 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 15 Sep 2020 11:21:47 -0400 Subject: [PATCH 0723/1079] [OPENMP]Add support for allocate vars in untied tasks. Local vars, marked with pragma allocate, mustbe allocate by the call of the runtime function and cannot be allocated as other local variables. Instead, we allocate a space for the pointer in private record and store the address, returned by kmpc_alloc call in this pointer. So, for untied tasks ``` #pragma omp task untied { S s; #pragma omp allocate(s) allocator(allocator) s = x; } ``` compiler generates something like this: ``` struct task_with_privates { S *ptr; }; void entry(task_with_privates *p) { S *s = p->s; switch(partid) { case 1: p->s = (S*)kmpc_alloc(); kmpc_omp_task(); br exit; case 2: *s = x; kmpc_omp_task(); br exit; case 2: ~S(s); kmpc_free((void*)s); br exit; } exit: } ``` Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D86558 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 168 +++++++++++------- clang/lib/CodeGen/CGOpenMPRuntime.h | 14 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 43 +++-- clang/test/OpenMP/allocate_codegen.cpp | 3 + clang/test/OpenMP/for_lastprivate_codegen.cpp | 3 +- clang/test/OpenMP/for_linear_codegen.cpp | 1 + .../test/OpenMP/for_reduction_codegen_UDR.cpp | 1 + .../OpenMP/parallel_firstprivate_codegen.cpp | 2 + .../test/OpenMP/parallel_private_codegen.cpp | 5 +- clang/test/OpenMP/task_codegen.cpp | 61 +++++-- 10 files changed, 207 insertions(+), 94 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5384e9196896b..e507e434d9e1c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1526,6 +1526,7 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { FunctionUDMMap.erase(I); } LastprivateConditionalToTypes.erase(CGF.CurFn); + FunctionToUntiedTaskStackMap.erase(CGF.CurFn); } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -3382,6 +3383,17 @@ struct PrivateHelpersTy { typedef std::pair PrivateDataTy; } // anonymous namespace +static bool isAllocatableDecl(const VarDecl *VD) { + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr()) + return false; + const auto *AA = CVD->getAttr(); + // Use the default allocation. + return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()); +} + static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { if (!Privates.empty()) { @@ -3396,9 +3408,12 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { QualType Type = VD->getType().getNonReferenceType(); // If the private variable is a local variable with lvalue ref type, // allocate the pointer instead of the pointee type. - if (Pair.second.isLocalPrivate() && - VD->getType()->isLValueReferenceType()) - Type = C.getPointerType(Type); + if (Pair.second.isLocalPrivate()) { + if (VD->getType()->isLValueReferenceType()) + Type = C.getPointerType(Type); + if (isAllocatableDecl(VD)) + Type = C.getPointerType(Type); + } FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator I(VD->getAttrs().begin()), @@ -3700,6 +3715,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, QualType Ty = VD->getType().getNonReferenceType(); if (VD->getType()->isLValueReferenceType()) Ty = C.getPointerType(Ty); + if (isAllocatableDecl(VD)) + Ty = C.getPointerType(Ty); Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), @@ -3780,8 +3797,10 @@ static void emitPrivatesInit(CodeGenFunction &CGF, FI = cast(FI->getType()->getAsTagDecl())->field_begin(); for (const PrivateDataTy &Pair : Privates) { // Do not initialize private locals. - if (Pair.second.isLocalPrivate()) + if (Pair.second.isLocalPrivate()) { + ++FI; continue; + } const VarDecl *VD = Pair.second.PrivateCopy; const Expr *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa(Init) && @@ -4146,8 +4165,12 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr)); ++I; } - for (const VarDecl *VD : Data.PrivateLocals) - Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + for (const VarDecl *VD : Data.PrivateLocals) { + if (isAllocatableDecl(VD)) + Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); + else + Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + } llvm::stable_sort(Privates, [](const PrivateDataTy &L, const PrivateDataTy &R) { return L.first > R.first; @@ -11225,44 +11248,27 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, return CGF.GetAddrOfLocalVar(NativeParam); } -namespace { -/// Cleanup action for allocate support. -class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { -public: - static const int CleanupArgs = 3; - -private: - llvm::FunctionCallee RTLFn; - llvm::Value *Args[CleanupArgs]; - -public: - OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, - ArrayRef CallArgs) - : RTLFn(RTLFn) { - assert(CallArgs.size() == CleanupArgs && - "Size of arguments does not match."); - std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); - } - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(RTLFn, Args); - } -}; -} // namespace - Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) return Address::invalid(); + Address UntiedAddr = Address::invalid(); + Address UntiedRealAddr = Address::invalid(); + auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); + if (It != FunctionToUntiedTaskStackMap.end()) { + const UntiedLocalVarsAddressesMap &UntiedData = + UntiedLocalVarsStack[It->second]; + auto I = UntiedData.find(VD); + if (I != UntiedData.end()) { + UntiedAddr = I->second.first; + UntiedRealAddr = I->second.second; + } + } const VarDecl *CVD = VD->getCanonicalDecl(); if (CVD->hasAttr()) { - const auto *AA = CVD->getAttr(); // Use the default allocation. - if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && - !AA->getAllocator()) - return Address::invalid(); + if (!isAllocatableDecl(VD)) + return UntiedAddr; llvm::Value *Size; CharUnits Align = CGM.getContext().getDeclAlign(CVD); if (CVD->getType()->isVariablyModifiedType()) { @@ -11277,43 +11283,80 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, Size = CGM.getSize(Sz.alignTo(Align)); } llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + const auto *AA = CVD->getAttr(); assert(AA->getAllocator() && "Expected allocator expression for non-default allocator."); llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); // According to the standard, the original allocator type is a enum // (integer). Convert to pointer type, if required. - if (Allocator->getType()->isIntegerTy()) - Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); - else if (Allocator->getType()->isPointerTy()) - Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Allocator, CGM.VoidPtrTy); + Allocator = CGF.EmitScalarConversion( + Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, + AA->getAllocator()->getExprLoc()); llvm::Value *Args[] = {ThreadID, Size, Allocator}; llvm::Value *Addr = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_alloc), Args, getName({CVD->getName(), ".void.addr"})); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, - Allocator}; llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free); - - CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); + QualType Ty = CGM.getContext().getPointerType(CVD->getType()); Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - getName({CVD->getName(), ".addr"})); - return Address(Addr, Align); + Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); + if (UntiedAddr.isValid()) + CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); + + // Cleanup action for allocate support. + class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { + llvm::FunctionCallee RTLFn; + unsigned LocEncoding; + Address Addr; + const Expr *Allocator; + + public: + OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, + Address Addr, const Expr *Allocator) + : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), + Allocator(Allocator) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + llvm::Value *Args[3]; + Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( + CGF, SourceLocation::getFromRawEncoding(LocEncoding)); + Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr.getPointer(), CGF.VoidPtrTy); + llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + Args[2] = AllocVal; + + CGF.EmitRuntimeCall(RTLFn, Args); + } + }; + Address VDAddr = + UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); + CGF.EHStack.pushCleanup( + NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), + VDAddr, AA->getAllocator()); + if (UntiedRealAddr.isValid()) + if (auto *Region = + dyn_cast_or_null(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); + return VDAddr; } - if (UntiedLocalVarsStack.empty()) - return Address::invalid(); - const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); - auto It = UntiedData.find(VD); - if (It == UntiedData.end()) - return Address::invalid(); + return UntiedAddr; +} - return It->second; +bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, + const VarDecl *VD) const { + auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); + if (It == FunctionToUntiedTaskStackMap.end()) + return false; + return UntiedLocalVarsStack[It->second].count(VD) > 0; } CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( @@ -11349,11 +11392,14 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { } CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( - CodeGenModule &CGM, - const llvm::DenseMap, Address> &LocalVars) - : CGM(CGM), NeedToPush(!LocalVars.empty()) { + CodeGenFunction &CGF, + const llvm::DenseMap, + std::pair> &LocalVars) + : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { if (!NeedToPush) return; + CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( + CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 178acaec0aa1f..41fa9f5345aa8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -253,9 +253,9 @@ class CGOpenMPRuntime { public: UntiedTaskLocalDeclsRAII( - CodeGenModule &CGM, - const llvm::DenseMap, Address> - &LocalVars); + CodeGenFunction &CGF, + const llvm::DenseMap, + std::pair> &LocalVars); ~UntiedTaskLocalDeclsRAII(); }; @@ -432,6 +432,8 @@ class CGOpenMPRuntime { std::tuple>> LastprivateConditionalToTypes; + /// Maps function to the position of the untied task locals stack. + llvm::DenseMap FunctionToUntiedTaskStackMap; /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -720,7 +722,8 @@ class CGOpenMPRuntime { llvm::SmallVector NontemporalDeclsStack; using UntiedLocalVarsAddressesMap = - llvm::DenseMap, Address>; + llvm::DenseMap, + std::pair>; llvm::SmallVector UntiedLocalVarsStack; /// Stack for list of addresses of declarations in current context marked as @@ -1882,6 +1885,9 @@ class CGOpenMPRuntime { /// Destroys user defined allocators specified in the uses_allocators clause. void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator); + + /// Returns true if the variable is a local variable in untied task. + bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const; }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 19dc9a87f239c..d656792dea718 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1563,6 +1563,17 @@ static void emitCommonOMPParallelDirective( CapturedVars, IfCond); } +static bool isAllocatableDecl(const VarDecl *VD) { + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr()) + return false; + const auto *AA = CVD->getAttr(); + // Use the default allocation. + return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()); +} + static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, llvm::SmallVectorImpl &) {} @@ -1575,12 +1586,7 @@ Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( if (!VD) return Address::invalid(); const VarDecl *CVD = VD->getCanonicalDecl(); - if (!CVD->hasAttr()) - return Address::invalid(); - const auto *AA = CVD->getAttr(); - // Use the default allocation. - if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && - !AA->getAllocator()) + if (!isAllocatableDecl(CVD)) return Address::invalid(); llvm::Value *Size; CharUnits Align = CGM.getContext().getDeclAlign(CVD); @@ -1596,6 +1602,7 @@ Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( Size = CGM.getSize(Sz.alignTo(Align)); } + const auto *AA = CVD->getAttr(); assert(AA->getAllocator() && "Expected allocator expression for non-default allocator."); llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); @@ -3931,7 +3938,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { - llvm::DenseMap, Address> UntiedLocalVars; + llvm::DenseMap, std::pair> + UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); llvm::SmallVector, 16> FirstprivatePtrs; @@ -3976,9 +3984,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( QualType Ty = VD->getType().getNonReferenceType(); if (VD->getType()->isLValueReferenceType()) Ty = CGF.getContext().getPointerType(Ty); + if (isAllocatableDecl(VD)) + Ty = CGF.getContext().getPointerType(Ty); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); - UntiedLocalVars.try_emplace(VD, PrivatePtr); + UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid()); CallArgs.push_back(PrivatePtr.getPointer()); } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( @@ -4002,9 +4012,18 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // Adjust mapping for internal locals by mapping actual memory instead of // a pointer to this memory. for (auto &Pair : UntiedLocalVars) { - Address Replacement(CGF.Builder.CreateLoad(Pair.second), - CGF.getContext().getDeclAlign(Pair.first)); - Pair.getSecond() = Replacement; + if (isAllocatableDecl(Pair.first)) { + llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); + Address Replacement(Ptr, CGF.getPointerAlign()); + Pair.getSecond().first = Replacement; + Ptr = CGF.Builder.CreateLoad(Replacement); + Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond().second = Replacement; + } else { + llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); + Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond().first = Replacement; + } } } if (Data.Reductions) { @@ -4100,7 +4119,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } (void)InRedScope.Privatize(); - CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM, + CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, UntiedLocalVars); Action.Enter(CGF); BodyGen(CGF); diff --git a/clang/test/OpenMP/allocate_codegen.cpp b/clang/test/OpenMP/allocate_codegen.cpp index c068589041af3..068e307697a0c 100644 --- a/clang/test/OpenMP/allocate_codegen.cpp +++ b/clang/test/OpenMP/allocate_codegen.cpp @@ -85,6 +85,7 @@ int main () { // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} // CHECK: store i32 %{{.+}}, i32* [[V_ADDR]], // CHECK-NEXT: [[V_VAL:%.+]] = load i32, i32* [[V_ADDR]], +// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = bitcast i32* [[V_ADDR]] to i8* // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[V_VOID_ADDR]], i8* inttoptr (i64 6 to i8*)) // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} // CHECK: ret i32 [[V_VAL]] @@ -101,7 +102,9 @@ void bar(int a, float &z) { // CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float** // CHECK: store float* %{{.+}}, float** [[Z_ADDR]], #pragma omp allocate(a,z) allocator(omp_default_mem_alloc) +// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) +// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) // CHECK: ret void } diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp index 4fc7b2061ae21..87f109e70e6e9 100644 --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -654,7 +654,8 @@ int main() { // CHECK-NEXT: br label %[[LAST_DONE]] // CHECK: [[LAST_DONE]] -// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*)) +// CHECK: [[F_VOID_PTR:%.+]] = bitcast float* [[F_PRIV]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*)) // CHECK-NEXT: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp index fd9d89c38dcb7..548ded3f8644f 100644 --- a/clang/test/OpenMP/for_linear_codegen.cpp +++ b/clang/test/OpenMP/for_linear_codegen.cpp @@ -414,6 +414,7 @@ int main() { // CHECK: [[ADD:%.+]] = add nsw i64 [[LVAR_VAL]], 3 // CHECK: store i64 [[ADD]], i64* [[LVAR_PRIV]], // CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) +// CHECK: [[LVAR_VOID_PTR:%.+]] = bitcast i64* [[LVAR_PRIV]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[LVAR_VOID_PTR]], i8* inttoptr (i64 5 to i8*)) // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: ret void diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp index 5a20fa187e9c3..ff6ce7847da1a 100644 --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -876,6 +876,7 @@ int main() { // CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % +// CHECK: [[VAR3_VOID_PTR:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[VAR3_VOID_PTR]], i8* inttoptr (i64 6 to i8*)) // CHECK: ret void diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp index 04af45badaea1..97024e0ace1ff 100644 --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -423,6 +423,7 @@ int main() { // CHECK-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]], // CHECK: store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]], // CHECK: store i32 0, i32* [[T_VAR_PRIV]], +// CHECK: [[T_VAR_VOID_PTR:%.+]] = bitcast i32* [[T_VAR_PRIV]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*)) // CHECK: ret void @@ -584,6 +585,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8 // ARRAY: [[BC:%.+]] = bitcast double* [[VLA2_PTR]] to i8* // ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC]], i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false) +// ARRAY: [[VLA2_VOID_PTR:%.+]] = bitcast double* [[VLA2_PTR]] to i8* // ARRAY: call void @__kmpc_free(i32 [[GTID]], i8* [[VLA2_VOID_PTR]], i8* inttoptr (i64 8 to i8*)) // ARRAY-NEXT: ret void #endif diff --git a/clang/test/OpenMP/parallel_private_codegen.cpp b/clang/test/OpenMP/parallel_private_codegen.cpp index ceceaf95d49ab..eb575c53f913b 100644 --- a/clang/test/OpenMP/parallel_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_private_codegen.cpp @@ -361,12 +361,13 @@ int main() { // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_PTR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], // CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 2 to i8*)) -// CHECK: [[A_PRIV:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32* -// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REF:%.+]], +// CHECK: [[A_PRIV_ADDR:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32* +// CHECK: store i{{[0-9]+}}* [[A_PRIV_ADDR]], i{{[0-9]+}}** [[REF:%.+]], // CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REF]], // CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], // CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 // CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_PRIV_ADDR]] to i8* // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 2 to i8*)) // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index 3c92ca75b1016..f54499ca38f06 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s @@ -14,6 +14,19 @@ #ifndef HEADER #define HEADER +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* } // CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* } @@ -258,21 +271,26 @@ int main() { a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) -#pragma omp task untied +#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c) { - S s1; + S s1, s2; +#ifdef UNTIEDRT +#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc) +#endif + s2.a = 0; #pragma omp task - a = 4; + a = c = 4; #pragma omp taskyield s1 = S(); + s2.a = 10; #pragma omp taskwait } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} @@ -294,10 +312,13 @@ int main() { // CHECK: define internal i32 // CHECK: store i32 4, i32* [[A_PTR]] -// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) +// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}}) // UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, -// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) -// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT: [[S2_ADDR_PTR_REF:%.+]] = alloca %struct.S**, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]], %struct.S*** [[S2_ADDR_PTR_REF]]) +// UNTIEDRT-DAG: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT-DAG: [[S2_ADDR_PTR:%.+]] = load %struct.S**, %struct.S*** [[S2_ADDR_PTR_REF]], +// UNTIEDRT-DAG: [[S2_ADDR:%.+]] = load %struct.S*, %struct.S** [[S2_ADDR_PTR]], // CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ // CHECK: [[DONE]]: @@ -309,16 +330,25 @@ int main() { // UNTIEDRT: br label %[[EXIT:[^,]+]] // UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]]) +// UNTIEDRT: [[S2_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 %{{.+}}, i64 4, i8* inttoptr (i64 7 to i8*)) +// UNTIEDRT: [[S2_PTR:%.+]] = bitcast i8* [[S2_VOID_PTR]] to %struct.S* +// UNTIEDRT: store %struct.S* [[S2_PTR]], %struct.S** [[S2_ADDR_PTR]], +// UNTIEDRT: load i32*, i32** % +// UNTIEDRT: store i32 2, i32* % +// UNTIEDRT: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: call void [[CONSTR]](%struct.S* [[S2_ADDR]]) // CHECK: call i8* @__kmpc_omp_task_alloc( // CHECK: call i32 @__kmpc_omp_task(% // CHECK: load i32*, i32** % -// CHECK: store i32 2, i32* % +// CHECK: store i32 {{2|3}}, i32* % // CHECK: call i32 @__kmpc_omp_task(% // UNTIEDRT: br label %[[EXIT]] // CHECK: call i32 @__kmpc_omp_taskyield(% // CHECK: load i32*, i32** % -// CHECK: store i32 3, i32* % +// CHECK: store i32 {{3|4}}, i32* % // CHECK: call i32 @__kmpc_omp_task(% // UNTIEDRT: br label %[[EXIT]] @@ -331,10 +361,13 @@ int main() { // CHECK: call i32 @__kmpc_omp_taskwait(% // CHECK: load i32*, i32** % -// CHECK: store i32 4, i32* % +// CHECK: store i32 {{4|5}}, i32* % // CHECK: call i32 @__kmpc_omp_task(% // UNTIEDRT: br label %[[EXIT]] +// UNTIEDRT: call void [[DESTR]](%struct.S* [[S2_ADDR]]) +// UNTIEDRT: [[S2_VOID_PTR:%.+]] = bitcast %struct.S* [[S2_ADDR]] to i8* +// UNTIEDRT: call void @__kmpc_free(i32 %{{.+}}, i8* [[S2_VOID_PTR]], i8* inttoptr (i64 7 to i8*)) // UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]]) // CHECK: br label %[[CLEANUP]] From 54e1bf115429fa28f9783da92f310a4ea991e7c4 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 18:23:08 -0700 Subject: [PATCH 0724/1079] [LoopAccessAnalysis][NewPM] Fix tests to work under NPM Pin RUN lines with -analyze to legacy PM, add corresponding NPM RUN lines. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87662 --- .../LoopAccessAnalysis/backward-dep-different-types.ll | 2 +- .../test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll | 2 +- .../Analysis/LoopAccessAnalysis/forward-loop-independent.ll | 2 +- .../Analysis/LoopAccessAnalysis/independent-interleaved.ll | 2 +- .../LoopAccessAnalysis/memcheck-for-loop-invariant.ll | 2 +- .../Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll | 5 +++-- .../LoopAccessAnalysis/memcheck-wrapping-pointers.ll | 3 ++- .../LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll | 2 +- .../test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll | 2 +- llvm/test/Analysis/LoopAccessAnalysis/nullptr.ll | 2 +- llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll | 2 +- .../LoopAccessAnalysis/pointer-with-unknown-bounds.ll | 2 +- llvm/test/Analysis/LoopAccessAnalysis/pr31098.ll | 2 +- .../Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll | 2 +- .../Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll | 2 +- llvm/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll | 2 +- .../Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll | 2 +- .../Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll | 2 +- .../Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll | 2 +- .../Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll | 2 +- .../Analysis/LoopAccessAnalysis/stride-access-dependence.ll | 2 +- .../test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll | 2 +- .../test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll | 2 +- .../LoopAccessAnalysis/unsafe-and-rt-checks-convergent.ll | 2 +- .../test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll | 2 +- .../LoopAccessAnalysis/wrapping-pointer-versioning.ll | 2 +- 26 files changed, 29 insertions(+), 27 deletions(-) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll b/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll index d8040a31a8dc3..7471adfb62399 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; In this loop just because we access A through different types (int, float) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll index 7d3ac09dbb9c4..8d3bfca58eb33 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; for (unsigned i = 0; i < 100; i++) { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll index 41e2a2904fb2f..8ad02e15ed73e 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Check that loop-indepedent forward dependences are discovered properly. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/independent-interleaved.ll b/llvm/test/Analysis/LoopAccessAnalysis/independent-interleaved.ll index fe56ea9ab5939..c4acdf248f93c 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/independent-interleaved.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/independent-interleaved.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -store-to-load-forwarding-conflict-detection=false -loop-accesses -analyze | FileCheck %s +; RUN: opt < %s -store-to-load-forwarding-conflict-detection=false -loop-accesses -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -store-to-load-forwarding-conflict-detection=false -disable-output < %s 2>&1 | FileCheck %s ; This test checks that we prove the strided accesses to be independent before diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-for-loop-invariant.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-for-loop-invariant.ll index f06bb00ec64aa..0a592488f1534 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-for-loop-invariant.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-for-loop-invariant.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Handle memchecks involving loop-invariant addresses: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll index 01813c8a81041..6114b453fa911 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze --loop-accesses %s | FileCheck %s +; RUN: opt -analyze --loop-accesses %s -enable-new-pm=0 | FileCheck %s +; RUN: opt -passes=print-access-info %s -disable-output 2>&1 | FileCheck %s ; This test verifies run-time boundary check of memory accesses. ; The original loop: @@ -18,7 +19,7 @@ ; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ), ; store a value at *%op touched memory under *%src. -;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy' +;CHECK: function 'fastCopy': ;CHECK: (Low: %op High: (32 + %op)) ;CHECK: (Low: %src High: (32 + %src)) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll index 484f2b47b22a1..94034bfd6fbc0 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll @@ -1,4 +1,5 @@ -; RUN: opt -basic-aa -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -passes=print-access-info %s -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll index 60c2a3930b5c0..362a1f48be1e8 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze -S < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 -S < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; This is the test case from PR26314. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll index 99ba107ed09ea..73a981705c0d1 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,require,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s 2>&1 | FileCheck %s ; For this loop: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nullptr.ll b/llvm/test/Analysis/LoopAccessAnalysis/nullptr.ll index 8fbf47304e800..1c2ac0c9b3b38 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/nullptr.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/nullptr.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Test that the loop accesses are proven safe in this case. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll index 4528976a09e65..34dddbe5cc1b3 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll index a10b851bcd1a2..2109a4d0ec4b1 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pr31098.ll b/llvm/test/Analysis/LoopAccessAnalysis/pr31098.ll index 04b73828f5148..399a395e09315 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/pr31098.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pr31098.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll b/llvm/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll index 921fd4d06314d..8405b0399ffe3 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; We give up analyzing the dependences in this loop due to non-constant diff --git a/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll index 4285ef0f1170c..8113c8d7106b2 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; The runtime memory check code and the access grouping diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll index 2a937cbe62f6e..647b509450b56 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,require,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s 2>&1 | FileCheck %s ; If the arrays don't alias this loop is safe with no memchecks: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll index 910d49edbb181..9335a21c170e8 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Analyze this loop: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll index 611e957168ffd..1b36ac156d22a 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-accesses -analyze | FileCheck -check-prefix=OLDPM %s +; RUN: opt < %s -loop-accesses -analyze -enable-new-pm=0 | FileCheck -check-prefix=OLDPM %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck -check-prefix=NEWPM %s ; Test to confirm LAA will find multiple stores to an invariant address in the diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll index d21cc6926c3b1..123ccd62503b4 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-accesses -analyze | FileCheck %s +; RUN: opt < %s -loop-accesses -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Test to confirm LAA will not find store to invariant address. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll index b25d79b3d0394..e877ce03d8419 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-accesses -analyze | FileCheck %s +; RUN: opt < %s -loop-accesses -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Inner loop has a store to invariant address, but LAA does not need to identify diff --git a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll index 4fe6f9f704f71..fc9fe3da8e604 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll index 1204e8359a13a..1ac52a7cf8909 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; In: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll index dc2232334a7b0..3fd1f72cdce3e 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; This loop: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-convergent.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-convergent.ll index 7f42e2730c0dc..c05f8a394e2a7 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-convergent.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-convergent.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Analyze this loop: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll index 7fbed6fcc15cf..998e0005aa493 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s +; RUN: opt -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -passes='require,require,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s ; Analyze this loop: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll index 4c058b190d69f..5d26e834e309d 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -loop-accesses -analyze < %s | FileCheck %s -check-prefix=LAA +; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s -check-prefix=LAA ; RUN: opt -passes='require,require,require,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s 2>&1 | FileCheck %s --check-prefix=LAA ; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV From 6f66ad13c50ceeaee5c63b1ab47cb1d2a5390500 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 18:45:30 -0700 Subject: [PATCH 0725/1079] [DependenceAnalysis][NewPM] Fix tests to work under NPM All tests had corresponding NPM lines, simply pin non-NPM lines to legacy PM. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87665 --- llvm/test/Analysis/DependenceAnalysis/AA.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Banerjee.ll | 4 ++-- llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Constraints.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Coupled.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/DADelin.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/GCD.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Invariant.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll | 2 +- .../Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/PR21585.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Preliminary.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Propagating.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/Separability.ll | 2 +- .../Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll | 2 +- .../DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/UsefulGEP.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll | 2 +- llvm/test/Analysis/DependenceAnalysis/ZIV.ll | 2 +- 27 files changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/test/Analysis/DependenceAnalysis/AA.ll b/llvm/test/Analysis/DependenceAnalysis/AA.ll index efb5c8d1ef031..f74c331668453 100644 --- a/llvm/test/Analysis/DependenceAnalysis/AA.ll +++ b/llvm/test/Analysis/DependenceAnalysis/AA.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" \ ; RUN: "-aa-pipeline=basic-aa,tbaa" 2>&1 | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -tbaa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -tbaa -da | FileCheck %s ; CHECK-LABEL: 'Dependence Analysis' for function 'test_no_noalias' ; CHECK: da analyze - none! diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll index 06fa7ad06983f..9f1a2de727e2a 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -1,9 +1,9 @@ ; RUN: opt < %s -disable-output -da-delinearize=false "-passes=print" \ ; RUN: -aa-pipeline=basic-aa 2>&1 | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da -da-delinearize=false | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da -da-delinearize=false | FileCheck %s ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -check-prefix=DELIN -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s -check-prefix=DELIN +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s -check-prefix=DELIN target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll index 7d1e8e22b956c..08a497c87a4ad 100644 --- a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll +++ b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; Test that the dependence analysis generates the correct results when using ; an aliased object that points to a different element in the same array. diff --git a/llvm/test/Analysis/DependenceAnalysis/Constraints.ll b/llvm/test/Analysis/DependenceAnalysis/Constraints.ll index d086bf37bb894..130e248ba7f83 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Constraints.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Constraints.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 -; RUN: opt < %s -analyze -basic-aa -da +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da ;; Check that this code doesn't abort. Test case is reduced version of lnt Polybench benchmark test case dynprog. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll index 4e81589d3bd9c..3a24813e98def 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll index 40054aa2187ea..6faa1bccc9008 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8m.main-arm-none-eabi" diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll index 40e12a784b18a..4c22e86ac8c80 100644 --- a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; ModuleID = 'ExactRDIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll index 720d4166ed1a5..b5f13ebe99161 100644 --- a/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll index a3564b7f89553..99c5cef969785 100644 --- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -check-prefix=DELIN -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s -check-prefix=DELIN +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s -check-prefix=DELIN target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/Invariant.ll b/llvm/test/Analysis/DependenceAnalysis/Invariant.ll index 5aaa3868cf9af..20358768bc827 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Invariant.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Invariant.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; Test for a bug, which caused an assert when an invalid ; SCEVAddRecExpr is created in addToCoefficient. diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll index e222755dd8e45..5642c845a2902 100644 --- a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll +++ b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -basic-aa -da +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da ; RUN: opt < %s -passes="print" ; Test that the dependence analysis pass does seg-fault due to a null pointer diff --git a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll index 2561df503913e..642cf67f394d4 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 -; RUN: opt < %s -analyze -basic-aa -da +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da ; ; CHECK: da analyze - consistent input [S S]! ; CHECK: da analyze - confused! diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll index d1df4ef63b542..10f57d0fd0fa9 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -check-prefix=DELIN -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s -check-prefix=DELIN +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s -check-prefix=DELIN target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/PR21585.ll b/llvm/test/Analysis/DependenceAnalysis/PR21585.ll index 6dd1403cd1354..d76e37a70dfea 100644 --- a/llvm/test/Analysis/DependenceAnalysis/PR21585.ll +++ b/llvm/test/Analysis/DependenceAnalysis/PR21585.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" \ ; RUN: "-aa-pipeline=basic-aa,globals-aa" 2>&1 | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -globals-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -globals-aa -da | FileCheck %s define void @i32_subscript(i32* %a) { entry: br label %for.body diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll index 05848a61a7378..ef2757fbc0662 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll index 41640a0b4b657..fe8f40a4fc428 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/Separability.ll b/llvm/test/Analysis/DependenceAnalysis/Separability.ll index bbbc0db4a609f..93803cf5c0694 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Separability.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Separability.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll index 7063f20cd0c30..e6ddafdad96dd 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output -passes="print" \ ; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s -; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s +; RUN: opt < %s -da -analyze -enable-new-pm=0 -da-disable-delinearization-checks | FileCheck %s ; CHECK-LABEL: t1 ; CHECK: da analyze - none! diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll index d783d2ec163fc..5dcba2252e303 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output -passes="print" \ ; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s -; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s +; RUN: opt < %s -da -analyze -enable-new-pm=0 -da-disable-delinearization-checks | FileCheck %s ; CHECK-LABEL: t1 ; CHECK: da analyze - none! diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll index 397ef8a2d3a03..be6b19ead51f7 100644 --- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll index 0151c7c78404e..6cdb0cacb4913 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; ModuleID = 'SymbolicRDIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll index 7a37107baf913..46a0c27b5c5f1 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" diff --git a/llvm/test/Analysis/DependenceAnalysis/UsefulGEP.ll b/llvm/test/Analysis/DependenceAnalysis/UsefulGEP.ll index c2d7765b03230..9b3896fa395d7 100644 --- a/llvm/test/Analysis/DependenceAnalysis/UsefulGEP.ll +++ b/llvm/test/Analysis/DependenceAnalysis/UsefulGEP.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 -; RUN: opt < %s -analyze -basic-aa -da +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da ;; Check this doesn't crash. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll index 449cffc7cd036..8e0f516a6d5cd 100644 --- a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; ModuleID = 'WeakCrossingSIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll index af9c0bd8f2bb1..9007910b2e36a 100644 --- a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; ModuleID = 'WeakZeroDstSIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll index 70612a4b5c1c2..8b87c068edb3c 100644 --- a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; ModuleID = 'WeakZeroSrcSIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/ZIV.ll b/llvm/test/Analysis/DependenceAnalysis/ZIV.ll index 4e1ea0834e9b5..fe7d9c433f5d9 100644 --- a/llvm/test/Analysis/DependenceAnalysis/ZIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/ZIV.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -; RUN: opt < %s -analyze -basic-aa -da | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s ; ModuleID = 'ZIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" From e0c7641de65fb4dc27fcc44b2e4f2cd570e58bed Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 17:49:58 -0700 Subject: [PATCH 0726/1079] [RegionInfo][NewPM] Fix RegionInfo tests to work under NPM Pin RUN lines with -analyze to legacy PM, add corresponding NPM RUN line if missing. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87658 --- llvm/test/Analysis/RegionInfo/bad_node_traversal.ll | 3 ++- llvm/test/Analysis/RegionInfo/block_sort.ll | 11 +++++++---- llvm/test/Analysis/RegionInfo/cond_loop.ll | 9 ++++++--- .../test/Analysis/RegionInfo/condition_complicated.ll | 9 ++++++--- .../Analysis/RegionInfo/condition_complicated_2.ll | 9 ++++++--- .../Analysis/RegionInfo/condition_forward_edge.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/condition_same_exit.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/condition_simple.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/exit_in_condition.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/infinite_loop.ll | 3 ++- llvm/test/Analysis/RegionInfo/infinite_loop_2.ll | 10 +++++++--- llvm/test/Analysis/RegionInfo/infinite_loop_3.ll | 11 ++++++++--- llvm/test/Analysis/RegionInfo/infinite_loop_4.ll | 11 ++++++++--- llvm/test/Analysis/RegionInfo/infinite_loop_5_a.ll | 3 ++- llvm/test/Analysis/RegionInfo/infinite_loop_5_b.ll | 3 ++- llvm/test/Analysis/RegionInfo/infinite_loop_5_c.ll | 3 ++- llvm/test/Analysis/RegionInfo/loop_with_condition.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/loops_1.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/loops_2.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/mix_1.ll | 9 ++++++--- .../test/Analysis/RegionInfo/multiple_exiting_edge.ll | 6 ++++-- llvm/test/Analysis/RegionInfo/nested_loops.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/next.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/outgoing_edge.ll | 2 +- llvm/test/Analysis/RegionInfo/outgoing_edge_1.ll | 2 +- llvm/test/Analysis/RegionInfo/paper.ll | 9 ++++++--- .../test/Analysis/RegionInfo/two_loops_same_header.ll | 9 ++++++--- llvm/test/Analysis/RegionInfo/unreachable_bb.ll | 2 +- 28 files changed, 137 insertions(+), 68 deletions(-) diff --git a/llvm/test/Analysis/RegionInfo/bad_node_traversal.ll b/llvm/test/Analysis/RegionInfo/bad_node_traversal.ll index 00dd1207af9f0..7e658f6bda68d 100644 --- a/llvm/test/Analysis/RegionInfo/bad_node_traversal.ll +++ b/llvm/test/Analysis/RegionInfo/bad_node_traversal.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s ; While working on improvements to the region info analysis, this test ; case caused an incorrect region 3 => 8 to be detected. diff --git a/llvm/test/Analysis/RegionInfo/block_sort.ll b/llvm/test/Analysis/RegionInfo/block_sort.ll index ce1a48132901e..ace6849fc848c 100644 --- a/llvm/test/Analysis/RegionInfo/block_sort.ll +++ b/llvm/test/Analysis/RegionInfo/block_sort.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats -analyze < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -regions -stats -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @BZ2_blockSort() nounwind { start: diff --git a/llvm/test/Analysis/RegionInfo/cond_loop.ll b/llvm/test/Analysis/RegionInfo/cond_loop.ll index 7dc311a299ce6..9fb2e22b49f1f 100644 --- a/llvm/test/Analysis/RegionInfo/cond_loop.ll +++ b/llvm/test/Analysis/RegionInfo/cond_loop.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "5": diff --git a/llvm/test/Analysis/RegionInfo/condition_complicated.ll b/llvm/test/Analysis/RegionInfo/condition_complicated.ll index e700503f8a48a..3c1507acf2211 100644 --- a/llvm/test/Analysis/RegionInfo/condition_complicated.ll +++ b/llvm/test/Analysis/RegionInfo/condition_complicated.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { end165: diff --git a/llvm/test/Analysis/RegionInfo/condition_complicated_2.ll b/llvm/test/Analysis/RegionInfo/condition_complicated_2.ll index 584ebba6f04b4..12564b3abc4ea 100644 --- a/llvm/test/Analysis/RegionInfo/condition_complicated_2.ll +++ b/llvm/test/Analysis/RegionInfo/condition_complicated_2.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc void @compress() nounwind { end33: diff --git a/llvm/test/Analysis/RegionInfo/condition_forward_edge.ll b/llvm/test/Analysis/RegionInfo/condition_forward_edge.ll index cc9a3294e1451..76ae02882a036 100644 --- a/llvm/test/Analysis/RegionInfo/condition_forward_edge.ll +++ b/llvm/test/Analysis/RegionInfo/condition_forward_edge.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/condition_same_exit.ll b/llvm/test/Analysis/RegionInfo/condition_same_exit.ll index f3f443b2ba643..39787409198a5 100644 --- a/llvm/test/Analysis/RegionInfo/condition_same_exit.ll +++ b/llvm/test/Analysis/RegionInfo/condition_same_exit.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/condition_simple.ll b/llvm/test/Analysis/RegionInfo/condition_simple.ll index 67bdb506702eb..f4456825f797a 100644 --- a/llvm/test/Analysis/RegionInfo/condition_simple.ll +++ b/llvm/test/Analysis/RegionInfo/condition_simple.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/exit_in_condition.ll b/llvm/test/Analysis/RegionInfo/exit_in_condition.ll index 8a6d208f479ef..a8c3624ff4e65 100644 --- a/llvm/test/Analysis/RegionInfo/exit_in_condition.ll +++ b/llvm/test/Analysis/RegionInfo/exit_in_condition.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { entry: diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop.ll b/llvm/test/Analysis/RegionInfo/infinite_loop.ll index 35c82ce8e0419..f27bb1a461f60 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s define void @normal_condition() nounwind { diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_2.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_2.ll index 76ecdd833c426..8c2cf2578b06a 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_2.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_2.ll @@ -1,8 +1,12 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -passes='print' -disable-output < %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_3.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_3.ll index 2b1b643005c01..960730766cbd1 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_3.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_3.ll @@ -1,9 +1,14 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s + +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll index c3ad028b0e558..8ff8e57783732 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -1,8 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s + +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_5_a.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_5_a.ll index bf56add87ac11..76f7b247c9664 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_5_a.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_5_a.ll @@ -1,4 +1,5 @@ -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_5_b.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_5_b.ll index d8602054cd007..9a5ff40cecc42 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_5_b.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_5_b.ll @@ -1,4 +1,5 @@ -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_5_c.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_5_c.ll index 0508d0a45bda5..fe2c29a72613a 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_5_c.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_5_c.ll @@ -1,4 +1,5 @@ -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/loop_with_condition.ll b/llvm/test/Analysis/RegionInfo/loop_with_condition.ll index 244f253d25df5..1965fed8ee2a6 100644 --- a/llvm/test/Analysis/RegionInfo/loop_with_condition.ll +++ b/llvm/test/Analysis/RegionInfo/loop_with_condition.ll @@ -1,11 +1,14 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/loops_1.ll b/llvm/test/Analysis/RegionInfo/loops_1.ll index 91023198ea296..39f59bf197148 100644 --- a/llvm/test/Analysis/RegionInfo/loops_1.ll +++ b/llvm/test/Analysis/RegionInfo/loops_1.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @loops_1() nounwind { entry: diff --git a/llvm/test/Analysis/RegionInfo/loops_2.ll b/llvm/test/Analysis/RegionInfo/loops_2.ll index 80cd34251d7e6..3973973381766 100644 --- a/llvm/test/Analysis/RegionInfo/loops_2.ll +++ b/llvm/test/Analysis/RegionInfo/loops_2.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @meread_() nounwind { entry: diff --git a/llvm/test/Analysis/RegionInfo/mix_1.ll b/llvm/test/Analysis/RegionInfo/mix_1.ll index a462119575a79..7637f59d1375c 100644 --- a/llvm/test/Analysis/RegionInfo/mix_1.ll +++ b/llvm/test/Analysis/RegionInfo/mix_1.ll @@ -1,11 +1,14 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @a_linear_impl_fig_1() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/llvm/test/Analysis/RegionInfo/multiple_exiting_edge.ll index 8de6472299428..0c3860ca3df92 100644 --- a/llvm/test/Analysis/RegionInfo/multiple_exiting_edge.ll +++ b/llvm/test/Analysis/RegionInfo/multiple_exiting_edge.ll @@ -1,5 +1,7 @@ -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -passes='print' -print-region-style=bb -disable-output < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn -disable-output < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition_0() nounwind { bb38: ; preds = %bb34, %bb34, %bb37 diff --git a/llvm/test/Analysis/RegionInfo/nested_loops.ll b/llvm/test/Analysis/RegionInfo/nested_loops.ll index 5d47d792cd924..980b52460ad40 100644 --- a/llvm/test/Analysis/RegionInfo/nested_loops.ll +++ b/llvm/test/Analysis/RegionInfo/nested_loops.ll @@ -1,11 +1,14 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { entry: diff --git a/llvm/test/Analysis/RegionInfo/next.ll b/llvm/test/Analysis/RegionInfo/next.ll index 03aa53e59a490..5976ecadad220 100644 --- a/llvm/test/Analysis/RegionInfo/next.ll +++ b/llvm/test/Analysis/RegionInfo/next.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt -passes='print' -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @MAIN__() nounwind { entry: diff --git a/llvm/test/Analysis/RegionInfo/outgoing_edge.ll b/llvm/test/Analysis/RegionInfo/outgoing_edge.ll index 39e1a39d7e5b5..db4932f831c6a 100644 --- a/llvm/test/Analysis/RegionInfo/outgoing_edge.ll +++ b/llvm/test/Analysis/RegionInfo/outgoing_edge.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s ; While working on improvements to the region info analysis, this test diff --git a/llvm/test/Analysis/RegionInfo/outgoing_edge_1.ll b/llvm/test/Analysis/RegionInfo/outgoing_edge_1.ll index 6f51131a188c5..7f723cd6d4e25 100644 --- a/llvm/test/Analysis/RegionInfo/outgoing_edge_1.ll +++ b/llvm/test/Analysis/RegionInfo/outgoing_edge_1.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s ; While working on improvements to region info analysis, this test diff --git a/llvm/test/Analysis/RegionInfo/paper.ll b/llvm/test/Analysis/RegionInfo/paper.ll index bc0fb18a0e276..31ce58dc7d8c9 100644 --- a/llvm/test/Analysis/RegionInfo/paper.ll +++ b/llvm/test/Analysis/RegionInfo/paper.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @a_linear_impl_fig_1() nounwind { "0": diff --git a/llvm/test/Analysis/RegionInfo/two_loops_same_header.ll b/llvm/test/Analysis/RegionInfo/two_loops_same_header.ll index d230d76440f8c..8c6546d2ced5c 100644 --- a/llvm/test/Analysis/RegionInfo/two_loops_same_header.ll +++ b/llvm/test/Analysis/RegionInfo/two_loops_same_header.ll @@ -1,10 +1,13 @@ ; REQUIRES: asserts -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -regions -stats -disable-output < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=RNIT %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -stats 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -passes='print' -print-region-style=bb < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -passes='print' -print-region-style=rn < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { entry: diff --git a/llvm/test/Analysis/RegionInfo/unreachable_bb.ll b/llvm/test/Analysis/RegionInfo/unreachable_bb.ll index 5dd1be958e71a..6268fff522690 100644 --- a/llvm/test/Analysis/RegionInfo/unreachable_bb.ll +++ b/llvm/test/Analysis/RegionInfo/unreachable_bb.ll @@ -1,4 +1,4 @@ -; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s ; We should not crash if there are some bbs that are not reachable. From 3f69b2140f55ace97c3b7819eb9c19fc682da998 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 18:35:12 -0700 Subject: [PATCH 0727/1079] [NewPM][opt] Fix -globals-aa not being recognized as alias analysis in NPM Was missing MODULE_ALIAS_ANALYSIS, previously only FUNCTION_ALIAS_ANALYSIS was taken into account. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87664 --- llvm/lib/Passes/PassBuilder.cpp | 3 +++ llvm/test/Analysis/GlobalsModRef/comdat-ipo.ll | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index cd64aecd81d73..03b31c233361d 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -2787,6 +2787,9 @@ Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { } bool PassBuilder::isAAPassName(StringRef PassName) { +#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; diff --git a/llvm/test/Analysis/GlobalsModRef/comdat-ipo.ll b/llvm/test/Analysis/GlobalsModRef/comdat-ipo.ll index f251e01ca69ca..aeeebfd3aede3 100644 --- a/llvm/test/Analysis/GlobalsModRef/comdat-ipo.ll +++ b/llvm/test/Analysis/GlobalsModRef/comdat-ipo.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -globals-aa -gvn -S | FileCheck %s +; RUN: opt < %s -basic-aa -globals-aa -gvn -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -basic-aa -globals-aa -gvn -enable-new-pm=1 -S | FileCheck %s ; See PR26774 From 9853e84b54d2453f88490381c2ea37deeab1789d Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 18:11:09 -0700 Subject: [PATCH 0728/1079] [PostDominators][NewPM] Fix tests to work under NPM Each test has a legacy PM pinned to legacy PM and a NPM RUN line. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87660 --- llvm/test/Analysis/PostDominators/infinite-loop.ll | 2 +- llvm/test/Analysis/PostDominators/infinite-loop2.ll | 2 +- llvm/test/Analysis/PostDominators/infinite-loop3.ll | 2 +- llvm/test/Analysis/PostDominators/pr1098.ll | 2 +- llvm/test/Analysis/PostDominators/pr24415.ll | 4 ++-- llvm/test/Analysis/PostDominators/pr6047_a.ll | 3 ++- llvm/test/Analysis/PostDominators/pr6047_b.ll | 5 +++-- llvm/test/Analysis/PostDominators/pr6047_c.ll | 5 +++-- llvm/test/Analysis/PostDominators/pr6047_d.ll | 5 +++-- 9 files changed, 17 insertions(+), 13 deletions(-) diff --git a/llvm/test/Analysis/PostDominators/infinite-loop.ll b/llvm/test/Analysis/PostDominators/infinite-loop.ll index 5796b8614dbde..5146fd6e21c0a 100644 --- a/llvm/test/Analysis/PostDominators/infinite-loop.ll +++ b/llvm/test/Analysis/PostDominators/infinite-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s @a = external global i32, align 4 diff --git a/llvm/test/Analysis/PostDominators/infinite-loop2.ll b/llvm/test/Analysis/PostDominators/infinite-loop2.ll index 139abb76e9512..de7413e40874f 100644 --- a/llvm/test/Analysis/PostDominators/infinite-loop2.ll +++ b/llvm/test/Analysis/PostDominators/infinite-loop2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s @a = external global i32, align 4 diff --git a/llvm/test/Analysis/PostDominators/infinite-loop3.ll b/llvm/test/Analysis/PostDominators/infinite-loop3.ll index f767df79d3a81..1536004ddc314 100644 --- a/llvm/test/Analysis/PostDominators/infinite-loop3.ll +++ b/llvm/test/Analysis/PostDominators/infinite-loop3.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s @a = external global i32, align 4 diff --git a/llvm/test/Analysis/PostDominators/pr1098.ll b/llvm/test/Analysis/PostDominators/pr1098.ll index 1dae0c566f055..62aaf96e0f69f 100644 --- a/llvm/test/Analysis/PostDominators/pr1098.ll +++ b/llvm/test/Analysis/PostDominators/pr1098.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s ; PR932 diff --git a/llvm/test/Analysis/PostDominators/pr24415.ll b/llvm/test/Analysis/PostDominators/pr24415.ll index 536c36848b9a5..aaee72758afa6 100644 --- a/llvm/test/Analysis/PostDominators/pr24415.ll +++ b/llvm/test/Analysis/PostDominators/pr24415.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s ; Function Attrs: nounwind ssp uwtable @@ -15,4 +15,4 @@ define void @foo() { ; CHECK-NEXT: [1] <> ; CHECK-NEXT: [2] %2 ; CHECK-NEXT: [2] %1 -; CHECK-NEXT: [3] %0 \ No newline at end of file +; CHECK-NEXT: [3] %0 diff --git a/llvm/test/Analysis/PostDominators/pr6047_a.ll b/llvm/test/Analysis/PostDominators/pr6047_a.ll index 32ccbe61271f2..08153f9864c6a 100644 --- a/llvm/test/Analysis/PostDominators/pr6047_a.ll +++ b/llvm/test/Analysis/PostDominators/pr6047_a.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s define internal void @f() { entry: br i1 undef, label %bb35, label %bb3.i diff --git a/llvm/test/Analysis/PostDominators/pr6047_b.ll b/llvm/test/Analysis/PostDominators/pr6047_b.ll index f1fbb648f5396..6b970b5cf7268 100644 --- a/llvm/test/Analysis/PostDominators/pr6047_b.ll +++ b/llvm/test/Analysis/PostDominators/pr6047_b.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s define internal void @f() { entry: br i1 undef, label %a, label %bb3.i @@ -22,4 +23,4 @@ bb35: ; CHECK-NEXT: [3] %bb35.loopexit3 ; CHECK-NEXT: [2] %a ; CHECK-NEXT: [2] %entry -; CHECK-NEXT: [2] %bb3.i \ No newline at end of file +; CHECK-NEXT: [2] %bb3.i diff --git a/llvm/test/Analysis/PostDominators/pr6047_c.ll b/llvm/test/Analysis/PostDominators/pr6047_c.ll index 0eef023b418ca..d2a9516ce39c7 100644 --- a/llvm/test/Analysis/PostDominators/pr6047_c.ll +++ b/llvm/test/Analysis/PostDominators/pr6047_c.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s define internal void @f() { entry: br i1 undef, label %bb35, label %bb3.i @@ -194,4 +195,4 @@ bb35: ; CHECK-NEXT: [3] %bb35.loopexit3 ; CHECK-NEXT: [2] %entry ; CHECK-NEXT: [2] %bb3.i -; CHECK-NEXT: Roots: %bb35 %bb3.i \ No newline at end of file +; CHECK-NEXT: Roots: %bb35 %bb3.i diff --git a/llvm/test/Analysis/PostDominators/pr6047_d.ll b/llvm/test/Analysis/PostDominators/pr6047_d.ll index 45ed86c27f869..93434af6ade83 100644 --- a/llvm/test/Analysis/PostDominators/pr6047_d.ll +++ b/llvm/test/Analysis/PostDominators/pr6047_d.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -postdomtree -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s define internal void @f() { entry: br i1 1, label %a, label %b @@ -29,4 +30,4 @@ bb35: ; CHECK-NEXT: [3] %a ; CHECK-NEXT: [3] %entry ; CHECK-NEXT: [3] %b -; CHECK-NEXT: [2] %bb3.i \ No newline at end of file +; CHECK-NEXT: [2] %bb3.i From d9c9a74d0dc5b64c7c8496294ed962d7ce332337 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 15 Sep 2020 14:19:06 -0400 Subject: [PATCH 0729/1079] [libc++] Add missing friend keyword Otherwise, we're declaring a non-static member function, and that gives errors in C++11 because of the change of semantics between C++11 and C++14 for non-const constexpr member functions. This was always intended to be a friend declaration. --- libcxx/include/iterator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 45516db24e7cd..e2910e9fdc2a1 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -1618,7 +1618,7 @@ private: __unwrap_iter(__wrap_iter<_Tp*>); #else template - inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR + inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR friend typename enable_if < is_trivially_copy_assignable<_Tp>::value, From 05134877e64ded64f6c3064173b98893b1ac5fb5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 15 Sep 2020 11:07:52 -0700 Subject: [PATCH 0730/1079] [X86] Use Align in reduceMaskedLoadToScalarLoad/reduceMaskedStoreToScalarStore. Correct pointer info. If we offset the pointer, we also need to offset the pointer info Differential Revision: https://reviews.llvm.org/D87593 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++++++++++++++--------- llvm/test/CodeGen/X86/vmaskmov-offset.ll | 4 ++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ecf151ffeb664..46295d10d2c28 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44446,7 +44446,8 @@ static int getOneTrueElt(SDValue V) { /// scalar element, and the alignment for the scalar memory access. static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, SelectionDAG &DAG, SDValue &Addr, - SDValue &Index, unsigned &Alignment) { + SDValue &Index, Align &Alignment, + unsigned &Offset) { int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); if (TrueMaskElt < 0) return false; @@ -44454,15 +44455,17 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, // Get the address of the one scalar element that is specified by the mask // using the appropriate offset from the base pointer. EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); + Offset = 0; Addr = MaskedOp->getBasePtr(); if (TrueMaskElt != 0) { - unsigned Offset = TrueMaskElt * EltVT.getStoreSize(); + Offset = TrueMaskElt * EltVT.getStoreSize(); Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset), SDLoc(MaskedOp)); } Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp)); - Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize()); + Alignment = commonAlignment(MaskedOp->getOriginalAlign(), + EltVT.getStoreSize()); return true; } @@ -44479,8 +44482,9 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, // is profitable. Endianness would also have to be considered. SDValue Addr, VecIndex; - unsigned Alignment; - if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment)) + Align Alignment; + unsigned Offset; + if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset)) return SDValue(); // Load the one scalar element that is specified by the mask using the @@ -44489,7 +44493,8 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, EVT VT = ML->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDValue Load = - DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(), + DAG.getLoad(EltVT, DL, ML->getChain(), Addr, + ML->getPointerInfo().getWithOffset(Offset), Alignment, ML->getMemOperand()->getFlags()); // Insert the loaded element into the appropriate place in the vector. @@ -44600,8 +44605,9 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, // is profitable. Endianness would also have to be considered. SDValue Addr, VecIndex; - unsigned Alignment; - if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment)) + Align Alignment; + unsigned Offset; + if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset)) return SDValue(); // Extract the one scalar element that is actually being stored. @@ -44612,7 +44618,8 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, MS->getValue(), VecIndex); // Store that element at the appropriate offset from the base pointer. - return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(), + return DAG.getStore(MS->getChain(), DL, Extract, Addr, + MS->getPointerInfo().getWithOffset(Offset), Alignment, MS->getMemOperand()->getFlags()); } diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll index f6ecb87705ca7..a67dcce037508 100644 --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -59,7 +59,7 @@ define <2 x double> @mload_constmask_v2f64(<2 x double>* %addr, <2 x double> %ds ; CHECK: liveins: $rdi, $xmm0 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr, align 4) + ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr + 8, align 4) ; CHECK: $xmm0 = COPY [[VMOVHPDrm]] ; CHECK: RET 0, $xmm0 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> , <2 x double> %dst) @@ -72,7 +72,7 @@ define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) { ; CHECK: liveins: $rdi, $xmm0 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr) + ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr + 8) ; CHECK: RET 0 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1>) ret void From ca76d6e94a30b8fe11a63d3a55d3903c7cd25b5d Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 17:17:32 -0700 Subject: [PATCH 0731/1079] [Bugpoint][NewPM] Pin bugpoint to legacy PM Bugpoint has lots of assumptions and hacks around the legacy PM, put off migrating it to NPM until later. Fixes tests under BugPoint under NPM. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D87655 --- llvm/test/BugPoint/unsymbolized.ll | 2 +- llvm/tools/bugpoint/OptimizerDriver.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/test/BugPoint/unsymbolized.ll b/llvm/test/BugPoint/unsymbolized.ll index d2060ddee168c..55aadc35884cb 100644 --- a/llvm/test/BugPoint/unsymbolized.ll +++ b/llvm/test/BugPoint/unsymbolized.ll @@ -3,7 +3,7 @@ ; RUN: echo "print('args = ' + str(sys.argv))" >> %t.py ; RUN: echo "exit(1)" >> %t.py ; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command=%python -opt-args %t.py | FileCheck %s -; RUN: not --crash opt -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s +; RUN: not --crash opt -enable-new-pm=0 -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s ; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command=%t.non.existent.opt.binary -opt-args %t.py 2>&1 | FileCheck %s --check-prefix=BAD-OPT ; Test that bugpoint disables symbolication on the opt tool to reduce runtime overhead when opt crashes diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp index 25a970bd68785..ca78735202fcb 100644 --- a/llvm/tools/bugpoint/OptimizerDriver.cpp +++ b/llvm/tools/bugpoint/OptimizerDriver.cpp @@ -205,6 +205,9 @@ bool BugDriver::runPasses(Module &Program, for (unsigned i = 0, e = OptArgs.size(); i != e; ++i) Args.push_back(OptArgs[i]); + // Pin to legacy PM since bugpoint has lots of infra and hacks revolving + // around the legacy PM. + Args.push_back("-enable-new-pm=0"); Args.push_back("-disable-symbolication"); Args.push_back("-o"); Args.push_back(OutputFilename); From 3d42d549554889ca182e1f3d31b23fa1383c6678 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 15 Sep 2020 14:47:23 +0100 Subject: [PATCH 0732/1079] [ConstraintElimination] Add constraint elimination pass. This patch is a first draft of a new pass that adds a more flexible way to eliminate compares based on more complex constraints collected from dominating conditions. In particular, it aims at simplifying conditions of the forms below using a forward propagation approach, rather than instcomine-style ad-hoc backwards walking of def-use chains. if (x < y) if (y < z) if (x < z) <- simplify or if (x + 2 < y) if (x + 1 < y) <- simplify assuming no wraps The general approach is to collect conditions and blocks, sort them by dominance and then iterate over the sorted list. Conditions are turned into a linear inequality and add it to a system containing the linear inequalities that hold on entry to the block. For blocks, we check each compare against the system and see if it is implied by the constraints in the system. We also keep a stack of processed conditions and remove conditions from the stack and the constraint system once they go out-of-scope (= do not dominate the current block any longer). Currently there still are the least the following areas for improvements * Currently large unsigned constants cannot be added to the system (coefficients must be represented as integers) * The way constraints are managed currently is not very optimized. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D84547 --- llvm/include/llvm/Analysis/ConstraintSystem.h | 10 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/Transforms/Scalar.h | 7 + .../lib/Transforms/IPO/PassManagerBuilder.cpp | 8 + llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + .../Scalar/ConstraintElimination.cpp | 310 ++++++++++++++++++ llvm/lib/Transforms/Scalar/Scalar.cpp | 1 + .../Transforms/ConstraintElimination/dom.ll | 10 +- .../ConstraintElimination/geps.2d.ll | 2 +- .../Transforms/ConstraintElimination/geps.ll | 48 +-- .../Transforms/ConstraintElimination/i128.ll | 2 +- .../Transforms/ConstraintElimination/loops.ll | 2 +- .../Transforms/ConstraintElimination/mixed.ll | 2 +- .../Transforms/ConstraintElimination/uge.ll | 22 +- .../ConstraintElimination/ugt-ule.ll | 6 +- .../Transforms/ConstraintElimination/ule.ll | 26 +- 16 files changed, 398 insertions(+), 60 deletions(-) create mode 100644 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h index 01f09f3daaaa6..f4e6dfbefc82b 100644 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -49,6 +49,14 @@ class ConstraintSystem { Constraints.push_back(R); } + void addVariableRowFill(const SmallVector &R) { + for (auto &CR : Constraints) { + while (CR.size() != R.size()) + CR.push_back(0); + } + addVariableRow(R); + } + /// Returns true if there may be a solution for the constraints in the system. bool mayHaveSolution(); @@ -62,6 +70,8 @@ class ConstraintSystem { } bool isConditionImplied(SmallVector R); + + void popLastConstraint() { Constraints.pop_back(); } }; } // namespace llvm diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f9a9604d1305c..83385657ee969 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -113,6 +113,7 @@ void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); +void initializeConstraintEliminationPass(PassRegistry &); void initializeControlHeightReductionLegacyPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 5ab8a0584ad0c..8c525c6895690 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -340,6 +340,13 @@ Pass *createLoopDeletionPass(); // FunctionPass *createConstantHoistingPass(); +//===----------------------------------------------------------------------===// +// +// ConstraintElimination - This pass eliminates conditions based on found +// constraints. +// +FunctionPass *createConstraintEliminationPass(); + //===----------------------------------------------------------------------===// // // Sink - Code Sinking diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 4b72a95120b38..4aef39c031c5c 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -153,6 +153,11 @@ cl::opt EnableMatrix( "enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); +cl::opt EnableConstraintElimination( + "enable-constraint-elimination", cl::init(false), cl::Hidden, + cl::desc( + "Enable pass to eliminate conditions based on linear constraints.")); + cl::opt AttributorRun( "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass."), @@ -381,6 +386,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( } } + if (EnableConstraintElimination) + MPM.add(createConstraintEliminationPass()); + if (OptLevel > 1) { // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 89173414c16b1..ae62aa0220724 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMScalarOpts BDCE.cpp CallSiteSplitting.cpp ConstantHoisting.cpp + ConstraintElimination.cpp CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp new file mode 100644 index 0000000000000..8500b831fda6a --- /dev/null +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -0,0 +1,310 @@ +//===-- ConstraintElimination.cpp - Eliminate conds using constraints. ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Eliminate conditions based on constraints collected from dominating +// conditions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstraintSystem.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "constraint-elimination" + +STATISTIC(NumCondsRemoved, "Number of instructions removed"); +DEBUG_COUNTER(EliminatedCounter, "conds-eliminated", + "Controls which conditions are eliminated"); + +static int64_t MaxConstraintValue = std::numeric_limits::max(); + +Optional> decompose(Value *V) { + if (auto *CI = dyn_cast(V)) { + if (CI->isNegative() || CI->uge(MaxConstraintValue)) + return {}; + return {{CI->getSExtValue(), nullptr}}; + } + auto *GEP = dyn_cast(V); + if (GEP && GEP->getNumOperands() == 2 && + isa(GEP->getOperand(GEP->getNumOperands() - 1))) { + return {{cast(GEP->getOperand(GEP->getNumOperands() - 1)) + ->getSExtValue(), + GEP->getPointerOperand()}}; + } + return {{0, V}}; +} + +/// Turn a condition \p CmpI into a constraint vector, using indices from \p +/// Value2Index. If \p ShouldAdd is true, new indices are added for values not +/// yet in \p Value2Index. +static SmallVector +getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, + DenseMap &Value2Index, bool ShouldAdd) { + Value *A, *B; + + int64_t Offset1 = 0; + int64_t Offset2 = 0; + + auto TryToGetIndex = [ShouldAdd, + &Value2Index](Value *V) -> Optional { + if (ShouldAdd) { + Value2Index.insert({V, Value2Index.size() + 1}); + return Value2Index[V]; + } + auto I = Value2Index.find(V); + if (I == Value2Index.end()) + return None; + return I->second; + }; + + if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE) + return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0, + Value2Index, ShouldAdd); + + if (Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT) { + auto ADec = decompose(Op0); + auto BDec = decompose(Op1); + if (!ADec || !BDec) + return {}; + std::tie(Offset1, A) = *ADec; + std::tie(Offset2, B) = *BDec; + Offset1 *= -1; + + if (!A && !B) + return {}; + + auto AIdx = A ? TryToGetIndex(A) : None; + auto BIdx = B ? TryToGetIndex(B) : None; + if ((A && !AIdx) || (B && !BIdx)) + return {}; + + SmallVector R(Value2Index.size() + 1, 0); + if (AIdx) + R[*AIdx] = 1; + if (BIdx) + R[*BIdx] = -1; + R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0); + return R; + } + + return {}; +} + +static SmallVector +getConstraint(CmpInst *Cmp, DenseMap &Value2Index, + bool ShouldAdd) { + return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0), + Cmp->getOperand(1), Value2Index, ShouldAdd); +} + +/// Represents either a condition that holds on entry to a block or a basic +/// block, with their respective Dominator DFS in and out numbers. +struct ConstraintOrBlock { + unsigned NumIn; + unsigned NumOut; + bool IsBlock; + bool Not; + union { + BasicBlock *BB; + CmpInst *Condition; + }; + + ConstraintOrBlock(DomTreeNode *DTN) + : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(true), + BB(DTN->getBlock()) {} + ConstraintOrBlock(DomTreeNode *DTN, CmpInst *Condition, bool Not) + : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(false), + Not(Not), Condition(Condition) {} +}; + +struct StackEntry { + unsigned NumIn; + unsigned NumOut; + CmpInst *Condition; + bool IsNot; + + StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot) + : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {} +}; + +static bool eliminateConstraints(Function &F, DominatorTree &DT) { + bool Changed = false; + DT.updateDFSNumbers(); + ConstraintSystem CS; + + SmallVector WorkList; + + // First, collect conditions implied by branches and blocks with their + // Dominator DFS in and out numbers. + for (BasicBlock &BB : F) { + if (!DT.getNode(&BB)) + continue; + WorkList.emplace_back(DT.getNode(&BB)); + + auto *Br = dyn_cast(BB.getTerminator()); + if (!Br || !Br->isConditional()) + continue; + auto *CmpI = dyn_cast(Br->getCondition()); + if (!CmpI) + continue; + if (Br->getSuccessor(0)->getSinglePredecessor()) + WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false); + if (Br->getSuccessor(1)->getSinglePredecessor()) + WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); + } + + // Next, sort worklist by dominance, so that dominating blocks and conditions + // come before blocks and conditions dominated by them. If a block and a + // condition have the same numbers, the condition comes before the block, as + // it holds on entry to the block. + sort(WorkList.begin(), WorkList.end(), + [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) { + return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock); + }); + + // Finally, process ordered worklist and eliminate implied conditions. + SmallVector DFSInStack; + DenseMap Value2Index; + for (ConstraintOrBlock &CB : WorkList) { + // First, pop entries from the stack that are out-of-scope for CB. Remove + // the corresponding entry from the constraint system. + while (!DFSInStack.empty()) { + auto &E = DFSInStack.back(); + LLVM_DEBUG(dbgs() << "Top of stack : " << E.NumIn << " " << E.NumOut + << "\n"); + LLVM_DEBUG(dbgs() << "CB: " << CB.NumIn << " " << CB.NumOut << "\n"); + bool IsDom = CB.NumIn >= E.NumIn && CB.NumOut <= E.NumOut; + if (IsDom) + break; + LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot + << "\n"); + DFSInStack.pop_back(); + CS.popLastConstraint(); + } + + LLVM_DEBUG({ + dbgs() << "Processing "; + if (CB.IsBlock) + dbgs() << *CB.BB; + else + dbgs() << *CB.Condition; + dbgs() << "\n"; + }); + + // For a block, check if any CmpInsts become known based on the current set + // of constraints. + if (CB.IsBlock) { + for (Instruction &I : *CB.BB) { + auto *Cmp = dyn_cast(&I); + if (!Cmp) + continue; + auto R = getConstraint(Cmp, Value2Index, false); + if (R.empty()) + continue; + if (CS.isConditionImplied(R)) { + if (!DebugCounter::shouldExecute(EliminatedCounter)) + continue; + + LLVM_DEBUG(dbgs() << "Condition " << *Cmp + << " implied by dominating constraints\n"); + LLVM_DEBUG({ + for (auto &E : reverse(DFSInStack)) + dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; + }); + Cmp->replaceAllUsesWith( + ConstantInt::getTrue(F.getParent()->getContext())); + NumCondsRemoved++; + Changed = true; + } + if (CS.isConditionImplied(ConstraintSystem::negate(R))) { + if (!DebugCounter::shouldExecute(EliminatedCounter)) + continue; + + LLVM_DEBUG(dbgs() << "Condition !" << *Cmp + << " implied by dominating constraints\n"); + LLVM_DEBUG({ + for (auto &E : reverse(DFSInStack)) + dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; + }); + Cmp->replaceAllUsesWith( + ConstantInt::getFalse(F.getParent()->getContext())); + NumCondsRemoved++; + Changed = true; + } + } + continue; + } + + // Otherwise, add the condition to the system and stack, if we can transform + // it into a constraint. + auto R = getConstraint(CB.Condition, Value2Index, true); + if (R.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n"); + if (CB.Not) + R = ConstraintSystem::negate(R); + + CS.addVariableRowFill(R); + DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not); + } + + return Changed; +} + +namespace { + +class ConstraintElimination : public FunctionPass { +public: + static char ID; + + ConstraintElimination() : FunctionPass(ID) { + initializeConstraintEliminationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto &DT = getAnalysis().getDomTree(); + return eliminateConstraints(F, DT); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; + +} // end anonymous namespace + +char ConstraintElimination::ID = 0; + +INITIALIZE_PASS_BEGIN(ConstraintElimination, "constraint-elimination", + "Constraint Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(ConstraintElimination, "constraint-elimination", + "Constraint Elimination", false, false) + +FunctionPass *llvm::createConstraintEliminationPass() { + return new ConstraintElimination(); +} diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index f4dc6f2996b98..8a740295b19c4 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -38,6 +38,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeAlignmentFromAssumptionsPass(Registry); initializeCallSiteSplittingLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); + initializeConstraintEliminationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); initializeDCELegacyPassPass(Registry); initializeDeadInstEliminationPass(Registry); diff --git a/llvm/test/Transforms/ConstraintElimination/dom.ll b/llvm/test/Transforms/ConstraintElimination/dom.ll index a6b8629bed78a..8002697352448 100644 --- a/llvm/test/Transforms/ConstraintElimination/dom.ll +++ b/llvm/test/Transforms/ConstraintElimination/dom.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s ; Test cases where both the true and false successors reach the same block, ; dominated by one of them. @@ -13,7 +13,7 @@ define i32 @test1(i32 %x) { ; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 10 @@ -47,7 +47,7 @@ define i32 @test2(i32 %x) { ; CHECK-NEXT: ret i32 20 ; CHECK: bb2: ; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: br label [[BB1]] ; entry: @@ -80,7 +80,7 @@ define i32 @test3(i32 %x, i1 %c) { ; CHECK-NEXT: ret i32 10 ; CHECK: bb2: ; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret i32 20 ; entry: @@ -110,7 +110,7 @@ define i32 @test4(i32 %x, i1 %c) { ; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2]] ; CHECK: bb1: ; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret i32 10 ; CHECK: bb2: ; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 10 diff --git a/llvm/test/Transforms/ConstraintElimination/geps.2d.ll b/llvm/test/Transforms/ConstraintElimination/geps.2d.ll index bb24514404414..35ffadbd85ea1 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps.2d.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps.2d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s define void @test.not.uge.ult([10 x i8]* %start, i8* %low, i8* %high) { ; CHECK-LABEL: @test.not.uge.ult( diff --git a/llvm/test/Transforms/ConstraintElimination/geps.ll b/llvm/test/Transforms/ConstraintElimination/geps.ll index 0e36ebf07f0f4..46763c08b3820 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s define i32 @test.ult(i32* readonly %src, i32* readnone %min, i32* readnone %max) { ; CHECK-LABEL: @test.ult( @@ -15,7 +15,7 @@ define i32 @test.ult(i32* readonly %src, i32* readnone %min, i32* readnone %max) ; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[SRC]], align 4 ; CHECK-NEXT: [[ADD_PTR_I36:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 ; CHECK-NEXT: [[C_3_MIN:%.*]] = icmp ult i32* [[ADD_PTR_I36]], [[MIN]] -; CHECK-NEXT: br i1 [[C_3_MIN]], label [[TRAP]], label [[CHECK_3_MAX:%.*]] +; CHECK-NEXT: br i1 false, label [[TRAP]], label [[CHECK_3_MAX:%.*]] ; CHECK: check.3.max: ; CHECK-NEXT: [[C_3_MAX:%.*]] = icmp ult i32* [[ADD_PTR_I36]], [[MAX]] ; CHECK-NEXT: br i1 [[C_3_MAX]], label [[CHECK_1_MIN:%.*]], label [[TRAP]] @@ -23,18 +23,18 @@ define i32 @test.ult(i32* readonly %src, i32* readnone %min, i32* readnone %max) ; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ADD_PTR_I36]], align 4 ; CHECK-NEXT: [[ADD_PTR_I29:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1 ; CHECK-NEXT: [[C_1_MIN:%.*]] = icmp ult i32* [[ADD_PTR_I29]], [[MIN]] -; CHECK-NEXT: br i1 [[C_1_MIN]], label [[TRAP]], label [[CHECK_1_MAX:%.*]] +; CHECK-NEXT: br i1 false, label [[TRAP]], label [[CHECK_1_MAX:%.*]] ; CHECK: check.1.max: ; CHECK-NEXT: [[C_1_MAX:%.*]] = icmp ult i32* [[ADD_PTR_I29]], [[MAX]] -; CHECK-NEXT: br i1 [[C_1_MAX]], label [[CHECK_2_MIN:%.*]], label [[TRAP]] +; CHECK-NEXT: br i1 true, label [[CHECK_2_MIN:%.*]], label [[TRAP]] ; CHECK: check.2.min: ; CHECK-NEXT: [[L2:%.*]] = load i32, i32* [[ADD_PTR_I29]], align 4 ; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 ; CHECK-NEXT: [[C_2_MIN:%.*]] = icmp ult i32* [[ADD_PTR_I]], [[MIN]] -; CHECK-NEXT: br i1 [[C_2_MIN]], label [[TRAP]], label [[CHECK_2_MAX:%.*]] +; CHECK-NEXT: br i1 false, label [[TRAP]], label [[CHECK_2_MAX:%.*]] ; CHECK: check.2.max: ; CHECK-NEXT: [[C_2_MAX:%.*]] = icmp ult i32* [[ADD_PTR_I]], [[MAX]] -; CHECK-NEXT: br i1 [[C_2_MAX]], label [[EXIT:%.*]], label [[TRAP]] +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[TRAP]] ; CHECK: exit: ; CHECK-NEXT: [[L3:%.*]] = load i32, i32* [[ADD_PTR_I]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] @@ -101,16 +101,16 @@ define void @test.not.uge.ult(i8* %start, i8* %low, i8* %high) { ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: [[T_0:%.*]] = icmp ult i8* [[START]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_0]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 ; CHECK-NEXT: [[T_1:%.*]] = icmp ult i8* [[START_1]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 ; CHECK-NEXT: [[T_2:%.*]] = icmp ult i8* [[START_2]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 ; CHECK-NEXT: [[T_3:%.*]] = icmp ult i8* [[START_3]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 ; CHECK-NEXT: [[C_4:%.*]] = icmp ult i8* [[START_4]], [[HIGH]] ; CHECK-NEXT: call void @use(i1 [[C_4]]) @@ -152,19 +152,19 @@ define void @test.not.uge.ule(i8* %start, i8* %low, i8* %high) { ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: [[T_0:%.*]] = icmp ule i8* [[START]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_0]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 ; CHECK-NEXT: [[T_1:%.*]] = icmp ule i8* [[START_1]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 ; CHECK-NEXT: [[T_2:%.*]] = icmp ule i8* [[START_2]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 ; CHECK-NEXT: [[T_3:%.*]] = icmp ule i8* [[START_3]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 ; CHECK-NEXT: [[T_4:%.*]] = icmp ule i8* [[START_4]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[T_4]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[START_5:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 5 ; CHECK-NEXT: [[C_5:%.*]] = icmp ule i8* [[START_5]], [[HIGH]] ; CHECK-NEXT: call void @use(i1 [[C_5]]) @@ -211,19 +211,19 @@ define void @test.not.uge.ugt(i8* %start, i8* %low, i8* %high) { ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: [[F_0:%.*]] = icmp ugt i8* [[START]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_0]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 ; CHECK-NEXT: [[F_1:%.*]] = icmp ugt i8* [[START_1]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 ; CHECK-NEXT: [[F_2:%.*]] = icmp ugt i8* [[START_2]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_2]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 ; CHECK-NEXT: [[F_3:%.*]] = icmp ugt i8* [[START_3]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_3]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 ; CHECK-NEXT: [[F_4:%.*]] = icmp ugt i8* [[START_4]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_4]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_5:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 5 ; CHECK-NEXT: [[C_5:%.*]] = icmp ugt i8* [[START_5]], [[HIGH]] ; CHECK-NEXT: call void @use(i1 [[C_5]]) @@ -274,16 +274,16 @@ define void @test.not.uge.uge(i8* %start, i8* %low, i8* %high) { ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: [[F_0:%.*]] = icmp ugt i8* [[START]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_0]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_1:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 1 ; CHECK-NEXT: [[F_1:%.*]] = icmp uge i8* [[START_1]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_2:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 2 ; CHECK-NEXT: [[F_2:%.*]] = icmp uge i8* [[START_2]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_2]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_3:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 3 ; CHECK-NEXT: [[F_3:%.*]] = icmp uge i8* [[START_3]], [[HIGH]] -; CHECK-NEXT: call void @use(i1 [[F_3]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[START_4:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 4 ; CHECK-NEXT: [[C_4:%.*]] = icmp uge i8* [[START_4]], [[HIGH]] ; CHECK-NEXT: call void @use(i1 [[C_4]]) diff --git a/llvm/test/Transforms/ConstraintElimination/i128.ll b/llvm/test/Transforms/ConstraintElimination/i128.ll index 6a10ea770dd58..d021db6aa907f 100644 --- a/llvm/test/Transforms/ConstraintElimination/i128.ll +++ b/llvm/test/Transforms/ConstraintElimination/i128.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Transforms/ConstraintElimination/loops.ll b/llvm/test/Transforms/ConstraintElimination/loops.ll index be25308c46dfe..37373e1fbcaf9 100644 --- a/llvm/test/Transforms/ConstraintElimination/loops.ll +++ b/llvm/test/Transforms/ConstraintElimination/loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s ; Make sure conditions in loops are not used to simplify themselves. diff --git a/llvm/test/Transforms/ConstraintElimination/mixed.ll b/llvm/test/Transforms/ConstraintElimination/mixed.ll index e4a264a8f0a0f..c0fb37883f71f 100644 --- a/llvm/test/Transforms/ConstraintElimination/mixed.ll +++ b/llvm/test/Transforms/ConstraintElimination/mixed.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s ; Make sure we do not incorrectly add variables to the system. diff --git a/llvm/test/Transforms/ConstraintElimination/uge.ll b/llvm/test/Transforms/ConstraintElimination/uge.ll index ca91733d2af98..bacb9a7f3d917 100644 --- a/llvm/test/Transforms/ConstraintElimination/uge.ll +++ b/llvm/test/Transforms/ConstraintElimination/uge.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s declare void @use(i1) @@ -10,7 +10,7 @@ define void @test_1_variable_constraint(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[T_1:%.*]] = icmp uge i32 [[X]], [[Y]] -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[X]], 10 ; CHECK-NEXT: call void @use(i1 [[C_2]]) ; CHECK-NEXT: [[C_3:%.*]] = icmp uge i32 [[Y]], [[X]] @@ -20,9 +20,9 @@ define void @test_1_variable_constraint(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T_2:%.*]] = icmp uge i32 [[Y]], [[X]] -; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[F_1:%.*]] = icmp uge i32 [[X]], [[Y]] -; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_5:%.*]] = icmp uge i32 [[X]], 10 ; CHECK-NEXT: call void @use(i1 [[C_5]]) ; CHECK-NEXT: [[C_6:%.*]] = icmp uge i32 10, [[X]] @@ -63,9 +63,9 @@ define void @test_1_constant_constraint(i32 %x) { ; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[T_1:%.*]] = icmp uge i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[T_2:%.*]] = icmp uge i32 [[X]], 9 -; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[C_2:%.*]] = icmp uge i32 [[X]], 11 ; CHECK-NEXT: call void @use(i1 [[C_2]]) ; CHECK-NEXT: [[C_4:%.*]] = icmp uge i32 10, [[X]] @@ -73,11 +73,11 @@ define void @test_1_constant_constraint(i32 %x) { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T_3:%.*]] = icmp uge i32 11, [[X]] -; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[F_1:%.*]] = icmp uge i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[F_1_1:%.*]] = icmp uge i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[F_1_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_5:%.*]] = icmp uge i32 [[X]], 9 ; CHECK-NEXT: call void @use(i1 [[C_5]]) ; CHECK-NEXT: [[C_6:%.*]] = icmp uge i32 1, [[X]] @@ -125,7 +125,7 @@ define i32 @test1(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] ; CHECK: bb2: ; CHECK-NEXT: [[C_3:%.*]] = icmp uge i32 [[X]], [[Z]] -; CHECK-NEXT: br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]] +; CHECK-NEXT: br i1 true, label [[BB3:%.*]], label [[EXIT]] ; CHECK: bb3: ; CHECK-NEXT: ret i32 10 ; CHECK: exit: @@ -225,7 +225,7 @@ define i32 @test4(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] ; CHECK: bb2: ; CHECK-NEXT: [[T_1:%.*]] = icmp uge i32 [[X]], [[Z]] -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[U_1:%.*]] = icmp eq i32 [[X]], [[Z]] ; CHECK-NEXT: call void @use(i1 [[U_1]]) ; CHECK-NEXT: ret i32 10 diff --git a/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll b/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll index c49ce7360cd68..cc9eca9a6605f 100644 --- a/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll +++ b/llvm/test/Transforms/ConstraintElimination/ugt-ule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s declare void @use(i1) @@ -10,13 +10,13 @@ define void @test(i8* %m, i8* %ptr) { ; CHECK-NEXT: br i1 [[CMP_1]], label [[BB_1:%.*]], label [[BB_2:%.*]] ; CHECK: bb.1: ; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i8* [[M]], [[PTR]] -; CHECK-NEXT: call void @use(i1 [[CMP_2]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: ret void ; CHECK: bb.2: ; CHECK-NEXT: br label [[BB_2_NEXT:%.*]] ; CHECK: bb.2.next: ; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i8* [[M]], [[PTR]] -; CHECK-NEXT: call void @use(i1 [[CMP_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/ConstraintElimination/ule.ll b/llvm/test/Transforms/ConstraintElimination/ule.ll index 2cb3750fad243..c5356550159e3 100644 --- a/llvm/test/Transforms/ConstraintElimination/ule.ll +++ b/llvm/test/Transforms/ConstraintElimination/ule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S %s | FileCheck %s +; RUN: opt -constraint-elimination -S %s | FileCheck %s declare void @use(i1) @@ -10,7 +10,7 @@ define void @test_1_variable_constraint(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[T_1:%.*]] = icmp ule i32 [[X]], [[Y]] -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 10 ; CHECK-NEXT: call void @use(i1 [[C_2]]) ; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[Y]], [[X]] @@ -20,9 +20,9 @@ define void @test_1_variable_constraint(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T_2:%.*]] = icmp ule i32 [[Y]], [[X]] -; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[F_1:%.*]] = icmp ule i32 [[X]], [[Y]] -; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_5:%.*]] = icmp ule i32 [[X]], 10 ; CHECK-NEXT: call void @use(i1 [[C_5]]) ; CHECK-NEXT: [[C_6:%.*]] = icmp ule i32 10, [[X]] @@ -63,9 +63,9 @@ define void @test_1_constant_constraint(i32 %x) { ; CHECK-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[T_1:%.*]] = icmp ule i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[T_2:%.*]] = icmp ule i32 [[X]], 11 -; CHECK-NEXT: call void @use(i1 [[T_2]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[C_2:%.*]] = icmp ule i32 [[X]], 9 ; CHECK-NEXT: call void @use(i1 [[C_2]]) ; CHECK-NEXT: [[C_4:%.*]] = icmp ule i32 10, [[X]] @@ -73,14 +73,14 @@ define void @test_1_constant_constraint(i32 %x) { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T_3:%.*]] = icmp ule i32 10, [[X]] -; CHECK-NEXT: call void @use(i1 [[T_3]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[F_1:%.*]] = icmp ule i32 [[X]], 9 -; CHECK-NEXT: call void @use(i1 [[F_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[F_1_1:%.*]] = icmp ule i32 [[X]], 10 -; CHECK-NEXT: call void @use(i1 [[F_1_1]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_5:%.*]] = icmp ule i32 [[X]], 11 ; CHECK-NEXT: call void @use(i1 [[C_5]]) -; CHECK-NEXT: [[C_6:%.*]] = icmp ule i32 10, [[X]] +; CHECK-NEXT: [[C_6:%.*]] = icmp ule i32 12, [[X]] ; CHECK-NEXT: call void @use(i1 [[C_6]]) ; CHECK-NEXT: ret void ; @@ -110,7 +110,7 @@ bb2: call void @use(i1 %f.1.1) %c.5 = icmp ule i32 %x, 11 call void @use(i1 %c.5) - %c.6 = icmp ule i32 10, %x + %c.6 = icmp ule i32 12, %x call void @use(i1 %c.6) ret void } @@ -126,7 +126,7 @@ define i32 @test1(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] ; CHECK: bb2: ; CHECK-NEXT: [[C_3:%.*]] = icmp ule i32 [[X]], [[Z]] -; CHECK-NEXT: br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]] +; CHECK-NEXT: br i1 true, label [[BB3:%.*]], label [[EXIT]] ; CHECK: bb3: ; CHECK-NEXT: ret i32 10 ; CHECK: exit: @@ -226,7 +226,7 @@ define i32 @test4(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]] ; CHECK: bb2: ; CHECK-NEXT: [[T_1:%.*]] = icmp ule i32 [[X]], [[Z]] -; CHECK-NEXT: call void @use(i1 [[T_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[U_1:%.*]] = icmp eq i32 [[X]], [[Z]] ; CHECK-NEXT: call void @use(i1 [[U_1]]) ; CHECK-NEXT: ret i32 10 From 32a61531b8181b1fdfa058803444f73ae6ab29ff Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 15 Sep 2020 18:32:17 +0000 Subject: [PATCH 0733/1079] [gn build] Port 3d42d549554 --- llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn index 60fcbe0318713..9d4c7a06c9402 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -15,6 +15,7 @@ static_library("Scalar") { "BDCE.cpp", "CallSiteSplitting.cpp", "ConstantHoisting.cpp", + "ConstraintElimination.cpp", "CorrelatedValuePropagation.cpp", "DCE.cpp", "DeadStoreElimination.cpp", From 79f22b1f99fd72db9a45c387258d289791f2b9c0 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 14 Sep 2020 12:20:45 -0700 Subject: [PATCH 0734/1079] Fix .debug_aranges parsing. Code was added that used llvm error checking to parse .debug_aranges, but the error check after parsing the DWARFDebugArangesSet was reversed and was causing no error to be returned with no valid address ranges being actually used. This meant we always would fall back onto creating out own address ranges by parsing the compile unit's ranges. This was causing problems for cases where the DW_TAG_compile_unit had a single address range by using a DW_AT_low_pc and DW_AT_high_pc attribute pair (not using a DW_AT_ranges attribute), but the .debug_aranges had correct split ranges. In this case we would end up using the single range for the compile unit that encompassed all of the ranges from the .debug_aranges section and would cause address resolving issues in LLDB where address lookups would fail for certain addresses. Differential Revision: https://reviews.llvm.org/D87626 --- .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 2 +- .../SymbolFile/DWARF/SymbolFileDWARFTests.cpp | 91 +++++++++++++------ 2 files changed, 66 insertions(+), 27 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index 7dc52c1e2df06..7062c9bfae235 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -39,7 +39,7 @@ DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) { Range range; while (debug_aranges_data.ValidOffset(offset)) { llvm::Error error = set.extract(debug_aranges_data, &offset); - if (!error) + if (error) return error; const uint32_t num_descriptors = set.NumDescriptors(); diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp index 8bf019ea9ed65..4898b94413cab 100644 --- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp +++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp @@ -19,6 +19,7 @@ #include "Plugins/SymbolFile/DWARF/DWARFDataExtractor.h" #include "Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h" #include "Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h" +#include "Plugins/SymbolFile/DWARF/DWARFDebugAranges.h" #include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h" #include "Plugins/SymbolFile/PDB/SymbolFilePDB.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" @@ -70,7 +71,7 @@ TEST_F(SymbolFileDWARFTests, TestAbilitiesForDWARF) { TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) { // Test that if we have a .debug_abbrev that contains ordered abbreviation // codes that start at 1, that we get O(1) access. - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -81,7 +82,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) { encoder.PutULEB128(DW_FORM_strp); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(2); // Abbrev code 2 encoder.PutULEB128(DW_TAG_subprogram); encoder.PutHex8(DW_CHILDREN_no); @@ -89,9 +90,9 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) { encoder.PutULEB128(DW_FORM_strp); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(0); // Abbrev code 0 (termination) - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -101,7 +102,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) { // Make sure we have O(1) access to each abbreviation by making sure the // index offset is 1 and not UINT32_MAX EXPECT_EQ(abbrev_set.GetIndexOffset(), 1u); - + auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(1); EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit); EXPECT_TRUE(abbrev1->HasChildren()); @@ -115,7 +116,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) { TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) { // Test that if we have a .debug_abbrev that contains ordered abbreviation // codes that start at 5, that we get O(1) access. - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -126,7 +127,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) { encoder.PutULEB128(DW_FORM_strp); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(6); // Abbrev code 6 encoder.PutULEB128(DW_TAG_subprogram); encoder.PutHex8(DW_CHILDREN_no); @@ -134,9 +135,9 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) { encoder.PutULEB128(DW_FORM_strp); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(0); // Abbrev code 0 (termination) - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -146,7 +147,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) { // Make sure we have O(1) access to each abbreviation by making sure the // index offset is 5 and not UINT32_MAX EXPECT_EQ(abbrev_set.GetIndexOffset(), 5u); - + auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(5); EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit); EXPECT_TRUE(abbrev1->HasChildren()); @@ -160,7 +161,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) { TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) { // Test that if we have a .debug_abbrev that contains unordered abbreviation // codes, that we can access the information correctly. - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -171,7 +172,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) { encoder.PutULEB128(DW_FORM_strp); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(1); // Abbrev code 1 encoder.PutULEB128(DW_TAG_subprogram); encoder.PutHex8(DW_CHILDREN_no); @@ -179,9 +180,9 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) { encoder.PutULEB128(DW_FORM_strp); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(0); // Abbrev code 0 (termination) - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -191,7 +192,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) { // Make sure we don't have O(1) access to each abbreviation by making sure // the index offset is UINT32_MAX EXPECT_EQ(abbrev_set.GetIndexOffset(), UINT32_MAX); - + auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(2); EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit); EXPECT_TRUE(abbrev1->HasChildren()); @@ -205,7 +206,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) { TEST_F(SymbolFileDWARFTests, TestAbbrevInvalidNULLTag) { // Test that we detect when an abbreviation has a NULL tag and that we get // an error when decoding. - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -214,9 +215,9 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevInvalidNULLTag) { encoder.PutHex8(DW_CHILDREN_no); encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(0); // Abbrev code 0 (termination) - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -232,7 +233,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevInvalidNULLTag) { TEST_F(SymbolFileDWARFTests, TestAbbrevNullAttrValidForm) { // Test that we detect when an abbreviation has a NULL attribute and a non // NULL form and that we get an error when decoding. - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -245,7 +246,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevNullAttrValidForm) { encoder.PutULEB128(0); encoder.PutULEB128(0); // Abbrev code 0 (termination) - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -255,13 +256,12 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevNullAttrValidForm) { EXPECT_TRUE(bool(error)); EXPECT_EQ("malformed abbreviation declaration attribute", llvm::toString(std::move(error))); - } TEST_F(SymbolFileDWARFTests, TestAbbrevValidAttrNullForm) { // Test that we detect when an abbreviation has a valid attribute and a // NULL form and that we get an error when decoding. - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -272,9 +272,9 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevValidAttrNullForm) { encoder.PutULEB128(0); // NULL form encoder.PutULEB128(0); encoder.PutULEB128(0); - + encoder.PutULEB128(0); // Abbrev code 0 (termination) - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -290,7 +290,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevMissingTerminator) { // Test that we detect when an abbreviation has a valid attribute and a // form, but is missing the NULL attribute and form that terminates an // abbreviation - + const auto byte_order = eByteOrderLittle; const uint8_t addr_size = 4; StreamString encoder(Stream::eBinary, addr_size, byte_order); @@ -300,7 +300,7 @@ TEST_F(SymbolFileDWARFTests, TestAbbrevMissingTerminator) { encoder.PutULEB128(DW_AT_name); encoder.PutULEB128(DW_FORM_strp); // Don't add the NULL DW_AT and NULL DW_FORM terminator - + DWARFDataExtractor data; data.SetData(encoder.GetData(), encoder.GetSize(), byte_order); DWARFAbbreviationDeclarationSet abbrev_set; @@ -346,3 +346,42 @@ TEST_F(SymbolFileDWARFTests, ParseArangesNonzeroSegmentSize) { llvm::toString(std::move(error))); EXPECT_EQ(off, 12U); // Parser should read no further than the segment size } + +TEST_F(SymbolFileDWARFTests, ParseAranges) { + // Test we can successfully parse a DWARFDebugAranges. The initial error + // checking code had a bug where it would always return an empty address + // ranges for everything in .debug_aranges and no error. + const unsigned char binary_data[] = { + 60, 0, 0, 0, // unit_length + 2, 0, // DWARF version number + 255, 0, 0, 0, // offset into the .debug_info_table + 8, // address size + 0, // segment size + 0, 0, 0, 0, // pad bytes + // BEGIN TUPLES + // First tuple: [0x1000-0x1100) + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Address 0x1000 + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Size 0x0100 + // Second tuple: [0x2000-0x2100) + 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Address 0x2000 + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Size 0x0100 + // Terminating tuple + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Terminator + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // Terminator + }; + DWARFDataExtractor data; + data.SetData(static_cast(binary_data), sizeof binary_data, + lldb::ByteOrder::eByteOrderLittle); + DWARFDebugAranges debug_aranges; + llvm::Error error = debug_aranges.extract(data); + ASSERT_FALSE(bool(error)); + EXPECT_EQ(debug_aranges.GetNumRanges(), 2u); + EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET); + EXPECT_EQ(debug_aranges.FindAddress(0x1000), 255u); + EXPECT_EQ(debug_aranges.FindAddress(0x1100 - 1), 255u); + EXPECT_EQ(debug_aranges.FindAddress(0x1100), DW_INVALID_OFFSET); + EXPECT_EQ(debug_aranges.FindAddress(0x1fff), DW_INVALID_OFFSET); + EXPECT_EQ(debug_aranges.FindAddress(0x2000), 255u); + EXPECT_EQ(debug_aranges.FindAddress(0x2100 - 1), 255u); + EXPECT_EQ(debug_aranges.FindAddress(0x2100), DW_INVALID_OFFSET); +} From d158e786ccd33f8c9fc3ab008dd9463e252fa36a Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 15 Sep 2020 11:55:10 -0700 Subject: [PATCH 0735/1079] [DemandedBits][NewPM] Pin some tests to legacy PM All tests have corresponding NPM RUN lines. -analyze doesn't work under NPM. --- llvm/test/Analysis/DemandedBits/add.ll | 2 +- llvm/test/Analysis/DemandedBits/basic.ll | 2 +- llvm/test/Analysis/DemandedBits/intrinsics.ll | 2 +- llvm/test/Analysis/DemandedBits/vectors.ll | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Analysis/DemandedBits/add.ll b/llvm/test/Analysis/DemandedBits/add.ll index 01673f82c2b36..dfd54525d0740 100644 --- a/llvm/test/Analysis/DemandedBits/add.ll +++ b/llvm/test/Analysis/DemandedBits/add.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s +; RUN: opt -S -demanded-bits -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s ; CHECK-DAG: DemandedBits: 0x1e for %1 = and i32 %a, 9 diff --git a/llvm/test/Analysis/DemandedBits/basic.ll b/llvm/test/Analysis/DemandedBits/basic.ll index 6f44465315e63..a05d3804156a3 100644 --- a/llvm/test/Analysis/DemandedBits/basic.ll +++ b/llvm/test/Analysis/DemandedBits/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s +; RUN: opt -S -demanded-bits -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s ; CHECK-DAG: DemandedBits: 0xff for %1 = add nsw i32 %a, 5 diff --git a/llvm/test/Analysis/DemandedBits/intrinsics.ll b/llvm/test/Analysis/DemandedBits/intrinsics.ll index 6987f14f8b1ba..ec78178ea22dc 100644 --- a/llvm/test/Analysis/DemandedBits/intrinsics.ll +++ b/llvm/test/Analysis/DemandedBits/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s +; RUN: opt -S -demanded-bits -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s ; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 diff --git a/llvm/test/Analysis/DemandedBits/vectors.ll b/llvm/test/Analysis/DemandedBits/vectors.ll index 36cde05fb7c62..a7835ca799bca 100644 --- a/llvm/test/Analysis/DemandedBits/vectors.ll +++ b/llvm/test/Analysis/DemandedBits/vectors.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s +; RUN: opt -S -demanded-bits -analyze -enable-new-pm=0 < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s ; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer From 558e5c31b66e114f164ad798de1f26b49042ed5e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 15 Sep 2020 11:59:00 -0700 Subject: [PATCH 0736/1079] [Dominators][NewPM] Pin tests with -analyze to legacy PM -analyze isn't supported in NPM. All affected tests have corresponding NPM RUN line. --- llvm/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll | 2 +- llvm/test/Analysis/Dominators/basic.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll b/llvm/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll index c036fe22ab87e..6fa3fec0359e5 100644 --- a/llvm/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll +++ b/llvm/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -domtree -break-crit-edges -analyze -domtree | FileCheck %s +; RUN: opt < %s -domtree -break-crit-edges -analyze -domtree -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes='require,break-crit-edges,print' -disable-output 2>&1| FileCheck %s ; PR932 diff --git a/llvm/test/Analysis/Dominators/basic.ll b/llvm/test/Analysis/Dominators/basic.ll index 353c3397b5da7..afa6f1e9a9b6b 100644 --- a/llvm/test/Analysis/Dominators/basic.ll +++ b/llvm/test/Analysis/Dominators/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -domtree -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OLDPM +; RUN: opt < %s -domtree -analyze -enable-new-pm=0 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OLDPM ; RUN: opt < %s -disable-output -passes='print' 2>&1 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NEWPM define void @test1() { From 583c8ce30c12511a814a1db2923b9809f2a15c54 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 15 Sep 2020 14:59:27 -0400 Subject: [PATCH 0737/1079] [libc++] Fix broken test for std::any and allocators The test was not allocating the right number of bytes. This is my fault, not Marshall's, as I was the one to write the tests for 39c879514170. --- .../test/libcxx/utilities/any/allocator.pass.cpp | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/libcxx/test/libcxx/utilities/any/allocator.pass.cpp b/libcxx/test/libcxx/utilities/any/allocator.pass.cpp index c6800eb832bda..9de8c5e7edff1 100644 --- a/libcxx/test/libcxx/utilities/any/allocator.pass.cpp +++ b/libcxx/test/libcxx/utilities/any/allocator.pass.cpp @@ -35,10 +35,8 @@ bool Large_was_constructed = false; bool Large_was_destroyed = false; bool Large_was_deallocated = false; -bool Small_was_allocated = false; bool Small_was_constructed = false; bool Small_was_destroyed = false; -bool Small_was_deallocated = false; namespace std { template <> @@ -51,7 +49,7 @@ namespace std { Large* allocate(std::size_t n) { Large_was_allocated = true; - return static_cast(::operator new(n)); + return static_cast(::operator new(n * sizeof(Large))); } template @@ -79,10 +77,7 @@ namespace std { using propagate_on_container_move_assignment = std::true_type; using is_always_equal = std::true_type; - Small* allocate(std::size_t n) { - Small_was_allocated = true; - return static_cast(::operator new(n)); - } + Small* allocate(std::size_t) { assert(false); } template void construct(Small* p, Args&& ...args) { @@ -95,10 +90,7 @@ namespace std { Small_was_destroyed = true; } - void deallocate(Small* p, std::size_t) { - Small_was_deallocated = true; - return ::operator delete(p); - } + void deallocate(Small*, std::size_t) { assert(false); } }; } // end namespace std @@ -124,12 +116,10 @@ int main(int, char**) { std::any a = Small(); (void)a; - assert(!Small_was_allocated); assert(Small_was_constructed); } assert(Small_was_destroyed); - assert(!Small_was_deallocated); } return 0; From 69f98311ca42127df92527b6fc3be99841a15f12 Mon Sep 17 00:00:00 2001 From: Jonas Toth Date: Sun, 13 Sep 2020 19:30:56 +0200 Subject: [PATCH 0738/1079] [ASTMatchers] extract public matchers from const-analysis into own patch The analysis for const-ness of local variables required a view generally useful matchers that are extracted into its own patch. They are decompositionDecl and forEachArgumentWithParamType, that works for calls through function pointers as well. This is a reupload of https://reviews.llvm.org/D72505, that already landed, but had to be reverted due to a GCC crash on powerpc (https://reviews.llvm.org/rG4c48ea68e491cb42f1b5d43ffba89f6a7f0dadc4) Because this took a long time to adress, i decided to redo this patch and have a clean workflow. I try to coordinate with someone that has a PPC to apply this patch and test for the crash. If everything is fine, I intend to just commit. If the crash is still happening, i hope to at least find the cause. Differential Revision: https://reviews.llvm.org/D87588 --- clang/docs/LibASTMatchersReference.html | 132 +++++++++++++++ clang/include/clang/ASTMatchers/ASTMatchers.h | 110 ++++++++++++ clang/lib/ASTMatchers/Dynamic/Registry.cpp | 2 + .../ASTMatchers/ASTMatchersTraversalTest.cpp | 158 ++++++++++++++++++ 4 files changed, 402 insertions(+) diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index eb85e420e7e4d..c4c6de117c1c0 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -649,6 +649,30 @@

    Node Matchers

    Matcher<DecompositionDecl>decompositionDeclMatcher<DecompositionDecl>...
    Matches decomposition-declarations.
    +
    +Examples matches the declaration node with foo and bar, but not
    +number.
    +(matcher = declStmt(has(decompositionDecl())))
    +
    +  int number = 42;
    +  auto [foo, bar] = std::make_pair{42, 42};
    +
    Matcher<DecompositionDecl>decompositionDeclMatcher<DecompositionDecl>...
    Matches decomposition-declarations.
    +
    +Examples matches the declaration node with foo and bar, but not
    +number.
    +(matcher = declStmt(has(decompositionDecl())))
    +
    +  int number = 42;
    +  auto [foo, bar] = std::make_pair{42, 42};
    +
    Matcher<NestedNameSpecifierLoc>nestedNameSpecifierLocMatcher<NestedNameSpecifierLoc>...
    Same as nestedNameSpecifier but matches NestedNameSpecifierLoc.
     
    Matcher<CXXConstructExpr>forEachArgumentWithParamTypeMatcher<Expr> ArgMatcher, Matcher<QualType> ParamMatcher
    Matches all arguments and their respective types for a CallExpr or
    +CXXConstructExpr. It is very similar to forEachArgumentWithParam but
    +it works on calls through function pointers as well.
    +
    +The difference is, that function pointers do not provide access to a
    +ParmVarDecl, but only the QualType for each argument.
    +
    +Given
    +  void f(int i);
    +  int y;
    +  f(y);
    +  void (*f_ptr)(int) = f;
    +  f_ptr(y);
    +callExpr(
    +  forEachArgumentWithParamType(
    +    declRefExpr(to(varDecl(hasName("y")))),
    +    qualType(isInteger()).bind("type)
    +))
    +  matches f(y) and f_ptr(y)
    +with declRefExpr(...)
    +  matching int y
    +and qualType(...)
    +  matching int
    +
    Matcher<CXXConstructExpr>forEachArgumentWithParamTypeMatcher<Expr> ArgMatcher, Matcher<QualType> ParamMatcher
    Matches all arguments and their respective types for a CallExpr or
    +CXXConstructExpr. It is very similar to forEachArgumentWithParam but
    +it works on calls through function pointers as well.
    +
    +The difference is, that function pointers do not provide access to a
    +ParmVarDecl, but only the QualType for each argument.
    +
    +Given
    +  void f(int i);
    +  int y;
    +  f(y);
    +  void (*f_ptr)(int) = f;
    +  f_ptr(y);
    +callExpr(
    +  forEachArgumentWithParamType(
    +    declRefExpr(to(varDecl(hasName("y")))),
    +    qualType(isInteger()).bind("type)
    +))
    +  matches f(y) and f_ptr(y)
    +with declRefExpr(...)
    +  matching int y
    +and qualType(...)
    +  matching int
    +
    Matcher<CXXConstructExpr>hasAnyArgumentMatcher<Expr> InnerMatcher
    Matches any argument of a call expression or a constructor call
     expression, or an ObjC-message-send expression.
    @@ -5850,6 +5928,60 @@ 

    AST Traversal Matchers

    Matcher<CallExpr>forEachArgumentWithParamTypeMatcher<Expr> ArgMatcher, Matcher<QualType> ParamMatcher
    Matches all arguments and their respective types for a CallExpr or
    +CXXConstructExpr. It is very similar to forEachArgumentWithParam but
    +it works on calls through function pointers as well.
    +
    +The difference is, that function pointers do not provide access to a
    +ParmVarDecl, but only the QualType for each argument.
    +
    +Given
    +  void f(int i);
    +  int y;
    +  f(y);
    +  void (*f_ptr)(int) = f;
    +  f_ptr(y);
    +callExpr(
    +  forEachArgumentWithParamType(
    +    declRefExpr(to(varDecl(hasName("y")))),
    +    qualType(isInteger()).bind("type)
    +))
    +  matches f(y) and f_ptr(y)
    +with declRefExpr(...)
    +  matching int y
    +and qualType(...)
    +  matching int
    +
    Matcher<CallExpr>forEachArgumentWithParamTypeMatcher<Expr> ArgMatcher, Matcher<QualType> ParamMatcher
    Matches all arguments and their respective types for a CallExpr or
    +CXXConstructExpr. It is very similar to forEachArgumentWithParam but
    +it works on calls through function pointers as well.
    +
    +The difference is, that function pointers do not provide access to a
    +ParmVarDecl, but only the QualType for each argument.
    +
    +Given
    +  void f(int i);
    +  int y;
    +  f(y);
    +  void (*f_ptr)(int) = f;
    +  f_ptr(y);
    +callExpr(
    +  forEachArgumentWithParamType(
    +    declRefExpr(to(varDecl(hasName("y")))),
    +    qualType(isInteger()).bind("type)
    +))
    +  matches f(y) and f_ptr(y)
    +with declRefExpr(...)
    +  matching int y
    +and qualType(...)
    +  matching int
    +
    Matcher<CallExpr>hasAnyArgumentMatcher<Expr> InnerMatcher
    Matches any argument of a call expression or a constructor call
     expression, or an ObjC-message-send expression.
    diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
    index f5c4fe63182ff..e670459fe8a2f 100644
    --- a/clang/include/clang/ASTMatchers/ASTMatchers.h
    +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
    @@ -334,6 +334,19 @@ AST_MATCHER_P(Stmt, isExpandedFromMacro, llvm::StringRef, MacroName) {
     /// \endcode
     extern const internal::VariadicAllOfMatcher decl;
     
    +/// Matches decomposition-declarations.
    +///
    +/// Examples matches the declaration node with \c foo and \c bar, but not
    +/// \c number.
    +/// (matcher = declStmt(has(decompositionDecl())))
    +///
    +/// \code
    +///   int number = 42;
    +///   auto [foo, bar] = std::make_pair{42, 42};
    +/// \endcode
    +extern const internal::VariadicAllOfMatcher
    +    decompositionDecl;
    +
     /// Matches a declaration of a linkage specification.
     ///
     /// Given
    @@ -4349,6 +4362,103 @@ AST_POLYMORPHIC_MATCHER_P2(forEachArgumentWithParam,
       return Matched;
     }
     
    +/// Matches all arguments and their respective types for a \c CallExpr or
    +/// \c CXXConstructExpr. It is very similar to \c forEachArgumentWithParam but
    +/// it works on calls through function pointers as well.
    +///
    +/// The difference is, that function pointers do not provide access to a
    +/// \c ParmVarDecl, but only the \c QualType for each argument.
    +///
    +/// Given
    +/// \code
    +///   void f(int i);
    +///   int y;
    +///   f(y);
    +///   void (*f_ptr)(int) = f;
    +///   f_ptr(y);
    +/// \endcode
    +/// callExpr(
    +///   forEachArgumentWithParamType(
    +///     declRefExpr(to(varDecl(hasName("y")))),
    +///     qualType(isInteger()).bind("type)
    +/// ))
    +///   matches f(y) and f_ptr(y)
    +/// with declRefExpr(...)
    +///   matching int y
    +/// and qualType(...)
    +///   matching int
    +AST_POLYMORPHIC_MATCHER_P2(forEachArgumentWithParamType,
    +                           AST_POLYMORPHIC_SUPPORTED_TYPES(CallExpr,
    +                                                           CXXConstructExpr),
    +                           internal::Matcher, ArgMatcher,
    +                           internal::Matcher, ParamMatcher) {
    +  BoundNodesTreeBuilder Result;
    +  // The first argument of an overloaded member operator is the implicit object
    +  // argument of the method which should not be matched against a parameter, so
    +  // we skip over it here.
    +  BoundNodesTreeBuilder Matches;
    +  unsigned ArgIndex = cxxOperatorCallExpr(callee(cxxMethodDecl()))
    +                              .matches(Node, Finder, &Matches)
    +                          ? 1
    +                          : 0;
    +
    +  const FunctionProtoType *FProto = nullptr;
    +
    +  if (const auto *Call = dyn_cast(&Node)) {
    +    if (const auto *Value =
    +            dyn_cast_or_null(Call->getCalleeDecl())) {
    +      QualType QT = Value->getType().getCanonicalType();
    +
    +      // This does not necessarily lead to a `FunctionProtoType`,
    +      // e.g. K&R functions do not have a function prototype.
    +      if (QT->isFunctionPointerType())
    +        FProto = QT->getPointeeType()->getAs();
    +
    +      if (QT->isMemberFunctionPointerType()) {
    +        const auto *MP = QT->getAs();
    +        assert(MP && "Must be member-pointer if its a memberfunctionpointer");
    +        FProto = MP->getPointeeType()->getAs();
    +        assert(FProto &&
    +               "The call must have happened through a member function "
    +               "pointer");
    +      }
    +    }
    +  }
    +
    +  int ParamIndex = 0;
    +  bool Matched = false;
    +
    +  for (; ArgIndex < Node.getNumArgs(); ++ArgIndex, ++ParamIndex) {
    +    BoundNodesTreeBuilder ArgMatches(*Builder);
    +    if (ArgMatcher.matches(*(Node.getArg(ArgIndex)->IgnoreParenCasts()), Finder,
    +                           &ArgMatches)) {
    +      BoundNodesTreeBuilder ParamMatches(ArgMatches);
    +
    +      // This test is cheaper compared to the big matcher in the next if.
    +      // Therefore, please keep this order.
    +      if (FProto) {
    +        QualType ParamType = FProto->getParamType(ParamIndex);
    +        if (ParamMatcher.matches(ParamType, Finder, &ParamMatches)) {
    +          Result.addMatch(ParamMatches);
    +          Matched = true;
    +          continue;
    +        }
    +      }
    +      if (expr(anyOf(cxxConstructExpr(hasDeclaration(cxxConstructorDecl(
    +                         hasParameter(ParamIndex, hasType(ParamMatcher))))),
    +                     callExpr(callee(functionDecl(
    +                         hasParameter(ParamIndex, hasType(ParamMatcher)))))))
    +              .matches(Node, Finder, &ParamMatches)) {
    +        Result.addMatch(ParamMatches);
    +        Matched = true;
    +        continue;
    +      }
    +    }
    +  }
    +  *Builder = std::move(Result);
    +  return Matched;
    +}
    +
     /// Matches the ParmVarDecl nodes that are at the N'th position in the parameter
     /// list. The parameter list could be that of either a block, function, or
     /// objc-method.
    diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
    index 058dab3333de1..8e62dce4fab52 100644
    --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
    +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
    @@ -202,6 +202,7 @@ RegistryMaps::RegistryMaps() {
       REGISTER_MATCHER(cxxUnresolvedConstructExpr);
       REGISTER_MATCHER(decayedType);
       REGISTER_MATCHER(decl);
    +  REGISTER_MATCHER(decompositionDecl);
       REGISTER_MATCHER(declCountIs);
       REGISTER_MATCHER(declRefExpr);
       REGISTER_MATCHER(declStmt);
    @@ -227,6 +228,7 @@ RegistryMaps::RegistryMaps() {
       REGISTER_MATCHER(floatLiteral);
       REGISTER_MATCHER(forEach);
       REGISTER_MATCHER(forEachArgumentWithParam);
    +  REGISTER_MATCHER(forEachArgumentWithParamType);
       REGISTER_MATCHER(forEachConstructorInitializer);
       REGISTER_MATCHER(forEachDescendant);
       REGISTER_MATCHER(forEachOverridden);
    diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
    index c7db52b37a506..72fbef5cdc175 100644
    --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
    +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
    @@ -741,6 +741,164 @@ TEST(ForEachArgumentWithParam, HandlesBoundNodesForNonMatches) {
         std::make_unique>("v", 4)));
     }
     
    +TEST(ForEachArgumentWithParamType, ReportsNoFalsePositives) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(isInteger()).bind("type");
    +  StatementMatcher CallExpr =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +
    +  // IntParam does not match.
    +  EXPECT_TRUE(notMatches("void f(int* i) { int* y; f(y); }", CallExpr));
    +  // ArgumentY does not match.
    +  EXPECT_TRUE(notMatches("void f(int i) { int x; f(x); }", CallExpr));
    +}
    +
    +TEST(ForEachArgumentWithParamType, MatchesCXXMemberCallExpr) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(isInteger()).bind("type");
    +  StatementMatcher CallExpr =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "struct S {"
    +      "  const S& operator[](int i) { return *this; }"
    +      "};"
    +      "void f(S S1) {"
    +      "  int y = 1;"
    +      "  S1[y];"
    +      "}",
    +      CallExpr, std::make_unique>("type", 1)));
    +
    +  StatementMatcher CallExpr2 =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "struct S {"
    +      "  static void g(int i);"
    +      "};"
    +      "void f() {"
    +      "  int y = 1;"
    +      "  S::g(y);"
    +      "}",
    +      CallExpr2, std::make_unique>("type", 1)));
    +}
    +
    +TEST(ForEachArgumentWithParamType, MatchesCallExpr) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(isInteger()).bind("type");
    +  StatementMatcher CallExpr =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void f(int i) { int y; f(y); }", CallExpr,
    +      std::make_unique>("type")));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void f(int i) { int y; f(y); }", CallExpr,
    +      std::make_unique>("arg")));
    +
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void f(int i, int j) { int y; f(y, y); }", CallExpr,
    +      std::make_unique>("type", 2)));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void f(int i, int j) { int y; f(y, y); }", CallExpr,
    +      std::make_unique>("arg", 2)));
    +}
    +
    +TEST(ForEachArgumentWithParamType, MatchesConstructExpr) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(isInteger()).bind("type");
    +  StatementMatcher ConstructExpr =
    +      cxxConstructExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "struct C {"
    +      "  C(int i) {}"
    +      "};"
    +      "int y = 0;"
    +      "C Obj(y);",
    +      ConstructExpr, std::make_unique>("type")));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "struct C {"
    +      "  C(int i) {}"
    +      "};"
    +      "int y = 0;"
    +      "C Obj(y);",
    +      ConstructExpr, std::make_unique>("arg")));
    +}
    +
    +TEST(ForEachArgumentWithParamType, HandlesKandRFunctions) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(isInteger()).bind("type");
    +  StatementMatcher CallExpr =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +
    +  EXPECT_TRUE(matchesC("void f();\n"
    +                       "void call_it(void) { int x, y; f(x, y); }\n"
    +                       "void f(a, b) int a, b; {}\n"
    +                       "void call_it2(void) { int x, y; f(x, y); }",
    +                       CallExpr));
    +}
    +
    +TEST(ForEachArgumentWithParamType, HandlesBoundNodesForNonMatches) {
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void g(int i, int j) {"
    +      "  int a;"
    +      "  int b;"
    +      "  int c;"
    +      "  g(a, 0);"
    +      "  g(a, b);"
    +      "  g(0, b);"
    +      "}",
    +      functionDecl(
    +          forEachDescendant(varDecl().bind("v")),
    +          forEachDescendant(callExpr(forEachArgumentWithParamType(
    +              declRefExpr(to(decl(equalsBoundNode("v")))), qualType())))),
    +      std::make_unique>("v", 4)));
    +}
    +
    +TEST(ForEachArgumentWithParamType, MatchesFunctionPtrCalls) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(builtinType()).bind("type");
    +  StatementMatcher CallExpr =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void f(int i) {"
    +      "void (*f_ptr)(int) = f; int y; f_ptr(y); }",
    +      CallExpr, std::make_unique>("type")));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      "void f(int i) {"
    +      "void (*f_ptr)(int) = f; int y; f_ptr(y); }",
    +      CallExpr, std::make_unique>("arg")));
    +}
    +
    +TEST(ForEachArgumentWithParamType, MatchesMemberFunctionPtrCalls) {
    +  StatementMatcher ArgumentY =
    +      declRefExpr(to(varDecl(hasName("y")))).bind("arg");
    +  TypeMatcher IntType = qualType(builtinType()).bind("type");
    +  StatementMatcher CallExpr =
    +      callExpr(forEachArgumentWithParamType(ArgumentY, IntType));
    +
    +  StringRef S = "struct A {\n"
    +                "  int f(int i) { return i + 1; }\n"
    +                "  int (A::*x)(int);\n"
    +                "};\n"
    +                "void f() {\n"
    +                "  int y = 42;\n"
    +                "  A a;\n"
    +                "  a.x = &A::f;\n"
    +                "  (a.*(a.x))(y);\n"
    +                "}";
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      S, CallExpr, std::make_unique>("type")));
    +  EXPECT_TRUE(matchAndVerifyResultTrue(
    +      S, CallExpr, std::make_unique>("arg")));
    +}
    +
     TEST(QualType, hasCanonicalType) {
       EXPECT_TRUE(notMatches("typedef int &int_ref;"
                                "int a;"
    
    From f975ae4867d1fdfaba11a3ec7e479da8fbfd82d8 Mon Sep 17 00:00:00 2001
    From: Zequan Wu 
    Date: Mon, 14 Sep 2020 10:57:23 -0700
    Subject: [PATCH 0739/1079] [CodeGen][typeid] Emit typeinfo directly if type is
     known at compile-time
    
    Differential Revision: https://reviews.llvm.org/D87425
    ---
     clang/include/clang/AST/ExprCXX.h              |  4 ++++
     clang/lib/AST/ExprCXX.cpp                      | 12 ++++++++++++
     clang/lib/CodeGen/CGExprCXX.cpp                |  3 ++-
     clang/test/CodeGenCXX/microsoft-abi-typeid.cpp |  8 +++++---
     4 files changed, 23 insertions(+), 4 deletions(-)
    
    diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
    index 0ba5e417fd58e..9658f37723e18 100644
    --- a/clang/include/clang/AST/ExprCXX.h
    +++ b/clang/include/clang/AST/ExprCXX.h
    @@ -858,6 +858,10 @@ class CXXTypeidExpr : public Expr {
       /// evaluated, per C++11 [expr.typeid]p3.
       bool isPotentiallyEvaluated() const;
     
    +  /// Best-effort check if the expression operand refers to a most derived
    +  /// object. This is not a strong guarantee.
    +  bool isMostDerived(ASTContext &Context) const;
    +
       bool isTypeOperand() const { return Operand.is(); }
     
       /// Retrieves the type operand of this typeid() expression after
    diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp
    index 3f3f2303587dd..1fd2b8e3b4e26 100644
    --- a/clang/lib/AST/ExprCXX.cpp
    +++ b/clang/lib/AST/ExprCXX.cpp
    @@ -146,6 +146,18 @@ bool CXXTypeidExpr::isPotentiallyEvaluated() const {
       return false;
     }
     
    +bool CXXTypeidExpr::isMostDerived(ASTContext &Context) const {
    +  assert(!isTypeOperand() && "Cannot call isMostDerived for typeid(type)");
    +  const Expr *E = getExprOperand()->IgnoreParenNoopCasts(Context);
    +  if (const auto *DRE = dyn_cast(E)) {
    +    QualType Ty = DRE->getDecl()->getType();
    +    if (!Ty->isPointerType() && !Ty->isReferenceType())
    +      return true;
    +  }
    +
    +  return false;
    +}
    +
     QualType CXXTypeidExpr::getTypeOperand(ASTContext &Context) const {
       assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)");
       Qualifiers Quals;
    diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
    index 50b6079bd80bf..e33730b9ae901 100644
    --- a/clang/lib/CodeGen/CGExprCXX.cpp
    +++ b/clang/lib/CodeGen/CGExprCXX.cpp
    @@ -2199,7 +2199,8 @@ llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) {
       //   polymorphic class type, the result refers to a std::type_info object
       //   representing the type of the most derived object (that is, the dynamic
       //   type) to which the glvalue refers.
    -  if (E->isPotentiallyEvaluated())
    +  // If the operand is already most derived object, no need to look up vtable.
    +  if (E->isPotentiallyEvaluated() && !E->isMostDerived(getContext()))
         return EmitTypeidFromVTable(*this, E->getExprOperand(),
                                     StdTypeInfoPtrTy);
     
    diff --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
    index f3bd7e6fd6c80..8598396f06441 100644
    --- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
    +++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
    @@ -46,9 +46,11 @@ const std::type_info* test4_typeid() { return &typeid(b); }
     
     const std::type_info* test5_typeid() { return &typeid(v); }
     // CHECK: define dso_local %struct.type_info* @"?test5_typeid@@YAPBUtype_info@@XZ"()
    -// CHECK:        [[RT:%.*]] = call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*))
    -// CHECK-NEXT:   [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info*
    -// CHECK-NEXT:   ret %struct.type_info* [[RET]]
    +// CHECK:   ret %struct.type_info* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to %struct.type_info*)
    +
    +const std::type_info *test6_typeid() { return &typeid((V &)v); }
    +// CHECK: define dso_local %struct.type_info* @"?test6_typeid@@YAPBUtype_info@@XZ"()
    +// CHECK:   ret %struct.type_info* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to %struct.type_info*)
     
     namespace PR26329 {
     struct Polymorphic {
    
    From 05aa997d511eed530305e2f3aa401584d0691186 Mon Sep 17 00:00:00 2001
    From: Albion Fung 
    Date: Tue, 15 Sep 2020 15:18:54 -0400
    Subject: [PATCH 0740/1079] [PowerPC] Implement __int128 vector divide
     operations
    
    This patch implements __int128 vector divide operations for ISA3.1.
    
    Differential Revision: https://reviews.llvm.org/D85453
    ---
     clang/lib/Headers/altivec.h                    | 12 ++++++++++++
     clang/test/CodeGen/builtins-ppc-p10vector.c    | 13 +++++++++++++
     llvm/lib/Target/PowerPC/PPCISelLowering.cpp    |  2 ++
     llvm/lib/Target/PowerPC/PPCInstrPrefix.td      |  6 ++++--
     llvm/test/CodeGen/PowerPC/p10-vector-divide.ll | 18 ++++++++++++++++++
     5 files changed, 49 insertions(+), 2 deletions(-)
    
    diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
    index 22744adefbefd..51fd3d21b5e1c 100644
    --- a/clang/lib/Headers/altivec.h
    +++ b/clang/lib/Headers/altivec.h
    @@ -3368,6 +3368,18 @@ vec_dive(vector unsigned long long __a, vector unsigned long long __b) {
     }
     #endif
     
    +#ifdef __POWER10_VECTOR__
    +static __inline__ vector unsigned __int128 __ATTRS_o_ai
    +vec_div(vector unsigned __int128 __a, vector unsigned __int128 __b) {
    +  return __a / __b;
    +}
    +
    +static __inline__ vector signed __int128 __ATTRS_o_ai
    +vec_div(vector signed __int128 __a, vector signed __int128 __b) {
    +  return __a / __b;
    +}
    +#endif __POWER10_VECTOR__
    +
     /* vec_dss */
     
     #define vec_dss __builtin_altivec_dss
    diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
    index ad63d646196c3..12ec3a6ab8f3d 100644
    --- a/clang/test/CodeGen/builtins-ppc-p10vector.c
    +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
    @@ -17,6 +17,7 @@ vector signed int vsia, vsib;
     vector unsigned int vuia, vuib, vuic;
     vector signed long long vslla, vsllb;
     vector unsigned long long vulla, vullb, vullc;
    +vector signed __int128 vsi128a, vsi128b;
     vector unsigned __int128 vui128a, vui128b, vui128c;
     vector float vfa, vfb;
     vector double vda, vdb;
    @@ -69,6 +70,18 @@ vector unsigned long long test_vec_div_ull(void) {
       return vec_div(vulla, vullb);
     }
     
    +vector unsigned __int128 test_vec_div_u128(void) {
    +  // CHECK: udiv <1 x i128>
    +  // CHECK-NEXT: ret <1 x i128>
    +  return vec_div(vui128a, vui128b);
    +}
    +
    +vector signed __int128 test_vec_div_s128(void) {
    +  // CHECK: sdiv <1 x i128>
    +  // CHECK-NEXT: ret <1 x i128>
    +  return vec_div(vsi128a, vsi128b);
    +}
    +
     vector signed int test_vec_dive_si(void) {
       // CHECK: @llvm.ppc.altivec.vdivesw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
       // CHECK-NEXT: ret <4 x i32>
    diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    index 66711f69a6457..3b0acfa76ec82 100644
    --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    @@ -888,6 +888,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
           setOperationAction(ISD::SREM, MVT::v2i64, Legal);
           setOperationAction(ISD::UREM, MVT::v4i32, Legal);
           setOperationAction(ISD::SREM, MVT::v4i32, Legal);
    +      setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
    +      setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
         }
     
         setOperationAction(ISD::MUL, MVT::v8i16, Legal);
    diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    index 55872a493dd68..4e951114b90f1 100644
    --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    @@ -1285,9 +1285,11 @@ let Predicates = [IsISA3_1] in {
                                [(set v1i128:$vD, (int_ppc_altivec_vmsumcud
                                      v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
       def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
    -                        "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
    +                        "vdivsq $vD, $vA, $vB", IIC_VecGeneral,
    +                        [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
       def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
    -                        "vdivuq $vD, $vA, $vB", IIC_VecGeneral, []>;
    +                        "vdivuq $vD, $vA, $vB", IIC_VecGeneral,
    +                        [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
       def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                              "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>;
       def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
    diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
    index dc21b4fb49eef..b5f36a78b2b26 100644
    --- a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
    +++ b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
    @@ -76,6 +76,24 @@ entry:
       ret <4 x i32> %div
     }
     
    +define <1 x i128> @test_vdivsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
    +; CHECK-LABEL: test_vdivsq:
    +; CHECK:       # %bb.0:
    +; CHECK-NEXT:    vdivsq v2, v2, v3
    +; CHECK-NEXT:    blr
    +  %tmp = sdiv <1 x i128> %x, %y
    +  ret <1 x i128> %tmp
    +}
    +
    +define <1 x i128> @test_vdivuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
    +; CHECK-LABEL: test_vdivuq:
    +; CHECK:       # %bb.0:
    +; CHECK-NEXT:    vdivuq v2, v2, v3
    +; CHECK-NEXT:    blr
    +  %tmp = udiv <1 x i128> %x, %y
    +  ret <1 x i128> %tmp
    +}
    +
     define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) {
     ; CHECK-LABEL: test_vdivesd:
     ; CHECK:       # %bb.0: # %entry
    
    From d417488ef5a6cd1089900defcd6d5ae5a1d47fd4 Mon Sep 17 00:00:00 2001
    From: Muhammad Asif Manzoor 
    Date: Tue, 15 Sep 2020 15:20:55 -0400
    Subject: [PATCH 0741/1079] [AArch64][SVE] Add lowering for llvm fsqrt
    
    Add the functionality to lower fsqrt for passthru variant
    
    Reviewed By: paulwalker-arm
    
    Differential Revision: https://reviews.llvm.org/D87707
    ---
     .../Target/AArch64/AArch64ISelLowering.cpp    |  8 +++
     llvm/lib/Target/AArch64/AArch64ISelLowering.h |  1 +
     .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  3 +-
     llvm/test/CodeGen/AArch64/sve-fp.ll           | 69 +++++++++++++++++++
     4 files changed, 80 insertions(+), 1 deletion(-)
    
    diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    index 8206614547839..b961e5a30cd0f 100644
    --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    @@ -145,6 +145,7 @@ static bool isMergePassthruOpcode(unsigned Opc) {
       case AArch64ISD::FROUND_MERGE_PASSTHRU:
       case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
       case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
    +  case AArch64ISD::FSQRT_MERGE_PASSTHRU:
         return true;
       }
     }
    @@ -990,6 +991,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
             setOperationAction(ISD::FROUND, VT, Custom);
             setOperationAction(ISD::FROUNDEVEN, VT, Custom);
             setOperationAction(ISD::FTRUNC, VT, Custom);
    +        setOperationAction(ISD::FSQRT, VT, Custom);
           }
         }
     
    @@ -1502,6 +1504,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
         MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
         MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
         MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
    +    MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
         MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
         MAKE_CASE(AArch64ISD::ADC)
         MAKE_CASE(AArch64ISD::SBC)
    @@ -3385,6 +3388,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       case Intrinsic::aarch64_sve_frintz:
         return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
                            Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
    +  case Intrinsic::aarch64_sve_fsqrt:
    +    return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
    +                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
       case Intrinsic::aarch64_sve_convert_to_svbool: {
         EVT OutVT = Op.getValueType();
         EVT InVT = Op.getOperand(1).getValueType();
    @@ -3696,6 +3702,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
         return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
       case ISD::FTRUNC:
         return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
    +  case ISD::FSQRT:
    +    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
       case ISD::FP_ROUND:
       case ISD::STRICT_FP_ROUND:
         return LowerFP_ROUND(Op, DAG);
    diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
    index d6e511891752a..e34caacd272d1 100644
    --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
    +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
    @@ -102,6 +102,7 @@ enum NodeType : unsigned {
       FRINT_MERGE_PASSTHRU,
       FROUND_MERGE_PASSTHRU,
       FROUNDEVEN_MERGE_PASSTHRU,
    +  FSQRT_MERGE_PASSTHRU,
       FTRUNC_MERGE_PASSTHRU,
       SIGN_EXTEND_INREG_MERGE_PASSTHRU,
       ZERO_EXTEND_INREG_MERGE_PASSTHRU,
    diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    index e01a34242a8d7..63545d30b2d11 100644
    --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    @@ -209,6 +209,7 @@ def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Ari
     def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>;
     def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>;
     def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
    +def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>;
     
     def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
     def AArch64clasta_n   : SDNode<"AArch64ISD::CLASTA_N",   SDT_AArch64ReduceWithInit>;
    @@ -1430,7 +1431,7 @@ multiclass sve_prefetch;
       defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", null_frag, AArch64frinti_mt>;
       defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
    -  defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt",  int_aarch64_sve_fsqrt>;
    +  defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt",  null_frag, AArch64fsqrt_mt>;
     
       let Predicates = [HasBF16, HasSVE] in {
         defm BFDOT_ZZZ    : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
    diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
    index e4aea2847bc4c..5334e66b22f7e 100644
    --- a/llvm/test/CodeGen/AArch64/sve-fp.ll
    +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
    @@ -480,6 +480,68 @@ define void @float_copy(* %P1, * %P2) {
       ret void
     }
     
    +; FSQRT
    +
    +define  @fsqrt_nxv8f16( %a) {
    +; CHECK-LABEL: fsqrt_nxv8f16:
    +; CHECK:       // %bb.0:
    +; CHECK-NEXT:    ptrue p0.h
    +; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
    +; CHECK-NEXT:    ret
    +  %res = call  @llvm.sqrt.nxv8f16( %a)
    +  ret  %res
    +}
    +
    +define  @fsqrt_nxv4f16( %a) {
    +; CHECK-LABEL: fsqrt_nxv4f16:
    +; CHECK:       // %bb.0:
    +; CHECK-NEXT:    ptrue p0.s
    +; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
    +; CHECK-NEXT:    ret
    +  %res = call  @llvm.sqrt.nxv4f16( %a)
    +  ret  %res
    +}
    +
    +define  @fsqrt_nxv2f16( %a) {
    +; CHECK-LABEL: fsqrt_nxv2f16:
    +; CHECK:       // %bb.0:
    +; CHECK-NEXT:    ptrue p0.d
    +; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
    +; CHECK-NEXT:    ret
    +  %res = call  @llvm.sqrt.nxv2f16( %a)
    +  ret  %res
    +}
    +
    +define  @fsqrt_nxv4f32( %a) {
    +; CHECK-LABEL: fsqrt_nxv4f32:
    +; CHECK:       // %bb.0:
    +; CHECK-NEXT:    ptrue p0.s
    +; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
    +; CHECK-NEXT:    ret
    +  %res = call  @llvm.sqrt.nxv4f32( %a)
    +  ret  %res
    +}
    +
    +define  @fsqrt_nxv2f32( %a) {
    +; CHECK-LABEL: fsqrt_nxv2f32:
    +; CHECK:       // %bb.0:
    +; CHECK-NEXT:    ptrue p0.d
    +; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
    +; CHECK-NEXT:    ret
    +  %res = call  @llvm.sqrt.nxv2f32( %a)
    +  ret  %res
    +}
    +
    +define  @fsqrt_nxv2f64( %a) {
    +; CHECK-LABEL: fsqrt_nxv2f64:
    +; CHECK:       // %bb.0:
    +; CHECK-NEXT:    ptrue p0.d
    +; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
    +; CHECK-NEXT:    ret
    +  %res = call  @llvm.sqrt.nxv2f64( %a)
    +  ret  %res
    +}
    +
     declare  @llvm.aarch64.sve.frecps.x.nxv8f16(, )
     declare   @llvm.aarch64.sve.frecps.x.nxv4f32( , )
     declare  @llvm.aarch64.sve.frecps.x.nxv2f64(, )
    @@ -495,5 +557,12 @@ declare  @llvm.fma.nxv8f16(,  @llvm.fma.nxv4f16(, , )
     declare  @llvm.fma.nxv2f16(, , )
     
    +declare  @llvm.sqrt.nxv8f16( )
    +declare  @llvm.sqrt.nxv4f16( )
    +declare  @llvm.sqrt.nxv2f16( )
    +declare  @llvm.sqrt.nxv4f32()
    +declare  @llvm.sqrt.nxv2f32()
    +declare  @llvm.sqrt.nxv2f64()
    +
     ; Function Attrs: nounwind readnone
     declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2
    
    From f1a3ab904439a63b21ba1c4521765c46630687c6 Mon Sep 17 00:00:00 2001
    From: Snehasish Kumar 
    Date: Wed, 2 Sep 2020 11:00:46 -0700
    Subject: [PATCH 0742/1079] [clang] Add a command line flag for the Machine
     Function Splitter.
    
    This patch adds a command line flag for the machine function splitter
    (added in rG94faadaca4e1).
    
    -fsplit-machine-functions
    Split machine functions using profile information (x86 ELF). On
    other targets an error is emitted. If profile information is not
    provided a warning is emitted notifying the user that profile
    information is required.
    
    Differential Revision: https://reviews.llvm.org/D87047
    ---
     clang/include/clang/Basic/CodeGenOptions.def |  1 +
     clang/include/clang/Driver/Options.td        |  3 +++
     clang/lib/CodeGen/BackendUtil.cpp            |  1 +
     clang/lib/Driver/ToolChains/Clang.cpp        | 20 ++++++++++++++++++++
     clang/lib/Frontend/CompilerInvocation.cpp    |  2 ++
     clang/test/Driver/fsplit-machine-functions.c |  9 +++++++++
     6 files changed, 36 insertions(+)
     create mode 100644 clang/test/Driver/fsplit-machine-functions.c
    
    diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
    index feb4ed01f6e86..b5da2a9cde1ac 100644
    --- a/clang/include/clang/Basic/CodeGenOptions.def
    +++ b/clang/include/clang/Basic/CodeGenOptions.def
    @@ -162,6 +162,7 @@ CODEGENOPT(NoImplicitFloat   , 1, 0) ///< Set when -mno-implicit-float is enable
     CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined.
     CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
     CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
    +CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.
     
     /// When false, this attempts to generate code as if the result of an
     /// overflowing conversion matches the overflowing behavior of a target's native
    diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
    index f196c1b72d27f..5b39ea513b243 100644
    --- a/clang/include/clang/Driver/Options.td
    +++ b/clang/include/clang/Driver/Options.td
    @@ -1996,6 +1996,9 @@ defm unique_internal_linkage_names : OptInFFlag<"unique-internal-linkage-names",
     defm unique_section_names : OptOutFFlag<"unique-section-names",
       "", "Don't use unique names for text and data sections">;
     
    +defm split_machine_functions: OptInFFlag<"split-machine-functions",
    +  "Enable", "Disable", " late function splitting using profile information (x86 ELF)">;
    +
     defm strict_return : OptOutFFlag<"strict-return", "",
       "Don't treat control flow paths that fall off the end of a non-void function as unreachable">;
     
    diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
    index 472d86ea2e360..5fc80d4fae71b 100644
    --- a/clang/lib/CodeGen/BackendUtil.cpp
    +++ b/clang/lib/CodeGen/BackendUtil.cpp
    @@ -514,6 +514,7 @@ static void initTargetOptions(DiagnosticsEngine &Diags,
           Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
       }
     
    +  Options.EnableMachineFunctionSplitter = CodeGenOpts.SplitMachineFunctions;
       Options.FunctionSections = CodeGenOpts.FunctionSections;
       Options.DataSections = CodeGenOpts.DataSections;
       Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
    diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
    index 40659ebb1395e..51056960761da 100644
    --- a/clang/lib/Driver/ToolChains/Clang.cpp
    +++ b/clang/lib/Driver/ToolChains/Clang.cpp
    @@ -4911,6 +4911,26 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                        options::OPT_fno_unique_basic_block_section_names, false))
         CmdArgs.push_back("-funique-basic-block-section-names");
     
    +  if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
    +                               options::OPT_fno_split_machine_functions)) {
    +    // This codegen pass is only available on x86-elf targets.
    +    if (Triple.isX86() && Triple.isOSBinFormatELF()) {
    +      if (A->getOption().matches(options::OPT_fsplit_machine_functions)) {
    +        // If the flag is enabled but no profile information is available then
    +        // emit a warning.
    +        if (getLastProfileUseArg(Args) || getLastProfileSampleUseArg(Args)) {
    +          A->render(Args, CmdArgs);
    +        } else {
    +          D.Diag(diag::warn_drv_diagnostics_hotness_requires_pgo)
    +              << A->getAsString(Args);
    +        }
    +      }
    +    } else {
    +      D.Diag(diag::err_drv_unsupported_opt_for_target)
    +          << A->getAsString(Args) << TripleStr;
    +    }
    +  }
    +
       Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
                       options::OPT_finstrument_functions_after_inlining,
                       options::OPT_finstrument_function_entry_bare);
    diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
    index 8393ebe9c07a1..a88a91182307f 100644
    --- a/clang/lib/Frontend/CompilerInvocation.cpp
    +++ b/clang/lib/Frontend/CompilerInvocation.cpp
    @@ -998,6 +998,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       Opts.UniqueInternalLinkageNames =
           Args.hasArg(OPT_funique_internal_linkage_names);
     
    +  Opts.SplitMachineFunctions = Args.hasArg(OPT_fsplit_machine_functions);
    +
       Opts.MergeFunctions = Args.hasArg(OPT_fmerge_functions);
     
       Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables);
    diff --git a/clang/test/Driver/fsplit-machine-functions.c b/clang/test/Driver/fsplit-machine-functions.c
    new file mode 100644
    index 0000000000000..e126e4d41edbf
    --- /dev/null
    +++ b/clang/test/Driver/fsplit-machine-functions.c
    @@ -0,0 +1,9 @@
    +// RUN: %clang -### -target x86_64 -fprofile-use=default.profdata -fsplit-machine-functions %s -c 2>&1 | FileCheck -check-prefix=CHECK-OPT %s
    +// RUN: %clang -### -target x86_64 -fprofile-use=default.profdata -fsplit-machine-functions -fno-split-machine-functions %s -c 2>&1 | FileCheck -check-prefix=CHECK-NOOPT %s
    +// RUN: %clang -### -target x86_64 -fsplit-machine-functions %s 2>&1 | FileCheck -check-prefix=CHECK-WARN %s
    +// RUN: not %clang -c -target arm-unknown-linux -fsplit-machine-functions %s 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s
    +
    +// CHECK-OPT:       "-fsplit-machine-functions"
    +// CHECK-NOOPT-NOT: "-fsplit-machine-functions"
    +// CHECK-WARN:      warning: argument '-fsplit-machine-functions' requires profile-guided optimization information
    +// CHECK-TRIPLE:    error: unsupported option '-fsplit-machine-functions' for target
    
    From 7d6ca2ec57073b9eabe6808ff1fe0560586c5ffb Mon Sep 17 00:00:00 2001
    From: Matt Arsenault 
    Date: Tue, 15 Sep 2020 13:46:23 -0400
    Subject: [PATCH 0743/1079] InferAddressSpaces: Fix assert with unreachable
     code
    
    Invalid IR in unreachable code is technically valid IR. In this case,
    the address space of the value was never inferred, and we tried to
    rewrite it with an invalid address space value which would assert.
    ---
     .../Transforms/Scalar/InferAddressSpaces.cpp  |  6 ++++
     .../InferAddressSpaces/AMDGPU/self-phi.ll     | 28 +++++++++++++++++++
     .../AMDGPU/unreachable-code-assert.ll         | 27 ++++++++++++++++++
     3 files changed, 61 insertions(+)
     create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/self-phi.ll
     create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/unreachable-code-assert.ll
    
    diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
    index db9cc58bbfc40..0ed6b593a91c7 100644
    --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
    +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
    @@ -997,6 +997,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
       SmallVector UndefUsesToFix;
       for (Value* V : Postorder) {
         unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
    +
    +    // In some degenerate cases (e.g. invalid IR in unreachable code), we may
    +    // not even infer the value to have its original address space.
    +    if (NewAddrSpace == UninitializedAddressSpace)
    +      continue;
    +
         if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
           Value *New = cloneValueWithNewAddressSpace(
               V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
    diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/self-phi.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/self-phi.ll
    new file mode 100644
    index 0000000000000..2f6496ab19944
    --- /dev/null
    +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/self-phi.ll
    @@ -0,0 +1,28 @@
    +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
    +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -infer-address-spaces %s | FileCheck %s
    +
    +define amdgpu_kernel void @phi_self(i8 addrspace(1)* %arg) {
    +; CHECK-LABEL: @phi_self(
    +; CHECK-NEXT:  entry:
    +; CHECK-NEXT:    br label [[LOOP:%.*]]
    +; CHECK:       loop:
    +; CHECK-NEXT:    [[I:%.*]] = phi i8 addrspace(1)* [ [[I]], [[LOOP]] ], [ [[ARG:%.*]], [[ENTRY:%.*]] ]
    +; CHECK-NEXT:    [[I1:%.*]] = load i8, i8 addrspace(1)* [[I]], align 1
    +; CHECK-NEXT:    [[I2:%.*]] = icmp eq i8 [[I1]], 0
    +; CHECK-NEXT:    br i1 [[I2]], label [[LOOP]], label [[RET:%.*]]
    +; CHECK:       ret:
    +; CHECK-NEXT:    ret void
    +;
    +entry:
    +  %cast = addrspacecast i8 addrspace(1)* %arg to i8*
    +  br label %loop
    +
    +loop:
    +  %i = phi i8* [%i, %loop], [%cast, %entry]
    +  %i1 = load i8, i8* %i, align 1
    +  %i2 = icmp eq i8 %i1, 0
    +  br i1 %i2, label %loop, label %ret
    +
    +ret:
    +  ret void
    +}
    diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/unreachable-code-assert.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/unreachable-code-assert.ll
    new file mode 100644
    index 0000000000000..73001b53634c0
    --- /dev/null
    +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/unreachable-code-assert.ll
    @@ -0,0 +1,27 @@
    +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
    +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -infer-address-spaces %s | FileCheck %s
    +
    +define amdgpu_kernel void @subclass_data_assert() {
    +; CHECK-LABEL: @subclass_data_assert(
    +; CHECK-NEXT:  entry:
    +; CHECK-NEXT:    unreachable
    +; CHECK:       strlen.while11:
    +; CHECK-NEXT:    [[I:%.*]] = getelementptr i8, i8* [[I]], i64 1
    +; CHECK-NEXT:    [[I1:%.*]] = load i8, i8* [[I]], align 1
    +; CHECK-NEXT:    [[I2:%.*]] = icmp eq i8 [[I1]], 0
    +; CHECK-NEXT:    br i1 [[I2]], label [[STRLEN_WHILE_DONE12:%.*]], label [[STRLEN_WHILE11:%.*]]
    +; CHECK:       strlen.while.done12:
    +; CHECK-NEXT:    ret void
    +;
    +entry:
    +  unreachable
    +
    +strlen.while11:                                   ; preds = %strlen.while11
    +  %i = getelementptr i8, i8* %i, i64 1
    +  %i1 = load i8, i8* %i, align 1
    +  %i2 = icmp eq i8 %i1, 0
    +  br i1 %i2, label %strlen.while.done12, label %strlen.while11
    +
    +strlen.while.done12:                              ; preds = %strlen.while11
    +  ret void
    +}
    
    From 38ecd6161993ea9632efe0c0bf304bf6c2dee98f Mon Sep 17 00:00:00 2001
    From: Ta-Wei Tu 
    Date: Tue, 15 Sep 2020 15:38:06 -0400
    Subject: [PATCH 0744/1079] [TableGen] Fix invalid comparison function
     `SizeOrder` in `getMatchingSubClassWithSubRegs`
    
    Building LLVM with -DEXPENSIVE_CHECKS fails with the following error
    message with libstdc++ in debug mode:
    
    Error: comparison doesn't meet irreflexive requirements,
    assert(!(a < a)).
    
    The patch fixes the comparison function SizeOrder by returning false
    when comparing two equal items.
    ---
     llvm/utils/TableGen/CodeGenRegisters.cpp | 2 ++
     1 file changed, 2 insertions(+)
    
    diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
    index eeb715dded43e..18a2de18c3e93 100644
    --- a/llvm/utils/TableGen/CodeGenRegisters.cpp
    +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
    @@ -999,6 +999,8 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
                           const CodeGenRegisterClass *B) {
         // If there are multiple, identical register classes, prefer the original
         // register class.
    +    if (A == B)
    +      return false;
         if (A->getMembers().size() == B->getMembers().size())
           return A == this;
         return A->getMembers().size() > B->getMembers().size();
    
    From 516a01b5f36d4188778a34202cd11856d70ac808 Mon Sep 17 00:00:00 2001
    From: Stephen Hines 
    Date: Tue, 15 Sep 2020 12:50:42 -0700
    Subject: [PATCH 0745/1079] Implement __isOSVersionAtLeast for Android
    
    Add the implementation of __isOSVersionAtLeast for Android. Currently,
    only the major version is checked against the API level of the platform
    which is an integer. The API level is retrieved by reading the system
    property ro.build.version.sdk (and optionally ro.build.version.codename
    to see if the platform is released or not).
    
    Patch by jiyong@google.com
    
    Bug: 150860940
    Bug: 134795810
    Test: m
    
    Reviewed By: srhines
    
    Differential Revision: https://reviews.llvm.org/D86596
    ---
     compiler-rt/lib/builtins/os_version_check.c | 38 +++++++++++++++++++++
     1 file changed, 38 insertions(+)
    
    diff --git a/compiler-rt/lib/builtins/os_version_check.c b/compiler-rt/lib/builtins/os_version_check.c
    index 3794b979434cc..fbc68f58caf76 100644
    --- a/compiler-rt/lib/builtins/os_version_check.c
    +++ b/compiler-rt/lib/builtins/os_version_check.c
    @@ -216,6 +216,44 @@ int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
       return Subminor <= GlobalSubminor;
     }
     
    +#elif __ANDROID__
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +static int SdkVersion;
    +static int IsPreRelease;
    +
    +static void readSystemProperties(void) {
    +  char buf[PROP_VALUE_MAX];
    +
    +  if (__system_property_get("ro.build.version.sdk", buf) == 0) {
    +    // When the system property doesn't exist, defaults to future API level.
    +    SdkVersion = __ANDROID_API_FUTURE__;
    +  } else {
    +    SdkVersion = atoi(buf);
    +  }
    +
    +  if (__system_property_get("ro.build.version.codename", buf) == 0) {
    +    IsPreRelease = 1;
    +  } else {
    +    IsPreRelease = strcmp(buf, "REL") != 0;
    +  }
    +  return;
    +}
    +
    +int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
    +  (int32_t) Minor;
    +  (int32_t) Subminor;
    +  static pthread_once_t once = PTHREAD_ONCE_INIT;
    +  pthread_once(&once, readSystemProperties);
    +
    +  return SdkVersion >= Major ||
    +         (IsPreRelease && Major == __ANDROID_API_FUTURE__);
    +}
    +
     #else
     
     // Silence an empty translation unit warning.
    
    From 00ba1a3de7faad80f7bb75d07a1a5da09a009895 Mon Sep 17 00:00:00 2001
    From: Guillaume Chatelet 
    Date: Tue, 15 Sep 2020 20:03:59 +0000
    Subject: [PATCH 0746/1079] [libc] remove useless headers
    
    ---
     libc/src/string/memcpy.h | 1 -
     libc/src/string/memset.h | 2 +-
     2 files changed, 1 insertion(+), 2 deletions(-)
    
    diff --git a/libc/src/string/memcpy.h b/libc/src/string/memcpy.h
    index 39ca4a46f7f35..f643f1de6294e 100644
    --- a/libc/src/string/memcpy.h
    +++ b/libc/src/string/memcpy.h
    @@ -9,7 +9,6 @@
     #ifndef LLVM_LIBC_SRC_STRING_MEMCPY_H
     #define LLVM_LIBC_SRC_STRING_MEMCPY_H
     
    -#include "include/string.h"
     #include  // size_t
     
     namespace __llvm_libc {
    diff --git a/libc/src/string/memset.h b/libc/src/string/memset.h
    index 611e70705b205..e38eb7d78a976 100644
    --- a/libc/src/string/memset.h
    +++ b/libc/src/string/memset.h
    @@ -9,7 +9,7 @@
     #ifndef LLVM_LIBC_SRC_STRING_MEMSET_H
     #define LLVM_LIBC_SRC_STRING_MEMSET_H
     
    -#include "include/string.h"
    +#include  // size_t
     
     namespace __llvm_libc {
     
    
    From 3b7f5166bd11fc6cbf96597d26753e8c3fc0e6ab Mon Sep 17 00:00:00 2001
    From: Huihui Zhang 
    Date: Tue, 15 Sep 2020 13:09:56 -0700
    Subject: [PATCH 0747/1079] [SLPVectorizer][SVE] Skip scalable-vector
     instructions before vectorizeSimpleInstructions.
    
    For scalable type, the aggregated size is unknown at compile-time.
    Skip instructions with scalable type to ensure the list of instructions
    for vectorizeSimpleInstructions does not contains any scalable-vector instructions.
    
    Reviewed By: RKSimon
    
    Differential Revision: https://reviews.llvm.org/D87550
    ---
     .../Transforms/Vectorize/SLPVectorizer.cpp    |  5 +++
     .../SLPVectorizer/AArch64/insertelement.ll    | 44 +++++++++++++++++++
     2 files changed, 49 insertions(+)
     create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
    
    diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    index e73113dab6d45..3347419077e3f 100644
    --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    @@ -7508,6 +7508,11 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       SmallVector PostProcessInstructions;
       SmallDenseSet KeyNodes;
       for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
    +    // Skip instructions with scalable type. The num of elements is unknown at
    +    // compile-time for scalable type.
    +    if (isa(it->getType()))
    +      continue;
    +
         // Skip instructions marked for the deletion.
         if (R.isDeleted(&*it))
           continue;
    diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
    new file mode 100644
    index 0000000000000..b5cab5d3186af
    --- /dev/null
    +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
    @@ -0,0 +1,44 @@
    +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
    +; RUN: opt < %s -slp-vectorizer -S 2>%t | FileCheck %s
    +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
    +
    +; WARN-NOT: warning
    +
    +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
    +target triple = "aarch64-unknown-linux-gnu"
    +
    +define <2 x float> @insertelement-fixed-vector() {
    +; CHECK-LABEL: @insertelement-fixed-vector(
    +; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
    +; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
    +; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
    +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
    +; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x float> [[I0]], float [[TMP3]], i32 1
    +; CHECK-NEXT:    ret <2 x float> [[I1]]
    +;
    +  %f0 = tail call fast float @llvm.fabs.f32(float undef)
    +  %f1 = tail call fast float @llvm.fabs.f32(float undef)
    +  %i0 = insertelement <2 x float> undef, float %f0, i32 0
    +  %i1 = insertelement <2 x float> %i0, float %f1, i32 1
    +  ret <2 x float> %i1
    +}
    +
    +; TODO: llvm.fabs could be optimized in vector form. It's legal to extract
    +; elements from fixed-length vector and insert into scalable vector.
    +define  @insertelement-scalable-vector() {
    +; CHECK-LABEL: @insertelement-scalable-vector(
    +; CHECK-NEXT:    [[F0:%.*]] = tail call fast float @llvm.fabs.f32(float undef)
    +; CHECK-NEXT:    [[F1:%.*]] = tail call fast float @llvm.fabs.f32(float undef)
    +; CHECK-NEXT:    [[I0:%.*]] = insertelement  undef, float [[F0]], i32 0
    +; CHECK-NEXT:    [[I1:%.*]] = insertelement  [[I0]], float [[F1]], i32 1
    +; CHECK-NEXT:    ret  [[I1]]
    +;
    +  %f0 = tail call fast float @llvm.fabs.f32(float undef)
    +  %f1 = tail call fast float @llvm.fabs.f32(float undef)
    +  %i0 = insertelement  undef, float %f0, i32 0
    +  %i1 = insertelement  %i0, float %f1, i32 1
    +  ret  %i1
    +}
    +
    +; Function Attrs: nounwind readnone speculatable willreturn
    +declare float @llvm.fabs.f32(float)
    
    From c19fda9aa073254c0979301bd57d875608329fa2 Mon Sep 17 00:00:00 2001
    From: Guillaume Chatelet 
    Date: Tue, 15 Sep 2020 20:09:50 +0000
    Subject: [PATCH 0748/1079] [libc] use stddef instead of string header
    
    ---
     libc/src/string/bzero.h | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/libc/src/string/bzero.h b/libc/src/string/bzero.h
    index a16e1d097f953..064800bad29b5 100644
    --- a/libc/src/string/bzero.h
    +++ b/libc/src/string/bzero.h
    @@ -9,7 +9,7 @@
     #ifndef LLVM_LIBC_SRC_STRING_BZERO_H
     #define LLVM_LIBC_SRC_STRING_BZERO_H
     
    -#include "include/string.h"
    +#include  // size_t
     
     namespace __llvm_libc {
     
    
    From 7d26d6a1b062f7ce820b02b39d102d5f8f15fa5f Mon Sep 17 00:00:00 2001
    From: Saleem Abdulrasool 
    Date: Tue, 8 Sep 2020 22:49:41 +0000
    Subject: [PATCH 0749/1079] Sema: add support for
     `__attribute__((__swift_bridged_typedef__))`
    
    Extend the semantic attributes that clang processes for Swift to include
    `swift_bridged_typedef`.  This attribute enables typedefs to be bridged
    into Swift with a bridged name.
    
    This is based on the work of the original changes in
    https://github.com/llvm/llvm-project-staging/commit/8afaf3aad2af43cfedca7a24cd817848c4e95c0c
    
    Differential Revision: https://reviews.llvm.org/D87396
    Reviewed By: Aaron Ballman
    ---
     clang/include/clang/Basic/Attr.td             |  6 ++++++
     clang/include/clang/Basic/AttrDocs.td         | 21 +++++++++++++++++++
     clang/lib/Sema/SemaDeclAttr.cpp               |  3 +++
     clang/test/AST/attr-swift_bridged_typedef.m   |  9 ++++++++
     clang/test/AST/attr-swift_bridged_typedef.mm  |  8 +++++++
     ...a-attribute-supported-attributes-list.test |  1 +
     .../SemaObjC/attr-swift_bridged_typedef.m     | 14 +++++++++++++
     7 files changed, 62 insertions(+)
     create mode 100644 clang/test/AST/attr-swift_bridged_typedef.m
     create mode 100644 clang/test/AST/attr-swift_bridged_typedef.mm
     create mode 100644 clang/test/SemaObjC/attr-swift_bridged_typedef.m
    
    diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
    index 3221cf23c4b53..6df3486182604 100644
    --- a/clang/include/clang/Basic/Attr.td
    +++ b/clang/include/clang/Basic/Attr.td
    @@ -2130,6 +2130,12 @@ def Regparm : TypeAttr {
       let ASTNode = 0;
     }
     
    +def SwiftBridgedTypedef : InheritableAttr {
    +  let Spellings = [GNU<"swift_bridged_typedef">];
    +  let Subjects = SubjectList<[TypedefName], ErrorDiag>;
    +  let Documentation = [SwiftBridgedTypedefDocs];
    +}
    +
     def SwiftObjCMembers : Attr {
       let Spellings = [GNU<"swift_objc_members">];
       let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
    diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
    index 939f52dae3d5a..7aff443e9a12e 100644
    --- a/clang/include/clang/Basic/AttrDocs.td
    +++ b/clang/include/clang/Basic/AttrDocs.td
    @@ -3476,6 +3476,27 @@ Swift.
       }];
     }
     
    +def SwiftBridgedTypedefDocs : Documentation {
    +  let Category = SwiftDocs;
    +  let Heading = "swift_bridged";
    +  let Content = [{
    +The ``swift_bridged_typedef`` attribute indicates that when the typedef to which
    +the attribute appertains is imported into Swift, it should refer to the bridged
    +Swift type (e.g. Swift's ``String``) rather than the Objective-C type as written
    +(e.g. ``NSString``).
    +
    +  .. code-block:: c
    +
    +    @interface NSString;
    +    typedef NSString *AliasedString __attribute__((__swift_bridged_typedef__));
    +
    +    extern void acceptsAliasedString(AliasedString _Nonnull parameter);
    +
    +In this case, the function ``acceptsAliasedString`` will be imported into Swift
    +as a function which accepts a ``String`` type parameter.
    +  }];
    +}
    +
     def SwiftObjCMembersDocs : Documentation {
       let Category = SwiftDocs;
       let Heading = "swift_objc_members";
    diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
    index bf9d8497f5a26..02ffd752233d1 100644
    --- a/clang/lib/Sema/SemaDeclAttr.cpp
    +++ b/clang/lib/Sema/SemaDeclAttr.cpp
    @@ -7533,6 +7533,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
         break;
     
       // Swift attributes.
    +  case ParsedAttr::AT_SwiftBridgedTypedef:
    +    handleSimpleAttribute(S, D, AL);
    +    break;
       case ParsedAttr::AT_SwiftError:
         handleSwiftError(S, D, AL);
         break;
    diff --git a/clang/test/AST/attr-swift_bridged_typedef.m b/clang/test/AST/attr-swift_bridged_typedef.m
    new file mode 100644
    index 0000000000000..8c7c0987569ec
    --- /dev/null
    +++ b/clang/test/AST/attr-swift_bridged_typedef.m
    @@ -0,0 +1,9 @@
    +// RUN: %clang_cc1 -fsyntax-only -ast-dump %s | FileCheck %s
    +
    +typedef struct T TBridged __attribute((__swift_bridged_typedef__));
    +// CHECK: TypedefDecl {{.*}} TBridged 'struct T'
    +// CHECK: SwiftBridgedTypedefAttr
    +
    +typedef struct T TBridged;
    +// CHECK: TypedefDecl {{.*}} TBridged 'struct T'
    +// CHECK: SwiftBridgedTypedefAttr
    diff --git a/clang/test/AST/attr-swift_bridged_typedef.mm b/clang/test/AST/attr-swift_bridged_typedef.mm
    new file mode 100644
    index 0000000000000..44fd022d5ea79
    --- /dev/null
    +++ b/clang/test/AST/attr-swift_bridged_typedef.mm
    @@ -0,0 +1,8 @@
    +// RUN: %clang_cc1 -fsyntax-only %s -ast-dump | FileCheck %s
    +
    +@interface NSString
    +@end
    +
    +using NSStringAlias __attribute__((__swift_bridged_typedef__)) = NSString *;
    +// CHECK: TypeAliasDecl {{.*}} NSStringAlias 'NSString *'
    +// CHECK: SwiftBridgedTypedefAttr
    diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
    index dcf7cd2b7f1a4..024081b02e3e3 100644
    --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
    +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
    @@ -146,6 +146,7 @@
     // CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property)
     // CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member)
     // CHECK-NEXT: SpeculativeLoadHardening (SubjectMatchRule_function, SubjectMatchRule_objc_method)
    +// CHECK-NEXT: SwiftBridgedTypedef (SubjectMatchRule_type_alias)
     // CHECK-NEXT: SwiftContext (SubjectMatchRule_variable_is_parameter)
     // CHECK-NEXT: SwiftError (SubjectMatchRule_function, SubjectMatchRule_objc_method)
     // CHECK-NEXT: SwiftErrorResult (SubjectMatchRule_variable_is_parameter)
    diff --git a/clang/test/SemaObjC/attr-swift_bridged_typedef.m b/clang/test/SemaObjC/attr-swift_bridged_typedef.m
    new file mode 100644
    index 0000000000000..2836b886a903d
    --- /dev/null
    +++ b/clang/test/SemaObjC/attr-swift_bridged_typedef.m
    @@ -0,0 +1,14 @@
    +// RUN: %clang_cc1 -verify -fsyntax-only %s
    +
    +@interface NSString
    +@end
    +
    +typedef NSString *NSStringAlias __attribute__((__swift_bridged_typedef__));
    +
    +typedef int IntAlias __attribute__((__swift_bridged_typedef__));
    +
    +struct __attribute__((swift_bridged_typedef)) S {};
    +// expected-error@-1 {{'swift_bridged_typedef' attribute only applies to typedefs}}
    +
    +typedef unsigned char UnsignedChar __attribute__((__swift_bridged_typedef__("UnsignedChar")));
    +// expected-error@-1 {{'__swift_bridged_typedef__' attribute takes no arguments}}
    
    From c3fd2a50ba1395b6c2240f6a688c6a1aa975a1fe Mon Sep 17 00:00:00 2001
    From: Guillaume Chatelet 
    Date: Tue, 15 Sep 2020 20:48:08 +0000
    Subject: [PATCH 0750/1079] [libc] Remove special case for 8 and 16 bytes
    
    They don't seem to gain much in real apps and its better to favor less branches and smaller code.
    ---
     libc/src/string/memcpy.cpp     | 4 ----
     libc/src/string/x86/memcpy.cpp | 4 ----
     2 files changed, 8 deletions(-)
    
    diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp
    index a8056714a225f..00d66ea677d25 100644
    --- a/libc/src/string/memcpy.cpp
    +++ b/libc/src/string/memcpy.cpp
    @@ -44,12 +44,8 @@ static void memcpy_impl(char *__restrict dst, const char *__restrict src,
         return CopyBlock<4>(dst, src);
       if (count < 8)
         return CopyBlockOverlap<4>(dst, src, count);
    -  if (count == 8)
    -    return CopyBlock<8>(dst, src);
       if (count < 16)
         return CopyBlockOverlap<8>(dst, src, count);
    -  if (count == 16)
    -    return CopyBlock<16>(dst, src);
       if (count < 32)
         return CopyBlockOverlap<16>(dst, src, count);
       if (count < 64)
    diff --git a/libc/src/string/x86/memcpy.cpp b/libc/src/string/x86/memcpy.cpp
    index 811ce5183fe4e..2e2148eb7289b 100644
    --- a/libc/src/string/x86/memcpy.cpp
    +++ b/libc/src/string/x86/memcpy.cpp
    @@ -59,12 +59,8 @@ static void memcpy_x86(char *__restrict dst, const char *__restrict src,
         return CopyBlock<4>(dst, src);
       if (count < 8)
         return CopyBlockOverlap<4>(dst, src, count);
    -  if (count == 8)
    -    return CopyBlock<8>(dst, src);
       if (count < 16)
         return CopyBlockOverlap<8>(dst, src, count);
    -  if (count == 16)
    -    return CopyBlock<16>(dst, src);
       if (count < 32)
         return CopyBlockOverlap<16>(dst, src, count);
       if (count < 64)
    
    From 609f5e050cea760694a46e126e5aa3f62660cae9 Mon Sep 17 00:00:00 2001
    From: Diego Caballero 
    Date: Fri, 4 Sep 2020 11:44:32 -0700
    Subject: [PATCH 0751/1079] [mlir] Rename 'setInsertionPointAfter' to avoid
     ambiguity
    
    Rename 'setInsertionPointAfter(Value)' API to avoid ambiguity with
    'setInsertionPointAfter(Operation *)' for SingleResult operations which
    implicitly convert to Value (see D86756).
    
    Differential Revision: https://reviews.llvm.org/D87155
    ---
     mlir/include/mlir/IR/Builders.h                       | 2 +-
     mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp | 2 +-
     2 files changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h
    index 0c30869752ea3..ccf11489add07 100644
    --- a/mlir/include/mlir/IR/Builders.h
    +++ b/mlir/include/mlir/IR/Builders.h
    @@ -333,7 +333,7 @@ class OpBuilder : public Builder {
       /// defining operation. This will cause subsequent insertions to go right
       /// after it. Otherwise, value is a BlockArgumen. Sets the insertion point to
       /// the start of its block.
    -  void setInsertionPointAfter(Value val) {
    +  void setInsertionPointAfterValue(Value val) {
         if (Operation *op = val.getDefiningOp()) {
           setInsertionPointAfter(op);
         } else {
    diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
    index 1de7b8957711a..ee52fe44830c4 100644
    --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
    +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
    @@ -945,7 +945,7 @@ static bool isUniformDefinition(Value value,
     /// vectorization strategy in 'state'.
     static Value vectorizeUniform(Value value, VectorizationState *state) {
       OpBuilder builder(value.getContext());
    -  builder.setInsertionPointAfter(value);
    +  builder.setInsertionPointAfterValue(value);
     
       auto vectorTy = getVectorType(value.getType(), state->strategy);
       auto bcast = builder.create(value.getLoc(), vectorTy, value);
    
    From 9e3842d60351f986d77dfe0a94f76e4fd895f188 Mon Sep 17 00:00:00 2001
    From: Alexey Bataev 
    Date: Tue, 15 Sep 2020 15:57:11 -0400
    Subject: [PATCH 0752/1079] [OPENMP]Fix codegen for is_device_ptr component,
     captured by reference.
    
    Need to map the component as TO instead of the literal, because need to
    pass a reference to a component if the pointer is overaligned.
    
    Reviewed By: jdoerfert
    
    Differential Revision: https://reviews.llvm.org/D84887
    ---
     clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 10 +++--
     .../OpenMP/target_is_device_ptr_codegen.cpp   | 37 +++++++++++++++++++
     2 files changed, 43 insertions(+), 4 deletions(-)
    
    diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
    index e507e434d9e1c..dfd9752c20c9b 100644
    --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
    +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
    @@ -8460,10 +8460,12 @@ class MappableExprsHandler {
         if (DevPointersMap.count(VD)) {
           CombinedInfo.BasePointers.emplace_back(Arg, VD);
           CombinedInfo.Pointers.push_back(Arg);
    -      CombinedInfo.Sizes.push_back(
    -          CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
    -                                    CGF.Int64Ty, /*isSigned=*/true));
    -      CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
    +      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
    +          CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
    +          /*isSigned=*/true));
    +      CombinedInfo.Types.push_back(
    +          (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
    +          OMP_MAP_TARGET_PARAM);
           CombinedInfo.Mappers.push_back(nullptr);
           return;
         }
    diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
    index 7c2eef577f9f3..a7c585751161e 100644
    --- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
    +++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
    @@ -285,4 +285,41 @@ void bar(double *arg){
       ++arg;
     }
     #endif
    +///==========================================================================///
    +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
    +// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
    +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-64
    +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
    +// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
    +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
    +
    +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
    +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
    +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
    +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
    +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
    +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
    +// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
    +#ifdef CK3
    +
    +// CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[SZ:64|32]]] [i{{64|32}} {{8|4}}]
    +// OMP_MAP_TARGET_PARAM = 0x20 | OMP_MAP_TO = 0x1 = 0x21
    +// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 [[#0x21]]]
    +void bar() {
    +  __attribute__((aligned(64))) double *ptr;
    +  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
    +  // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
    +  // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
    +  // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
    +  // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
    +  // CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to double***
    +  // CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double***
    +  // CK3-DAG: store double** [[PTR:%.+]], double*** [[CBP1]]
    +  // CK3-DAG: store double** [[PTR]], double*** [[CP1]]
    +
    +  // CK3: call void [[KERNEL:@.+]](double** [[PTR]])
    +#pragma omp target is_device_ptr(ptr)
    +  *ptr = 0;
    +}
    +#endif
     #endif
    
    From c3e6054b07be1340fb255abe1e3c85b911710059 Mon Sep 17 00:00:00 2001
    From: Joseph Huber 
    Date: Tue, 15 Sep 2020 15:04:37 -0400
    Subject: [PATCH 0753/1079] [OpenMP] Additional Information for Libomptarget
     Mappings
    
    Summary:
    This patch adds additonal support for priting infromation from Libomptarget for
    already existing maps and printing the final data mapped on the device at
    device destruction.
    
    Reviewers: jdoerfort gkistanova
    
    Subscribers: guansong openmp-commits sstefan1 yaxunl
    
    Tags: #OpenMP
    
    Differential Revision: https://reviews.llvm.org/D87722
    ---
     openmp/libomptarget/src/device.cpp    | 20 ++++++++++++++------
     openmp/libomptarget/src/interface.cpp | 21 ++++-----------------
     openmp/libomptarget/src/private.h     | 16 ++++++++++++++++
     3 files changed, 34 insertions(+), 23 deletions(-)
    
    diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
    index fdf625cb71f66..79feebe6f32ba 100644
    --- a/openmp/libomptarget/src/device.cpp
    +++ b/openmp/libomptarget/src/device.cpp
    @@ -17,6 +17,7 @@
     
     #include 
     #include 
    +#include 
     #include 
     
     /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
    @@ -50,7 +51,12 @@ DeviceTy::DeviceTy(RTLInfoTy *RTL)
           ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx(),
           MemoryManager(nullptr) {}
     
    -DeviceTy::~DeviceTy() = default;
    +DeviceTy::~DeviceTy() {
    +  if (DeviceID == -1 || getInfoLevel() < 1)
    +    return;
    +
    +  dumpTargetPointerMappings(*this);
    +}
     
     int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
       DataMapMtx.lock();
    @@ -214,11 +220,13 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
           HT.incRefCount();
     
         uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
    -    DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
    -        "Size=%" PRId64 ",%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
    -        DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
    -        (UpdateRefCount ? " updated" : ""),
    -        HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
    +    INFO(DeviceID,
    +         "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
    +         ", "
    +         "Size=%" PRId64 ",%s RefCount=%s\n",
    +         (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp),
    +         Size, (UpdateRefCount ? " updated" : ""),
    +         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
         rc = (void *)tp;
       } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
         // Explicit extension of mapped data - not allowed.
    diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
    index 084f2ac5aee3c..76a9e766ec76e 100644
    --- a/openmp/libomptarget/src/interface.cpp
    +++ b/openmp/libomptarget/src/interface.cpp
    @@ -24,21 +24,6 @@
     kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
     std::mutex TargetOffloadMtx;
     
    -////////////////////////////////////////////////////////////////////////////////
    -/// dump a table of all the host-target pointer pairs on failure
    -static void dumpTargetPointerMappings() {
    -  for (const auto &Device : Devices) {
    -    fprintf(stderr, "Device %d:\n", Device.DeviceID);
    -    fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)");
    -    for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
    -      fprintf(stderr, DPxMOD " " DPxMOD " %lu\n",
    -              DPxPTR(HostTargetMap.HstPtrBegin),
    -              DPxPTR(HostTargetMap.TgtPtrBegin),
    -              HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin);
    -    }
    -  }
    -}
    -
     ////////////////////////////////////////////////////////////////////////////////
     /// manage the success or failure of a target construct
     static void HandleDefaultTargetOffload() {
    @@ -76,9 +61,11 @@ static void HandleTargetOutcome(bool success) {
         case tgt_mandatory:
           if (!success) {
             if (getInfoLevel() > 1)
    -          dumpTargetPointerMappings();
    +          for (const auto &Device : Devices)
    +            dumpTargetPointerMappings(Device);
             else
    -          FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump tables\n");
    +          FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target"
    +                          "pointer maps\n");
     
             FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory");
           }
    diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
    index f01714808dd4e..17ca81e353f1a 100644
    --- a/openmp/libomptarget/src/private.h
    +++ b/openmp/libomptarget/src/private.h
    @@ -96,4 +96,20 @@ int __kmpc_get_target_offload(void) __attribute__((weak));
     #define TARGET_NAME Libomptarget
     #define DEBUG_PREFIX GETNAME(TARGET_NAME)
     
    +////////////////////////////////////////////////////////////////////////////////
    +/// dump a table of all the host-target pointer pairs on failure
    +static inline void dumpTargetPointerMappings(const DeviceTy &Device) {
    +  if (Device.HostDataToTargetMap.empty())
    +    return;
    +
    +  fprintf(stderr, "Device %d Host-Device Pointer Mappings:\n", Device.DeviceID);
    +  fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)");
    +  for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
    +    fprintf(stderr, DPxMOD " " DPxMOD " %lu\n",
    +            DPxPTR(HostTargetMap.HstPtrBegin),
    +            DPxPTR(HostTargetMap.TgtPtrBegin),
    +            HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin);
    +  }
    +}
    +
     #endif
    
    From 7b4cc0961b142877794645576d2393af43c48069 Mon Sep 17 00:00:00 2001
    From: Xun Li 
    Date: Tue, 15 Sep 2020 15:19:57 -0700
    Subject: [PATCH 0754/1079] [TSAN] Handle musttail call properly in
     EscapeEnumerator (and TSAN)
    
    Call instructions with musttail tag must be optimized as a tailcall, otherwise could lead to incorrect program behavior.
    When TSAN is instrumenting functions, it broke the contract by adding a call to the tsan exit function inbetween the musttail call and return instruction, and also inserted exception handling code.
    This happend throguh EscapeEnumerator, which adds exception handling code and returns ret instructions as the place to insert instrumentation calls.
    This becomes especially problematic for coroutines, because coroutines rely on tail calls to do symmetric transfers properly.
    To fix this, this patch moves the location to insert instrumentation calls prior to the musttail call for ret instructions that are following musttail calls, and also does not handle exception for musttail calls.
    
    Differential Revision: https://reviews.llvm.org/D87620
    ---
     .../lib/Transforms/Utils/EscapeEnumerator.cpp | 25 ++++++++++++++--
     .../ThreadSanitizer/tsan_musttail.ll          | 30 +++++++++++++++++++
     2 files changed, 53 insertions(+), 2 deletions(-)
     create mode 100644 llvm/test/Instrumentation/ThreadSanitizer/tsan_musttail.ll
    
    diff --git a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
    index cae9d9ee6d709..dca58bcdc0b73 100644
    --- a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
    +++ b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
    @@ -41,7 +41,27 @@ IRBuilder<> *EscapeEnumerator::Next() {
         if (!isa(TI) && !isa(TI))
           continue;
     
    -    Builder.SetInsertPoint(TI);
    +    // If the ret instruction is followed by a musttaill call,
    +    // or a bitcast instruction and then a musttail call, we should return
    +    // the musttail call as the insertion point to not break the musttail
    +    // contract.
    +    auto AdjustMustTailCall = [&](Instruction *I) -> Instruction * {
    +      auto *RI = dyn_cast(I);
    +      if (!RI || !RI->getPrevNode())
    +        return I;
    +      auto *CI = dyn_cast(RI->getPrevNode());
    +      if (CI && CI->isMustTailCall())
    +        return CI;
    +      auto *BI = dyn_cast(RI->getPrevNode());
    +      if (!BI || !BI->getPrevNode())
    +        return I;
    +      CI = dyn_cast(BI->getPrevNode());
    +      if (CI && CI->isMustTailCall())
    +        return CI;
    +      return I;
    +    };
    +
    +    Builder.SetInsertPoint(AdjustMustTailCall(TI));
         return &Builder;
       }
     
    @@ -54,11 +74,12 @@ IRBuilder<> *EscapeEnumerator::Next() {
         return nullptr;
     
       // Find all 'call' instructions that may throw.
    +  // We cannot tranform calls with musttail tag.
       SmallVector Calls;
       for (BasicBlock &BB : F)
         for (Instruction &II : BB)
           if (CallInst *CI = dyn_cast(&II))
    -        if (!CI->doesNotThrow())
    +        if (!CI->doesNotThrow() && !CI->isMustTailCall())
               Calls.push_back(CI);
     
       if (Calls.empty())
    diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_musttail.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_musttail.ll
    new file mode 100644
    index 0000000000000..bb681f67e0ecd
    --- /dev/null
    +++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_musttail.ll
    @@ -0,0 +1,30 @@
    +; To test that __tsan_func_exit always happen before musttaill call and no exception handling code.
    +; RUN: opt < %s -tsan -S | FileCheck %s
    +
    +define internal i32 @preallocated_musttail(i32* preallocated(i32) %p) sanitize_thread {
    +  %rv = load i32, i32* %p
    +  ret i32 %rv
    +}
    +
    +define i32 @call_preallocated_musttail(i32* preallocated(i32) %a) sanitize_thread {
    +  %r = musttail call i32 @preallocated_musttail(i32* preallocated(i32) %a)
    +  ret i32 %r
    +}
    +
    +; CHECK-LABEL:  define i32 @call_preallocated_musttail(i32* preallocated(i32) %a) 
    +; CHECK:          call void @__tsan_func_exit()
    +; CHECK-NEXT:     %r = musttail call i32 @preallocated_musttail(i32* preallocated(i32) %a)
    +; CHECK-NEXT:     ret i32 %r
    +
    +
    +define i32 @call_preallocated_musttail_cast(i32* preallocated(i32) %a) sanitize_thread {
    +  %r = musttail call i32 @preallocated_musttail(i32* preallocated(i32) %a)
    +  %t = bitcast i32 %r to i32
    +  ret i32 %t
    +}
    +
    +; CHECK-LABEL:  define i32 @call_preallocated_musttail_cast(i32* preallocated(i32) %a)
    +; CHECK:          call void @__tsan_func_exit()
    +; CHECK-NEXT:     %r = musttail call i32 @preallocated_musttail(i32* preallocated(i32) %a)
    +; CHECK-NEXT:     %t = bitcast i32 %r to i32
    +; CHECK-NEXT:     ret i32 %t
    
    From 277de43d88c9d0d57235e3df617d462487e17e20 Mon Sep 17 00:00:00 2001
    From: Stanislav Mekhanoshin 
    Date: Thu, 10 Sep 2020 15:10:52 -0700
    Subject: [PATCH 0755/1079] [AMDGPU] Unify intrinsic ret/nortn interface
    
    We have a single noret intrinsic an a lot of special handling
    around it. Declare it just as any other but do not define rtn
    instructions itself instead.
    
    Differential Revision: https://reviews.llvm.org/D87719
    ---
     llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  39 ++---
     .../AMDGPU/AMDGPUInstructionSelector.cpp      | 148 ++++++++++++++++--
     .../Target/AMDGPU/AMDGPUInstructionSelector.h |   6 +-
     llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |   2 +
     .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  25 +--
     llvm/lib/Target/AMDGPU/BUFInstructions.td     |  46 +++---
     llvm/lib/Target/AMDGPU/FLATInstructions.td    |  27 ++--
     llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  97 ++++++------
     llvm/lib/Target/AMDGPU/SIInstrInfo.td         |  26 +--
     llvm/lib/Target/AMDGPU/SIInstructions.td      |   2 +-
     ...llvm.amdgcn.global.atomic.fadd-with-ret.ll |  10 ++
     .../llvm.amdgcn.global.atomic.fadd.ll         |  16 +-
     .../llvm.amdgcn.raw.buffer.atomic.fadd.ll     |  24 +--
     ...dgcn.struct.buffer.atomic.fadd-with-ret.ll |  11 ++
     .../llvm.amdgcn.struct.buffer.atomic.fadd.ll  |  24 +--
     .../regbankselect-amdgcn-s-buffer-load.mir    |  12 +-
     .../regbankselect-amdgcn.s.buffer.load.ll     | 112 ++++---------
     .../AMDGPU/buffer-intrinsics-mmo-offsets.ll   |  54 +++----
     .../AMDGPU/cgp-addressing-modes-gfx1030.ll    |   1 -
     .../AMDGPU/cgp-addressing-modes-gfx908.ll     |   9 +-
     .../AMDGPU/fail-select-buffer-atomic-fadd.ll  |   6 +-
     llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll |  14 +-
     .../AMDGPU/global-saddr-atomics.gfx908.ll     |  12 +-
     .../CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll |  30 ++--
     .../llvm.amdgcn.raw.buffer.atomic.fadd.ll     |  14 +-
     .../llvm.amdgcn.struct.buffer.atomic.fadd.ll  |  12 +-
     .../test/CodeGen/AMDGPU/shl_add_ptr_global.ll |   4 +-
     27 files changed, 421 insertions(+), 362 deletions(-)
     create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
     create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
    
    diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    index 3536facfa9aea..2aff207ce0149 100644
    --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    @@ -1012,7 +1012,7 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
       AMDGPURsrcIntrinsic<2, 0>;
     
     // gfx908 intrinsic
    -def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic;
    +def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic;
     
     class AMDGPUStructBufferAtomic : Intrinsic <
       !if(NoRtn, [], [data_ty]),
    @@ -1049,7 +1049,7 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
       AMDGPURsrcIntrinsic<2, 0>;
     
     // gfx908 intrinsic
    -def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic;
    +def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic;
     
     
     // Obsolescent tbuffer intrinsics.
    @@ -1181,6 +1181,19 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
       AMDGPURsrcIntrinsic<2, 0>;
     
     def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
    +
    +class AMDGPUBufferAtomicFP : Intrinsic <
    +  [llvm_anyfloat_ty],
    +  [LLVMMatchType<0>, // vdata(VGPR)
    +   llvm_v4i32_ty,    // rsrc(SGPR)
    +   llvm_i32_ty,      // vindex(VGPR)
    +   llvm_i32_ty,      // offset(SGPR/VGPR/imm)
    +   llvm_i1_ty],      // slc(imm)
    +  [ImmArg>], "", [SDNPMemOperand]>,
    +  AMDGPURsrcIntrinsic<1, 0>;
    +
    +// Legacy form of the intrinsic. raw and struct forms should be preferred.
    +def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
     } // defset AMDGPUBufferIntrinsics
     
     // Uses that do not set the done bit should set IntrWriteMem on the
    @@ -1800,27 +1813,7 @@ def int_amdgcn_udot8 :
     // gfx908 intrinsics
     // ===----------------------------------------------------------------------===//
     
    -class AMDGPUBufferAtomicNoRtn : Intrinsic <
    -  [],
    -  [llvm_anyfloat_ty,  // vdata(VGPR)
    -   llvm_v4i32_ty,     // rsrc(SGPR)
    -   llvm_i32_ty,       // vindex(VGPR)
    -   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    -   llvm_i1_ty],       // slc(imm)
    -  [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>,
    -  AMDGPURsrcIntrinsic<1, 0>;
    -
    -class AMDGPUGlobalAtomicNoRtn : Intrinsic <
    -  [],
    -  [llvm_anyptr_ty,    // vaddr
    -   llvm_anyfloat_ty],               // vdata(VGPR)
    -  [IntrArgMemOnly, IntrWillReturn, NoCapture>], "",
    -  [SDNPMemOperand]>;
    -
    -def int_amdgcn_global_atomic_fadd    : AMDGPUGlobalAtomicNoRtn;
    -
    -// Legacy form of the intrinsic. raw and struct forms should be preferred.
    -def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn;
    +def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn;
     
     // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
     def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
    diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    index 7ed6688439355..d84d6309bb266 100644
    --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    @@ -30,6 +30,7 @@
     #include "llvm/CodeGen/MachineInstr.h"
     #include "llvm/CodeGen/MachineInstrBuilder.h"
     #include "llvm/CodeGen/MachineRegisterInfo.h"
    +#include "llvm/IR/DiagnosticInfo.h"
     #include "llvm/IR/Type.h"
     #include "llvm/Support/Debug.h"
     #include "llvm/Support/raw_ostream.h"
    @@ -1743,6 +1744,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
         return selectDSAppendConsume(I, false);
       case Intrinsic::amdgcn_s_barrier:
         return selectSBarrier(I);
    +  case Intrinsic::amdgcn_global_atomic_fadd:
    +    return selectGlobalAtomicFaddIntrinsic(I);
       default: {
         return selectImpl(I, *CoverageInfo);
       }
    @@ -2899,6 +2902,123 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
       return true;
     }
     
    +bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
    +  MachineInstr &MI) const {
    +
    +  MachineBasicBlock *MBB = MI.getParent();
    +  const DebugLoc &DL = MI.getDebugLoc();
    +
    +  if (!MRI->use_nodbg_empty(MI.getOperand(0).getReg())) {
    +    Function &F = MBB->getParent()->getFunction();
    +    DiagnosticInfoUnsupported
    +      NoFpRet(F, "return versions of fp atomics not supported",
    +              MI.getDebugLoc(), DS_Error);
    +    F.getContext().diagnose(NoFpRet);
    +    return false;
    +  }
    +
    +  // FIXME: This is only needed because tablegen requires number of dst operands
    +  // in match and replace pattern to be the same. Otherwise patterns can be
    +  // exported from SDag path.
    +  MachineOperand &VDataIn = MI.getOperand(1);
    +  MachineOperand &VIndex = MI.getOperand(3);
    +  MachineOperand &VOffset = MI.getOperand(4);
    +  MachineOperand &SOffset = MI.getOperand(5);
    +  int16_t Offset = MI.getOperand(6).getImm();
    +
    +  bool HasVOffset = !isOperandImmEqual(VOffset, 0, *MRI);
    +  bool HasVIndex = !isOperandImmEqual(VIndex, 0, *MRI);
    +
    +  unsigned Opcode;
    +  if (HasVOffset) {
    +    Opcode = HasVIndex ? AMDGPU::BUFFER_ATOMIC_ADD_F32_BOTHEN
    +                       : AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFEN;
    +  } else {
    +    Opcode = HasVIndex ? AMDGPU::BUFFER_ATOMIC_ADD_F32_IDXEN
    +                       : AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFSET;
    +  }
    +
    +  if (MRI->getType(VDataIn.getReg()).isVector()) {
    +    switch (Opcode) {
    +    case AMDGPU::BUFFER_ATOMIC_ADD_F32_BOTHEN:
    +      Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_BOTHEN;
    +      break;
    +    case AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFEN:
    +      Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_OFFEN;
    +      break;
    +    case AMDGPU::BUFFER_ATOMIC_ADD_F32_IDXEN:
    +      Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_IDXEN;
    +      break;
    +    case AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFSET:
    +      Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_OFFSET;
    +      break;
    +    }
    +  }
    +
    +  auto I = BuildMI(*MBB, MI, DL, TII.get(Opcode));
    +  I.add(VDataIn);
    +
    +  if (Opcode == AMDGPU::BUFFER_ATOMIC_ADD_F32_BOTHEN ||
    +      Opcode == AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_BOTHEN) {
    +    Register IdxReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
    +    BuildMI(*MBB, &*I, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
    +      .addReg(VIndex.getReg())
    +      .addImm(AMDGPU::sub0)
    +      .addReg(VOffset.getReg())
    +      .addImm(AMDGPU::sub1);
    +
    +    I.addReg(IdxReg);
    +  } else if (HasVIndex) {
    +    I.add(VIndex);
    +  } else if (HasVOffset) {
    +    I.add(VOffset);
    +  }
    +
    +  I.add(MI.getOperand(2)); // rsrc
    +  I.add(SOffset);
    +  I.addImm(Offset);
    +  renderExtractSLC(I, MI, 7);
    +  I.cloneMemRefs(MI);
    +
    +  MI.eraseFromParent();
    +
    +  return true;
    +}
    +
    +bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
    +  MachineInstr &MI) const{
    +
    +  MachineBasicBlock *MBB = MI.getParent();
    +  const DebugLoc &DL = MI.getDebugLoc();
    +
    +  if (!MRI->use_nodbg_empty(MI.getOperand(0).getReg())) {
    +    Function &F = MBB->getParent()->getFunction();
    +    DiagnosticInfoUnsupported
    +      NoFpRet(F, "return versions of fp atomics not supported",
    +              MI.getDebugLoc(), DS_Error);
    +    F.getContext().diagnose(NoFpRet);
    +    return false;
    +  }
    +
    +  // FIXME: This is only needed because tablegen requires number of dst operands
    +  // in match and replace pattern to be the same. Otherwise patterns can be
    +  // exported from SDag path.
    +  auto Addr = selectFlatOffsetImpl(MI.getOperand(2));
    +
    +  Register Data = MI.getOperand(3).getReg();
    +  const unsigned Opc = MRI->getType(Data).isVector() ?
    +    AMDGPU::GLOBAL_ATOMIC_PK_ADD_F16 : AMDGPU::GLOBAL_ATOMIC_ADD_F32;
    +  auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
    +    .addReg(Addr.first)
    +    .addReg(Data)
    +    .addImm(Addr.second)
    +    .addImm(0) // SLC
    +    .cloneMemRefs(MI);
    +
    +  MI.eraseFromParent();
    +  return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
    +}
    +
     bool AMDGPUInstructionSelector::select(MachineInstr &I) {
       if (I.isPHI())
         return selectPHI(I);
    @@ -3018,6 +3138,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
         assert(Intr && "not an image intrinsic with image pseudo");
         return selectImageIntrinsic(I, Intr);
       }
    +  case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
    +    return selectAMDGPU_BUFFER_ATOMIC_FADD(I);
       default:
         return selectImpl(I, *CoverageInfo);
       }
    @@ -3260,14 +3382,11 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
     }
     
     template 
    -InstructionSelector::ComplexRendererFns
    +std::pair
     AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
       MachineInstr *MI = Root.getParent();
     
    -  InstructionSelector::ComplexRendererFns Default = {{
    -      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
    -      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
    -    }};
    +  auto Default = std::make_pair(Root.getReg(), 0);
     
       if (!STI.hasFlatInstOffsets())
         return Default;
    @@ -3287,20 +3406,27 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
     
       Register BasePtr = OpDef->getOperand(1).getReg();
     
    -  return {{
    -      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
    -      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
    -    }};
    +  return std::make_pair(BasePtr, Offset.getValue());
     }
     
     InstructionSelector::ComplexRendererFns
     AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
    -  return selectFlatOffsetImpl(Root);
    +  auto PtrWithOffset = selectFlatOffsetImpl(Root);
    +
    +  return {{
    +      [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
    +      [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
    +    }};
     }
     
     InstructionSelector::ComplexRendererFns
     AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
    -  return selectFlatOffsetImpl(Root);
    +  auto PtrWithOffset = selectFlatOffsetImpl(Root);
    +
    +  return {{
    +      [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
    +      [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
    +    }};
     }
     
     /// Match a zero extend from a 32-bit value to 64-bits.
    diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    index bd25c67964bfa..578958f120aa0 100644
    --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    @@ -141,6 +141,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
       bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
       bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
       bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const;
    +  bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const;
    +  bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
     
       std::pair
       selectVOP3ModsImpl(MachineOperand &Root) const;
    @@ -180,11 +182,11 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
       selectSmrdSgpr(MachineOperand &Root) const;
     
       template 
    -  InstructionSelector::ComplexRendererFns
    +  std::pair
       selectFlatOffsetImpl(MachineOperand &Root) const;
    +
       InstructionSelector::ComplexRendererFns
       selectFlatOffset(MachineOperand &Root) const;
    -
       InstructionSelector::ComplexRendererFns
       selectFlatOffsetSigned(MachineOperand &Root) const;
     
    diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
    index fad606c792a92..01c7934e9eb05 100644
    --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
    +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
    @@ -483,6 +483,8 @@ defm atomic_load_umax : ret_noret_binary_atomic_op;
     defm atomic_load_umin : ret_noret_binary_atomic_op;
     defm atomic_load_xor : ret_noret_binary_atomic_op;
     defm atomic_load_fadd : ret_noret_binary_atomic_op;
    +let MemoryVT = v2f16 in
    +defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op;
     defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op;
     
     def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
    diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    index c0bef6a5ada16..fc9315c016bb1 100644
    --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    @@ -750,6 +750,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
     
       for (MachineInstr &MI : Range) {
         for (MachineOperand &Def : MI.defs()) {
    +      if (MRI.use_nodbg_empty(Def.getReg()))
    +        continue;
    +
           LLT ResTy = MRI.getType(Def.getReg());
           const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
           ResultRegs.push_back(Def.getReg());
    @@ -2971,7 +2974,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       }
       case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: {
         applyDefaultMapping(OpdMapper);
    -    executeInWaterfallLoop(MI, MRI, {1, 4});
    +    executeInWaterfallLoop(MI, MRI, {2, 5});
         return;
       }
       case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
    @@ -3929,7 +3932,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
       case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
       case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
    -  case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
    +  case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
    +  case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: {
         // vdata_out
         OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
     
    @@ -3952,23 +3956,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         // initialized.
         break;
       }
    -  case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: {
    -    // vdata_in
    -    OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
    -
    -    // rsrc
    -    OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
    -
    -    // vindex
    -    OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
    -
    -    // voffset
    -    OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
    -
    -    // soffset
    -    OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
    -    break;
    -  }
       case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
         // vdata_out
         OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
    diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
    index 45eca4b3216a5..480070505d62b 100644
    --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
    +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
    @@ -1094,14 +1094,12 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
                                            int_amdgcn_buffer_wbinvl1>;
     
     let SubtargetPredicate = HasAtomicFaddInsts in {
    -
     defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
    -  "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
    +  "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_noret_32
     >;
     defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
    -  "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_fadd_global_noret
    +  "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_noret_32
     >;
    -
     } // End SubtargetPredicate = HasAtomicFaddInsts
     
     //===----------------------------------------------------------------------===//
    @@ -1394,36 +1392,46 @@ defm : BufferAtomicPatterns;
     defm : BufferAtomicPatterns;
     defm : BufferAtomicPatterns;
     
    +class NoUseBufferAtomic : PatFrag <
    +  (ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5, node:$src6, node:$src7),
    +  (vt (Op $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7)),
    +  [{ return SDValue(N, 0).use_empty(); }]> {
    +
    +  let GISelPredicateCode = [{
    +    return MRI.use_nodbg_empty(MI.getOperand(0).getReg());
    +  }];
    +}
    +
     multiclass BufferAtomicPatterns_NO_RTN {
       def : GCNPat<
    -    (name vt:$vdata_in, v4i32:$rsrc, 0,
    -          0, i32:$soffset, timm:$offset,
    -          timm:$cachepolicy, 0),
    +    (NoUseBufferAtomic vt:$vdata_in, v4i32:$rsrc, 0,
    +                                 0, i32:$soffset, timm:$offset,
    +                                 timm:$cachepolicy, 0),
         (!cast(opcode # _OFFSET) getVregSrcForVT.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
    -                                        (as_i16timm $offset), (extract_slc $cachepolicy))
    +                                          (as_i16timm $offset), (extract_slc $cachepolicy))
       >;
     
       def : GCNPat<
    -    (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
    -          0, i32:$soffset, timm:$offset,
    -          timm:$cachepolicy, timm),
    +    (NoUseBufferAtomic vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
    +                                 0, i32:$soffset, timm:$offset,
    +                                 timm:$cachepolicy, timm),
         (!cast(opcode # _IDXEN) getVregSrcForVT.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
    -                                       (as_i16timm $offset), (extract_slc $cachepolicy))
    +                                          (as_i16timm $offset), (extract_slc $cachepolicy))
       >;
     
       def : GCNPat<
    -    (name vt:$vdata_in, v4i32:$rsrc, 0,
    -          i32:$voffset, i32:$soffset, timm:$offset,
    -          timm:$cachepolicy, 0),
    +    (NoUseBufferAtomic vt:$vdata_in, v4i32:$rsrc, 0,
    +                                 i32:$voffset, i32:$soffset, timm:$offset,
    +                                 timm:$cachepolicy, 0),
         (!cast(opcode # _OFFEN) getVregSrcForVT.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
    -                                       (as_i16timm $offset), (extract_slc $cachepolicy))
    +                                          (as_i16timm $offset), (extract_slc $cachepolicy))
       >;
     
       def : GCNPat<
    -    (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
    -          i32:$voffset, i32:$soffset, timm:$offset,
    -          timm:$cachepolicy, timm),
    +    (NoUseBufferAtomic vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
    +                                 i32:$voffset, i32:$soffset, timm:$offset,
    +                                 timm:$cachepolicy, timm),
         (!cast(opcode # _BOTHEN)
           getVregSrcForVT.ret:$vdata_in,
           (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
    diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
    index f5b6829e89f79..abe29f73a9141 100644
    --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
    +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
    @@ -78,6 +78,7 @@ class FLAT_Real  op, FLAT_Pseudo ps> :
       // copy relevant pseudo op flags
       let SubtargetPredicate = ps.SubtargetPredicate;
       let AsmMatchConverter  = ps.AsmMatchConverter;
    +  let OtherPredicates = ps.OtherPredicates;
       let TSFlags = ps.TSFlags;
       let UseNamedOperandTable = ps.UseNamedOperandTable;
     
    @@ -714,16 +715,16 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
         FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
     } // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
     
    -let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
    -
    -defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
    -  "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
    ->;
    -defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
    -  "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_fadd_global_noret
    ->;
    -
    -} // End SubtargetPredicate = HasAtomicFaddInsts
    +let is_flat_global = 1 in {
    +let OtherPredicates = [HasAtomicFaddInsts] in {
    +  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
    +    "global_atomic_add_f32", VGPR_32, f32
    +  >;
    +  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
    +    "global_atomic_pk_add_f16", VGPR_32, v2f16
    +  >;
    +} // End OtherPredicates = [HasAtomicFaddInsts]
    +} // End is_flat_global = 1
     
     //===----------------------------------------------------------------------===//
     // Flat Patterns
    @@ -1081,8 +1082,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64
     defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
     defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>;
     
    -defm : GlobalFLATNoRtnAtomicPats ;
    -defm : GlobalFLATNoRtnAtomicPats ;
    +let OtherPredicates = [HasAtomicFaddInsts] in {
    +defm : GlobalFLATNoRtnAtomicPats ;
    +defm : GlobalFLATNoRtnAtomicPats ;
    +}
     
     } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
     
    diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    index d5712206da91e..7a71c1d35526d 100644
    --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    @@ -1121,7 +1121,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       case Intrinsic::amdgcn_buffer_atomic_fadd: {
         SIMachineFunctionInfo *MFI = MF.getInfo();
     
    -    Info.opc = ISD::INTRINSIC_VOID;
    +    Info.opc = ISD::INTRINSIC_W_CHAIN;
         Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
         Info.ptrVal = MFI->getBufferPSV(
           *MF.getSubtarget().getInstrInfo(),
    @@ -1135,18 +1135,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     
         return true;
       }
    -  case Intrinsic::amdgcn_global_atomic_fadd: {
    -    Info.opc = ISD::INTRINSIC_VOID;
    -    Info.memVT = MVT::getVT(CI.getOperand(0)->getType()
    -                            ->getPointerElementType());
    -    Info.ptrVal = CI.getOperand(0);
    -    Info.align.reset();
    -
    -    // FIXME: Should report an atomic ordering here.
    -    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
    -
    -    return true;
    -  }
       case Intrinsic::amdgcn_ds_append:
       case Intrinsic::amdgcn_ds_consume: {
         Info.opc = ISD::INTRINSIC_W_CHAIN;
    @@ -1171,6 +1159,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                      MachineMemOperand::MOVolatile;
         return true;
       }
    +  case Intrinsic::amdgcn_global_atomic_fadd: {
    +    Info.opc = ISD::INTRINSIC_W_CHAIN;
    +    Info.memVT = MVT::getVT(CI.getType());
    +    Info.ptrVal = CI.getOperand(0);
    +    Info.align.reset();
    +    Info.flags = MachineMemOperand::MOLoad |
    +                 MachineMemOperand::MOStore |
    +                 MachineMemOperand::MODereferenceable |
    +                 MachineMemOperand::MOVolatile;
    +    return true;
    +  }
       case Intrinsic::amdgcn_ds_gws_init:
       case Intrinsic::amdgcn_ds_gws_barrier:
       case Intrinsic::amdgcn_ds_gws_sema_v:
    @@ -7034,7 +7033,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       case Intrinsic::amdgcn_buffer_atomic_umax:
       case Intrinsic::amdgcn_buffer_atomic_and:
       case Intrinsic::amdgcn_buffer_atomic_or:
    -  case Intrinsic::amdgcn_buffer_atomic_xor: {
    +  case Intrinsic::amdgcn_buffer_atomic_xor:
    +  case Intrinsic::amdgcn_buffer_atomic_fadd: {
         unsigned Slc = cast(Op.getOperand(6))->getZExtValue();
         unsigned IdxEn = 1;
         if (auto Idx = dyn_cast(Op.getOperand(4)))
    @@ -7094,6 +7094,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
         case Intrinsic::amdgcn_buffer_atomic_xor:
           Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
           break;
    +    case Intrinsic::amdgcn_buffer_atomic_fadd:
    +      if (!Op.getValue(0).use_empty()) {
    +        DiagnosticInfoUnsupported
    +          NoFpRet(DAG.getMachineFunction().getFunction(),
    +                  "return versions of fp atomics not supported",
    +                  DL.getDebugLoc(), DS_Error);
    +        DAG.getContext()->diagnose(NoFpRet);
    +        return SDValue();
    +      }
    +      Opcode = AMDGPUISD::BUFFER_ATOMIC_FADD;
    +      break;
         default:
           llvm_unreachable("unhandled atomic opcode");
         }
    @@ -7101,6 +7112,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
         return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
                                        M->getMemOperand());
       }
    +  case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
    +    return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
    +  case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
    +    return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
       case Intrinsic::amdgcn_raw_buffer_atomic_swap:
         return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SWAP);
       case Intrinsic::amdgcn_raw_buffer_atomic_add:
    @@ -7226,6 +7241,27 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
         return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
                                        Op->getVTList(), Ops, VT, M->getMemOperand());
       }
    +  case Intrinsic::amdgcn_global_atomic_fadd: {
    +    if (!Op.getValue(0).use_empty()) {
    +      DiagnosticInfoUnsupported
    +        NoFpRet(DAG.getMachineFunction().getFunction(),
    +                "return versions of fp atomics not supported",
    +                DL.getDebugLoc(), DS_Error);
    +      DAG.getContext()->diagnose(NoFpRet);
    +      return SDValue();
    +    }
    +    MemSDNode *M = cast(Op);
    +    SDValue Ops[] = {
    +      M->getOperand(0), // Chain
    +      M->getOperand(2), // Ptr
    +      M->getOperand(3)  // Value
    +    };
    +
    +    EVT VT = Op.getOperand(3).getValueType();
    +    return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT,
    +                         DAG.getVTList(VT, MVT::Other), Ops,
    +                         M->getMemOperand());
    +  }
       default:
         if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
                 AMDGPU::getImageDimIntrinsicInfo(IntrID))
    @@ -7547,39 +7583,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
                                        M->getMemoryVT(), M->getMemOperand());
       }
    -  case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
    -    return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
    -  case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
    -    return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
    -  case Intrinsic::amdgcn_buffer_atomic_fadd: {
    -    unsigned Slc = cast(Op.getOperand(6))->getZExtValue();
    -    unsigned IdxEn = 1;
    -    if (auto Idx = dyn_cast(Op.getOperand(4)))
    -      IdxEn = Idx->getZExtValue() != 0;
    -    SDValue Ops[] = {
    -      Chain,
    -      Op.getOperand(2), // vdata
    -      Op.getOperand(3), // rsrc
    -      Op.getOperand(4), // vindex
    -      SDValue(),        // voffset -- will be set by setBufferOffsets
    -      SDValue(),        // soffset -- will be set by setBufferOffsets
    -      SDValue(),        // offset -- will be set by setBufferOffsets
    -      DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
    -      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
    -    };
    -    unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
    -    // We don't know the offset if vindex is non-zero, so clear it.
    -    if (IdxEn)
    -      Offset = 0;
    -    EVT VT = Op.getOperand(2).getValueType();
    -
    -    auto *M = cast(Op);
    -    M->getMemOperand()->setOffset(Offset);
    -
    -    return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_FADD, DL,
    -                                   Op->getVTList(), Ops, VT,
    -                                   M->getMemOperand());
    -  }
       case Intrinsic::amdgcn_end_cf:
         return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
                                           Op->getOperand(2), Chain), 0);
    diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
    index 13957a6c1f628..034563a0cbd11 100644
    --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
    +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
    @@ -173,18 +173,6 @@ class SDBufferAtomic : SDNode ;
     
    -class SDBufferAtomicNoRtn : SDNode , // rsrc
    -       SDTCisVT<2, i32>,   // vindex(VGPR)
    -       SDTCisVT<3, i32>,   // voffset(VGPR)
    -       SDTCisVT<4, i32>,   // soffset(SGPR)
    -       SDTCisVT<5, i32>,   // offset(imm)
    -       SDTCisVT<6, i32>,   // cachepolicy(imm)
    -       SDTCisVT<7, i1>]>,  // idxen(imm)
    -  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
    ->;
    -
     def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
     def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
     def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
    @@ -198,7 +186,7 @@ def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
     def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
     def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
     def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
    -def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
    +def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
     
     def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
       SDTypeProfile<1, 9,
    @@ -316,18 +304,6 @@ defm atomic_load_fmax_#as : binary_atomic_op;
     } // End let AddressSpaces = ...
     } // End foreach AddrSpace
     
    -def atomic_fadd_global_noret_impl : PatFrag<
    -  (ops node:$ptr, node:$value),
    -  (atomic_load_fadd node:$ptr, node:$value)> {
    -  // FIXME: Move this
    -  let MemoryVT = f32;
    -  let IsAtomic = 1;
    -  let AddressSpaces = StoreAddress_global.AddrSpaces;
    -}
    -
    -def atomic_fadd_global_noret : PatFrags<(ops node:$src0, node:$src1),
    -  [(int_amdgcn_global_atomic_fadd node:$src0, node:$src1),
    -   (atomic_fadd_global_noret_impl node:$src0, node:$src1)]>;
     
     //===----------------------------------------------------------------------===//
     // SDNodes PatFrags for loads/stores with a glue input.
    diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
    index 2ac5f6be65802..5f8f2a4e58479 100644
    --- a/llvm/lib/Target/AMDGPU/SIInstructions.td
    +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
    @@ -2435,7 +2435,7 @@ def G_AMDGPU_BUFFER_ATOMIC_OR : BufferAtomicGenericInstruction;
     def G_AMDGPU_BUFFER_ATOMIC_XOR : BufferAtomicGenericInstruction;
     def G_AMDGPU_BUFFER_ATOMIC_INC : BufferAtomicGenericInstruction;
     def G_AMDGPU_BUFFER_ATOMIC_DEC : BufferAtomicGenericInstruction;
    -def G_AMDGPU_BUFFER_ATOMIC_FADD : BufferAtomicGenericInstruction<1/*NoRtn*/>;
    +def G_AMDGPU_BUFFER_ATOMIC_FADD : BufferAtomicGenericInstruction;
     
     def G_AMDGPU_BUFFER_ATOMIC_CMPSWAP : AMDGPUGenericInstruction {
       let OutOperandList = (outs type0:$dst);
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
    new file mode 100644
    index 0000000000000..22e944fc3a116
    --- /dev/null
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
    @@ -0,0 +1,10 @@
    +; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
    +
    +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #0
    +
    +; GFX908: error: {{.*}} return versions of fp atomics not supported
    +
    +define float @global_atomic_fadd_f32_rtn(float addrspace(1)* %ptr, float %data) {
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
    +  ret float %ret
    +}
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
    index 60ba088404a2d..70651280003e5 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
    @@ -8,7 +8,7 @@ define void @global_atomic_fadd_f32(float addrspace(1)* %ptr, float %data) {
     ; GFX908-NEXT:    global_atomic_add_f32 v[0:1], v2, off
     ; GFX908-NEXT:    s_waitcnt vmcnt(0)
     ; GFX908-NEXT:    s_setpc_b64 s[30:31]
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
       ret void
     }
     
    @@ -26,7 +26,7 @@ define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %da
     ; GFX908-NEXT:    s_waitcnt vmcnt(0)
     ; GFX908-NEXT:    s_setpc_b64 s[30:31]
       %gep = getelementptr float, float addrspace(1)* %ptr, i64 512
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %gep, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data)
       ret void
     }
     
    @@ -44,7 +44,7 @@ define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float
     ; GFX908-NEXT:    s_waitcnt vmcnt(0)
     ; GFX908-NEXT:    s_setpc_b64 s[30:31]
       %gep = getelementptr float, float addrspace(1)* %ptr, i64 -511
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %gep, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data)
       ret void
     }
     
    @@ -62,7 +62,7 @@ define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %pt
     ; GFX908-NEXT:    global_atomic_add_f32 v[0:1], v2, off
     ; GFX908-NEXT:    s_endpgm
       %gep = getelementptr float, float addrspace(1)* %ptr, i64 512
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %gep, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data)
       ret void
     }
     
    @@ -73,7 +73,7 @@ define void @global_atomic_fadd_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half>
     ; GFX908-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
     ; GFX908-NEXT:    s_waitcnt vmcnt(0)
     ; GFX908-NEXT:    s_setpc_b64 s[30:31]
    -  call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
       ret void
     }
     
    @@ -91,11 +91,11 @@ define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr,
     ; GFX908-NEXT:    s_waitcnt vmcnt(0)
     ; GFX908-NEXT:    s_setpc_b64 s[30:31]
       %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -511
    -  call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %gep, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %gep, <2 x half> %data)
       ret void
     }
     
    -declare void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* nocapture, float) #0
    -declare void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0
    +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #0
    +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0
     
     attributes #0 = { argmemonly nounwind willreturn }
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
    index e9cd9f6ff797c..1cb79ff7fcacf 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
    @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -35,7 +35,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 4095, align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
       %voffset.add = add i32 %voffset, 4095
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -52,7 +52,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 4095, align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -70,7 +70,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -117,7 +117,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp
       ; CHECK:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
       ; CHECK: bb.4:
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -162,7 +162,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v
       ; CHECK:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
       ; CHECK: bb.4:
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -181,7 +181,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 4095, align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
       %voffset = add i32 %voffset.base, 4095
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -200,7 +200,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
       ret void
     }
     
    @@ -218,7 +218,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -235,11 +235,11 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    -declare void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
    -declare void @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
    +declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
    +declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
     
     attributes #0 = { nounwind }
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
    new file mode 100644
    index 0000000000000..99dde6c4d5833
    --- /dev/null
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
    @@ -0,0 +1,11 @@
    +; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
    +
    +declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
    +
    +; GFX908: error: {{.*}} return versions of fp atomics not supported
    +
    +define amdgpu_ps float @buffer_atomic_add_f32_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
    +main_body:
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
    +  ret float %ret
    +}
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
    index 4a5e4be7cb819..be0c233577d0b 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
    @@ -18,7 +18,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
       ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 4095, align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
       %voffset.add = add i32 %voffset, 4095
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -57,7 +57,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 4095, align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -76,7 +76,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -126,7 +126,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
       ; CHECK:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
       ; CHECK: bb.4:
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -173,7 +173,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
       ; CHECK:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
       ; CHECK: bb.4:
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -194,7 +194,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
       ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
       ret void
     }
     
    @@ -212,7 +212,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2)
       ret void
     }
     
    @@ -232,7 +232,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
       ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
       ; CHECK:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -250,11 +250,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
       ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
       ; CHECK:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; CHECK:   S_ENDPGM 0
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    -declare void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
    -declare void @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
    +declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
    +declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
     
     attributes #0 = { nounwind }
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
    index f0e2698e52f20..7257357eab8ec 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
    @@ -58,14 +58,12 @@ body: |
         ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
         ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
         ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -    ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -    ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +    ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
         ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
         ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
         ; CHECK: .1:
         ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
    -    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
    -    ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %2(<4 x s32>), %bb.1
    +    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.1
         ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
         ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
         ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -105,14 +103,12 @@ body: |
         ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
         ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
         ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -    ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -    ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +    ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
         ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
         ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
         ; CHECK: .1:
         ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
    -    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
    -    ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %2(<4 x s32>), %bb.1
    +    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %8, %bb.1
         ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
         ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
         ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
    index 96b66d48e23dd..9e051458ccd19 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
    @@ -1961,16 +1961,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
       ; CHECK:   [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]]
       ; CHECK:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
       ; CHECK:   [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2013,16 +2009,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
       ; GREEDY:   [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]]
       ; GREEDY:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
       ; GREEDY:   [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2074,16 +2066,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
       ; CHECK:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
       ; CHECK:   [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
       ; CHECK:   [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %31, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %23(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2127,16 +2115,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
       ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
       ; GREEDY:   [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
       ; GREEDY:   [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %31, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %23(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2186,16 +2170,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
       ; CHECK:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
       ; CHECK:   [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
       ; CHECK:   [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %31, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %23(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2239,16 +2219,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
       ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
       ; GREEDY:   [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
       ; GREEDY:   [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %31, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %23(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2297,16 +2273,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
       ; CHECK:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
       ; CHECK:   [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]]
       ; CHECK:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2349,16 +2321,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
       ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
       ; GREEDY:   [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]]
       ; GREEDY:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2407,16 +2375,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
       ; CHECK:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
       ; CHECK:   [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]]
       ; CHECK:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2459,16 +2423,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
       ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
       ; GREEDY:   [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]]
       ; GREEDY:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2517,16 +2477,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
       ; CHECK:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
       ; CHECK:   [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]]
       ; CHECK:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2569,16 +2525,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
       ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
       ; GREEDY:   [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]]
       ; GREEDY:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2626,16 +2578,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
       ; CHECK:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
       ; CHECK:   [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
       ; CHECK:   [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; CHECK:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; CHECK:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; CHECK:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; CHECK: bb.2:
       ; CHECK:   successors: %bb.3, %bb.2
    -  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; CHECK:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; CHECK:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; CHECK:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    @@ -2677,16 +2625,12 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
       ; GREEDY:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
       ; GREEDY:   [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
       ; GREEDY:   [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
    -  ; GREEDY:   [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
    -  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
    +  ; GREEDY:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
       ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
       ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
       ; GREEDY: bb.2:
       ; GREEDY:   successors: %bb.3, %bb.2
    -  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %30, %bb.2
    -  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.1, %21(<4 x s32>), %bb.2
    -  ; GREEDY:   [[PHI2:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %22(<4 x s32>), %bb.2
    +  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2
       ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
       ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
       ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
    diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
    index e4f0083a4685c..2c5a3f3d9ba96 100644
    --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
    +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
    @@ -15,27 +15,27 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 32, align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 48, align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 64, align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 80, align 1, addrspace 4)
       ; GCN:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1
       ; GCN:   [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
       ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 96, align 1, addrspace 4)
    @@ -49,13 +49,13 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
       ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF3]].sub0
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    -  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7" + 112, addrspace 4)
    -  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4)
    -  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4)
    -  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 112, align 1, addrspace 4)
    +  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
    +  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
    +  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64
       ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4)
    @@ -64,7 +64,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]]
       ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4)
    @@ -73,7 +73,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]]
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80
       ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4)
    @@ -82,7 +82,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY]]
       ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY8]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
       ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4)
       ; GCN:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[DEF4]].sub0
    @@ -101,7 +101,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
       ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY13]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[DEF8]].sub0
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96
       ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4)
    @@ -110,7 +110,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY]]
       ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY15]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4)
    @@ -119,7 +119,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY]]
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY16]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
       ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY17]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112
    @@ -135,7 +135,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY22]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120
    @@ -150,7 +150,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[COPY26]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
       ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY27]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4)
       ; GCN:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
    @@ -164,7 +164,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[COPY31]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
       ; GCN:   [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
       ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY32]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4)
    @@ -193,7 +193,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
       ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[DEF15]].sub0
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
       ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4)
       ; GCN:   [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
    @@ -207,7 +207,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
       ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[COPY48]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
       ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
    -  ; GCN:   INLINEASM &"", 1
    +  ; GCN:   INLINEASM &"", 1 /* sideeffect attdialect */
       ; GCN:   [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
       ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY49]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4)
       ; GCN:   [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152
    @@ -268,10 +268,10 @@ bb.0:
     
       call void asm sideeffect "", "" ()
     
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 112, i1 false) #2
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 1, i32 112, i1 false) #2
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 %arg1, i32 112, i1 false) #2
    +  %fadd1 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 112, i1 false) #2
    +  %fadd2 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
    +  %fadd3 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 1, i32 112, i1 false) #2
    +  %fadd4 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 %arg1, i32 112, i1 false) #2
     
       call void asm sideeffect "", "" ()
     
    @@ -392,7 +392,7 @@ declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i
     declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
     declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #2
     declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #2
    -declare void @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) #2
    +declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) #2
     declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
     declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0
     declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #2
    diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
    index 0f655dadfa11d..7d3839d213b89 100644
    --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
    +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
    @@ -68,7 +68,6 @@ done:
     
     declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #0
     declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
    -declare void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* nocapture, float) #2
     
     attributes #0 = { argmemonly nounwind }
     attributes #1 = { nounwind readnone willreturn }
    diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
    index 840a4ec3dac8f..e14a35e150824 100644
    --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
    +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
    @@ -1,5 +1,4 @@
     ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
    -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
     ; RUN: opt -S -codegenprepare -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=OPT %s
     ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s
     
    @@ -9,14 +8,14 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(float a
     ; OPT-LABEL: @test_sink_small_offset_global_atomic_fadd_f32(
     ; OPT-NEXT:  entry:
     ; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr float, float addrspace(1)* [[OUT:%.*]], i32 999999
    -; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #3
    +; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) [[ATTR3:#.*]]
     ; OPT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TID]], 0
     ; OPT-NEXT:    br i1 [[CMP]], label [[ENDIF:%.*]], label [[IF:%.*]]
     ; OPT:       if:
     ; OPT-NEXT:    [[TMP0:%.*]] = bitcast float addrspace(1)* [[IN:%.*]] to i8 addrspace(1)*
     ; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 28
     ; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SUNKADDR]] to float addrspace(1)*
    -; OPT-NEXT:    call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* [[TMP1]], float 2.000000e+00)
    +; OPT-NEXT:    [[FADD2:%.*]] = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* [[TMP1]], float 2.000000e+00)
     ; OPT-NEXT:    [[VAL:%.*]] = load volatile float, float addrspace(1)* undef, align 4
     ; OPT-NEXT:    br label [[ENDIF]]
     ; OPT:       endif:
    @@ -57,7 +56,7 @@ entry:
       br i1 %cmp, label %endif, label %if
     
     if:
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %in.gep, float 2.0)
    +  %fadd2 = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %in.gep, float 2.0)
       %val = load volatile float, float addrspace(1)* undef
       br label %endif
     
    @@ -71,7 +70,7 @@ done:
     }
     
     declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
    -declare void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* nocapture, float) #2
    +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #2
     
     attributes #0 = { argmemonly nounwind }
     attributes #1 = { nounwind readnone willreturn }
    diff --git a/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll b/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
    index e52fcc747a710..710bfa9744ad9 100644
    --- a/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
    @@ -8,12 +8,12 @@
     ; have the instruction available.
     ; FIXME: Should also really make sure the v2f16 version fails.
     
    -; FAIL: LLVM ERROR: Cannot select: {{.+}}: ch = BUFFER_ATOMIC_FADD
    +; FAIL: LLVM ERROR: Cannot select: {{.+}}: f32,ch = BUFFER_ATOMIC_FADD
     define amdgpu_cs void @atomic_fadd(<4 x i32> inreg %arg0) {
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %arg0, i32 0, i32 112, i1 false)
    +  %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %arg0, i32 0, i32 112, i1 false)
       ret void
     }
     
    -declare void @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1 immarg) #0
    +declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1 immarg) #0
     
     attributes #0 = { nounwind }
    diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
    index 315180dff5fac..af54135d1ceba 100644
    --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
    +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
    @@ -1,12 +1,12 @@
    -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s
    -; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908 %s
    +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,CAS %s
    +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908,CAS %s
     
     ; GCN-LABEL: {{^}}global_atomic_fadd_ret_f32:
    -; GCN: [[LOOP:BB[0-9]+_[0-9]+]]
    -; GCN: v_add_f32_e32
    -; GCN: global_atomic_cmpswap
    -; GCN: s_andn2_b64 exec, exec,
    -; GCN-NEXT: s_cbranch_execnz [[LOOP]]
    +; CAS: [[LOOP:BB[0-9]+_[0-9]+]]
    +; CAS: v_add_f32_e32
    +; CAS: global_atomic_cmpswap
    +; CAS: s_andn2_b64 exec, exec,
    +; CAS-NEXT: s_cbranch_execnz [[LOOP]]
     define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr) {
       %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
       store float %result, float addrspace(1)* undef
    diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
    index fb5a454421550..e8f4504bbccaa 100644
    --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
    +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
    @@ -15,7 +15,7 @@ define amdgpu_ps void @global_fadd_saddr_f32_nortn(i8 addrspace(1)* inreg %sbase
       %zext.offset = zext i32 %voffset to i64
       %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
       %cast.gep0 = bitcast i8 addrspace(1)* %gep0 to float addrspace(1)*
    -  call void @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* %cast.gep0, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* %cast.gep0, float %data)
       ret void
     }
     
    @@ -28,7 +28,7 @@ define amdgpu_ps void @global_fadd_saddr_f32_nortn_neg128(i8 addrspace(1)* inreg
       %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
       %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
       %cast.gep1 = bitcast i8 addrspace(1)* %gep1 to float addrspace(1)*
    -  call void @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* %cast.gep1, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* %cast.gep1, float %data)
       ret void
     }
     
    @@ -40,7 +40,7 @@ define amdgpu_ps void @global_fadd_saddr_v2f16_nortn(i8 addrspace(1)* inreg %sba
       %zext.offset = zext i32 %voffset to i64
       %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
       %cast.gep0 = bitcast i8 addrspace(1)* %gep0 to <2 x half> addrspace(1)*
    -  call void @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* %cast.gep0, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* %cast.gep0, <2 x half> %data)
       ret void
     }
     
    @@ -53,11 +53,11 @@ define amdgpu_ps void @global_fadd_saddr_v2f16_nortn_neg128(i8 addrspace(1)* inr
       %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
       %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
       %cast.gep1 = bitcast i8 addrspace(1)* %gep1 to <2 x half> addrspace(1)*
    -  call void @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* %cast.gep1, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* %cast.gep1, <2 x half> %data)
       ret void
     }
     
    -declare void @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* nocapture, float) #0
    -declare void @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0
    +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* nocapture, float) #0
    +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0
     
     attributes #0 = { argmemonly nounwind willreturn }
    diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
    index b46e01373aad0..aee44794ac89b 100644
    --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
    @@ -1,15 +1,15 @@
     ; RUN: llc < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs | FileCheck %s -check-prefix=GCN
     
    -declare void @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1)
    -declare void @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)
    -declare void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)*, float)
    -declare void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
    +declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1)
    +declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)
    +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
    +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
     
     ; GCN-LABEL: {{^}}buffer_atomic_add_f32:
     ; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen
     define amdgpu_ps void @buffer_atomic_add_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
     main_body:
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
    +  %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
       ret void
     }
     
    @@ -17,7 +17,7 @@ main_body:
     ; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen offset:4 slc
     define amdgpu_ps void @buffer_atomic_add_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
     main_body:
    -  call void @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
    +  %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
       ret void
     }
     
    @@ -25,7 +25,7 @@ main_body:
     ; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen
     define amdgpu_ps void @buffer_atomic_pk_add_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) {
     main_body:
    -  call void @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
    +  %ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
       ret void
     }
     
    @@ -33,7 +33,7 @@ main_body:
     ; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen offset:4 slc
     define amdgpu_ps void @buffer_atomic_pk_add_v2f16_off4_slc(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) {
     main_body:
    -  call void @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
    +  %ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
       ret void
     }
     
    @@ -41,7 +41,7 @@ main_body:
     ; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off
     define amdgpu_kernel void @global_atomic_add_f32(float addrspace(1)* %ptr, float %data) {
     main_body:
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
       ret void
     }
     
    @@ -50,7 +50,7 @@ main_body:
     define amdgpu_kernel void @global_atomic_add_f32_off4(float addrspace(1)* %ptr, float %data) {
     main_body:
       %p = getelementptr float, float addrspace(1)* %ptr, i64 1
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %p, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data)
       ret void
     }
     
    @@ -59,7 +59,7 @@ main_body:
     define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) {
     main_body:
       %p = getelementptr float, float addrspace(1)* %ptr, i64 -1
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %p, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data)
       ret void
     }
     
    @@ -67,7 +67,7 @@ main_body:
     ; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off
     define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
     main_body:
    -  call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
       ret void
     }
     
    @@ -76,7 +76,7 @@ main_body:
     define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
     main_body:
       %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 1
    -  call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
       ret void
     }
     
    @@ -85,7 +85,7 @@ main_body:
     define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
     main_body:
       %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1
    -  call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
    +  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
       ret void
     }
     
    @@ -94,7 +94,7 @@ main_body:
     ; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
     ; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off
     define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 {
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data)
    +  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
       ret void
     }
     
    diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
    index a48528caba1ba..90f805f2fc85f 100644
    --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
    @@ -10,7 +10,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_add_f32 v0, v1, s[8:11], s6 offen
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 24)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 24)
       ret void
     }
     
    @@ -23,7 +23,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_add_f32 v0, off, s[8:11], s6
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[8:11], s6 offen
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -49,7 +49,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_pk_add_f16 v0, off, s[8:11], s6 offset:92
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -62,11 +62,11 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_add_f32 v0, v1, s[8:11], s6 offen slc
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
    +  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
       ret void
     }
     
    -declare void @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
    -declare void @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
    +declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
    +declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
     
     attributes #0 = { nounwind }
    diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
    index ccd6dc912b66c..3df101ea6fdda 100644
    --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
    +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
    @@ -11,7 +11,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_add_f32 v0, v[1:2], s[8:11], s6 idxen offen
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -25,7 +25,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_add_f32 v0, v1, s[8:11], s6 idxen
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
       ret void
     }
     
    @@ -38,7 +38,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_add_f32 v0, v[1:2], s[8:11], s6 idxen offen slc
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
    +  %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
       ret void
     }
     
    @@ -51,11 +51,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
     ; CHECK-NEXT:    s_mov_b32 s8, s2
     ; CHECK-NEXT:    buffer_atomic_pk_add_f16 v0, v[1:2], s[8:11], s6 idxen offen
     ; CHECK-NEXT:    s_endpgm
    -  call void @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
    +  %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
       ret void
     }
     
    -declare void @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
    -declare void @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
    +declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
    +declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
     
     attributes #0 = { nounwind }
    diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
    index fb74c0829fcde..d7fa172f501e7 100644
    --- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
    +++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
    @@ -29,12 +29,12 @@ define void @shl_base_global_ptr_global_atomic_fadd(i32 addrspace(1)* %out, i64
       %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
       %shl = shl i64 %cast, 2
       %castback = inttoptr i64 %shl to float addrspace(1)*
    -  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %castback, float 100.0)
    +  call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %castback, float 100.0)
       store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
       ret void
     }
     
    -declare void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* nocapture, float) #1
    +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #1
     
     attributes #0 = { nounwind }
     attributes #1 = { argmemonly nounwind willreturn }
    
    From a4e35cc2ec1036832e7626191f8b9f0e3169477c Mon Sep 17 00:00:00 2001
    From: Volkan Keles 
    Date: Tue, 15 Sep 2020 15:50:34 -0700
    Subject: [PATCH 0756/1079] GlobalISel: Add combines for G_TRUNC
    
    https://reviews.llvm.org/D87050
    ---
     .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  13 ++
     .../include/llvm/Target/GlobalISel/Combine.td |  22 ++-
     .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  77 ++++++++++
     .../AArch64/GlobalISel/arm64-fallback.ll      |   4 +-
     .../AArch64/GlobalISel/combine-trunc.mir      | 142 ++++++++++++++++++
     llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll    |  16 +-
     llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll   |   7 +-
     7 files changed, 264 insertions(+), 17 deletions(-)
     create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
    
    diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    index 3fd55386b054b..faf9646ebf4f4 100644
    --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    @@ -298,6 +298,19 @@ class CombinerHelper {
       bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
       bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
     
    +  /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
    +  bool matchCombineTruncOfExt(MachineInstr &MI,
    +                              std::pair &MatchInfo);
    +  bool applyCombineTruncOfExt(MachineInstr &MI,
    +                              std::pair &MatchInfo);
    +
    +  /// Transform trunc (shl x, K) to shl (trunc x),
    +  /// K => K < VT.getScalarSizeInBits().
    +  bool matchCombineTruncOfShl(MachineInstr &MI,
    +                              std::pair &MatchInfo);
    +  bool applyCombineTruncOfShl(MachineInstr &MI,
    +                              std::pair &MatchInfo);
    +
       /// Return true if any explicit use operand on \p MI is defined by a
       /// G_IMPLICIT_DEF.
       bool matchAnyExplicitUseIsUndef(MachineInstr &MI);
    diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
    index fa75d7d95489b..902b250359900 100644
    --- a/llvm/include/llvm/Target/GlobalISel/Combine.td
    +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
    @@ -202,7 +202,7 @@ def binop_left_undef_to_zero: GICombineRule<
     // replaced with undef.
     def propagate_undef_any_op: GICombineRule<
       (defs root:$root),
    -  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR):$root,
    +  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC):$root,
              [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
       (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
     
    @@ -437,6 +437,24 @@ def unmerge_zext_to_zext : GICombineRule<
       (apply [{ return Helper.applyCombineUnmergeZExtToZExt(*${d}); }])
     >;
     
    +// Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x).
    +def trunc_ext_fold_matchinfo : GIDefMatchData<"std::pair">;
    +def trunc_ext_fold: GICombineRule <
    +  (defs root:$root, trunc_ext_fold_matchinfo:$matchinfo),
    +  (match (wip_match_opcode G_TRUNC):$root,
    +         [{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]),
    +  (apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
    +>;
    +
    +// Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits().
    +def trunc_shl_matchinfo : GIDefMatchData<"std::pair">;
    +def trunc_shl: GICombineRule <
    +  (defs root:$root, trunc_shl_matchinfo:$matchinfo),
    +  (match (wip_match_opcode G_TRUNC):$root,
    +         [{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]),
    +  (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
    +>;
    +
     // FIXME: These should use the custom predicate feature once it lands.
     def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                          undef_to_negative_one,
    @@ -469,4 +487,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
         known_bits_simplifications, ext_ext_fold,
         not_cmp_fold, opt_brcond_by_inverting_cond,
         unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
    -    unmerge_zext_to_zext]>;
    +    unmerge_zext_to_zext, trunc_ext_fold, trunc_shl]>;
    diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    index 5eff975127d77..2b67f0785aeab 100644
    --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    @@ -2029,6 +2029,83 @@ bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
       return true;
     }
     
    +bool CombinerHelper::matchCombineTruncOfExt(
    +    MachineInstr &MI, std::pair &MatchInfo) {
    +  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
    +  Register SrcReg = MI.getOperand(1).getReg();
    +  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
    +  unsigned SrcOpc = SrcMI->getOpcode();
    +  if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
    +      SrcOpc == TargetOpcode::G_ZEXT) {
    +    MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
    +    return true;
    +  }
    +  return false;
    +}
    +
    +bool CombinerHelper::applyCombineTruncOfExt(
    +    MachineInstr &MI, std::pair &MatchInfo) {
    +  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
    +  Register SrcReg = MatchInfo.first;
    +  unsigned SrcExtOp = MatchInfo.second;
    +  Register DstReg = MI.getOperand(0).getReg();
    +  LLT SrcTy = MRI.getType(SrcReg);
    +  LLT DstTy = MRI.getType(DstReg);
    +  if (SrcTy == DstTy) {
    +    MI.eraseFromParent();
    +    replaceRegWith(MRI, DstReg, SrcReg);
    +    return true;
    +  }
    +  Builder.setInstrAndDebugLoc(MI);
    +  if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
    +    Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
    +  else
    +    Builder.buildTrunc(DstReg, SrcReg);
    +  MI.eraseFromParent();
    +  return true;
    +}
    +
    +bool CombinerHelper::matchCombineTruncOfShl(
    +    MachineInstr &MI, std::pair &MatchInfo) {
    +  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
    +  Register DstReg = MI.getOperand(0).getReg();
    +  Register SrcReg = MI.getOperand(1).getReg();
    +  LLT DstTy = MRI.getType(DstReg);
    +  Register ShiftSrc;
    +  Register ShiftAmt;
    +
    +  if (MRI.hasOneNonDBGUse(SrcReg) &&
    +      mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
    +      isLegalOrBeforeLegalizer(
    +          {TargetOpcode::G_SHL,
    +           {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
    +    KnownBits Known = KB->getKnownBits(ShiftAmt);
    +    unsigned Size = DstTy.getSizeInBits();
    +    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
    +      MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
    +      return true;
    +    }
    +  }
    +  return false;
    +}
    +
    +bool CombinerHelper::applyCombineTruncOfShl(
    +    MachineInstr &MI, std::pair &MatchInfo) {
    +  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
    +  Register DstReg = MI.getOperand(0).getReg();
    +  Register SrcReg = MI.getOperand(1).getReg();
    +  LLT DstTy = MRI.getType(DstReg);
    +  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
    +
    +  Register ShiftSrc = MatchInfo.first;
    +  Register ShiftAmt = MatchInfo.second;
    +  Builder.setInstrAndDebugLoc(MI);
    +  Builder.buildShl(DstReg, Builder.buildTrunc(DstTy, ShiftSrc),
    +                   Builder.buildTrunc(DstTy, ShiftAmt), SrcMI->getFlags());
    +  MI.eraseFromParent();
    +  return true;
    +}
    +
     bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
       return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
         return MO.isReg() &&
    diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    index 0b3371501ef89..a90d899ec3aa4 100644
    --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    @@ -107,8 +107,8 @@ end:
     ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_ (in function: nonpow2_add_narrowing)
     ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing
     ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing:
    -define void @nonpow2_add_narrowing() {
    -  %a = add i128 undef, undef
    +define void @nonpow2_add_narrowing(i128 %x, i128 %y) {
    +  %a = add i128 %x, %y
       %b = trunc i128 %a to i96
       %dummy = add i96 %b, %b
       store i96 %dummy, i96* undef
    diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
    new file mode 100644
    index 0000000000000..eb1652cc0dba0
    --- /dev/null
    +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
    @@ -0,0 +1,142 @@
    +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
    +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s
    +---
    +name:            test_combine_trunc_undef
    +body:             |
    +  bb.1:
    +    ; CHECK-LABEL: name: test_combine_trunc_undef
    +    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
    +    ; CHECK: $w0 = COPY [[DEF]](s32)
    +    %0:_(s64) = G_IMPLICIT_DEF
    +    %1:_(s32) = G_TRUNC %0(s64)
    +    $w0 = COPY %1(s32)
    +...
    +---
    +name:            test_combine_trunc_undef_vec
    +body:             |
    +  bb.1:
    +    ; CHECK-LABEL: name: test_combine_trunc_undef_vec
    +    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
    +    ; CHECK: $x0 = COPY [[DEF]](<2 x s32>)
    +    %0:_(<2 x s64>) = G_IMPLICIT_DEF
    +    %1:_(<2 x s32>) = G_TRUNC %0(<2 x s64>)
    +    $x0 = COPY %1(<2 x s32>)
    +...
    +---
    +name:            test_combine_trunc_anyext_s32_s16
    +body:             |
    +  bb.1:
    +  liveins: $h0
    +    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
    +    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
    +    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
    +    %0:_(s16) = COPY $h0
    +    %1:_(s64) = G_ANYEXT %0(s16)
    +    %2:_(s32) = G_TRUNC %1(s64)
    +    $w0 = COPY %2(s32)
    +...
    +---
    +name:            test_combine_trunc_anyext_s32_s16_vec
    +body:             |
    +  bb.1:
    +  liveins: $s0
    +    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16_vec
    +    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0
    +    ; CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[COPY]](<2 x s16>)
    +    ; CHECK: $x0 = COPY [[ANYEXT]](<2 x s32>)
    +    %0:_(<2 x s16>) = COPY $s0
    +    %1:_(<2 x s64>) = G_ANYEXT %0(<2 x s16>)
    +    %2:_(<2 x s32>) = G_TRUNC %1(<2 x s64>)
    +    $x0 = COPY %2(<2 x s32>)
    +...
    +---
    +name:            test_combine_trunc_sext_s32_s16
    +body:             |
    +  bb.1:
    +  liveins: $h0
    +    ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
    +    ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
    +    ; CHECK: $w0 = COPY [[SEXT]](s32)
    +    %0:_(s16) = COPY $h0
    +    %1:_(s64) = G_SEXT %0(s16)
    +    %2:_(s32) = G_TRUNC %1(s64)
    +    $w0 = COPY %2(s32)
    +...
    +---
    +name:            test_combine_trunc_zext_s32_s16
    +body:             |
    +  bb.1:
    +  liveins: $h0
    +    ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
    +    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
    +    ; CHECK: $w0 = COPY [[ZEXT]](s32)
    +    %0:_(s16) = COPY $h0
    +    %1:_(s64) = G_ZEXT %0(s16)
    +    %2:_(s32) = G_TRUNC %1(s64)
    +    $w0 = COPY %2(s32)
    +...
    +---
    +name:            test_combine_trunc_anyext_s32_s32
    +body:             |
    +  bb.1:
    +  liveins: $w0
    +    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
    +    ; CHECK: $w0 = COPY [[COPY]](s32)
    +    %0:_(s32) = COPY $w0
    +    %1:_(s64) = G_ANYEXT %0(s32)
    +    %2:_(s32) = G_TRUNC %1(s64)
    +    $w0 = COPY %2(s32)
    +...
    +---
    +name:            test_combine_trunc_anyext_s32_s64
    +body:             |
    +  bb.1:
    +  liveins: $x0
    +    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
    +    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
    +    ; CHECK: $w0 = COPY [[TRUNC]](s32)
    +    %0:_(s64) = COPY $x0
    +    %1:_(s128) = G_ANYEXT %0(s64)
    +    %2:_(s32) = G_TRUNC %1(s128)
    +    $w0 = COPY %2(s32)
    +...
    +---
    +name:            test_combine_trunc_shl_s32_by_2
    +body:             |
    +  bb.1:
    +  liveins: $w0
    +    ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
    +    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
    +    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
    +    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
    +    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
    +    ; CHECK: $h0 = COPY [[SHL]](s16)
    +    %0:_(s32) = COPY $w0
    +    %1:_(s32) = G_CONSTANT i32 2
    +    %2:_(s32) = G_SHL %0(s32), %1(s32)
    +    %3:_(s16) = G_TRUNC %2(s32)
    +    $h0 = COPY %3(s16)
    +...
    +---
    +name:            test_combine_trunc_shl_s32_by_17
    +body:             |
    +  bb.1:
    +  liveins: $w0
    +    ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_17
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
    +    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
    +    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
    +    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
    +    ; CHECK: $h0 = COPY [[TRUNC]](s16)
    +    %0:_(s32) = COPY $w0
    +    %1:_(s32) = G_CONSTANT i32 17
    +    %2:_(s32) = G_SHL %0(s32), %1(s32)
    +    %3:_(s16) = G_TRUNC %2(s32)
    +    $h0 = COPY %3(s16)
    +...
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
    index f58e26604529e..ff16d8a6fffaa 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
    @@ -82,14 +82,14 @@ define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
     ;
     ; GFX8-LABEL: s_shl_i8_7:
     ; GFX8:       ; %bb.0:
    -; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
    -; GFX8-NEXT:    s_lshl_b32 s0, s0, 7
    +; GFX8-NEXT:    s_bfe_u32 s1, 7, 0x100000
    +; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
     ; GFX8-NEXT:    ; return to shader part epilog
     ;
     ; GFX9-LABEL: s_shl_i8_7:
     ; GFX9:       ; %bb.0:
    -; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
    -; GFX9-NEXT:    s_lshl_b32 s0, s0, 7
    +; GFX9-NEXT:    s_bfe_u32 s1, 7, 0x100000
    +; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
     ; GFX9-NEXT:    ; return to shader part epilog
       %result = shl i8 %value, 7
       ret i8 %result
    @@ -426,14 +426,14 @@ define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
     ;
     ; GFX8-LABEL: s_shl_i16_15:
     ; GFX8:       ; %bb.0:
    -; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
    -; GFX8-NEXT:    s_lshl_b32 s0, s0, 15
    +; GFX8-NEXT:    s_bfe_u32 s1, 15, 0x100000
    +; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
     ; GFX8-NEXT:    ; return to shader part epilog
     ;
     ; GFX9-LABEL: s_shl_i16_15:
     ; GFX9:       ; %bb.0:
    -; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
    -; GFX9-NEXT:    s_lshl_b32 s0, s0, 15
    +; GFX9-NEXT:    s_bfe_u32 s1, 15, 0x100000
    +; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
     ; GFX9-NEXT:    ; return to shader part epilog
       %result = shl i16 %value, 15
       ret i16 %result
    diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
    index 4edc231fc1410..9139cd029adda 100644
    --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
    +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
    @@ -37,7 +37,6 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> in
     ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
     ; GFX8-NEXT:    s_mov_b32 s3, s2
     ; GFX8-NEXT:    s_and_b32 s0, s0, s2
    -; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
     ; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
     ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
     ; GFX8-NEXT:    s_and_b32 s0, s0, s2
    @@ -121,10 +120,8 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
     ; GFX8-NEXT:    s_mov_b32 s5, s4
     ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
     ; GFX8-NEXT:    s_and_b32 s6, s1, s4
    -; GFX8-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
    -; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
    -; GFX8-NEXT:    s_and_b64 s[2:3], s[6:7], s[4:5]
    -; GFX8-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
    +; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[4:5]
    +; GFX8-NEXT:    s_xor_b64 s[2:3], s[6:7], s[4:5]
     ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
     ; GFX8-NEXT:    s_and_b32 s0, s0, s4
     ; GFX8-NEXT:    s_or_b32 s0, s1, s0
    
    From ae726fecae9a1cc9c50de5a9f6e860056f82c556 Mon Sep 17 00:00:00 2001
    From: Jan Korous 
    Date: Tue, 18 Aug 2020 22:36:16 -0700
    Subject: [PATCH 0757/1079] [SourceManager] Explicitly check for potential
     iterator underflow
    
    Differential Revision: https://reviews.llvm.org/D86231
    ---
     clang/lib/Basic/SourceManager.cpp | 5 +++++
     1 file changed, 5 insertions(+)
    
    diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
    index 0a76c78cd44fb..0f194403bf04a 100644
    --- a/clang/lib/Basic/SourceManager.cpp
    +++ b/clang/lib/Basic/SourceManager.cpp
    @@ -1936,6 +1936,11 @@ SourceManager::getMacroArgExpandedLocation(SourceLocation Loc) const {
     
       assert(!MacroArgsCache->empty());
       MacroArgsMap::iterator I = MacroArgsCache->upper_bound(Offset);
    +  // In case every element in MacroArgsCache is greater than Offset we can't
    +  // decrement the iterator.
    +  if (I == MacroArgsCache->begin())
    +    return Loc;
    +
       --I;
     
       unsigned MacroArgBeginOffs = I->first;
    
    From 61fc10d6a520f267e11009ce8fce88d73615796b Mon Sep 17 00:00:00 2001
    From: Mircea Trofin 
    Date: Mon, 14 Sep 2020 10:45:00 -0700
    Subject: [PATCH 0758/1079] [ThinLTO] add post-thinlto-merge option to
     -lto-embed-bitcode
    
    This will embed bitcode after (Thin)LTO merge, but before optimizations.
    In the case the thinlto backend is called from clang, the .llvmcmd
    section is also produced. Doing so in the case where the caller is the
    linker doesn't yet have a motivation, and would require plumbing through
    command line args.
    
    Differential Revision: https://reviews.llvm.org/D87636
    ---
     clang/lib/CodeGen/BackendUtil.cpp           |  7 +++--
     clang/test/CodeGen/Inputs/start-lib1.ll     |  9 ++++++
     clang/test/CodeGen/Inputs/start-lib2.ll     |  6 ++++
     clang/test/CodeGen/thinlto_embed_bitcode.ll | 30 ++++++++++++++++++
     llvm/include/llvm/LTO/LTOBackend.h          |  3 +-
     llvm/lib/LTO/LTOBackend.cpp                 | 34 ++++++++++++++++++---
     llvm/test/LTO/X86/Inputs/start-lib1.ll      |  1 +
     llvm/test/LTO/X86/embed-bitcode.ll          |  9 +++++-
     8 files changed, 90 insertions(+), 9 deletions(-)
     create mode 100644 clang/test/CodeGen/Inputs/start-lib1.ll
     create mode 100644 clang/test/CodeGen/Inputs/start-lib2.ll
     create mode 100644 clang/test/CodeGen/thinlto_embed_bitcode.ll
    
    diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
    index 5fc80d4fae71b..01f7e239f7909 100644
    --- a/clang/lib/CodeGen/BackendUtil.cpp
    +++ b/clang/lib/CodeGen/BackendUtil.cpp
    @@ -1647,9 +1647,10 @@ static void runThinLTOBackend(
         Conf.CGFileType = getCodeGenFileType(Action);
         break;
       }
    -  if (Error E = thinBackend(
    -          Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
    -          ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) {
    +  if (Error E =
    +          thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
    +                      ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
    +                      ModuleMap, &CGOpts.CmdArgs)) {
         handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
           errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
         });
    diff --git a/clang/test/CodeGen/Inputs/start-lib1.ll b/clang/test/CodeGen/Inputs/start-lib1.ll
    new file mode 100644
    index 0000000000000..18b6ea25386f5
    --- /dev/null
    +++ b/clang/test/CodeGen/Inputs/start-lib1.ll
    @@ -0,0 +1,9 @@
    +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
    +target triple = "x86_64-unknown-linux-gnu"
    +
    +declare void @bar()
    +
    +define void @foo() {
    +  call void @bar()
    +  ret void
    +}
    diff --git a/clang/test/CodeGen/Inputs/start-lib2.ll b/clang/test/CodeGen/Inputs/start-lib2.ll
    new file mode 100644
    index 0000000000000..68b3c8362808e
    --- /dev/null
    +++ b/clang/test/CodeGen/Inputs/start-lib2.ll
    @@ -0,0 +1,6 @@
    +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
    +target triple = "x86_64-unknown-linux-gnu"
    +
    +define void @bar() {
    +  ret void
    +}
    diff --git a/clang/test/CodeGen/thinlto_embed_bitcode.ll b/clang/test/CodeGen/thinlto_embed_bitcode.ll
    new file mode 100644
    index 0000000000000..4efb525e5f3e6
    --- /dev/null
    +++ b/clang/test/CodeGen/thinlto_embed_bitcode.ll
    @@ -0,0 +1,30 @@
    +; REQUIRES: x86-registered-target
    +
    +; check the -lto-embed-bitcode=post-thinlto-merge does not perform optimizations
    +; we expect 't1' - i.e start-lib1.ll's products - have both foo and bar defined,
    +; but the bar call is still made from foo.
    +; RUN: opt -module-summary %p/Inputs/start-lib1.ll -o %t1.bc
    +; RUN: opt -module-summary %p/Inputs/start-lib2.ll -o %t2.bc
    +; RUN: llvm-lto -thinlto -o %t.o %t1.bc %t2.bc
    +
    +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t.o -x ir %t1.bc -c -fthinlto-index=%t.o.thinlto.bc -mllvm -lto-embed-bitcode=post-merge-pre-opt
    +; RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=CHECK-ELF,CHECK-CMD
    +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t.o /dev/null
    +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT
    +
    +; For the optimized case, we expect the inlining of foo into bar to happen.
    +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t.o -x ir %t1.bc -c -fthinlto-index=%t.o.thinlto.bc -mllvm -lto-embed-bitcode=optimized
    +; RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=CHECK-ELF,CHECK-NO-CMD
    +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t.o /dev/null
    +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OPT
    +
    +; CHECK-ELF:      .text   PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 AX 0
    +; CHECK-ELF-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00    0
    +; CHECK-ELF-CMD:  .llvmcmd
    +; CHECK-ELF-NO-CMD-NOT: .llvmcmd
    +
    +; CHECK:          define void @foo() 
    +; CHECK-OPT-NEXT:   ret void
    +; CHECK-NOOPT-NEXT: call void @bar()
    +; CHECK-NOOPT: define available_externally void @bar() !thinlto_src_module !0 {
    +; CHECK-NOOPT-NEXT: ret void
    diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h
    index 0226e4a3fbf56..735969c47039b 100644
    --- a/llvm/include/llvm/LTO/LTOBackend.h
    +++ b/llvm/include/llvm/LTO/LTOBackend.h
    @@ -44,7 +44,8 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
                       Module &M, const ModuleSummaryIndex &CombinedIndex,
                       const FunctionImporter::ImportMapTy &ImportList,
                       const GVSummaryMapTy &DefinedGlobals,
    -                  MapVector &ModuleMap);
    +                  MapVector &ModuleMap,
    +                  const std::vector *CmdArgs = nullptr);
     
     Error finalizeOptimizationRemarks(
         std::unique_ptr DiagOutputFile);
    diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
    index 00309b6d712f8..4c5778e81184e 100644
    --- a/llvm/lib/LTO/LTOBackend.cpp
    +++ b/llvm/lib/LTO/LTOBackend.cpp
    @@ -50,9 +50,12 @@
     using namespace llvm;
     using namespace lto;
     
    +#define DEBUG_TYPE "lto-backend"
    +
     enum class LTOBitcodeEmbedding {
       DoNotEmbed = 0,
       EmbedOptimized = 1,
    +  EmbedPostMergePreOptimized = 2
     };
     
     static cl::opt EmbedBitcode(
    @@ -60,7 +63,10 @@ static cl::opt EmbedBitcode(
         cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none",
                               "Do not embed"),
                    clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized",
    -                          "Embed after all optimization passes")),
    +                          "Embed after all optimization passes"),
    +               clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized,
    +                          "post-merge-pre-opt",
    +                          "Embed post merge, but before optimizations")),
         cl::desc("Embed LLVM bitcode in object files produced by LTO"));
     
     LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
    @@ -346,7 +352,25 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
     
     bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
              bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
    -         const ModuleSummaryIndex *ImportSummary) {
    +         const ModuleSummaryIndex *ImportSummary,
    +         const std::vector *CmdArgs = nullptr) {
    +  if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
    +    // FIXME: the motivation for capturing post-merge bitcode and command line
    +    // is replicating the compilation environment from bitcode, without needing
    +    // to understand the dependencies (the functions to be imported). This
    +    // assumes a clang - based invocation, case in which we have the command
    +    // line.
    +    // It's not very clear how the above motivation would map in the
    +    // linker-based case, so we currently don't plumb the command line args in
    +    // that case.
    +    if (CmdArgs == nullptr)
    +      LLVM_DEBUG(
    +          dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
    +                    "command line arguments are not available");
    +    llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
    +                               /*EmbedBitcode*/ true,
    +                               /*EmbedMarker*/ false, CmdArgs);
    +  }
       // FIXME: Plumb the combined index into the new pass manager.
       if (!Conf.OptPipeline.empty())
         runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
    @@ -531,7 +555,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
                            Module &Mod, const ModuleSummaryIndex &CombinedIndex,
                            const FunctionImporter::ImportMapTy &ImportList,
                            const GVSummaryMapTy &DefinedGlobals,
    -                       MapVector &ModuleMap) {
    +                       MapVector &ModuleMap,
    +                       const std::vector *CmdArgs) {
       Expected TOrErr = initAndLookupTarget(Conf, Mod);
       if (!TOrErr)
         return TOrErr.takeError();
    @@ -599,7 +624,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
         return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
     
       if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
    -           /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
    +           /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
    +           CmdArgs))
         return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
     
       codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
    diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/llvm/test/LTO/X86/Inputs/start-lib1.ll
    index 9f42e6afff0f3..18b6ea25386f5 100644
    --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll
    +++ b/llvm/test/LTO/X86/Inputs/start-lib1.ll
    @@ -4,5 +4,6 @@ target triple = "x86_64-unknown-linux-gnu"
     declare void @bar()
     
     define void @foo() {
    +  call void @bar()
       ret void
     }
    diff --git a/llvm/test/LTO/X86/embed-bitcode.ll b/llvm/test/LTO/X86/embed-bitcode.ll
    index c8b4d0faa7479..bdddd079d2265 100644
    --- a/llvm/test/LTO/X86/embed-bitcode.ll
    +++ b/llvm/test/LTO/X86/embed-bitcode.ll
    @@ -11,13 +11,20 @@
     ; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=optimized -o %t3 %t1.o %t2.o %t3.o
     ; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF
     ; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t3.0 /dev/null
    -; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefix=CHECK-LL
    +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK-LL,CHECK-OPT
    +
    +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=post-merge-pre-opt -o %t3 %t1.o %t2.o %t3.o
    +; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF
    +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t3.0 /dev/null
    +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK-LL,CHECK-NOOPT
     
     ; CHECK-ELF:      .text   PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 AX 0
     ; CHECK-ELF-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00    0
     
     ; CHECK-LL: @_start
     ; CHECK-LL: @foo
    +; CHECK-OPT-NEXT: ret void
    +; CHECK-NOOPT-NEXT: call void @bar
     ; CHECK-LL: @bar
     
     target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
    
    From 97203cfd6bae0388f9dd22ddca592737324a2c72 Mon Sep 17 00:00:00 2001
    From: Aditya Nandakumar 
    Date: Tue, 15 Sep 2020 16:06:55 -0700
    Subject: [PATCH 0759/1079] [GISel] Add new GISel combiners for G_MUL
    
    https://reviews.llvm.org/D87668
    
    Patch adds two new GICombinerRules, one for G_MUL(X, 1) and another for G_MUL(X, -1).
    G_MUL(X, 1) is an identity combine, and G_MUL(X, -1) gets replaced with G_SUB(0, X).
    Patch additionally adds new combiner tests for the AArch64 target to test these
    new combiner rules, as well as updates AMDGPU GISel tests.
    
    Patch by mkitzan
    ---
     .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   3 +
     .../include/llvm/Target/GlobalISel/Combine.td |  22 ++-
     .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  13 ++
     .../AArch64/GlobalISel/combine-mul.mir        | 134 ++++++++++++++++++
     4 files changed, 170 insertions(+), 2 deletions(-)
     create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir
    
    diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    index faf9646ebf4f4..87d5e6a18c8ad 100644
    --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    @@ -311,6 +311,9 @@ class CombinerHelper {
       bool applyCombineTruncOfShl(MachineInstr &MI,
                                   std::pair &MatchInfo);
     
    +  /// Transform G_MUL(x, -1) to G_SUB(0, x)
    +  bool applyCombineMulByNegativeOne(MachineInstr &MI);
    +
       /// Return true if any explicit use operand on \p MI is defined by a
       /// G_IMPLICIT_DEF.
       bool matchAnyExplicitUseIsUndef(MachineInstr &MI);
    diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
    index 902b250359900..847a861c6b725 100644
    --- a/llvm/include/llvm/Target/GlobalISel/Combine.td
    +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
    @@ -255,6 +255,14 @@ def right_identity_zero: GICombineRule<
       (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
     >;
     
    +// Fold x op 1 -> x
    +def right_identity_one: GICombineRule<
    +  (defs root:$root),
    +  (match (wip_match_opcode G_MUL):$root,
    +    [{ return Helper.matchConstantOp(${root}->getOperand(2), 1); }]),
    +  (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
    +>;
    +
     // Fold (x op x) - > x
     def binop_same_val: GICombineRule<
       (defs root:$root),
    @@ -455,6 +463,14 @@ def trunc_shl: GICombineRule <
       (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
     >;
     
    +// Transform (mul x, -1) -> (sub 0, x)
    +def mul_by_neg_one: GICombineRule <
    +  (defs root:$root),
    +  (match (wip_match_opcode G_MUL):$root,
    +         [{ return Helper.matchConstantOp(${root}->getOperand(2), -1); }]),
    +  (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }])
    +>;
    +
     // FIXME: These should use the custom predicate feature once it lands.
     def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                          undef_to_negative_one,
    @@ -468,7 +484,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                             binop_same_val, binop_left_to_zero,
                                             binop_right_to_zero, p2i_to_i2p,
                                             i2p_to_p2i, anyext_trunc_fold,
    -                                        fneg_fneg_fold]>;
    +                                        fneg_fneg_fold, right_identity_one]>;
     
     def known_bits_simplifications : GICombineGroup<[
       and_trivial_mask, redundant_sext_inreg]>;
    @@ -477,7 +493,9 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
     
     def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
     
    -def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd]>;
    +def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
    +                                       mul_by_neg_one]>;
    +
     def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
         combines_for_extload, combine_indexed_load_store, undef_combines,
         identity_combines, simplify_add_to_sub,
    diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    index 2b67f0785aeab..74215999ea60a 100644
    --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    @@ -2008,6 +2008,19 @@ bool CombinerHelper::applyCombineExtOfExt(
       return false;
     }
     
    +bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
    +  assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
    +  Register DstReg = MI.getOperand(0).getReg();
    +  Register SrcReg = MI.getOperand(1).getReg();
    +  LLT DstTy = MRI.getType(DstReg);
    +
    +  Builder.setInstrAndDebugLoc(MI);
    +  Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
    +                   MI.getFlags());
    +  MI.eraseFromParent();
    +  return true;
    +}
    +
     bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
       assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
       Register SrcReg = MI.getOperand(1).getReg();
    diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir
    new file mode 100644
    index 0000000000000..2f911693fd244
    --- /dev/null
    +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir
    @@ -0,0 +1,134 @@
    +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
    +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
    +
    +---
    +name:            mul_by_zero
    +alignment:       4
    +tracksRegLiveness: true
    +frameInfo:
    +  maxAlignment:    1
    +machineFunctionInfo: {}
    +body:             |
    +  bb.0:
    +    liveins: $x0
    +    ; CHECK-LABEL: name: mul_by_zero
    +    ; CHECK: liveins: $x0
    +    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
    +    ; CHECK: $x0 = COPY [[C]](s64)
    +    %0:_(s64) = COPY $x0
    +    %1:_(s64) = G_CONSTANT i64 0
    +    %2:_(s64) = G_MUL %0, %1(s64)
    +    $x0 = COPY %2(s64)
    +...
    +---
    +name:            mul_vector_by_zero
    +alignment:       4
    +tracksRegLiveness: true
    +frameInfo:
    +  maxAlignment:    1
    +machineFunctionInfo: {}
    +body:             |
    +  bb.0:
    +    liveins: $q0
    +    ; Currently not implemented.
    +    ; CHECK-LABEL: name: mul_vector_by_zero
    +    ; CHECK: liveins: $q0
    +    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
    +    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
    +    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
    +    ; CHECK: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[COPY]], [[BUILD_VECTOR]]
    +    ; CHECK: $q0 = COPY [[MUL]](<4 x s32>)
    +    %0:_(<4 x s32>) = COPY $q0
    +    %1:_(s32) = G_CONSTANT i32 0
    +    %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
    +    %3:_(<4 x s32>) = G_MUL %0, %2(<4 x s32>)
    +    $q0 = COPY %3(<4 x s32>)
    +...
    +---
    +name:            mul_by_one
    +alignment:       4
    +tracksRegLiveness: true
    +frameInfo:
    +  maxAlignment:    1
    +machineFunctionInfo: {}
    +body:             |
    +  bb.0:
    +    liveins: $x0
    +    ; CHECK-LABEL: name: mul_by_one
    +    ; CHECK: liveins: $x0
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
    +    ; CHECK: $x0 = COPY [[COPY]](s64)
    +    %0:_(s64) = COPY $x0
    +    %1:_(s64) = G_CONSTANT i64 1
    +    %2:_(s64) = G_MUL %0, %1(s64)
    +    $x0 = COPY %2(s64)
    +...
    +---
    +name:            mul_vector_by_one
    +alignment:       4
    +tracksRegLiveness: true
    +frameInfo:
    +  maxAlignment:    1
    +machineFunctionInfo: {}
    +body:             |
    +  bb.0:
    +    liveins: $q0
    +    ; Currently not implemented.
    +    ; CHECK-LABEL: name: mul_vector_by_one
    +    ; CHECK: liveins: $q0
    +    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
    +    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
    +    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
    +    ; CHECK: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[COPY]], [[BUILD_VECTOR]]
    +    ; CHECK: $q0 = COPY [[MUL]](<4 x s32>)
    +    %0:_(<4 x s32>) = COPY $q0
    +    %1:_(s32) = G_CONSTANT i32 1
    +    %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
    +    %3:_(<4 x s32>) = G_MUL %0, %2(<4 x s32>)
    +    $q0 = COPY %3(<4 x s32>)
    +...
    +---
    +name:            mul_by_neg_one
    +alignment:       4
    +tracksRegLiveness: true
    +frameInfo:
    +  maxAlignment:    1
    +machineFunctionInfo: {}
    +body:             |
    +  bb.0:
    +    liveins: $x0
    +    ; CHECK-LABEL: name: mul_by_neg_one
    +    ; CHECK: liveins: $x0
    +    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
    +    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
    +    ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
    +    ; CHECK: $x0 = COPY [[SUB]](s64)
    +    %0:_(s64) = COPY $x0
    +    %1:_(s64) = G_CONSTANT i64 -1
    +    %2:_(s64) = G_MUL %0, %1(s64)
    +    $x0 = COPY %2(s64)
    +...
    +---
    +name:            mul_vector_by_neg_one
    +alignment:       4
    +tracksRegLiveness: true
    +frameInfo:
    +  maxAlignment:    1
    +machineFunctionInfo: {}
    +body:             |
    +  bb.0:
    +    liveins: $q0
    +    ; Currently not implemented.
    +    ; CHECK-LABEL: name: mul_vector_by_neg_one
    +    ; CHECK: liveins: $q0
    +    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
    +    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
    +    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
    +    ; CHECK: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[COPY]], [[BUILD_VECTOR]]
    +    ; CHECK: $q0 = COPY [[MUL]](<4 x s32>)
    +    %0:_(<4 x s32>) = COPY $q0
    +    %1:_(s32) = G_CONSTANT i32 -1
    +    %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
    +    %3:_(<4 x s32>) = G_MUL %0, %2(<4 x s32>)
    +    $q0 = COPY %3(<4 x s32>)
    +...
    
    From 2ea4c2c598b7c6f95b5d4db747bdf72770e586df Mon Sep 17 00:00:00 2001
    From: Wenlei He 
    Date: Tue, 15 Sep 2020 16:09:30 -0700
    Subject: [PATCH 0760/1079] [BFI] Make BFI information available through loop
     passes inside LoopStandardAnalysisResults
    MIME-Version: 1.0
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: 8bit
    
    ~~D65060 uncovered that trying to use BFI in loop passes can lead to non-deterministic behavior when blocks are re-used while retaining old BFI data.~~
    
    ~~To make sure BFI is preserved through loop passes a Value Handle (VH) callback is registered on blocks themselves. When a block is freed it now also wipes out the accompanying BFI entry such that stale BFI data can no longer persist resolving the determinism issue. ~~
    
    ~~An optimistic approach would be to incrementally update BFI information throughout the loop passes rather than only invalidating them on removed blocks. The issues with that are:~~
    ~~1. It is not clear how BFI information should be incrementally updated: If a block is duplicated does its BFI information come with? How about if it's split/modified/moved around? ~~
    ~~2. Assuming we can address these problems the implementation here will be a massive undertaking. ~~
    
    ~~There's a known need of BFI in LICM analysis which requires correct but not incrementally updated BFI data. A follow-up change can register BFI in all loop passes so this preserved but potentially lossy data is available to any loop pass that wants it.~~
    
    See: D75341 for an identical implementation of preserving BFI via VH callbacks. The previous statements do still apply but this change no longer has to be in this diff because it's already upstream 😄 .
    
    This diff also moves BFI to be a part of LoopStandardAnalysisResults since the previous method using getCachedResults now (correctly!) statically asserts (D72893) that this data isn't static through the loop passes.
    
    Testing
    Ninja check
    
    Reviewed By: asbirlea, nikic
    
    Differential Revision: https://reviews.llvm.org/D86156
    ---
     .../llvm/Analysis/LoopAnalysisManager.h       |  1 +
     .../llvm/Transforms/Scalar/LoopPassManager.h  | 17 +++++--
     llvm/lib/Passes/PassBuilder.cpp               | 47 +++++++++++--------
     llvm/lib/Transforms/Scalar/LICM.cpp           | 41 +++++++++-------
     llvm/lib/Transforms/Scalar/LoopDistribute.cpp |  3 +-
     .../Transforms/Scalar/LoopLoadElimination.cpp |  3 +-
     llvm/lib/Transforms/Scalar/LoopUnswitch.cpp   |  5 ++
     llvm/lib/Transforms/Utils/LoopVersioning.cpp  |  3 +-
     .../Transforms/Vectorize/LoopVectorize.cpp    |  3 +-
     llvm/test/Other/opt-O2-pipeline.ll            |  8 +++-
     .../Other/opt-O3-pipeline-enable-matrix.ll    |  8 +++-
     llvm/test/Other/opt-O3-pipeline.ll            |  8 +++-
     llvm/test/Other/opt-Os-pipeline.ll            |  8 +++-
     .../Transforms/Scalar/LoopPassManagerTest.cpp |  6 +++
     14 files changed, 111 insertions(+), 50 deletions(-)
    
    diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
    index 0e162e03bde14..11dbd15c86783 100644
    --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
    +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
    @@ -57,6 +57,7 @@ struct LoopStandardAnalysisResults {
       ScalarEvolution &SE;
       TargetLibraryInfo &TLI;
       TargetTransformInfo &TTI;
    +  BlockFrequencyInfo *BFI;
       MemorySSA *MSSA;
     };
     
    diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
    index 751c1832ba6c3..821de6c70aa01 100644
    --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
    +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
    @@ -41,6 +41,7 @@
     #include "llvm/ADT/STLExtras.h"
     #include "llvm/Analysis/AliasAnalysis.h"
     #include "llvm/Analysis/BasicAliasAnalysis.h"
    +#include "llvm/Analysis/BlockFrequencyInfo.h"
     #include "llvm/Analysis/GlobalsModRef.h"
     #include "llvm/Analysis/LoopAnalysisManager.h"
     #include "llvm/Analysis/LoopInfo.h"
    @@ -233,9 +234,11 @@ class FunctionToLoopPassAdaptor
         : public PassInfoMixin> {
     public:
       explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false,
    +                                     bool UseBlockFrequencyInfo = false,
                                          bool DebugLogging = false)
           : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging),
    -        UseMemorySSA(UseMemorySSA) {
    +        UseMemorySSA(UseMemorySSA),
    +        UseBlockFrequencyInfo(UseBlockFrequencyInfo) {
         LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
         LoopCanonicalizationFPM.addPass(LCSSAPass());
       }
    @@ -267,6 +270,9 @@ class FunctionToLoopPassAdaptor
         MemorySSA *MSSA = UseMemorySSA
                               ? (&AM.getResult(F).getMSSA())
                               : nullptr;
    +    BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
    +                                  ? (&AM.getResult(F))
    +                                  : nullptr;
         LoopStandardAnalysisResults LAR = {AM.getResult(F),
                                            AM.getResult(F),
                                            AM.getResult(F),
    @@ -274,6 +280,7 @@ class FunctionToLoopPassAdaptor
                                            AM.getResult(F),
                                            AM.getResult(F),
                                            AM.getResult(F),
    +                                       BFI,
                                            MSSA};
     
         // Setup the loop analysis manager from its proxy. It is important that
    @@ -370,6 +377,8 @@ class FunctionToLoopPassAdaptor
         PA.preserve();
         PA.preserve();
         PA.preserve();
    +    if (UseBlockFrequencyInfo && F.hasProfileData())
    +      PA.preserve();
         if (UseMemorySSA)
           PA.preserve();
         // FIXME: What we really want to do here is preserve an AA category, but
    @@ -389,6 +398,7 @@ class FunctionToLoopPassAdaptor
       FunctionPassManager LoopCanonicalizationFPM;
     
       bool UseMemorySSA = false;
    +  bool UseBlockFrequencyInfo = false;
     };
     
     /// A function to deduce a loop pass type and wrap it in the templated
    @@ -396,9 +406,10 @@ class FunctionToLoopPassAdaptor
     template 
     FunctionToLoopPassAdaptor
     createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false,
    +                                bool UseBlockFrequencyInfo = false,
                                     bool DebugLogging = false) {
    -  return FunctionToLoopPassAdaptor(std::move(Pass), UseMemorySSA,
    -                                              DebugLogging);
    +  return FunctionToLoopPassAdaptor(
    +      std::move(Pass), UseMemorySSA, UseBlockFrequencyInfo, DebugLogging);
     }
     
     /// Pass for printing a loop's contents as textual IR.
    diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
    index 03b31c233361d..ddbc7a2fb4d5a 100644
    --- a/llvm/lib/Passes/PassBuilder.cpp
    +++ b/llvm/lib/Passes/PassBuilder.cpp
    @@ -520,13 +520,15 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
       FPM.addPass(
           RequireAnalysisPass());
       FPM.addPass(createFunctionToLoopPassAdaptor(
    -      std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
    +      std::move(LPM1), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
    +      DebugLogging));
       FPM.addPass(SimplifyCFGPass());
       FPM.addPass(InstCombinePass());
       // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
       // *All* loop passes must preserve it, in order to be able to use it.
       FPM.addPass(createFunctionToLoopPassAdaptor(
    -      std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
    +      std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false,
    +      DebugLogging));
     
       // Delete small array after loop unroll.
       FPM.addPass(SROA());
    @@ -677,14 +679,16 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
       FPM.addPass(
           RequireAnalysisPass());
       FPM.addPass(createFunctionToLoopPassAdaptor(
    -      std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
    +      std::move(LPM1), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
    +      DebugLogging));
       FPM.addPass(SimplifyCFGPass());
       FPM.addPass(InstCombinePass());
       // The loop passes in LPM2 (IndVarSimplifyPass, LoopIdiomRecognizePass,
       // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
       // *All* loop passes must preserve it, in order to be able to use it.
       FPM.addPass(createFunctionToLoopPassAdaptor(
    -      std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
    +      std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false,
    +      DebugLogging));
     
       // Delete small array after loop unroll.
       FPM.addPass(SROA());
    @@ -721,7 +725,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
       FPM.addPass(DSEPass());
       FPM.addPass(createFunctionToLoopPassAdaptor(
           LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
    -      EnableMSSALoopDependency, DebugLogging));
    +      EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, DebugLogging));
     
       if (PTO.Coroutines)
         FPM.addPass(CoroElidePass());
    @@ -799,7 +803,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
     
       FunctionPassManager FPM;
       FPM.addPass(createFunctionToLoopPassAdaptor(
    -      LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));
    +      LoopRotatePass(), EnableMSSALoopDependency,
    +      /*UseBlockFrequencyInfo=*/false, DebugLogging));
       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     
       // Add the profile lowering pass.
    @@ -1129,7 +1134,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
     
       // First rotate loops that may have been un-rotated by prior passes.
       OptimizePM.addPass(createFunctionToLoopPassAdaptor(
    -      LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));
    +      LoopRotatePass(), EnableMSSALoopDependency,
    +      /*UseBlockFrequencyInfo=*/false, DebugLogging));
     
       // Distribute loops to allow partial vectorization.  I.e. isolate dependences
       // into separate loop that would otherwise inhibit vectorization.  This is
    @@ -1196,7 +1202,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
       OptimizePM.addPass(RequireAnalysisPass());
       OptimizePM.addPass(createFunctionToLoopPassAdaptor(
           LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
    -      EnableMSSALoopDependency, DebugLogging));
    +      EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, DebugLogging));
     
       // Now that we've vectorized and unrolled loops, we may have more refined
       // alignment information, try to re-derive it here.
    @@ -2261,8 +2267,9 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
       }
     #define LOOP_PASS(NAME, CREATE_PASS)                                           \
       if (Name == NAME) {                                                          \
    -    MPM.addPass(createModuleToFunctionPassAdaptor(                             \
    -        createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging)));   \
    +    MPM.addPass(                                                               \
    +        createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(     \
    +            CREATE_PASS, false, false, DebugLogging)));                        \
         return Error::success();                                                   \
       }
     #define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
    @@ -2272,7 +2279,7 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
           return Params.takeError();                                               \
         MPM.addPass(                                                               \
             createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(     \
    -            CREATE_PASS(Params.get()), false, DebugLogging)));                 \
    +            CREATE_PASS(Params.get()), false, false, DebugLogging)));          \
         return Error::success();                                                   \
       }
     #include "PassRegistry.def"
    @@ -2373,8 +2380,9 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
       }
     #define LOOP_PASS(NAME, CREATE_PASS)                                           \
       if (Name == NAME) {                                                          \
    -    CGPM.addPass(createCGSCCToFunctionPassAdaptor(                             \
    -        createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging)));   \
    +    CGPM.addPass(                                                              \
    +        createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(      \
    +            CREATE_PASS, false, false, DebugLogging)));                        \
         return Error::success();                                                   \
       }
     #define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
    @@ -2384,7 +2392,7 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
           return Params.takeError();                                               \
         CGPM.addPass(                                                              \
             createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(      \
    -            CREATE_PASS(Params.get()), false, DebugLogging)));                 \
    +            CREATE_PASS(Params.get()), false, false, DebugLogging)));          \
         return Error::success();                                                   \
       }
     #include "PassRegistry.def"
    @@ -2421,8 +2429,9 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
             return Err;
           // Add the nested pass manager with the appropriate adaptor.
           bool UseMemorySSA = (Name == "loop-mssa");
    -      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
    -                                                  DebugLogging));
    +      FPM.addPass(createFunctionToLoopPassAdaptor(
    +          std::move(LPM), UseMemorySSA, /*UseBlockFrequencyInfo=*/false,
    +          DebugLogging));
           return Error::success();
         }
         if (auto Count = parseRepeatPassName(Name)) {
    @@ -2476,8 +2485,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
     //        The risk is that it may become obsolete if we're not careful.
     #define LOOP_PASS(NAME, CREATE_PASS)                                           \
       if (Name == NAME) {                                                          \
    -    FPM.addPass(                                                               \
    -        createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging));    \
    +    FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false,     \
    +                                                DebugLogging));                \
         return Error::success();                                                   \
       }
     #define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
    @@ -2486,7 +2495,7 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
         if (!Params)                                                               \
           return Params.takeError();                                               \
         FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()),     \
    -                                                false, DebugLogging));         \
    +                                                false, false, DebugLogging));  \
         return Error::success();                                                   \
       }
     #include "PassRegistry.def"
    diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
    index b741d36e37bff..841badba08340 100644
    --- a/llvm/lib/Transforms/Scalar/LICM.cpp
    +++ b/llvm/lib/Transforms/Scalar/LICM.cpp
    @@ -39,6 +39,7 @@
     #include "llvm/Analysis/ConstantFolding.h"
     #include "llvm/Analysis/GlobalsModRef.h"
     #include "llvm/Analysis/GuardUtils.h"
    +#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
     #include "llvm/Analysis/Loads.h"
     #include "llvm/Analysis/LoopInfo.h"
     #include "llvm/Analysis/LoopIterator.h"
    @@ -171,8 +172,8 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
     namespace {
     struct LoopInvariantCodeMotion {
       bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
    -                 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
    -                 ScalarEvolution *SE, MemorySSA *MSSA,
    +                 BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
    +                 TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
                      OptimizationRemarkEmitter *ORE);
     
       LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
    @@ -208,19 +209,23 @@ struct LegacyLICMPass : public LoopPass {
         MemorySSA *MSSA = EnableMSSALoopDependency
                               ? (&getAnalysis().getMSSA())
                               : nullptr;
    +    bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
    +    BlockFrequencyInfo *BFI =
    +        hasProfileData ? &getAnalysis().getBFI()
    +                       : nullptr;
         // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
    -    // pass.  Function analyses need to be preserved across loop transformations
    +    // pass. Function analyses need to be preserved across loop transformations
         // but ORE cannot be preserved (see comment before the pass definition).
         OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
    -    return LICM.runOnLoop(L,
    -                          &getAnalysis().getAAResults(),
    -                          &getAnalysis().getLoopInfo(),
    -                          &getAnalysis().getDomTree(),
    -                          &getAnalysis().getTLI(
    -                              *L->getHeader()->getParent()),
    -                          &getAnalysis().getTTI(
    -                              *L->getHeader()->getParent()),
    -                          SE ? &SE->getSE() : nullptr, MSSA, &ORE);
    +    return LICM.runOnLoop(
    +        L, &getAnalysis().getAAResults(),
    +        &getAnalysis().getLoopInfo(),
    +        &getAnalysis().getDomTree(), BFI,
    +        &getAnalysis().getTLI(
    +            *L->getHeader()->getParent()),
    +        &getAnalysis().getTTI(
    +            *L->getHeader()->getParent()),
    +        SE ? &SE->getSE() : nullptr, MSSA, &ORE);
       }
     
       /// This transformation requires natural loop information & requires that
    @@ -236,6 +241,9 @@ struct LegacyLICMPass : public LoopPass {
         }
         AU.addRequired();
         getLoopAnalysisUsage(AU);
    +    LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
    +    AU.addPreserved();
    +    AU.addPreserved();
       }
     
     private:
    @@ -251,8 +259,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
       OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
     
       LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
    -  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
    -                      AR.MSSA, &ORE))
    +  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
    +                      &AR.SE, AR.MSSA, &ORE))
         return PreservedAnalyses::all();
     
       auto PA = getLoopPassPreservedAnalyses();
    @@ -272,6 +280,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
     INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
     INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
     INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
    +INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
     INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
                         false)
     
    @@ -286,8 +295,8 @@ Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
     /// times on one loop.
     bool LoopInvariantCodeMotion::runOnLoop(
         Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
    -    TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
    -    MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
    +    BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
    +    ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
       bool Changed = false;
     
       assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
    diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
    index 7867a5468891b..04b7254e4cdba 100644
    --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
    +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
    @@ -1058,7 +1058,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
       auto &LAM = AM.getResult(F).getManager();
       std::function GetLAA =
           [&](Loop &L) -> const LoopAccessInfo & {
    -    LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
    +    LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,     SE,
    +                                      TLI, TTI, nullptr, nullptr};
         return LAM.getResult(L, AR);
       };
     
    diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
    index e8473d6520254..ce010c9bacacf 100644
    --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
    +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
    @@ -720,7 +720,8 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
       auto &LAM = AM.getResult(F).getManager();
       bool Changed = eliminateLoadsAcrossLoops(
           F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
    -        LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
    +        LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,  SE,
    +                                          TLI, TTI, nullptr, MSSA};
             return LAM.getResult(L, AR);
           });
     
    diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
    index d83b7b05f88b5..00b242c16f384 100644
    --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
    +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
    @@ -32,6 +32,7 @@
     #include "llvm/Analysis/AssumptionCache.h"
     #include "llvm/Analysis/CodeMetrics.h"
     #include "llvm/Analysis/InstructionSimplify.h"
    +#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
     #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
     #include "llvm/Analysis/LoopInfo.h"
     #include "llvm/Analysis/LoopIterator.h"
    @@ -217,6 +218,10 @@ namespace {
         /// loop preheaders be inserted into the CFG.
         ///
         void getAnalysisUsage(AnalysisUsage &AU) const override {
    +      // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
    +      // can remain part of the same loop pass as LICM
    +      AU.addPreserved();
    +      AU.addPreserved();
           AU.addRequired();
           AU.addRequired();
           if (EnableMSSALoopDependency) {
    diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
    index b4925064bc6b9..fe8fb90d140ab 100644
    --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
    +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
    @@ -357,7 +357,8 @@ PreservedAnalyses LoopVersioningPass::run(Function &F,
     
       auto &LAM = AM.getResult(F).getManager();
       auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
    -    LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
    +    LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,  SE,
    +                                      TLI, TTI, nullptr, MSSA};
         return LAM.getResult(L, AR);
       };
     
    diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    index 545540efc2841..b203dd88eb3dd 100644
    --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    @@ -8621,7 +8621,8 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
         auto &LAM = AM.getResult(F).getManager();
         std::function GetLAA =
             [&](Loop &L) -> const LoopAccessInfo & {
    -      LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
    +      LoopStandardAnalysisResults AR = {AA,  AC,  DT,      LI,  SE,
    +                                        TLI, TTI, nullptr, MSSA};
           return LAM.getResult(L, AR);
         };
         auto &MAMProxy = AM.getResult(F);
    diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll
    index e606e7cfac171..58ed6b2a0820a 100644
    --- a/llvm/test/Other/opt-O2-pipeline.ll
    +++ b/llvm/test/Other/opt-O2-pipeline.ll
    @@ -111,6 +111,8 @@
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Rotate Loops
     ; CHECK-NEXT:         Memory SSA
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:           Unswitch loops
    @@ -168,6 +170,8 @@
     ; CHECK-NEXT:         LCSSA Verifier
     ; CHECK-NEXT:         Loop-Closed SSA Form Pass
     ; CHECK-NEXT:         Scalar Evolution Analysis
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:         Post-Dominator Tree Construction
    @@ -270,10 +274,10 @@
     ; CHECK-NEXT:       LCSSA Verifier
     ; CHECK-NEXT:       Loop-Closed SSA Form Pass
     ; CHECK-NEXT:       Scalar Evolution Analysis
    -; CHECK-NEXT:       Loop Pass Manager
    -; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Lazy Branch Probability Analysis
     ; CHECK-NEXT:       Lazy Block Frequency Analysis
    +; CHECK-NEXT:       Loop Pass Manager
    +; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Optimization Remark Emitter
     ; CHECK-NEXT:       Warn about non-applied transformations
     ; CHECK-NEXT:       Alignment from assumptions
    diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
    index aaee6f786bac9..493957e865d4f 100644
    --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
    +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
    @@ -116,6 +116,8 @@
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Rotate Loops
     ; CHECK-NEXT:         Memory SSA
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:           Unswitch loops
    @@ -173,6 +175,8 @@
     ; CHECK-NEXT:         LCSSA Verifier
     ; CHECK-NEXT:         Loop-Closed SSA Form Pass
     ; CHECK-NEXT:         Scalar Evolution Analysis
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:         Post-Dominator Tree Construction
    @@ -282,10 +286,10 @@
     ; CHECK-NEXT:       LCSSA Verifier
     ; CHECK-NEXT:       Loop-Closed SSA Form Pass
     ; CHECK-NEXT:       Scalar Evolution Analysis
    -; CHECK-NEXT:       Loop Pass Manager
    -; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Lazy Branch Probability Analysis
     ; CHECK-NEXT:       Lazy Block Frequency Analysis
    +; CHECK-NEXT:       Loop Pass Manager
    +; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Optimization Remark Emitter
     ; CHECK-NEXT:       Warn about non-applied transformations
     ; CHECK-NEXT:       Alignment from assumptions
    diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll
    index b2d2f85ae21be..f674dabd52173 100644
    --- a/llvm/test/Other/opt-O3-pipeline.ll
    +++ b/llvm/test/Other/opt-O3-pipeline.ll
    @@ -116,6 +116,8 @@
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Rotate Loops
     ; CHECK-NEXT:         Memory SSA
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:           Unswitch loops
    @@ -173,6 +175,8 @@
     ; CHECK-NEXT:         LCSSA Verifier
     ; CHECK-NEXT:         Loop-Closed SSA Form Pass
     ; CHECK-NEXT:         Scalar Evolution Analysis
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:         Post-Dominator Tree Construction
    @@ -275,10 +279,10 @@
     ; CHECK-NEXT:       LCSSA Verifier
     ; CHECK-NEXT:       Loop-Closed SSA Form Pass
     ; CHECK-NEXT:       Scalar Evolution Analysis
    -; CHECK-NEXT:       Loop Pass Manager
    -; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Lazy Branch Probability Analysis
     ; CHECK-NEXT:       Lazy Block Frequency Analysis
    +; CHECK-NEXT:       Loop Pass Manager
    +; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Optimization Remark Emitter
     ; CHECK-NEXT:       Warn about non-applied transformations
     ; CHECK-NEXT:       Alignment from assumptions
    diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll
    index cc91707c4b009..66df666a64c69 100644
    --- a/llvm/test/Other/opt-Os-pipeline.ll
    +++ b/llvm/test/Other/opt-Os-pipeline.ll
    @@ -97,6 +97,8 @@
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Rotate Loops
     ; CHECK-NEXT:         Memory SSA
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:           Unswitch loops
    @@ -154,6 +156,8 @@
     ; CHECK-NEXT:         LCSSA Verifier
     ; CHECK-NEXT:         Loop-Closed SSA Form Pass
     ; CHECK-NEXT:         Scalar Evolution Analysis
    +; CHECK-NEXT:         Lazy Branch Probability Analysis
    +; CHECK-NEXT:         Lazy Block Frequency Analysis
     ; CHECK-NEXT:         Loop Pass Manager
     ; CHECK-NEXT:           Loop Invariant Code Motion
     ; CHECK-NEXT:         Post-Dominator Tree Construction
    @@ -256,10 +260,10 @@
     ; CHECK-NEXT:       LCSSA Verifier
     ; CHECK-NEXT:       Loop-Closed SSA Form Pass
     ; CHECK-NEXT:       Scalar Evolution Analysis
    -; CHECK-NEXT:       Loop Pass Manager
    -; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Lazy Branch Probability Analysis
     ; CHECK-NEXT:       Lazy Block Frequency Analysis
    +; CHECK-NEXT:       Loop Pass Manager
    +; CHECK-NEXT:         Loop Invariant Code Motion
     ; CHECK-NEXT:       Optimization Remark Emitter
     ; CHECK-NEXT:       Warn about non-applied transformations
     ; CHECK-NEXT:       Alignment from assumptions
    diff --git a/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp b/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp
    index 8142eaf90de10..8bec9629c5540 100644
    --- a/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp
    +++ b/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp
    @@ -9,7 +9,10 @@
     #include "llvm/Transforms/Scalar/LoopPassManager.h"
     #include "llvm/Analysis/AliasAnalysis.h"
     #include "llvm/Analysis/AssumptionCache.h"
    +#include "llvm/Analysis/BlockFrequencyInfo.h"
    +#include "llvm/Analysis/BranchProbabilityInfo.h"
     #include "llvm/Analysis/MemorySSA.h"
    +#include "llvm/Analysis/PostDominators.h"
     #include "llvm/Analysis/ScalarEvolution.h"
     #include "llvm/Analysis/TargetLibraryInfo.h"
     #include "llvm/Analysis/TargetTransformInfo.h"
    @@ -294,6 +297,9 @@ class LoopPassManagerTest : public ::testing::Test {
         // those.
         FAM.registerPass([&] { return AAManager(); });
         FAM.registerPass([&] { return AssumptionAnalysis(); });
    +    FAM.registerPass([&] { return BlockFrequencyAnalysis(); });
    +    FAM.registerPass([&] { return BranchProbabilityAnalysis(); });
    +    FAM.registerPass([&] { return PostDominatorTreeAnalysis(); });
         FAM.registerPass([&] { return MemorySSAAnalysis(); });
         FAM.registerPass([&] { return ScalarEvolutionAnalysis(); });
         FAM.registerPass([&] { return TargetLibraryAnalysis(); });
    
    From 50ee05ab65db2ab262436ee0f92f7888607a89f3 Mon Sep 17 00:00:00 2001
    From: Alexandre Ganea 
    Date: Tue, 15 Sep 2020 19:18:24 -0400
    Subject: [PATCH 0761/1079] [llvm][cmake] Change LLVM_INTEGRATED_CRT_ALLOC to a
     path instead of a boolean
    
    Differential Revision: https://reviews.llvm.org/D87609
    ---
     llvm/CMakeLists.txt | 26 +++++++++++++-------------
     1 file changed, 13 insertions(+), 13 deletions(-)
    
    diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
    index 410103b0bfd68..4ae7bc14d3bb5 100644
    --- a/llvm/CMakeLists.txt
    +++ b/llvm/CMakeLists.txt
    @@ -514,6 +514,19 @@ if( WIN32 AND NOT CYGWIN )
       set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
     endif()
     
    +set(LLVM_INTEGRATED_CRT_ALLOC "" CACHE PATH "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with /MT enabled.")
    +if(LLVM_INTEGRATED_CRT_ALLOC)
    +  if(NOT WIN32)
    +    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC is only supported on Windows.")
    +  endif()
    +  if(LLVM_USE_SANITIZER)
    +    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC cannot be used along with LLVM_USE_SANITIZER!")
    +  endif()
    +  if(CMAKE_BUILD_TYPE AND uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
    +    message(FATAL_ERROR "The Debug target isn't supported along with LLVM_INTEGRATED_CRT_ALLOC!")
    +  endif()
    +endif()
    +
     # Define options to control the inclusion and default build behavior for
     # components which may not strictly be necessary (tools, examples, and tests).
     #
    @@ -567,19 +580,6 @@ option (LLVM_BUILD_EXTERNAL_COMPILER_RT
     option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
       "Show target and host info when tools are invoked with --version." ON)
     
    -option(LLVM_INTEGRATED_CRT_ALLOC "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with /MT enabled." OFF)
    -if(LLVM_INTEGRATED_CRT_ALLOC)
    -  if(NOT WIN32)
    -    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC is only supported on Windows.")
    -  endif()
    -  if(LLVM_USE_SANITIZER)
    -    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC cannot be used along with LLVM_USE_SANITIZER!")
    -  endif()
    -  if(CMAKE_BUILD_TYPE AND uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
    -    message(FATAL_ERROR "The Debug target isn't supported along with LLVM_INTEGRATED_CRT_ALLOC!")
    -  endif()
    -endif()
    -
     # You can configure which libraries from LLVM you want to include in the
     # shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
     # list of LLVM components. All component names handled by llvm-config are valid.
    
    From 79378b1b757d5c981e60320f5a735f3e356557a0 Mon Sep 17 00:00:00 2001
    From: Volkan Keles 
    Date: Tue, 15 Sep 2020 16:40:38 -0700
    Subject: [PATCH 0762/1079] GlobalISel: Fix a failing combiner test
    
    test/CodeGen/AArch64/GlobalISel/combine-trunc.mir was failing
    due to the different order for evaluating function arguments.
    This patch updates the related code to fix the issue.
    ---
     llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 +++--
     1 file changed, 3 insertions(+), 2 deletions(-)
    
    diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    index 74215999ea60a..5e2b86200ce5e 100644
    --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    @@ -2113,8 +2113,9 @@ bool CombinerHelper::applyCombineTruncOfShl(
       Register ShiftSrc = MatchInfo.first;
       Register ShiftAmt = MatchInfo.second;
       Builder.setInstrAndDebugLoc(MI);
    -  Builder.buildShl(DstReg, Builder.buildTrunc(DstTy, ShiftSrc),
    -                   Builder.buildTrunc(DstTy, ShiftAmt), SrcMI->getFlags());
    +  auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
    +  auto TruncShiftAmt = Builder.buildTrunc(DstTy, ShiftAmt);
    +  Builder.buildShl(DstReg, TruncShiftSrc, TruncShiftAmt, SrcMI->getFlags());
       MI.eraseFromParent();
       return true;
     }
    
    From 91332c4dbb033f7d1ffa1a9632012d88b08661c4 Mon Sep 17 00:00:00 2001
    From: Arthur Eubanks 
    Date: Mon, 14 Sep 2020 11:06:36 -0700
    Subject: [PATCH 0763/1079] [CGSCC][NewPM] Fix adding mutually recursive new
     functions
    
    When adding a new function via addNewFunctionIntoRefSCC(), it creates a
    new node and immediately populates the edges. Since populateSlow() calls
    G->get() on all referenced functions, it will create a node (but not
    populate it) for functions that haven't yet been added. If we add two
    mutually recursive functions, the assert that the node should never have
    been created will fire when the second function is added. So here we
    remove that assert since the node may have already been created (but not
    yet populated).
    
    createNode() is only called from addNewFunctionInto{,Ref}SCC().
    
    https://bugs.llvm.org/show_bug.cgi?id=47502
    
    Reviewed By: jdoerfert
    
    Differential Revision: https://reviews.llvm.org/D87623
    ---
     llvm/lib/Analysis/LazyCallGraph.cpp           |  2 -
     .../Analysis/CGSCCPassManagerTest.cpp         | 55 +++++++++++++++++++
     2 files changed, 55 insertions(+), 2 deletions(-)
    
    diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp
    index efded17cef4e3..b3658999e7fef 100644
    --- a/llvm/lib/Analysis/LazyCallGraph.cpp
    +++ b/llvm/lib/Analysis/LazyCallGraph.cpp
    @@ -1595,8 +1595,6 @@ void LazyCallGraph::updateGraphPtrs() {
     }
     
     LazyCallGraph::Node &LazyCallGraph::createNode(Function &F) {
    -  assert(!lookup(F) && "node already exists");
    -
       Node &N = get(F);
       NodeMap[&F] = &N;
       N.DFSNumber = N.LowLink = -1;
    diff --git a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp
    index 2dad605395c37..e0ff4e891ab65 100644
    --- a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp
    +++ b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp
    @@ -1766,5 +1766,60 @@ TEST_F(CGSCCPassManagerTest, TestInsertionOfNewRefSCC) {
       MPM.run(*M, MAM);
     }
     
    +TEST_F(CGSCCPassManagerTest, TestInsertionOfNewRefSCCMutuallyRecursive) {
    +  std::unique_ptr M = parseIR("define void @f() {\n"
    +                                      "entry:\n"
    +                                      "  ret void\n"
    +                                      "}\n");
    +
    +  CGSCCPassManager CGPM(/*DebugLogging*/ true);
    +  CGPM.addPass(LambdaSCCPassNoPreserve([&](LazyCallGraph::SCC &C,
    +                                           CGSCCAnalysisManager &AM,
    +                                           LazyCallGraph &CG,
    +                                           CGSCCUpdateResult &UR) {
    +    auto &FAM =
    +        AM.getResult(C, CG).getManager();
    +
    +    for (auto &N : C) {
    +      auto &F = N.getFunction();
    +      if (F.getName() != "f")
    +        continue;
    +
    +      // Create mutually recursive functions (ref only) 'h1' and 'h2'.
    +      auto *H1 = Function::Create(F.getFunctionType(), F.getLinkage(),
    +                                  F.getAddressSpace(), "h1", F.getParent());
    +      auto *H2 = Function::Create(F.getFunctionType(), F.getLinkage(),
    +                                  F.getAddressSpace(), "h2", F.getParent());
    +      BasicBlock *H1BB =
    +          BasicBlock::Create(F.getParent()->getContext(), "entry", H1);
    +      BasicBlock *H2BB =
    +          BasicBlock::Create(F.getParent()->getContext(), "entry", H2);
    +      (void)CastInst::CreatePointerCast(H2, Type::getInt8PtrTy(F.getContext()),
    +                                        "h2.ref", H1BB);
    +      (void)ReturnInst::Create(H1->getContext(), H1BB);
    +      (void)CastInst::CreatePointerCast(H1, Type::getInt8PtrTy(F.getContext()),
    +                                        "h1.ref", H2BB);
    +      (void)ReturnInst::Create(H2->getContext(), H2BB);
    +
    +      // Add 'f -> h1' ref edge.
    +      (void)CastInst::CreatePointerCast(H1, Type::getInt8PtrTy(F.getContext()),
    +                                        "h.ref", &F.getEntryBlock().front());
    +
    +      CG.addNewFunctionIntoRefSCC(*H1, C.getOuterRefSCC());
    +      CG.addNewFunctionIntoRefSCC(*H2, C.getOuterRefSCC());
    +
    +      ASSERT_NO_FATAL_FAILURE(
    +          updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM))
    +          << "Updating the call graph with a demoted, self-referential "
    +             "call edge 'f -> f', a newly inserted ref edge 'f -> g', and "
    +             "mutually recursive h1 <-> h2 caused a fatal failure";
    +    }
    +  }));
    +
    +  ModulePassManager MPM(/*DebugLogging*/ true);
    +  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
    +  MPM.run(*M, MAM);
    +}
    +
     #endif
     } // namespace
    
    From ffe9986de4297fdeddcd0b0b9bac2a28c45f661b Mon Sep 17 00:00:00 2001
    From: Jessica Paquette 
    Date: Thu, 10 Sep 2020 13:34:15 -0700
    Subject: [PATCH 0764/1079] [AArch64][GlobalISel] Refactor + improve CMN, ADDS,
     and ADD emit functions
    
    These functions were extremely similar:
    
    - `emitADD`
    - `emitADDS`
    - `emitCMN`
    
    Refactor them a little, introducing a more generic `emitInstr` function to
    do most of the work.
    
    Also add support for the immediate + shifted register addressing modes in each
    of them.
    
    Update select-uaddo.mir to show that selecing ADDS now supports folding
    immediates + shifts. (I don't think this can impact CMN, because the CMN checks
    require a G_SUB with a non-constant on the RHS.)
    
    This is around a 0.02% code size improvement on CTMark at -O3.
    
    Differential Revision: https://reviews.llvm.org/D87529
    ---
     .../GISel/AArch64InstructionSelector.cpp      | 146 +++++++++++++-----
     .../AArch64/GlobalISel/select-uaddo.mir       |  51 ++++++
     2 files changed, 155 insertions(+), 42 deletions(-)
    
    diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    index ed31b336aa3e9..7307d5b7e1d0c 100644
    --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    @@ -171,8 +171,57 @@ class AArch64InstructionSelector : public InstructionSelector {
       emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
                          MachineOperand &Predicate,
                          MachineIRBuilder &MIRBuilder) const;
    -  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
    +  MachineInstr *emitInstr(unsigned Opcode,
    +                          std::initializer_list DstOps,
    +                          std::initializer_list SrcOps,
    +                          MachineIRBuilder &MIRBuilder,
    +                          const ComplexRendererFns &RenderFns = None) const;
    +  /// Helper function to emit a binary operation such as an ADD, ADDS, etc.
    +  ///
    +  /// This is intended for instructions with the following opcode variants:
    +  ///
    +  ///  - Xri, Wri (arithmetic immediate form)
    +  ///  - Xrs, Wrs (shifted register form)
    +  ///  - Xrr, Wrr (register form)
    +  ///
    +  /// For example, for ADD, we have ADDXri, ADDWri, ADDXrs, etc.
    +  ///
    +  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
    +  /// in a specific order.
    +  ///
    +  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
    +  ///
    +  /// \code
    +  ///   const std::array, 3> Table {
    +  ///    {{AArch64::ADDXri, AArch64::ADDWri},
    +  ///     {AArch64::ADDXrs, AArch64::ADDWrs},
    +  ///     {AArch64::ADDXrr, AArch64::ADDWrr}}};
    +  /// \endcode
    +  ///
    +  /// Each row in the table corresponds to a different addressing mode. Each
    +  /// column corresponds to a different register size.
    +  ///
    +  /// \attention Rows must be structured as follows:
    +  ///   - Row 0: The ri opcode variants
    +  ///   - Row 1: The rs opcode variants
    +  ///   - Row 2: The rr opcode variants
    +  ///
    +  /// \attention Columns must be structured as follows:
    +  ///   - Column 0: The 64-bit opcode variants
    +  ///   - Column 1: The 32-bit opcode variants
    +  ///
    +  /// \p Dst is the destination register of the binop to emit.
    +  /// \p LHS is the left-hand operand of the binop to emit.
    +  /// \p RHS is the right-hand operand of the binop to emit.
    +  MachineInstr *emitBinOp(
    +      const std::array, 3> &AddrModeAndSizeToOpcode,
    +      Register Dst, MachineOperand &LHS, MachineOperand &RHS,
    +      MachineIRBuilder &MIRBuilder) const;
    +  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
    +                        MachineOperand &RHS,
                             MachineIRBuilder &MIRBuilder) const;
    +  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
    +                         MachineIRBuilder &MIRBuilder) const;
       MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                             MachineIRBuilder &MIRBuilder) const;
       MachineInstr *emitTST(const Register &LHS, const Register &RHS,
    @@ -2462,11 +2511,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
         }
     
         // Add and set the set condition flag.
    -    unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
         MachineIRBuilder MIRBuilder(I);
    -    auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
    -                                        {I.getOperand(2), I.getOperand(3)});
    -    constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
    +    emitADDS(I.getOperand(0).getReg(), I.getOperand(2), I.getOperand(3),
    +             MIRBuilder);
     
         // Now, put the overflow result in the register given by the first operand
         // to the G_UADDO. CSINC increments the result when the predicate is false,
    @@ -3749,55 +3796,70 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
       return std::make_pair(Opc, SubregIdx);
     }
     
    +MachineInstr *AArch64InstructionSelector::emitInstr(
    +    unsigned Opcode, std::initializer_list DstOps,
    +    std::initializer_list SrcOps, MachineIRBuilder &MIRBuilder,
    +    const ComplexRendererFns &RenderFns) const {
    +  assert(Opcode && "Expected an opcode?");
    +  assert(!isPreISelGenericOpcode(Opcode) &&
    +         "Function should only be used to produce selected instructions!");
    +  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
    +  if (RenderFns)
    +    for (auto &Fn : *RenderFns)
    +      Fn(MI);
    +  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
    +  return &*MI;
    +}
    +
    +MachineInstr *AArch64InstructionSelector::emitBinOp(
    +    const std::array, 3> &AddrModeAndSizeToOpcode,
    +    Register Dst, MachineOperand &LHS, MachineOperand &RHS,
    +    MachineIRBuilder &MIRBuilder) const {
    +  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
    +  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
    +  auto Ty = MRI.getType(LHS.getReg());
    +  assert(Ty.isScalar() && "Expected a scalar?");
    +  unsigned Size = Ty.getSizeInBits();
    +  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
    +  bool Is32Bit = Size == 32;
    +  if (auto Fns = selectArithImmed(RHS))
    +    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
    +                     MIRBuilder, Fns);
    +  if (auto Fns = selectShiftedRegister(RHS))
    +    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
    +                     MIRBuilder, Fns);
    +  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
    +                   MIRBuilder);
    +}
    +
     MachineInstr *
     AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
                                         MachineOperand &RHS,
                                         MachineIRBuilder &MIRBuilder) const {
    -  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
    -  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
    -  static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
    -                                       {AArch64::ADDWrr, AArch64::ADDWri}};
    -  bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
    -  auto ImmFns = selectArithImmed(RHS);
    -  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
    -  auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
    -
    -  // If we matched a valid constant immediate, add those operands.
    -  if (ImmFns) {
    -    for (auto &RenderFn : *ImmFns)
    -      RenderFn(AddMI);
    -  } else {
    -    AddMI.addUse(RHS.getReg());
    -  }
    +  const std::array, 3> OpcTable{
    +      {{AArch64::ADDXri, AArch64::ADDWri},
    +       {AArch64::ADDXrs, AArch64::ADDWrs},
    +       {AArch64::ADDXrr, AArch64::ADDWrr}}};
    +  return emitBinOp(OpcTable, DefReg, LHS, RHS, MIRBuilder);
    +}
     
    -  constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
    -  return &*AddMI;
    +MachineInstr *
    +AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
    +                                     MachineOperand &RHS,
    +                                     MachineIRBuilder &MIRBuilder) const {
    +  const std::array, 3> OpcTable{
    +      {{AArch64::ADDSXri, AArch64::ADDSWri},
    +       {AArch64::ADDSXrs, AArch64::ADDSWrs},
    +       {AArch64::ADDSXrr, AArch64::ADDSWrr}}};
    +  return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
     }
     
     MachineInstr *
     AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                                         MachineIRBuilder &MIRBuilder) const {
    -  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
       MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
    -  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
    -                                       {AArch64::ADDSWrr, AArch64::ADDSWri}};
       bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
    -  auto ImmFns = selectArithImmed(RHS);
    -  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
    -  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
    -
    -  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
    -
    -  // If we matched a valid constant immediate, add those operands.
    -  if (ImmFns) {
    -    for (auto &RenderFn : *ImmFns)
    -      RenderFn(CmpMI);
    -  } else {
    -    CmpMI.addUse(RHS.getReg());
    -  }
    -
    -  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
    -  return &*CmpMI;
    +  return emitADDS(Is32Bit ? AArch64::WZR : AArch64::XZR, LHS, RHS, MIRBuilder);
     }
     
     MachineInstr *
    diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
    index 96f9ad2b0634e..135932bdfb0c4 100644
    --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
    +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
    @@ -60,3 +60,54 @@ body:             |
         RET_ReallyLR implicit $w0
     
     ...
    +---
    +name:            uaddo_s32_imm
    +alignment:       4
    +legalized:       true
    +regBankSelected: true
    +tracksRegLiveness: true
    +body:             |
    +  bb.1.entry:
    +    liveins: $w0, $w1, $x2
    +    ; Check that we get ADDSWri when we can fold in a constant.
    +    ;
    +    ; CHECK-LABEL: name: uaddo_s32_imm
    +    ; CHECK: liveins: $w0, $w1, $x2
    +    ; CHECK: %copy:gpr32sp = COPY $w0
    +    ; CHECK: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
    +    ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
    +    ; CHECK: $w0 = COPY %add
    +    ; CHECK: RET_ReallyLR implicit $w0
    +    %copy:gpr(s32) = COPY $w0
    +    %constant:gpr(s32) = G_CONSTANT i32 16
    +    %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
    +    $w0 = COPY %add(s32)
    +    RET_ReallyLR implicit $w0
    +
    +...
    +---
    +name:            uaddo_s32_shifted
    +alignment:       4
    +legalized:       true
    +regBankSelected: true
    +tracksRegLiveness: true
    +body:             |
    +  bb.1.entry:
    +    liveins: $w0, $w1, $x2
    +    ; Check that we get ADDSWrs when we can fold in a shift.
    +    ;
    +    ; CHECK-LABEL: name: uaddo_s32_shifted
    +    ; CHECK: liveins: $w0, $w1, $x2
    +    ; CHECK: %copy1:gpr32 = COPY $w0
    +    ; CHECK: %copy2:gpr32 = COPY $w1
    +    ; CHECK: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv
    +    ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
    +    ; CHECK: $w0 = COPY %add
    +    ; CHECK: RET_ReallyLR implicit $w0
    +    %copy1:gpr(s32) = COPY $w0
    +    %copy2:gpr(s32) = COPY $w1
    +    %constant:gpr(s32) = G_CONSTANT i32 16
    +    %shift:gpr(s32) = G_SHL %copy2(s32), %constant(s32)
    +    %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy1, %shift
    +    $w0 = COPY %add(s32)
    +    RET_ReallyLR implicit $w0
    
    From 2c391a5a14aeb34e970aba85c5aa540656fe47ca Mon Sep 17 00:00:00 2001
    From: Wenlei He 
    Date: Tue, 15 Sep 2020 17:21:32 -0700
    Subject: [PATCH 0765/1079] [LICM] Make Loop ICM profile aware again
    
    D65060 was reverted because it introduced non-determinism by using BFI counts from already freed blocks. The parent of this revision fixes that by using a VH callback on blocks to prevent this from happening and makes sure BFI data is passed correctly in LoopStandardAnalysisResults.
    
    This re-introduces the previous optimization of using BFI data to prevent LICM from hoisting/sinking if the instruction will end up moving to a colder block.
    
    Internally at Facebook this change results in a ~7% win in a CPU related metric in one of our big services by preventing hoisting cold code into a hot pre-header like the added test case demonstrates.
    
    Testing:
    ninja check
    
    Reviewed By: asbirlea
    
    Differential Revision: https://reviews.llvm.org/D87551
    ---
     .../include/llvm/Transforms/Utils/LoopUtils.h | 21 +++--
     llvm/lib/Passes/PassBuilder.cpp               |  8 +-
     llvm/lib/Transforms/Scalar/LICM.cpp           | 81 ++++++++++++++---
     .../Transforms/LICM/Inputs/no-hoist-prof.prof |  7 ++
     llvm/test/Transforms/LICM/no-hoist-prof.ll    | 88 +++++++++++++++++++
     llvm/test/Transforms/LICM/sink.ll             | 10 ++-
     6 files changed, 187 insertions(+), 28 deletions(-)
     create mode 100644 llvm/test/Transforms/LICM/Inputs/no-hoist-prof.prof
     create mode 100644 llvm/test/Transforms/LICM/no-hoist-prof.ll
    
    diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
    index 70c8c84c857bf..cf0982d270b89 100644
    --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
    +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
    @@ -26,6 +26,7 @@ class AAResults;
     class AliasSet;
     class AliasSetTracker;
     class BasicBlock;
    +class BlockFrequencyInfo;
     class IRBuilderBase;
     class Loop;
     class LoopInfo;
    @@ -123,12 +124,13 @@ struct SinkAndHoistLICMFlags {
     /// reverse depth first order w.r.t the DominatorTree. This allows us to visit
     /// uses before definitions, allowing us to sink a loop body in one pass without
     /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
    -/// TargetLibraryInfo, Loop, AliasSet information for all
    +/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
     /// instructions of the loop and loop safety information as
     /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
     bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
    -                TargetLibraryInfo *, TargetTransformInfo *, Loop *,
    -                AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
    +                BlockFrequencyInfo *, TargetLibraryInfo *,
    +                TargetTransformInfo *, Loop *, AliasSetTracker *,
    +                MemorySSAUpdater *, ICFLoopSafetyInfo *,
                     SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
     
     /// Walk the specified region of the CFG (defined by all blocks
    @@ -136,13 +138,14 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
     /// first order w.r.t the DominatorTree.  This allows us to visit definitions
     /// before uses, allowing us to hoist a loop body in one pass without iteration.
     /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
    -/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
    -/// loop and loop safety information as arguments. Diagnostics is emitted via \p
    -/// ORE. It returns changed status.
    +/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
    +/// instructions of the loop and loop safety information as arguments.
    +/// Diagnostics is emitted via \p ORE. It returns changed status.
     bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
    -                 TargetLibraryInfo *, Loop *, AliasSetTracker *,
    -                 MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
    -                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
    +                 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
    +                 AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
    +                 ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
    +                 OptimizationRemarkEmitter *);
     
     /// This function deletes dead loops. The caller of this function needs to
     /// guarantee that the loop is infact dead.
    diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
    index ddbc7a2fb4d5a..1f43b5e6538e5 100644
    --- a/llvm/lib/Passes/PassBuilder.cpp
    +++ b/llvm/lib/Passes/PassBuilder.cpp
    @@ -2429,9 +2429,11 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
             return Err;
           // Add the nested pass manager with the appropriate adaptor.
           bool UseMemorySSA = (Name == "loop-mssa");
    -      FPM.addPass(createFunctionToLoopPassAdaptor(
    -          std::move(LPM), UseMemorySSA, /*UseBlockFrequencyInfo=*/false,
    -          DebugLogging));
    +      bool UseBFI =
    +          std::any_of(InnerPipeline.begin(), InnerPipeline.end(),
    +                      [](auto Pipeline) { return Pipeline.Name == "licm"; });
    +      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
    +                                                  UseBFI, DebugLogging));
           return Error::success();
         }
         if (auto Count = parseRepeatPassName(Name)) {
    diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
    index 841badba08340..a8fe8280a9ce6 100644
    --- a/llvm/lib/Transforms/Scalar/LICM.cpp
    +++ b/llvm/lib/Transforms/Scalar/LICM.cpp
    @@ -35,6 +35,7 @@
     #include "llvm/Analysis/AliasAnalysis.h"
     #include "llvm/Analysis/AliasSetTracker.h"
     #include "llvm/Analysis/BasicAliasAnalysis.h"
    +#include "llvm/Analysis/BlockFrequencyInfo.h"
     #include "llvm/Analysis/CaptureTracking.h"
     #include "llvm/Analysis/ConstantFolding.h"
     #include "llvm/Analysis/GlobalsModRef.h"
    @@ -99,6 +100,11 @@ static cl::opt ControlFlowHoisting(
         "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
         cl::desc("Enable control flow (and PHI) hoisting in LICM"));
     
    +static cl::opt HoistSinkColdnessThreshold(
    +    "licm-coldness-threshold", cl::Hidden, cl::init(4),
    +    cl::desc("Relative coldness Threshold of hoisting/sinking destination "
    +             "block for LICM to be considered beneficial"));
    +
     static cl::opt MaxNumUsesTraversed(
         "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
         cl::desc("Max num uses visited for identifying load "
    @@ -144,8 +150,9 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                       MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
                       OptimizationRemarkEmitter *ORE);
     static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
    -                 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
    -                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
    +                 BlockFrequencyInfo *BFI, const Loop *CurLoop,
    +                 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
    +                 OptimizationRemarkEmitter *ORE);
     static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                                const DominatorTree *DT,
                                                const Loop *CurLoop,
    @@ -356,12 +363,13 @@ bool LoopInvariantCodeMotion::runOnLoop(
                                      LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
                                      /*IsSink=*/true};
       if (L->hasDedicatedExits())
    -    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
    -                          CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
    +    Changed |=
    +        sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
    +                   CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
       Flags.IsSink = false;
       if (Preheader)
         Changed |=
    -        hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
    +        hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
                         CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE);
     
       // Now that all loop invariants have been removed from the loop, promote any
    @@ -458,10 +466,10 @@ bool LoopInvariantCodeMotion::runOnLoop(
     /// definitions, allowing us to sink a loop body in one pass without iteration.
     ///
     bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
    -                      DominatorTree *DT, TargetLibraryInfo *TLI,
    -                      TargetTransformInfo *TTI, Loop *CurLoop,
    -                      AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
    -                      ICFLoopSafetyInfo *SafetyInfo,
    +                      DominatorTree *DT, BlockFrequencyInfo *BFI,
    +                      TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
    +                      Loop *CurLoop, AliasSetTracker *CurAST,
    +                      MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
                           SinkAndHoistLICMFlags &Flags,
                           OptimizationRemarkEmitter *ORE) {
     
    @@ -510,7 +518,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
               isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
               canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
                                  ORE)) {
    -        if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
    +        if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
               if (!FreeInLoop) {
                 ++II;
                 salvageDebugInfo(I);
    @@ -755,13 +763,43 @@ class ControlFlowHoister {
     };
     } // namespace
     
    +// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
    +// only worthwhile if the destination block is actually colder than current
    +// block.
    +static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
    +                                 OptimizationRemarkEmitter *ORE,
    +                                 BlockFrequencyInfo *BFI) {
    +  // Check block frequency only when runtime profile is available
    +  // to avoid pathological cases. With static profile, lean towards
    +  // hosting because it helps canonicalize the loop for vectorizer.
    +  if (!DstBlock->getParent()->hasProfileData())
    +    return true;
    +
    +  if (!HoistSinkColdnessThreshold || !BFI)
    +    return true;
    +
    +  BasicBlock *SrcBlock = I.getParent();
    +  if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
    +      BFI->getBlockFreq(SrcBlock).getFrequency()) {
    +    ORE->emit([&]() {
    +      return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
    +             << "failed to sink or hoist instruction because containing block "
    +                "has lower frequency than destination block";
    +    });
    +    return false;
    +  }
    +
    +  return true;
    +}
    +
     /// Walk the specified region of the CFG (defined by all blocks dominated by
     /// the specified block, and that are in the current loop) in depth first
     /// order w.r.t the DominatorTree.  This allows us to visit definitions before
     /// uses, allowing us to hoist a loop body in one pass without iteration.
     ///
     bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
    -                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
    +                       DominatorTree *DT, BlockFrequencyInfo *BFI,
    +                       TargetLibraryInfo *TLI, Loop *CurLoop,
                            AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
                            ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
                            SinkAndHoistLICMFlags &Flags,
    @@ -812,13 +850,15 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
     
           // Try hoisting the instruction out to the preheader.  We can only do
           // this if all of the operands of the instruction are loop invariant and
    -      // if it is safe to hoist the instruction.
    +      // if it is safe to hoist the instruction. We also check block frequency
    +      // to make sure instruction only gets hoisted into colder blocks.
           // TODO: It may be safe to hoist if we are hoisting to a conditional block
           // and we have accurately duplicated the control flow from the loop header
           // to that block.
           if (CurLoop->hasLoopInvariantOperands(&I) &&
               canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
                                  ORE) &&
    +          worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
               isSafeToExecuteUnconditionally(
                   I, DT, CurLoop, SafetyInfo, ORE,
                   CurLoop->getLoopPreheader()->getTerminator())) {
    @@ -1554,8 +1594,9 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
     /// position, and may either delete it or move it to outside of the loop.
     ///
     static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
    -                 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
    -                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
    +                 BlockFrequencyInfo *BFI, const Loop *CurLoop,
    +                 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
    +                 OptimizationRemarkEmitter *ORE) {
       LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
       ORE->emit([&]() {
         return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
    @@ -1631,7 +1672,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
       // If this instruction is only used outside of the loop, then all users are
       // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
       // the instruction.
    +  // First check if I is worth sinking for all uses. Sink only when it is worth
    +  // across all uses.
       SmallSetVector Users(I.user_begin(), I.user_end());
    +  SmallVector ExitPNs;
       for (auto *UI : Users) {
         auto *User = cast(UI);
     
    @@ -1641,6 +1685,15 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
         PHINode *PN = cast(User);
         assert(ExitBlockSet.count(PN->getParent()) &&
                "The LCSSA PHI is not in an exit block!");
    +    if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
    +      return Changed;
    +    }
    +
    +    ExitPNs.push_back(PN);
    +  }
    +
    +  for (auto *PN : ExitPNs) {
    +
         // The PHI must be trivially replaceable.
         Instruction *New = sinkThroughTriviallyReplaceablePHI(
             PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
    diff --git a/llvm/test/Transforms/LICM/Inputs/no-hoist-prof.prof b/llvm/test/Transforms/LICM/Inputs/no-hoist-prof.prof
    new file mode 100644
    index 0000000000000..c1b2ee0873c00
    --- /dev/null
    +++ b/llvm/test/Transforms/LICM/Inputs/no-hoist-prof.prof
    @@ -0,0 +1,7 @@
    +_Z3fooii:200:1
    + 0: 1
    + 1: 1 _Z3bari:1
    + 2: 200
    + 3: 200
    + 4: 0
    + 5: 1
    diff --git a/llvm/test/Transforms/LICM/no-hoist-prof.ll b/llvm/test/Transforms/LICM/no-hoist-prof.ll
    new file mode 100644
    index 0000000000000..1b18aa3c288e4
    --- /dev/null
    +++ b/llvm/test/Transforms/LICM/no-hoist-prof.ll
    @@ -0,0 +1,88 @@
    +; RUN: opt -enable-new-pm=1 -sample-profile -licm -S -sample-profile-file='%S/Inputs/no-hoist-prof.prof' < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM
    +; RUN: opt -passes=licm -S < %s | FileCheck %s --check-prefix=CHECK-LICM
    +
    +; Original source code:
    +;
    +; int bar(int);
    +; int foo(int iter, int explode) {
    +;   int base = bar(explode);
    +;   for (int i = 0; i != iter; ++i)
    +;     if (i == explode)
    +;       iter = (base * base) + bar(iter);
    +;   return iter;
    +; }
    +
    +; We need debug information in this .ll in order to leverage the pgo file, so:
    +; .ll generated by running `clang++ -O3 -g -S -emit-llvm`, then:
    +;   - move hoisted mul back into cold section
    +;   - give labels names
    +;   - reindex variables
    +;   - remove metadata calls, attributes, module header
    +;   - remove unnecessary metadata
    +
    +; CHECK-LICM: .l.check.preheader:{{.*}}
    +; CHECK-LICM-NEXT: {{.*}} = mul {{.*}}
    +; CHECK-LICM-NEXT: br{{.*}}
    +
    +; CHECK-BFI-LICM: .l.cold:{{.*}}
    +; CHECK-BFI-LICM-NEXT: {{.*}} = mul {{.*}}
    +
    +define dso_local i32 @_Z3fooii(i32, i32) local_unnamed_addr #0 !dbg !7 {
    +  %3 = tail call i32 @_Z3bari(i32 %1), !dbg !19
    +  %4 = icmp eq i32 %0, 0, !dbg !22
    +  br i1 %4, label %.l.ret, label %.l.check.preheader, !dbg !24
    +
    +.l.check.preheader:
    +  br label %.l.check, !dbg !24
    +
    +.l.ret:
    +  %5 = phi i32 [ 0, %2 ], [ %12, %.l.iterate ]
    +  ret i32 %5, !dbg !25
    +
    +.l.check:
    +  %6 = phi i32 [ 0, %.l.check.preheader ], [ %13, %.l.iterate ]
    +  %7 = phi i32 [ %0, %.l.check.preheader ], [ %12, %.l.iterate ]
    +  %8 = icmp eq i32 %6, %1, !dbg !26
    +  br i1 %8, label %.l.cold, label %.l.iterate, !dbg !28
    +
    +.l.cold:
    +  %9 = mul nsw i32 %3, %3
    +  %10 = tail call i32 @_Z3bari(i32 %7), !dbg !29
    +  %11 = add nsw i32 %10, %9, !dbg !30
    +  br label %.l.iterate, !dbg !31
    +
    +.l.iterate:
    +  %12 = phi i32 [ %11, %.l.cold ], [ %7, %.l.check ]
    +  %13 = add nuw nsw i32 %6, 1, !dbg !32
    +  %14 = icmp eq i32 %13, %12, !dbg !22
    +  br i1 %14, label %.l.ret, label %.l.check, !dbg !24, !llvm.loop !33
    +}
    +
    +attributes #0 = { "use-sample-profile" }
    +
    +declare dso_local i32 @_Z3bari(i32) local_unnamed_addr #1
    +
    +!llvm.module.flags = !{!4}
    +
    +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: None)
    +!1 = !DIFile(filename: "foo.cpp", directory: "/tmp/gather_pgo")
    +!4 = !{i32 2, !"Debug Info Version", i32 3}
    +!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooii", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
    +!8 = !DISubroutineType(types: !9)
    +!9 = !{!10, !10, !10}
    +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
    +!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3)
    +!19 = !DILocation(line: 3, column: 14, scope: !7)
    +!22 = !DILocation(line: 4, column: 21, scope: !23)
    +!23 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 3)
    +!24 = !DILocation(line: 4, column: 3, scope: !16)
    +!25 = !DILocation(line: 7, column: 3, scope: !7)
    +!26 = !DILocation(line: 5, column: 11, scope: !27)
    +!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 5, column: 9)
    +!28 = !DILocation(line: 5, column: 9, scope: !23)
    +!29 = !DILocation(line: 6, column: 30, scope: !27)
    +!30 = !DILocation(line: 6, column: 28, scope: !27)
    +!31 = !DILocation(line: 6, column: 7, scope: !27)
    +!32 = !DILocation(line: 4, column: 30, scope: !23)
    +!33 = distinct !{!33, !24, !34}
    +!34 = !DILocation(line: 6, column: 38, scope: !16)
    diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll
    index 17170f5af1965..8a5da47847c86 100644
    --- a/llvm/test/Transforms/LICM/sink.ll
    +++ b/llvm/test/Transforms/LICM/sink.ll
    @@ -1,8 +1,10 @@
    -; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-LICM
    +; RUN: opt -S -licm -licm-coldness-threshold=0 < %s | FileCheck %s --check-prefix=CHECK-LICM
    +; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM
     ; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
     ; RUN: opt -S < %s -passes='require,loop(licm),loop-sink' \
     ; RUN:     | FileCheck %s --check-prefix=CHECK-SINK
    -; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM
    +; RUN: opt -S -licm -licm-coldness-threshold=0 -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM
    +; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM
     
     ; Original source code:
     ; int g;
    @@ -29,6 +31,10 @@ define i32 @foo(i32, i32) #0 !prof !2 {
     ; CHECK-LICM: load i32, i32* @g
     ; CHECK-LICM: br label %.lr.ph
     
    +; CHECK-BFI-LICM: .lr.ph.preheader:
    +; CHECK-BFI-LICM-NOT: load i32, i32* @g
    +; CHECK-BFI-LICM: br label %.lr.ph
    +
     .lr.ph:
       %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ]
       %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ]
    
    From 056534dc2b15ed1d276bead76f054cc7ac9d2bf1 Mon Sep 17 00:00:00 2001
    From: Wenlei He 
    Date: Tue, 15 Sep 2020 17:29:32 -0700
    Subject: [PATCH 0766/1079] SVML support for log10, sqrt
    
    Although LLVM supports vectorization of loops containing log10/sqrt, it did not support using SVML implementation of it. Added support so that when clang is invoked with -fveclib=SVML now an appropriate SVML library log2 implementation will be invoked.
    
    Follow up on: https://reviews.llvm.org/D77114
    
    Tests:
    Added unit tests to svml-calls.ll, svml-calls-finite.ll. Can be run with llvm-lint.
    Created a simple c++ file that tests log10/sqrt, and used clang+ to build it, and output final assembly.
    
    Reviewed By: craig.topper
    
    Differential Revision: https://reviews.llvm.org/D87169
    ---
     llvm/include/llvm/Analysis/VecFuncs.def       |  48 +++++
     .../LoopVectorize/X86/svml-calls-finite.ll    | 114 ++++++++++
     .../LoopVectorize/X86/svml-calls.ll           | 194 ++++++++++++++++++
     llvm/test/Transforms/Util/add-TLI-mappings.ll |   7 +-
     4 files changed, 361 insertions(+), 2 deletions(-)
    
    diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
    index 9fdbf638078f4..a47ee3c147252 100644
    --- a/llvm/include/llvm/Analysis/VecFuncs.def
    +++ b/llvm/include/llvm/Analysis/VecFuncs.def
    @@ -269,6 +269,54 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
     TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
     TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
     
    +TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2)
    +TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4)
    +TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8)
    +
    +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4)
    +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8)
    +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16)
    +
    +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2)
    +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4)
    +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8)
    +
    +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4)
    +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8)
    +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16)
    +
    +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2)
    +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4)
    +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8)
    +
    +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4)
    +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8)
    +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16)
    +
    +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2)
    +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4)
    +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8)
    +
    +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4)
    +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8)
    +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16)
    +
    +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2)
    +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4)
    +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8)
    +
    +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
    +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
    +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
    +
    +TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2)
    +TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4)
    +TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8)
    +
    +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4)
    +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8)
    +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16)
    +
     TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
     TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
     TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
    diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
    index dd6692d75e5f5..a6e191c3d6923 100644
    --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
    +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
    @@ -300,3 +300,117 @@ for.end:                                          ; preds = %for.body
     !91 = distinct !{!31, !32, !33}
     !92 = !{!"llvm.loop.vectorize.width", i32 4}
     !93 = !{!"llvm.loop.vectorize.enable", i1 true}
    +
    +declare float @__log10f_finite(float) #0
    +
    +; CHECK-LABEL: @log10_f32
    +; CHECK: <4 x float> @__svml_log10f4
    +; CHECK: ret
    +define void @log10_f32(float* nocapture %varray) {
    +entry:
    +  br label %for.body
    +
    +for.body:                                         ; preds = %for.body, %entry
    +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    +  %tmp = trunc i64 %indvars.iv to i32
    +  %conv = sitofp i32 %tmp to float
    +  %call = tail call fast float @__log10f_finite(float %conv)
    +  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
    +  store float %call, float* %arrayidx, align 4
    +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    +  %exitcond = icmp eq i64 %indvars.iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
    +
    +for.end:                                          ; preds = %for.body
    +  ret void
    +}
    +
    +!101 = distinct !{!21, !22, !23}
    +!102 = !{!"llvm.loop.vectorize.width", i32 4}
    +!103 = !{!"llvm.loop.vectorize.enable", i1 true}
    +
    +
    +declare double @__log10_finite(double) #0
    +
    +; CHECK-LABEL: @log10_f64
    +; CHECK: <4 x double> @__svml_log104
    +; CHECK: ret
    +define void @log10_f64(double* nocapture %varray) {
    +entry:
    +  br label %for.body
    +
    +for.body:                                         ; preds = %for.body, %entry
    +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    +  %tmp = trunc i64 %indvars.iv to i32
    +  %conv = sitofp i32 %tmp to double
    +  %call = tail call fast double @__log10_finite(double %conv)
    +  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
    +  store double %call, double* %arrayidx, align 4
    +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    +  %exitcond = icmp eq i64 %indvars.iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
    +
    +for.end:                                          ; preds = %for.body
    +  ret void
    +}
    +
    +!111 = distinct !{!31, !32, !33}
    +!112 = !{!"llvm.loop.vectorize.width", i32 4}
    +!113 = !{!"llvm.loop.vectorize.enable", i1 true}
    +
    +declare float @__sqrtf_finite(float) #0
    +
    +; CHECK-LABEL: @sqrt_f32
    +; CHECK: <4 x float> @__svml_sqrtf4
    +; CHECK: ret
    +define void @sqrt_f32(float* nocapture %varray) {
    +entry:
    +  br label %for.body
    +
    +for.body:                                         ; preds = %for.body, %entry
    +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    +  %tmp = trunc i64 %indvars.iv to i32
    +  %conv = sitofp i32 %tmp to float
    +  %call = tail call fast float @__sqrtf_finite(float %conv)
    +  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
    +  store float %call, float* %arrayidx, align 4
    +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    +  %exitcond = icmp eq i64 %indvars.iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
    +
    +for.end:                                          ; preds = %for.body
    +  ret void
    +}
    +
    +!121 = distinct !{!21, !22, !23}
    +!122 = !{!"llvm.loop.vectorize.width", i32 4}
    +!123 = !{!"llvm.loop.vectorize.enable", i1 true}
    +
    +
    +declare double @__sqrt_finite(double) #0
    +
    +; CHECK-LABEL: @sqrt_f64
    +; CHECK: <4 x double> @__svml_sqrt4
    +; CHECK: ret
    +define void @sqrt_f64(double* nocapture %varray) {
    +entry:
    +  br label %for.body
    +
    +for.body:                                         ; preds = %for.body, %entry
    +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    +  %tmp = trunc i64 %indvars.iv to i32
    +  %conv = sitofp i32 %tmp to double
    +  %call = tail call fast double @__sqrt_finite(double %conv)
    +  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
    +  store double %call, double* %arrayidx, align 4
    +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    +  %exitcond = icmp eq i64 %indvars.iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
    +
    +for.end:                                          ; preds = %for.body
    +  ret void
    +}
    +
    +!131 = distinct !{!31, !32, !33}
    +!132 = !{!"llvm.loop.vectorize.width", i32 4}
    +!133 = !{!"llvm.loop.vectorize.enable", i1 true}
    diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
    index c074830075521..da6b4696ba2ba 100644
    --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
    +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
    @@ -33,6 +33,16 @@ declare float @log2f(float) #0
     declare double @llvm.log2.f64(double) #0
     declare float @llvm.log2.f32(float) #0
     
    +declare double @log10(double) #0
    +declare float @log10f(float) #0
    +declare double @llvm.log10.f64(double) #0
    +declare float @llvm.log10.f32(float) #0
    +
    +declare double @sqrt(double) #0
    +declare float @sqrtf(float) #0
    +declare double @llvm.sqrt.f64(double) #0
    +declare float @llvm.sqrt.f32(float) #0
    +
     declare double @exp2(double) #0
     declare float @exp2f(float) #0
     declare double @llvm.exp2.f64(double) #0
    @@ -598,6 +608,190 @@ for.end:
       ret void
     }
     
    +define void @log10_f64(double* nocapture %varray) {
    +; CHECK-LABEL: @log10_f64(
    +; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to double
    +  %call = tail call double @log10(double %conv)
    +  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
    +  store double %call, double* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @log10_f32(float* nocapture %varray) {
    +; CHECK-LABEL: @log10_f32(
    +; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to float
    +  %call = tail call float @log10f(float %conv)
    +  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
    +  store float %call, float* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @log10_f64_intrinsic(double* nocapture %varray) {
    +; CHECK-LABEL: @log10_f64_intrinsic(
    +; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to double
    +  %call = tail call double @llvm.log10.f64(double %conv)
    +  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
    +  store double %call, double* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @log10_f32_intrinsic(float* nocapture %varray) {
    +; CHECK-LABEL: @log10_f32_intrinsic(
    +; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to float
    +  %call = tail call float @llvm.log10.f32(float %conv)
    +  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
    +  store float %call, float* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @sqrt_f64(double* nocapture %varray) {
    +; CHECK-LABEL: @sqrt_f64(
    +; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to double
    +  %call = tail call double @sqrt(double %conv)
    +  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
    +  store double %call, double* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @sqrt_f32(float* nocapture %varray) {
    +; CHECK-LABEL: @sqrt_f32(
    +; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to float
    +  %call = tail call float @sqrtf(float %conv)
    +  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
    +  store float %call, float* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @sqrt_f64_intrinsic(double* nocapture %varray) {
    +; CHECK-LABEL: @sqrt_f64_intrinsic(
    +; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to double
    +  %call = tail call double @llvm.sqrt.f64(double %conv)
    +  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
    +  store double %call, double* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
    +define void @sqrt_f32_intrinsic(float* nocapture %varray) {
    +; CHECK-LABEL: @sqrt_f32_intrinsic(
    +; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
    +; CHECK:    ret void
    +;
    +entry:
    +  br label %for.body
    +
    +for.body:
    +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
    +  %tmp = trunc i64 %iv to i32
    +  %conv = sitofp i32 %tmp to float
    +  %call = tail call float @llvm.sqrt.f32(float %conv)
    +  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
    +  store float %call, float* %arrayidx, align 4
    +  %iv.next = add nuw nsw i64 %iv, 1
    +  %exitcond = icmp eq i64 %iv.next, 1000
    +  br i1 %exitcond, label %for.end, label %for.body
    +
    +for.end:
    +  ret void
    +}
    +
     define void @exp2_f64(double* nocapture %varray) {
     ; CHECK-LABEL: @exp2_f64(
     ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
    diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
    index c68a9c9a71c65..75e32528ac7c5 100644
    --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
    +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
    @@ -9,10 +9,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     target triple = "x86_64-unknown-linux-gnu"
     
     ; COMMON-LABEL: @llvm.compiler.used = appending global
    -; SVML-SAME:        [3 x i8*] [
    +; SVML-SAME:        [6 x i8*] [
     ; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
     ; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
    -; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*)
    +; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
    +; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
    +; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
    +; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
     ; MASSV-SAME:       [2 x i8*] [
     ; MASSV-SAME:         i8* bitcast (<2 x double> (<2 x double>)* @__sind2_massv to i8*),
     ; MASSV-SAME:         i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*)
    
    From 7bc77c8526b6b2f0a2b2b780151bafc5e4094130 Mon Sep 17 00:00:00 2001
    From: Michael Kitzan 
    Date: Tue, 15 Sep 2020 17:50:48 -0700
    Subject: [PATCH 0767/1079] Test commit
    
    
    From f7aa1563eb5ff00416fba373073ba19832b6fc34 Mon Sep 17 00:00:00 2001
    From: Arthur Eubanks 
    Date: Tue, 15 Sep 2020 15:02:23 -0700
    Subject: [PATCH 0768/1079] [LowerSwitch][NewPM] Port lowerswitch to NPM
    
    Reviewed By: ychen
    
    Differential Revision: https://reviews.llvm.org/D87726
    ---
     llvm/include/llvm/InitializePasses.h          |   2 +-
     .../llvm/Transforms/Utils/LowerSwitch.h       |  26 ++
     llvm/lib/Passes/PassBuilder.cpp               |   1 +
     llvm/lib/Passes/PassRegistry.def              |   1 +
     llvm/lib/Transforms/Scalar/StructurizeCFG.cpp |   2 +-
     llvm/lib/Transforms/Utils/FixIrreducible.cpp  |   2 +-
     llvm/lib/Transforms/Utils/LowerSwitch.cpp     | 393 +++++++++---------
     llvm/lib/Transforms/Utils/UnifyLoopExits.cpp  |   2 +-
     llvm/lib/Transforms/Utils/Utils.cpp           |   2 +-
     llvm/test/Transforms/LowerSwitch/feature.ll   |   1 +
     10 files changed, 225 insertions(+), 207 deletions(-)
     create mode 100644 llvm/include/llvm/Transforms/Utils/LowerSwitch.h
    
    diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
    index 83385657ee969..c31231b9276bb 100644
    --- a/llvm/include/llvm/InitializePasses.h
    +++ b/llvm/include/llvm/InitializePasses.h
    @@ -264,7 +264,7 @@ void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
     void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&);
     void initializeLowerIntrinsicsPass(PassRegistry&);
     void initializeLowerInvokeLegacyPassPass(PassRegistry&);
    -void initializeLowerSwitchPass(PassRegistry&);
    +void initializeLowerSwitchLegacyPassPass(PassRegistry &);
     void initializeLowerTypeTestsPass(PassRegistry&);
     void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
     void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
    diff --git a/llvm/include/llvm/Transforms/Utils/LowerSwitch.h b/llvm/include/llvm/Transforms/Utils/LowerSwitch.h
    new file mode 100644
    index 0000000000000..97086987ffcbd
    --- /dev/null
    +++ b/llvm/include/llvm/Transforms/Utils/LowerSwitch.h
    @@ -0,0 +1,26 @@
    +//===- LowerSwitch.h - Eliminate Switch instructions ----------------------===//
    +//
    +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    +// See https://llvm.org/LICENSE.txt for license information.
    +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
    +//
    +//===----------------------------------------------------------------------===//
    +//
    +// The LowerSwitch transformation rewrites switch instructions with a sequence
    +// of branches, which allows targets to get away with not implementing the
    +// switch instruction until it is convenient.
    +//
    +//===----------------------------------------------------------------------===//
    +
    +#ifndef LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H
    +#define LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H
    +
    +#include "llvm/IR/PassManager.h"
    +
    +namespace llvm {
    +struct LowerSwitchPass : public PassInfoMixin {
    +  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    +};
    +} // namespace llvm
    +
    +#endif // LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H
    diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
    index 1f43b5e6538e5..7cd9722c7b6c5 100644
    --- a/llvm/lib/Passes/PassBuilder.cpp
    +++ b/llvm/lib/Passes/PassBuilder.cpp
    @@ -193,6 +193,7 @@
     #include "llvm/Transforms/Utils/LoopSimplify.h"
     #include "llvm/Transforms/Utils/LoopVersioning.h"
     #include "llvm/Transforms/Utils/LowerInvoke.h"
    +#include "llvm/Transforms/Utils/LowerSwitch.h"
     #include "llvm/Transforms/Utils/Mem2Reg.h"
     #include "llvm/Transforms/Utils/NameAnonGlobals.h"
     #include "llvm/Transforms/Utils/SymbolRewriter.h"
    diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
    index 1d70db3063470..0823988089270 100644
    --- a/llvm/lib/Passes/PassRegistry.def
    +++ b/llvm/lib/Passes/PassRegistry.def
    @@ -220,6 +220,7 @@ FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
     FUNCTION_PASS("loop-sink", LoopSinkPass())
     FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
     FUNCTION_PASS("lowerinvoke", LowerInvokePass())
    +FUNCTION_PASS("lowerswitch", LowerSwitchPass())
     FUNCTION_PASS("mem2reg", PromotePass())
     FUNCTION_PASS("memcpyopt", MemCpyOptPass())
     FUNCTION_PASS("mergeicmps", MergeICmpsPass())
    diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
    index c20e57b02c1a5..688900a1c20f8 100644
    --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
    +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
    @@ -343,7 +343,7 @@ char StructurizeCFG::ID = 0;
     INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
                           false, false)
     INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
    -INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
    +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
     INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
     INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
     INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
    diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
    index 460ba9e97fc6e..8d75eea25ba85 100644
    --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
    +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
    @@ -104,7 +104,7 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
     INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
                           "Convert irreducible control-flow into natural loops",
                           false /* Only looks at CFG */, false /* Analysis Pass */)
    -INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
    +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
     INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
     INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
     INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
    diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
    index 34e836d9660f3..10a4420b1753b 100644
    --- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
    +++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
    @@ -12,6 +12,7 @@
     //
     //===----------------------------------------------------------------------===//
     
    +#include "llvm/Transforms/Utils/LowerSwitch.h"
     #include "llvm/ADT/DenseMap.h"
     #include "llvm/ADT/STLExtras.h"
     #include "llvm/ADT/SmallPtrSet.h"
    @@ -26,6 +27,7 @@
     #include "llvm/IR/Function.h"
     #include "llvm/IR/InstrTypes.h"
     #include "llvm/IR/Instructions.h"
    +#include "llvm/IR/PassManager.h"
     #include "llvm/IR/Value.h"
     #include "llvm/InitializePasses.h"
     #include "llvm/Pass.h"
    @@ -55,9 +57,9 @@ namespace {
     
     } // end anonymous namespace
     
    +namespace {
     // Return true iff R is covered by Ranges.
    -static bool IsInRanges(const IntRange &R,
    -                       const std::vector &Ranges) {
    +bool IsInRanges(const IntRange &R, const std::vector &Ranges) {
       // Note: Ranges must be sorted, non-overlapping and non-adjacent.
     
       // Find the first range whose High field is >= R.High,
    @@ -68,120 +70,34 @@ static bool IsInRanges(const IntRange &R,
       return I != Ranges.end() && I->Low <= R.Low;
     }
     
    -namespace {
    -
    -  /// Replace all SwitchInst instructions with chained branch instructions.
    -  class LowerSwitch : public FunctionPass {
    -  public:
    -    // Pass identification, replacement for typeid
    -    static char ID;
    -
    -    LowerSwitch() : FunctionPass(ID) {
    -      initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
    -    }
    -
    -    bool runOnFunction(Function &F) override;
    -
    -    void getAnalysisUsage(AnalysisUsage &AU) const override {
    -      AU.addRequired();
    -    }
    -
    -    struct CaseRange {
    -      ConstantInt* Low;
    -      ConstantInt* High;
    -      BasicBlock* BB;
    -
    -      CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
    -          : Low(low), High(high), BB(bb) {}
    -    };
    -
    -    using CaseVector = std::vector;
    -    using CaseItr = std::vector::iterator;
    -
    -  private:
    -    void processSwitchInst(SwitchInst *SI,
    -                           SmallPtrSetImpl &DeleteList,
    -                           AssumptionCache *AC, LazyValueInfo *LVI);
    -
    -    BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
    -                              ConstantInt *LowerBound, ConstantInt *UpperBound,
    -                              Value *Val, BasicBlock *Predecessor,
    -                              BasicBlock *OrigBlock, BasicBlock *Default,
    -                              const std::vector &UnreachableRanges);
    -    BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
    -                             ConstantInt *LowerBound, ConstantInt *UpperBound,
    -                             BasicBlock *OrigBlock, BasicBlock *Default);
    -    unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
    -  };
    -
    -  /// The comparison function for sorting the switch case values in the vector.
    -  /// WARNING: Case ranges should be disjoint!
    -  struct CaseCmp {
    -    bool operator()(const LowerSwitch::CaseRange& C1,
    -                    const LowerSwitch::CaseRange& C2) {
    -      const ConstantInt* CI1 = cast(C1.Low);
    -      const ConstantInt* CI2 = cast(C2.High);
    -      return CI1->getValue().slt(CI2->getValue());
    -    }
    -  };
    -
    -} // end anonymous namespace
    -
    -char LowerSwitch::ID = 0;
    -
    -// Publicly exposed interface to pass...
    -char &llvm::LowerSwitchID = LowerSwitch::ID;
    -
    -INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
    -                      "Lower SwitchInst's to branches", false, false)
    -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
    -INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
    -INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
    -                    "Lower SwitchInst's to branches", false, false)
    -
    -// createLowerSwitchPass - Interface to this file...
    -FunctionPass *llvm::createLowerSwitchPass() {
    -  return new LowerSwitch();
    -}
    -
    -bool LowerSwitch::runOnFunction(Function &F) {
    -  LazyValueInfo *LVI = &getAnalysis().getLVI();
    -  auto *ACT = getAnalysisIfAvailable();
    -  AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
    -
    -  bool Changed = false;
    -  SmallPtrSet DeleteList;
    -
    -  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
    -    BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
    -
    -    // If the block is a dead Default block that will be deleted later, don't
    -    // waste time processing it.
    -    if (DeleteList.count(Cur))
    -      continue;
    -
    -    if (SwitchInst *SI = dyn_cast(Cur->getTerminator())) {
    -      Changed = true;
    -      processSwitchInst(SI, DeleteList, AC, LVI);
    -    }
    -  }
    -
    -  for (BasicBlock* BB: DeleteList) {
    -    LVI->eraseBlock(BB);
    -    DeleteDeadBlock(BB);
    +struct CaseRange {
    +  ConstantInt *Low;
    +  ConstantInt *High;
    +  BasicBlock *BB;
    +
    +  CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
    +      : Low(low), High(high), BB(bb) {}
    +};
    +
    +using CaseVector = std::vector;
    +using CaseItr = std::vector::iterator;
    +
    +/// The comparison function for sorting the switch case values in the vector.
    +/// WARNING: Case ranges should be disjoint!
    +struct CaseCmp {
    +  bool operator()(const CaseRange &C1, const CaseRange &C2) {
    +    const ConstantInt *CI1 = cast(C1.Low);
    +    const ConstantInt *CI2 = cast(C2.High);
    +    return CI1->getValue().slt(CI2->getValue());
       }
    -
    -  return Changed;
    -}
    +};
     
     /// Used for debugging purposes.
     LLVM_ATTRIBUTE_USED
    -static raw_ostream &operator<<(raw_ostream &O,
    -                               const LowerSwitch::CaseVector &C) {
    +raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) {
       O << "[";
     
    -  for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
    -       B != E;) {
    +  for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) {
         O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
         if (++B != E)
           O << ", ";
    @@ -200,9 +116,9 @@ static raw_ostream &operator<<(raw_ostream &O,
     /// 2) Removed if subsequent incoming values now share the same case, i.e.,
     /// multiple outcome edges are condensed into one. This is necessary to keep the
     /// number of phi values equal to the number of branches to SuccBB.
    -static void
    -fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
    -        const unsigned NumMergedCases = std::numeric_limits::max()) {
    +void FixPhis(
    +    BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
    +    const unsigned NumMergedCases = std::numeric_limits::max()) {
       for (BasicBlock::iterator I = SuccBB->begin(),
                                 IE = SuccBB->getFirstNonPHI()->getIterator();
            I != IE; ++I) {
    @@ -233,17 +149,80 @@ fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
       }
     }
     
    +/// Create a new leaf block for the binary lookup tree. It checks if the
    +/// switch's value == the case's value. If not, then it jumps to the default
    +/// branch. At this point in the tree, the value can't be another valid case
    +/// value, so the jump to the "default" branch is warranted.
    +BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound,
    +                         ConstantInt *UpperBound, BasicBlock *OrigBlock,
    +                         BasicBlock *Default) {
    +  Function *F = OrigBlock->getParent();
    +  BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
    +  F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
    +
    +  // Emit comparison
    +  ICmpInst *Comp = nullptr;
    +  if (Leaf.Low == Leaf.High) {
    +    // Make the seteq instruction...
    +    Comp =
    +        new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf");
    +  } else {
    +    // Make range comparison
    +    if (Leaf.Low == LowerBound) {
    +      // Val >= Min && Val <= Hi --> Val <= Hi
    +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
    +                          "SwitchLeaf");
    +    } else if (Leaf.High == UpperBound) {
    +      // Val <= Max && Val >= Lo --> Val >= Lo
    +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
    +                          "SwitchLeaf");
    +    } else if (Leaf.Low->isZero()) {
    +      // Val >= 0 && Val <= Hi --> Val <=u Hi
    +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
    +                          "SwitchLeaf");
    +    } else {
    +      // Emit V-Lo <=u Hi-Lo
    +      Constant *NegLo = ConstantExpr::getNeg(Leaf.Low);
    +      Instruction *Add = BinaryOperator::CreateAdd(
    +          Val, NegLo, Val->getName() + ".off", NewLeaf);
    +      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
    +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
    +                          "SwitchLeaf");
    +    }
    +  }
    +
    +  // Make the conditional branch...
    +  BasicBlock *Succ = Leaf.BB;
    +  BranchInst::Create(Succ, Default, Comp, NewLeaf);
    +
    +  // If there were any PHI nodes in this successor, rewrite one entry
    +  // from OrigBlock to come from NewLeaf.
    +  for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) {
    +    PHINode *PN = cast(I);
    +    // Remove all but one incoming entries from the cluster
    +    uint64_t Range = Leaf.High->getSExtValue() - Leaf.Low->getSExtValue();
    +    for (uint64_t j = 0; j < Range; ++j) {
    +      PN->removeIncomingValue(OrigBlock);
    +    }
    +
    +    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
    +    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
    +    PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
    +  }
    +
    +  return NewLeaf;
    +}
    +
     /// Convert the switch statement into a binary lookup of the case values.
     /// The function recursively builds this tree. LowerBound and UpperBound are
     /// used to keep track of the bounds for Val that have already been checked by
     /// a block emitted by one of the previous calls to switchConvert in the call
     /// stack.
    -BasicBlock *
    -LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
    -                           ConstantInt *UpperBound, Value *Val,
    -                           BasicBlock *Predecessor, BasicBlock *OrigBlock,
    -                           BasicBlock *Default,
    -                           const std::vector &UnreachableRanges) {
    +BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
    +                          ConstantInt *UpperBound, Value *Val,
    +                          BasicBlock *Predecessor, BasicBlock *OrigBlock,
    +                          BasicBlock *Default,
    +                          const std::vector &UnreachableRanges) {
       assert(LowerBound && UpperBound && "Bounds must be initialized");
       unsigned Size = End - Begin;
     
    @@ -255,10 +234,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
         if (Begin->Low == LowerBound && Begin->High == UpperBound) {
           unsigned NumMergedCases = 0;
           NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
    -      fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
    +      FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
           return Begin->BB;
         }
    -    return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
    +    return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
                             Default);
       }
     
    @@ -305,12 +284,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
       ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
                                     Val, Pivot.Low, "Pivot");
     
    -  BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
    -                                      NewUpperBound, Val, NewNode, OrigBlock,
    -                                      Default, UnreachableRanges);
    -  BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
    -                                      UpperBound, Val, NewNode, OrigBlock,
    -                                      Default, UnreachableRanges);
    +  BasicBlock *LBranch =
    +      SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val,
    +                    NewNode, OrigBlock, Default, UnreachableRanges);
    +  BasicBlock *RBranch =
    +      SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val,
    +                    NewNode, OrigBlock, Default, UnreachableRanges);
     
       F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
       NewNode->getInstList().push_back(Comp);
    @@ -319,78 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
       return NewNode;
     }
     
    -/// Create a new leaf block for the binary lookup tree. It checks if the
    -/// switch's value == the case's value. If not, then it jumps to the default
    -/// branch. At this point in the tree, the value can't be another valid case
    -/// value, so the jump to the "default" branch is warranted.
    -BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
    -                                      ConstantInt *LowerBound,
    -                                      ConstantInt *UpperBound,
    -                                      BasicBlock *OrigBlock,
    -                                      BasicBlock *Default) {
    -  Function* F = OrigBlock->getParent();
    -  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
    -  F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
    -
    -  // Emit comparison
    -  ICmpInst* Comp = nullptr;
    -  if (Leaf.Low == Leaf.High) {
    -    // Make the seteq instruction...
    -    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
    -                        Leaf.Low, "SwitchLeaf");
    -  } else {
    -    // Make range comparison
    -    if (Leaf.Low == LowerBound) {
    -      // Val >= Min && Val <= Hi --> Val <= Hi
    -      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
    -                          "SwitchLeaf");
    -    } else if (Leaf.High == UpperBound) {
    -      // Val <= Max && Val >= Lo --> Val >= Lo
    -      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
    -                          "SwitchLeaf");
    -    } else if (Leaf.Low->isZero()) {
    -      // Val >= 0 && Val <= Hi --> Val <=u Hi
    -      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
    -                          "SwitchLeaf");
    -    } else {
    -      // Emit V-Lo <=u Hi-Lo
    -      Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
    -      Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
    -                                                   Val->getName()+".off",
    -                                                   NewLeaf);
    -      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
    -      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
    -                          "SwitchLeaf");
    -    }
    -  }
    -
    -  // Make the conditional branch...
    -  BasicBlock* Succ = Leaf.BB;
    -  BranchInst::Create(Succ, Default, Comp, NewLeaf);
    -
    -  // If there were any PHI nodes in this successor, rewrite one entry
    -  // from OrigBlock to come from NewLeaf.
    -  for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) {
    -    PHINode* PN = cast(I);
    -    // Remove all but one incoming entries from the cluster
    -    uint64_t Range = Leaf.High->getSExtValue() -
    -                     Leaf.Low->getSExtValue();
    -    for (uint64_t j = 0; j < Range; ++j) {
    -      PN->removeIncomingValue(OrigBlock);
    -    }
    -
    -    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
    -    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
    -    PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
    -  }
    -
    -  return NewLeaf;
    -}
    -
     /// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
     /// \post \p Cases wouldn't contain references to \p SI's default BB.
     /// \returns Number of \p SI's cases that do not reference \p SI's default BB.
    -unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
    +unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) {
       unsigned NumSimpleCases = 0;
     
       // Start with "simple" cases
    @@ -431,9 +342,9 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
     
     /// Replace the specified switch instruction with a sequence of chained if-then
     /// insts in a balanced binary search.
    -void LowerSwitch::processSwitchInst(SwitchInst *SI,
    -                                    SmallPtrSetImpl &DeleteList,
    -                                    AssumptionCache *AC, LazyValueInfo *LVI) {
    +void ProcessSwitchInst(SwitchInst *SI,
    +                       SmallPtrSetImpl &DeleteList,
    +                       AssumptionCache *AC, LazyValueInfo *LVI) {
       BasicBlock *OrigBlock = SI->getParent();
       Function *F = OrigBlock->getParent();
       Value *Val = SI->getCondition();  // The value we are switching on...
    @@ -458,7 +369,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
       if (Cases.empty()) {
         BranchInst::Create(Default, OrigBlock);
         // Remove all the references from Default's PHIs to OrigBlock, but one.
    -    fixPhis(Default, OrigBlock, OrigBlock);
    +    FixPhis(Default, OrigBlock, OrigBlock);
         SI->eraseFromParent();
         return;
       }
    @@ -592,12 +503,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
       BranchInst::Create(Default, NewDefault);
     
       BasicBlock *SwitchBlock =
    -      switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
    +      SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
                         OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
     
       // If there are entries in any PHI nodes for the default edge, make sure
       // to update them as well.
    -  fixPhis(Default, OrigBlock, NewDefault);
    +  FixPhis(Default, OrigBlock, NewDefault);
     
       // Branch to our shiny new if-then stuff...
       BranchInst::Create(SwitchBlock, OrigBlock);
    @@ -610,3 +521,81 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
       if (pred_begin(OldDefault) == pred_end(OldDefault))
         DeleteList.insert(OldDefault);
     }
    +
    +bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
    +  bool Changed = false;
    +  SmallPtrSet DeleteList;
    +
    +  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
    +    BasicBlock *Cur =
    +        &*I++; // Advance over block so we don't traverse new blocks
    +
    +    // If the block is a dead Default block that will be deleted later, don't
    +    // waste time processing it.
    +    if (DeleteList.count(Cur))
    +      continue;
    +
    +    if (SwitchInst *SI = dyn_cast(Cur->getTerminator())) {
    +      Changed = true;
    +      ProcessSwitchInst(SI, DeleteList, AC, LVI);
    +    }
    +  }
    +
    +  for (BasicBlock *BB : DeleteList) {
    +    LVI->eraseBlock(BB);
    +    DeleteDeadBlock(BB);
    +  }
    +
    +  return Changed;
    +}
    +
    +/// Replace all SwitchInst instructions with chained branch instructions.
    +class LowerSwitchLegacyPass : public FunctionPass {
    +public:
    +  // Pass identification, replacement for typeid
    +  static char ID;
    +
    +  LowerSwitchLegacyPass() : FunctionPass(ID) {
    +    initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry());
    +  }
    +
    +  bool runOnFunction(Function &F) override;
    +
    +  void getAnalysisUsage(AnalysisUsage &AU) const override {
    +    AU.addRequired();
    +  }
    +};
    +
    +} // end anonymous namespace
    +
    +char LowerSwitchLegacyPass::ID = 0;
    +
    +// Publicly exposed interface to pass...
    +char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID;
    +
    +INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch",
    +                      "Lower SwitchInst's to branches", false, false)
    +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
    +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
    +INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch",
    +                    "Lower SwitchInst's to branches", false, false)
    +
    +// createLowerSwitchPass - Interface to this file...
    +FunctionPass *llvm::createLowerSwitchPass() {
    +  return new LowerSwitchLegacyPass();
    +}
    +
    +bool LowerSwitchLegacyPass::runOnFunction(Function &F) {
    +  LazyValueInfo *LVI = &getAnalysis().getLVI();
    +  auto *ACT = getAnalysisIfAvailable();
    +  AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
    +  return LowerSwitch(F, LVI, AC);
    +}
    +
    +PreservedAnalyses LowerSwitchPass::run(Function &F,
    +                                       FunctionAnalysisManager &AM) {
    +  LazyValueInfo *LVI = &AM.getResult(F);
    +  AssumptionCache *AC = AM.getCachedResult(F);
    +  return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none()
    +                                 : PreservedAnalyses::all();
    +}
    diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
    index 6eacb9a20e4c0..7017ee7bea957 100644
    --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
    +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
    @@ -54,7 +54,7 @@ FunctionPass *llvm::createUnifyLoopExitsPass() { return new UnifyLoopExits(); }
     INITIALIZE_PASS_BEGIN(UnifyLoopExits, "unify-loop-exits",
                           "Fixup each natural loop to have a single exit block",
                           false /* Only looks at CFG */, false /* Analysis Pass */)
    -INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
    +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
     INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
     INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
     INITIALIZE_PASS_END(UnifyLoopExits, "unify-loop-exits",
    diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
    index ce98a739bea88..1638635440a95 100644
    --- a/llvm/lib/Transforms/Utils/Utils.cpp
    +++ b/llvm/lib/Transforms/Utils/Utils.cpp
    @@ -34,7 +34,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
       initializeLibCallsShrinkWrapLegacyPassPass(Registry);
       initializeLoopSimplifyPass(Registry);
       initializeLowerInvokeLegacyPassPass(Registry);
    -  initializeLowerSwitchPass(Registry);
    +  initializeLowerSwitchLegacyPassPass(Registry);
       initializeNameAnonGlobalLegacyPassPass(Registry);
       initializePromoteLegacyPassPass(Registry);
       initializeStripNonLineTableDebugInfoPass(Registry);
    diff --git a/llvm/test/Transforms/LowerSwitch/feature.ll b/llvm/test/Transforms/LowerSwitch/feature.ll
    index 09d25f0b06d44..55427af498eac 100644
    --- a/llvm/test/Transforms/LowerSwitch/feature.ll
    +++ b/llvm/test/Transforms/LowerSwitch/feature.ll
    @@ -1,4 +1,5 @@
     ; RUN: opt < %s -lowerswitch -S | FileCheck %s
    +; RUN: opt < %s -passes=lowerswitch -S | FileCheck %s
     
     ; We have switch on input.
     ; On output we should got binary comparison tree. Check that all is fine.
    
    From ba12e77ec16b38a4498610c6b8cdeb1a7e8a6aae Mon Sep 17 00:00:00 2001
    From: Arthur Eubanks 
    Date: Mon, 14 Sep 2020 14:37:46 -0700
    Subject: [PATCH 0769/1079] [NewPM] Port strip* passes to NPM
    
    strip-nondebug and strip-debug-declare have no existing associated tests
    
    Reviewed By: ychen
    
    Differential Revision: https://reviews.llvm.org/D87639
    ---
     .../llvm/Transforms/IPO/StripSymbols.h        | 47 ++++++++++++++++
     llvm/lib/Passes/PassBuilder.cpp               |  1 +
     llvm/lib/Passes/PassRegistry.def              |  4 ++
     llvm/lib/Transforms/IPO/StripSymbols.cpp      | 56 +++++++++++++++----
     .../StripSymbols/2007-01-15-llvm.used.ll      |  1 +
     .../StripSymbols/strip-dead-debug-info.ll     |  1 +
     6 files changed, 99 insertions(+), 11 deletions(-)
     create mode 100644 llvm/include/llvm/Transforms/IPO/StripSymbols.h
    
    diff --git a/llvm/include/llvm/Transforms/IPO/StripSymbols.h b/llvm/include/llvm/Transforms/IPO/StripSymbols.h
    new file mode 100644
    index 0000000000000..dd76d481d668c
    --- /dev/null
    +++ b/llvm/include/llvm/Transforms/IPO/StripSymbols.h
    @@ -0,0 +1,47 @@
    +//===- StripSymbols.h - Strip symbols and debug info from a module --------===//
    +//
    +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    +// See https://llvm.org/LICENSE.txt for license information.
    +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
    +//
    +//===----------------------------------------------------------------------===//
    +//
    +// The StripSymbols transformation implements code stripping. Specifically, it
    +// can delete:
    +//
    +//   * names for virtual registers
    +//   * symbols for internal globals and functions
    +//   * debug information
    +//
    +// Note that this transformation makes code much less readable, so it should
    +// only be used in situations where the 'strip' utility would be used, such as
    +// reducing code size or making it harder to reverse engineer code.
    +//
    +//===----------------------------------------------------------------------===//
    +
    +#ifndef LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H
    +#define LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H
    +
    +#include "llvm/IR/PassManager.h"
    +
    +namespace llvm {
    +
    +struct StripSymbolsPass : PassInfoMixin {
    +  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    +};
    +
    +struct StripNonDebugSymbolsPass : PassInfoMixin {
    +  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    +};
    +
    +struct StripDebugDeclarePass : PassInfoMixin {
    +  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    +};
    +
    +struct StripDeadDebugInfoPass : PassInfoMixin {
    +  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    +};
    +
    +} // end namespace llvm
    +
    +#endif // LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H
    diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
    index 7cd9722c7b6c5..2ecd6fb602cb5 100644
    --- a/llvm/lib/Passes/PassBuilder.cpp
    +++ b/llvm/lib/Passes/PassBuilder.cpp
    @@ -101,6 +101,7 @@
     #include "llvm/Transforms/IPO/SCCP.h"
     #include "llvm/Transforms/IPO/SampleProfile.h"
     #include "llvm/Transforms/IPO/StripDeadPrototypes.h"
    +#include "llvm/Transforms/IPO/StripSymbols.h"
     #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
     #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
     #include "llvm/Transforms/InstCombine/InstCombine.h"
    diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
    index 0823988089270..d006f86ea2fbb 100644
    --- a/llvm/lib/Passes/PassRegistry.def
    +++ b/llvm/lib/Passes/PassRegistry.def
    @@ -88,7 +88,11 @@ MODULE_PASS("scc-oz-module-inliner",
       buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging))
     MODULE_PASS("oz-module-optimizer",
       buildModuleOptimizationPipeline(OptimizationLevel::Oz, DebugLogging, /*LTOPreLink*/false))
    +MODULE_PASS("strip", StripSymbolsPass())
    +MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass())
     MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
    +MODULE_PASS("strip-debug-declare", StripDebugDeclarePass())
    +MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass())
     MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
     MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
     MODULE_PASS("verify", VerifierPass())
    diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp
    index 088091df770f9..4fc71847a0707 100644
    --- a/llvm/lib/Transforms/IPO/StripSymbols.cpp
    +++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp
    @@ -19,18 +19,21 @@
     //
     //===----------------------------------------------------------------------===//
     
    +#include "llvm/Transforms/IPO/StripSymbols.h"
     #include "llvm/ADT/SmallPtrSet.h"
     #include "llvm/IR/Constants.h"
     #include "llvm/IR/DebugInfo.h"
     #include "llvm/IR/DerivedTypes.h"
     #include "llvm/IR/Instructions.h"
     #include "llvm/IR/Module.h"
    +#include "llvm/IR/PassManager.h"
     #include "llvm/IR/TypeFinder.h"
     #include "llvm/IR/ValueSymbolTable.h"
     #include "llvm/InitializePasses.h"
     #include "llvm/Pass.h"
     #include "llvm/Transforms/IPO.h"
     #include "llvm/Transforms/Utils/Local.h"
    +
     using namespace llvm;
     
     namespace {
    @@ -249,9 +252,7 @@ bool StripNonDebugSymbols::runOnModule(Module &M) {
       return StripSymbolNames(M, true);
     }
     
    -bool StripDebugDeclare::runOnModule(Module &M) {
    -  if (skipModule(M))
    -    return false;
    +static bool stripDebugDeclareImpl(Module &M) {
     
       Function *Declare = M.getFunction("llvm.dbg.declare");
       std::vector DeadConstants;
    @@ -289,17 +290,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
       return true;
     }
     
    -/// Remove any debug info for global variables/functions in the given module for
    -/// which said global variable/function no longer exists (i.e. is null).
    -///
    -/// Debugging information is encoded in llvm IR using metadata. This is designed
    -/// such a way that debug info for symbols preserved even if symbols are
    -/// optimized away by the optimizer. This special pass removes debug info for
    -/// such symbols.
    -bool StripDeadDebugInfo::runOnModule(Module &M) {
    +bool StripDebugDeclare::runOnModule(Module &M) {
       if (skipModule(M))
         return false;
    +  return stripDebugDeclareImpl(M);
    +}
     
    +static bool stripDeadDebugInfoImpl(Module &M) {
       bool Changed = false;
     
       LLVMContext &C = M.getContext();
    @@ -380,3 +377,40 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
     
       return Changed;
     }
    +
    +/// Remove any debug info for global variables/functions in the given module for
    +/// which said global variable/function no longer exists (i.e. is null).
    +///
    +/// Debugging information is encoded in llvm IR using metadata. This is designed
    +/// such a way that debug info for symbols preserved even if symbols are
    +/// optimized away by the optimizer. This special pass removes debug info for
    +/// such symbols.
    +bool StripDeadDebugInfo::runOnModule(Module &M) {
    +  if (skipModule(M))
    +    return false;
    +  return stripDeadDebugInfoImpl(M);
    +}
    +
    +PreservedAnalyses StripSymbolsPass::run(Module &M, ModuleAnalysisManager &AM) {
    +  StripDebugInfo(M);
    +  StripSymbolNames(M, false);
    +  return PreservedAnalyses::all();
    +}
    +
    +PreservedAnalyses StripNonDebugSymbolsPass::run(Module &M,
    +                                                ModuleAnalysisManager &AM) {
    +  StripSymbolNames(M, true);
    +  return PreservedAnalyses::all();
    +}
    +
    +PreservedAnalyses StripDebugDeclarePass::run(Module &M,
    +                                             ModuleAnalysisManager &AM) {
    +  stripDebugDeclareImpl(M);
    +  return PreservedAnalyses::all();
    +}
    +
    +PreservedAnalyses StripDeadDebugInfoPass::run(Module &M,
    +                                              ModuleAnalysisManager &AM) {
    +  stripDeadDebugInfoImpl(M);
    +  return PreservedAnalyses::all();
    +}
    diff --git a/llvm/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll b/llvm/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
    index 438fa96b41ef3..81ccc422c2bd0 100644
    --- a/llvm/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
    +++ b/llvm/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
    @@ -1,4 +1,5 @@
     ; RUN: opt < %s -strip -S | FileCheck %s
    +; RUN: opt < %s -passes=strip -S | FileCheck %s
     
     ; CHECK: foo
     ; CHECK: bar
    diff --git a/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll
    index e13e02cb4b558..d9b21d4a60fd5 100644
    --- a/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll
    +++ b/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll
    @@ -1,4 +1,5 @@
     ; RUN: opt -strip-dead-debug-info -verify %s -S | FileCheck %s
    +; RUN: opt -passes='strip-dead-debug-info,verify' %s -S | FileCheck %s
     
     ; CHECK: ModuleID = '{{.*}}'
     ; CHECK-NOT: "bar"
    
    From 5f4abb7fab1c6a87f059ed8732fd12b237f4805d Mon Sep 17 00:00:00 2001
    From: Krzysztof Parzyszek 
    Date: Tue, 15 Sep 2020 20:32:09 -0500
    Subject: [PATCH 0770/1079] [Hexagon] Replace incorrect pattern for vpackl
     HWI32 -> HVi8
    
    V6_vdealb4w is not correct for pairs, use V6_vpackeh/V6_vpackeb instead.
    ---
     llvm/lib/Target/Hexagon/HexagonPatternsHVX.td                | 3 ++-
     .../test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll | 5 +++--
     llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll             | 5 +++--
     3 files changed, 8 insertions(+), 5 deletions(-)
    
    diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
    index 64e24f2466263..b84c6eb27fe2a 100644
    --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
    +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
    @@ -417,7 +417,8 @@ let Predicates = [UseHVX] in {
       def: Pat<(VecI8  (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
       def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
       def: Pat<(VecI8  (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
    -  def: Pat<(VecI8  (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>;
    +  def: Pat<(VecI8  (vpackl HWI32:$Vs)),
    +           (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
       def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
     
       def: Pat<(VecI16  (vunpack   HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
    diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll
    index 83d49fca03b88..23e8b590b2d8a 100644
    --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll
    +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-pair.ll
    @@ -2,10 +2,11 @@
     
     ; This has a v32i8 = truncate v16i32 (64b mode), which was legalized to
     ; 64i8 = vpackl v32i32, for which there were no selection patterns provided.
    -; Check that we generate vdeale for this.
    +; Check that we generate vpackeh->vpackeb for this.
     
     ; CHECK-LABEL: fred:
    -; CHECK: vdeale(v1.b,v0.b)
    +; CHECK: v[[V0:[0-9]+]].h = vpacke(v1.w,v0.w)
    +; CHECK:                  = vpacke({{.*}},v[[V0]].h)
     define void @fred(<32 x i8>* %a0, <32 x i32> %a1) #0 {
       %v0 = trunc <32 x i32> %a1 to <32 x i8>
       store <32 x i8> %v0, <32 x i8>* %a0, align 32
    diff --git a/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll b/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll
    index e23fcb0e427ae..71e24bd0d6c0d 100644
    --- a/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll
    +++ b/llvm/test/CodeGen/Hexagon/autohvx/widen-trunc.ll
    @@ -49,8 +49,9 @@ define void @f2(<64 x i16>* %a0, <64 x i8>* %a1) #0 {
     ; CHECK-DAG: v[[V0:[0-9]+]] = vmem(r0+#0)
     ; CHECK-DAG: v[[V1:[0-9]+]] = vmem(r0+#1)
     ; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
    -; CHECK: v[[V2:[0-9]+]].b = vdeale(v[[V1]].b,v[[V0]].b)
    -; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
    +; CHECK: v[[V2:[0-9]+]].h = vpacke(v[[V1]].w,v[[V0]].w)
    +; CHECK: v[[V3:[0-9]+]].b = vpacke({{.*}},v[[V2]].h)
    +; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V3]]
     define void @f3(<64 x i32>* %a0, <64 x i8>* %a1) #0 {
       %v0 = load <64 x i32>, <64 x i32>* %a0, align 128
       %v1 = trunc <64 x i32> %v0 to <64 x i8>
    
    From 1b88845ce1b7731a062c3d1fcc80d201c70e4a44 Mon Sep 17 00:00:00 2001
    From: Reid Kleckner 
    Date: Tue, 15 Sep 2020 18:50:34 -0700
    Subject: [PATCH 0771/1079] [PDB] Drop LF_PRECOMP from debugTypes earlier
    
    This is a minor simplification to avoid firing up a BinaryStreamReader
    and CVType parser.
    ---
     lld/COFF/DebugTypes.cpp | 10 ----------
     lld/COFF/InputFiles.cpp |  2 ++
     2 files changed, 2 insertions(+), 10 deletions(-)
    
    diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
    index b8c488f26908a..3a9bd83036173 100644
    --- a/lld/COFF/DebugTypes.cpp
    +++ b/lld/COFF/DebugTypes.cpp
    @@ -447,16 +447,6 @@ UsePrecompSource::mergeDebugT(TypeMerger *m, CVIndexMap *indexMap) {
       if (!e)
         return e.takeError();
     
    -  // Drop LF_PRECOMP record from the input stream, as it has been replaced
    -  // with the precompiled headers Type stream in the mergeInPrecompHeaderObj()
    -  // call above. Note that we can't just call Types.drop_front(), as we
    -  // explicitly want to rebase the stream.
    -  CVTypeArray types;
    -  BinaryStreamReader reader(file->debugTypes, support::little);
    -  cantFail(reader.readArray(types, reader.getLength()));
    -  auto firstType = types.begin();
    -  file->debugTypes = file->debugTypes.drop_front(firstType->RecordData.size());
    -
       return TpiSource::mergeDebugT(m, indexMap);
     }
     
    diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
    index a692dfe95d6d9..6522d68d37e9c 100644
    --- a/lld/COFF/InputFiles.cpp
    +++ b/lld/COFF/InputFiles.cpp
    @@ -821,6 +821,8 @@ void ObjFile::initializeDependencies() {
         PrecompRecord precomp = cantFail(
             TypeDeserializer::deserializeAs(firstType->data()));
         debugTypesObj = makeUsePrecompSource(this, precomp);
    +    // Drop the LF_PRECOMP record from the input stream.
    +    debugTypes = debugTypes.drop_front(firstType->RecordData.size());
         return;
       }
     
    
    From 3b3ca5c989f9f8e29e4b8b10e77eb08c2b822533 Mon Sep 17 00:00:00 2001
    From: Alina Sbirlea 
    Date: Tue, 15 Sep 2020 19:12:10 -0700
    Subject: [PATCH 0772/1079] Fix test after D86156.
    
    ---
     llvm/test/CodeGen/AMDGPU/opt-pipeline.ll | 22 ++++++++++++++++------
     1 file changed, 16 insertions(+), 6 deletions(-)
    
    diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
    index 31531a43fc3f2..50bc175bc24f2 100644
    --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
    +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
    @@ -139,6 +139,8 @@
     ; GCN-O1-NEXT:       Loop Pass Manager
     ; GCN-O1-NEXT:         Rotate Loops
     ; GCN-O1-NEXT:       Memory SSA
    +; GCN-O1-NEXT:       Lazy Branch Probability Analysis
    +; GCN-O1-NEXT:       Lazy Block Frequency Analysis
     ; GCN-O1-NEXT:       Loop Pass Manager
     ; GCN-O1-NEXT:         Loop Invariant Code Motion
     ; GCN-O1-NEXT:       Post-Dominator Tree Construction
    @@ -270,10 +272,10 @@
     ; GCN-O1-NEXT:       LCSSA Verifier
     ; GCN-O1-NEXT:       Loop-Closed SSA Form Pass
     ; GCN-O1-NEXT:       Scalar Evolution Analysis
    -; GCN-O1-NEXT:       Loop Pass Manager
    -; GCN-O1-NEXT:         Loop Invariant Code Motion
     ; GCN-O1-NEXT:       Lazy Branch Probability Analysis
     ; GCN-O1-NEXT:       Lazy Block Frequency Analysis
    +; GCN-O1-NEXT:       Loop Pass Manager
    +; GCN-O1-NEXT:         Loop Invariant Code Motion
     ; GCN-O1-NEXT:       Optimization Remark Emitter
     ; GCN-O1-NEXT:       Warn about non-applied transformations
     ; GCN-O1-NEXT:       Alignment from assumptions
    @@ -459,6 +461,8 @@
     ; GCN-O2-NEXT:       Loop Pass Manager
     ; GCN-O2-NEXT:         Rotate Loops
     ; GCN-O2-NEXT:       Memory SSA
    +; GCN-O2-NEXT:       Lazy Branch Probability Analysis
    +; GCN-O2-NEXT:       Lazy Block Frequency Analysis
     ; GCN-O2-NEXT:       Loop Pass Manager
     ; GCN-O2-NEXT:         Loop Invariant Code Motion
     ; GCN-O2-NEXT:       Post-Dominator Tree Construction
    @@ -521,6 +525,8 @@
     ; GCN-O2-NEXT:       LCSSA Verifier
     ; GCN-O2-NEXT:       Loop-Closed SSA Form Pass
     ; GCN-O2-NEXT:       Scalar Evolution Analysis
    +; GCN-O2-NEXT:       Lazy Branch Probability Analysis
    +; GCN-O2-NEXT:       Lazy Block Frequency Analysis
     ; GCN-O2-NEXT:       Loop Pass Manager
     ; GCN-O2-NEXT:         Loop Invariant Code Motion
     ; GCN-O2-NEXT:       Post-Dominator Tree Construction
    @@ -623,10 +629,10 @@
     ; GCN-O2-NEXT:       LCSSA Verifier
     ; GCN-O2-NEXT:       Loop-Closed SSA Form Pass
     ; GCN-O2-NEXT:       Scalar Evolution Analysis
    -; GCN-O2-NEXT:       Loop Pass Manager
    -; GCN-O2-NEXT:         Loop Invariant Code Motion
     ; GCN-O2-NEXT:       Lazy Branch Probability Analysis
     ; GCN-O2-NEXT:       Lazy Block Frequency Analysis
    +; GCN-O2-NEXT:       Loop Pass Manager
    +; GCN-O2-NEXT:         Loop Invariant Code Motion
     ; GCN-O2-NEXT:       Optimization Remark Emitter
     ; GCN-O2-NEXT:       Warn about non-applied transformations
     ; GCN-O2-NEXT:       Alignment from assumptions
    @@ -819,6 +825,8 @@
     ; GCN-O3-NEXT:       Loop Pass Manager
     ; GCN-O3-NEXT:         Rotate Loops
     ; GCN-O3-NEXT:       Memory SSA
    +; GCN-O3-NEXT:       Lazy Branch Probability Analysis
    +; GCN-O3-NEXT:       Lazy Block Frequency Analysis
     ; GCN-O3-NEXT:       Loop Pass Manager
     ; GCN-O3-NEXT:         Loop Invariant Code Motion
     ; GCN-O3-NEXT:       Post-Dominator Tree Construction
    @@ -881,6 +889,8 @@
     ; GCN-O3-NEXT:       LCSSA Verifier
     ; GCN-O3-NEXT:       Loop-Closed SSA Form Pass
     ; GCN-O3-NEXT:       Scalar Evolution Analysis
    +; GCN-O3-NEXT:       Lazy Branch Probability Analysis
    +; GCN-O3-NEXT:       Lazy Block Frequency Analysis
     ; GCN-O3-NEXT:       Loop Pass Manager
     ; GCN-O3-NEXT:         Loop Invariant Code Motion
     ; GCN-O3-NEXT:       Post-Dominator Tree Construction
    @@ -983,10 +993,10 @@
     ; GCN-O3-NEXT:       LCSSA Verifier
     ; GCN-O3-NEXT:       Loop-Closed SSA Form Pass
     ; GCN-O3-NEXT:       Scalar Evolution Analysis
    -; GCN-O3-NEXT:       Loop Pass Manager
    -; GCN-O3-NEXT:         Loop Invariant Code Motion
     ; GCN-O3-NEXT:       Lazy Branch Probability Analysis
     ; GCN-O3-NEXT:       Lazy Block Frequency Analysis
    +; GCN-O3-NEXT:       Loop Pass Manager
    +; GCN-O3-NEXT:         Loop Invariant Code Motion
     ; GCN-O3-NEXT:       Optimization Remark Emitter
     ; GCN-O3-NEXT:       Warn about non-applied transformations
     ; GCN-O3-NEXT:       Alignment from assumptions
    
    From 2ce1a697f037469e737db1ad41dfa14ec653ec53 Mon Sep 17 00:00:00 2001
    From: Craig Topper 
    Date: Tue, 15 Sep 2020 19:31:48 -0700
    Subject: [PATCH 0773/1079] [X86] Always use 16-bit displacement in 16-bit mode
     when there is no base or index register.
    
    Previously we only did this if the immediate fit in 16 bits, but
    the GNU assembler seems to just truncate.
    
    Fixes PR46952
    ---
     llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 4 +---
     llvm/test/MC/X86/x86-16.s                             | 5 +++++
     2 files changed, 6 insertions(+), 3 deletions(-)
    
    diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    index 0de94cda2d739..533145e57ca59 100644
    --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    @@ -161,13 +161,11 @@ static bool is16BitMemOperand(const MCInst &MI, unsigned Op,
                                   const MCSubtargetInfo &STI) {
       const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
       const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg);
    -  const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
     
       unsigned BaseReg = Base.getReg();
       unsigned IndexReg = Index.getReg();
     
    -  if (STI.hasFeature(X86::Mode16Bit) && BaseReg == 0 && IndexReg == 0 &&
    -      Disp.isImm() && Disp.getImm() < 0x10000)
    +  if (STI.hasFeature(X86::Mode16Bit) && BaseReg == 0 && IndexReg == 0)
         return true;
       if ((BaseReg != 0 &&
            X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) ||
    diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s
    index f92164e57314a..f1b4428703f10 100644
    --- a/llvm/test/MC/X86/x86-16.s
    +++ b/llvm/test/MC/X86/x86-16.s
    @@ -1056,3 +1056,8 @@ foo:
     // CHECK:  encoding: [0x0f,0x84,A,A]
     // CHECK:  fixup A - offset: 2, value: foo-2, kind: FK_PCRel_2
     {disp32} je foo
    +
    +// CHECK: movl nearer, %ebx
    +// CHECK:  encoding: [0x66,0x8b,0x1e,A,A]
    +// CHECK:  fixup A - offset: 3, value: nearer, kind: FK_Data_2
    +movl    nearer, %ebx
    
    From 3b38062d1c8b6965ded5b6bc686db63f1a59e818 Mon Sep 17 00:00:00 2001
    From: Arthur Eubanks 
    Date: Tue, 15 Sep 2020 20:21:45 -0700
    Subject: [PATCH 0774/1079] [NewPM] Fix 2003-02-19-LoopInfoNestingBug.ll under
     NPM
    
    Also move it to a more appropriate directory.
    ---
     .../LoopInfo}/2003-02-19-LoopInfoNestingBug.ll             | 7 ++++---
     1 file changed, 4 insertions(+), 3 deletions(-)
     rename llvm/test/{Other => Analysis/LoopInfo}/2003-02-19-LoopInfoNestingBug.ll (76%)
    
    diff --git a/llvm/test/Other/2003-02-19-LoopInfoNestingBug.ll b/llvm/test/Analysis/LoopInfo/2003-02-19-LoopInfoNestingBug.ll
    similarity index 76%
    rename from llvm/test/Other/2003-02-19-LoopInfoNestingBug.ll
    rename to llvm/test/Analysis/LoopInfo/2003-02-19-LoopInfoNestingBug.ll
    index b807c4440008c..caa27b3c58ffd 100644
    --- a/llvm/test/Other/2003-02-19-LoopInfoNestingBug.ll
    +++ b/llvm/test/Analysis/LoopInfo/2003-02-19-LoopInfoNestingBug.ll
    @@ -2,9 +2,10 @@
     ; figure out that loop "Inner" should be nested inside of leep "LoopHeader", 
     ; and instead nests it just inside loop "Top"
     ;
    -; RUN: opt < %s -analyze -loops | \
    -; RUN:   grep "     Loop at depth 3 containing: %Inner
    " -; +; RUN: opt < %s -analyze -loops -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; CHECK: Loop at depth 3 containing: %Inner
    define void @test() { br label %Top From b1b187a1386e5d7bfecb2a63dc8c654583684e22 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 15 Sep 2020 20:25:35 -0700 Subject: [PATCH 0775/1079] [NewPM][SCEV] Fix constant-fold-gep.ll under NPM --- llvm/test/Other/constant-fold-gep.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/Other/constant-fold-gep.ll b/llvm/test/Other/constant-fold-gep.ll index 8028b4fff9870..8be214713d5ce 100644 --- a/llvm/test/Other/constant-fold-gep.ll +++ b/llvm/test/Other/constant-fold-gep.ll @@ -11,7 +11,8 @@ ; RUN: opt -S -o - -instcombine -globalopt -data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" < %s | FileCheck --check-prefix=TO %s ; "SCEV" - ScalarEvolution with default target layout -; RUN: opt -analyze -scalar-evolution < %s | FileCheck --check-prefix=SCEV %s +; RUN: opt -analyze -scalar-evolution < %s -enable-new-pm=0 | FileCheck --check-prefix=SCEV %s +; RUN: opt -passes='print' < %s -disable-output 2>&1 | FileCheck --check-prefix=SCEV %s ; The automatic constant folder in opt does not have targetdata access, so From bb371f8ce8c2fc77e0ab6c87d253a1d1db00d0eb Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 15 Sep 2020 20:29:20 -0700 Subject: [PATCH 0776/1079] [NewPM] Fix opt-hot-cold-split.ll under NPM Pin to legacy PM, there are already NPM RUN lines. --- llvm/test/Other/opt-hot-cold-split.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Other/opt-hot-cold-split.ll b/llvm/test/Other/opt-hot-cold-split.ll index f43f3a3d893ce..cd01314f1f7e1 100644 --- a/llvm/test/Other/opt-hot-cold-split.ll +++ b/llvm/test/Other/opt-hot-cold-split.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=DEFAULT-Os +; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -debug-pass=Structure -enable-new-pm=0 < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=DEFAULT-Os ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-PRELINK-Os ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='thinlto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-PRELINK-Os ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-POSTLINK-Os From 37c5dbb31a2fa9aa6618efe56ab0d6cd8f358957 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 16 Sep 2020 03:40:36 +0000 Subject: [PATCH 0777/1079] Fully qualify some more namespace in MLIR ODS to be more friendly to dialects not defined under the mlir namespace (NFC) --- .../mlir/Interfaces/SideEffectInterfaces.td | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaces.td b/mlir/include/mlir/Interfaces/SideEffectInterfaces.td index 1ee623b613659..0f189fa8164ba 100644 --- a/mlir/include/mlir/Interfaces/SideEffectInterfaces.td +++ b/mlir/include/mlir/Interfaces/SideEffectInterfaces.td @@ -51,7 +51,7 @@ class EffectOpInterfaceBase Collects all of the operation's effects into `effects`. }], "void", "getEffects", - (ins "SmallVectorImpl> &":$effects) >, InterfaceMethod<[{ @@ -59,7 +59,7 @@ class EffectOpInterfaceBase }], "void", "getEffectsOnValue", (ins "Value":$value, - "SmallVectorImpl> &":$effects), [{ $_op.getEffects(effects); llvm::erase_if(effects, [&](auto &it) { @@ -73,7 +73,7 @@ class EffectOpInterfaceBase }], "void", "getEffectsOnResource", (ins "SideEffects::Resource *":$resource, - "SmallVectorImpl> &":$effects), [{ $_op.getEffects(effects); llvm::erase_if(effects, [&](auto &it) { @@ -87,7 +87,7 @@ class EffectOpInterfaceBase /// Collect all of the effect instances that correspond to the given /// `Effect` and place them in 'effects'. template void getEffects( - SmallVectorImpl> &effects) { getEffects(effects); llvm::erase_if(effects, [&](auto &it) { @@ -115,7 +115,7 @@ class EffectOpInterfaceBase /// Returns true if this operation has no effects. bool hasNoEffect() { - SmallVector, 4> effects; + SmallVector<::mlir::SideEffects::EffectInstance<}] # baseEffect # [{>, 4> effects; getEffects(effects); return effects.empty(); } @@ -124,7 +124,7 @@ class EffectOpInterfaceBase static bool hasNoEffect(Operation *op) { if (auto interface = dyn_cast<}] # name # [{>(op)) return interface.hasNoEffect(); - return op->hasTrait(); + return op->hasTrait<::mlir::OpTrait::HasRecursiveSideEffects>(); } }]; @@ -178,7 +178,7 @@ class SideEffectsTraitBase { + "::mlir::MemoryEffects::Effect"> { let description = [{ An interface used to query information about the memory effects applied by an operation. From 00f09dd4c13d7e86d07728ba03700a18e9013adf Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Mon, 7 Sep 2020 12:56:34 +0700 Subject: [PATCH 0778/1079] [InstCombine] Add tests for statepoint simplification This tests increase coverage for change introduced in D85959 Reviewers: reames, reames Reviewed By: reames Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D87224 --- .../InstCombine/statepoint-cleanup.ll | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/statepoint-cleanup.ll diff --git a/llvm/test/Transforms/InstCombine/statepoint-cleanup.ll b/llvm/test/Transforms/InstCombine/statepoint-cleanup.ll new file mode 100644 index 0000000000000..003f25b4ff7a9 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/statepoint-cleanup.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -instcombine-max-iterations=1 -S | FileCheck %s +; These tests check the optimizations specific to +; pointers being relocated at a statepoint. + + +declare void @func() + +define void @test(i32 addrspace(1)* %b) gc "statepoint-example" { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = getelementptr i32, i32 addrspace(1)* [[B:%.*]], i64 16 +; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* nonnull @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[B]], i32 addrspace(1)* [[D]]) ] +; CHECK-NEXT: [[B_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) +; CHECK-NEXT: [[B_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) +; CHECK-NEXT: [[D_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) +; CHECK-NEXT: [[D_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) +; CHECK-NEXT: [[D_NEW_3:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) +; CHECK-NEXT: [[D_NEW_4:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_1]], align 4 +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_2]], align 4 +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_1]], align 4 +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_2]], align 4 +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_3]], align 4 +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_4]], align 4 +; CHECK-NEXT: ret void +; +entry: + %d = getelementptr i32, i32 addrspace(1)* %b, i64 16 + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %b, i32 addrspace(1)* %b, i32 addrspace(1)* %d, i32 addrspace(1)* %d)] + %b.new.1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) + %b.new.2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1) + %d.new.1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 2) + %d.new.2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 3) + %d.new.3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 1, i32 2) + %d.new.4 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 1, i32 3) + store i32 1, i32 addrspace(1)* %b.new.1 + store i32 1, i32 addrspace(1)* %b.new.2 + store i32 1, i32 addrspace(1)* %d.new.1 + store i32 1, i32 addrspace(1)* %d.new.2 + store i32 1, i32 addrspace(1)* %d.new.3 + store i32 1, i32 addrspace(1)* %d.new.4 + ret void +} + +define void @test_no_derived_use(i32 addrspace(1)* %b) gc "statepoint-example" { +; CHECK-LABEL: @test_no_derived_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* nonnull @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[B:%.*]]) ] +; CHECK-NEXT: [[B_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %d = getelementptr i32, i32 addrspace(1)* %b, i64 16 + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %b, i32 addrspace(1)* %b, i32 addrspace(1)* %d, i32 addrspace(1)* %d)] + %b.new.1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) + %b.new.2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1) + %d.new.1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 2) + %d.new.2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 3) + %d.new.3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 1, i32 2) + %d.new.4 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 1, i32 3) + store i32 1, i32 addrspace(1)* %b.new.1 + ret void +} + +define void @test_no_base_use(i32 addrspace(1)* %b) gc "statepoint-example" { +; CHECK-LABEL: @test_no_base_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = getelementptr i32, i32 addrspace(1)* [[B:%.*]], i64 16 +; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* nonnull @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[B]], i32 addrspace(1)* [[D]]) ] +; CHECK-NEXT: [[D_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %d = getelementptr i32, i32 addrspace(1)* %b, i64 16 + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %b, i32 addrspace(1)* %b, i32 addrspace(1)* %d, i32 addrspace(1)* %d)] + %b.new.1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) + %b.new.2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 1) + %d.new.1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 2) + %d.new.2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 3) + %d.new.3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 1, i32 2) + %d.new.4 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 1, i32 3) + store i32 1, i32 addrspace(1)* %d.new.1 + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) From 8a04cdb510c89b8c6419d6ce1e98967d7ac9abb2 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 16 Sep 2020 11:30:21 +0700 Subject: [PATCH 0779/1079] [Test] Add signed version of a test --- .../IndVarSimplify/predicated_ranges.ll | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll index 7956735922fea..62a0a1dcf8656 100644 --- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -10,8 +10,8 @@ ; 1 <= iv <= len [3]; ; 4. iv.next = iv - 1 and [3], therefore ; 0 <= iv.next < len. -define void @test_predicated_simple(i32* %p, i32* %arr) { -; CHECK-LABEL: @test_predicated_simple( +define void @test_predicated_simple_unsigned(i32* %p, i32* %arr) { +; CHECK-LABEL: @test_predicated_simple_unsigned( ; CHECK-NEXT: preheader: ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0:!range !.*]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -60,4 +60,54 @@ fail: unreachable } +define void @test_predicated_simple_signed(i32* %p, i32* %arr) { +; CHECK-LABEL: @test_predicated_simple_signed( +; CHECK-NEXT: preheader: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[PREHEADER:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[ZERO_COND:%.*]] = icmp eq i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]] +; CHECK: range_check_block: +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], 1 +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[EL]], 0 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: fail: +; CHECK-NEXT: unreachable +; +preheader: + %len = load i32, i32* %p, !range !0 + br label %loop + +loop: + %iv = phi i32 [%len, %preheader], [%iv.next, %backedge] + %zero_cond = icmp eq i32 %iv, 0 + br i1 %zero_cond, label %exit, label %range_check_block + +range_check_block: + %iv.next = sub i32 %iv, 1 + %range_check = icmp slt i32 %iv.next, %len + br i1 %range_check, label %backedge, label %fail + +backedge: + %el.ptr = getelementptr i32, i32* %p, i32 %iv + %el = load i32, i32* %el.ptr + %loop.cond = icmp eq i32 %el, 0 + br i1 %loop.cond, label %loop, label %exit + +exit: + ret void + +fail: + unreachable +} + !0 = !{i32 0, i32 2147483647} From 96c6d012dfe2492891d0f0450dd7cd5f3c1ca88c Mon Sep 17 00:00:00 2001 From: Zinovy Nis Date: Mon, 14 Sep 2020 22:08:00 +0300 Subject: [PATCH 0780/1079] [clang-tidy] Fix crash in modernize-use-noexcept on uninstantiated throw class Bug: https://bugs.llvm.org/show_bug.cgi?id=47446 Differential Revision: https://reviews.llvm.org/D87627 --- clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp | 5 ++++- .../test/clang-tidy/checkers/modernize-use-noexcept-opt.cpp | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp index cc4bc05a35dd0..c4e7f12e74acb 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp @@ -77,13 +77,16 @@ void UseNoexceptCheck::check(const MatchFinder::MatchResult &Result) { .getExceptionSpecRange(); } + assert(FnTy && "FunctionProtoType is null."); + if (isUnresolvedExceptionSpec(FnTy->getExceptionSpecType())) + return; + assert(Range.isValid() && "Exception Source Range is invalid."); CharSourceRange CRange = Lexer::makeFileCharRange( CharSourceRange::getTokenRange(Range), *Result.SourceManager, Result.Context->getLangOpts()); - assert(FnTy && "FunctionProtoType is null."); bool IsNoThrow = FnTy->isNothrow(); StringRef ReplacementStr = IsNoThrow diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-noexcept-opt.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-noexcept-opt.cpp index 92c1387d64d66..b0f52a18edf51 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-noexcept-opt.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-noexcept-opt.cpp @@ -4,6 +4,7 @@ // This test is not run in C++17 or later because dynamic exception // specifications were removed in C++17. +using size_t = __SIZE_TYPE__; class A {}; class B {}; @@ -19,6 +20,11 @@ void k() throw(int(int)); // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: dynamic exception specification 'throw(int(int))' is deprecated; consider removing it instead [modernize-use-noexcept] // CHECK-FIXES: void k() ; +// Shouldn't crash due to llvm_unreachable in canThrow() on EST_Uninstantiated +template class c { void *operator new(size_t) throw (int);}; +void s() { c<1> doesnt_crash; } +// CHECK-MESSAGES: :[[@LINE-2]]:53: warning: dynamic exception specification 'throw (int)' is deprecated; consider removing it instead [modernize-use-noexcept] + void foobar() throw(A, B) {} // CHECK-MESSAGES: :[[@LINE-2]]:15: warning: dynamic exception specification 'throw(A, B)' is deprecated; consider removing it instead [modernize-use-noexcept] From 757ac4ccfb8b024454b4f445a2b5c8985da5dc8a Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Mon, 14 Sep 2020 13:53:50 -0700 Subject: [PATCH 0781/1079] [lldb] Reword CompilerType docstring to not say "generic type" Since "generic type" has a precise meaning in some languages, reword the docstring of `CompilerType` to avoid ambiguity. Differential Revision: https://reviews.llvm.org/D87633 --- lldb/include/lldb/Symbol/CompilerType.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index c5e19773d51c7..6143739381659 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -20,7 +20,7 @@ namespace lldb_private { class DataExtractor; -/// Represents a generic type in a programming language. +/// Generic representation of a type in a programming language. /// /// This class serves as an abstraction for a type inside one of the TypeSystems /// implemented by the language plugins. It does not have any actual logic in it From 9c40495a35a2cac89dd72db54892d6bd7a2abf0d Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Tue, 15 Sep 2020 10:58:45 +0530 Subject: [PATCH 0782/1079] [MLIR][NFC] Value print update for block arguments Emit some more information when printing/dumping `Value`s of `BlockArgument` kind. This is purely to help for debugging purposes. Differential Revision: https://reviews.llvm.org/D87670 --- mlir/lib/IR/AsmPrinter.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 3deb7b477bea4..602138d3ada7c 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -2359,16 +2359,18 @@ void Value::print(raw_ostream &os) { if (auto *op = getDefiningOp()) return op->print(os); // TODO: Improve this. - assert(isa()); - os << "\n"; + BlockArgument arg = this->cast(); + os << " of type '" << arg.getType() + << "' at index: " << arg.getArgNumber() << '\n'; } void Value::print(raw_ostream &os, AsmState &state) { if (auto *op = getDefiningOp()) return op->print(os, state); // TODO: Improve this. - assert(isa()); - os << "\n"; + BlockArgument arg = this->cast(); + os << " of type '" << arg.getType() + << "' at index: " << arg.getArgNumber() << '\n'; } void Value::dump() { From 291bfff5dbb70360730e91b4019f8080e4e3d7f5 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Tue, 15 Sep 2020 13:01:04 -0400 Subject: [PATCH 0783/1079] libclc: Add a __builtin to let SPIRV targets select between SW and HW FMA Reviewer: jenatali jvesely Differential Revision: https://reviews.llvm.org/D85910 --- libclc/generic/lib/math/math.h | 3 +++ libclc/spirv/lib/math/fma.cl | 5 +++++ libclc/spirv64/lib/math/fma.cl | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/libclc/generic/lib/math/math.h b/libclc/generic/lib/math/math.h index c931d19a380c1..351e37dc3f12c 100644 --- a/libclc/generic/lib/math/math.h +++ b/libclc/generic/lib/math/math.h @@ -40,6 +40,9 @@ #if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__ #define HAVE_HW_FMA32() (0) +#elif defined CLC_SPIRV || defined CLC_SPIRV64 +bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); +#define HAVE_HW_FMA32() __clc_runtime_has_hw_fma32() #else #define HAVE_HW_FMA32() (1) #endif diff --git a/libclc/spirv/lib/math/fma.cl b/libclc/spirv/lib/math/fma.cl index 982ddc4374f35..79142425e52d2 100644 --- a/libclc/spirv/lib/math/fma.cl +++ b/libclc/spirv/lib/math/fma.cl @@ -4,3 +4,8 @@ #define __CLC_BODY #define __FLOAT_ONLY #include + +bool __clc_runtime_has_hw_fma32() +{ + return false; +} diff --git a/libclc/spirv64/lib/math/fma.cl b/libclc/spirv64/lib/math/fma.cl index 982ddc4374f35..79142425e52d2 100644 --- a/libclc/spirv64/lib/math/fma.cl +++ b/libclc/spirv64/lib/math/fma.cl @@ -4,3 +4,8 @@ #define __CLC_BODY #define __FLOAT_ONLY #include + +bool __clc_runtime_has_hw_fma32() +{ + return false; +} From 8ea7ef8eda93aa144c339275fc6d9db2615a0118 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 15 Sep 2020 22:40:13 -0700 Subject: [PATCH 0784/1079] [ThinLTO] Relax thinlto_embed_bitcode.ll check Fixes fuscia test [1] - the thinlto annotations may not always be there. [1] http://lab.llvm.org:8011/builders/fuchsia-x86_64-linux/builds/11312 --- clang/test/CodeGen/thinlto_embed_bitcode.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/thinlto_embed_bitcode.ll b/clang/test/CodeGen/thinlto_embed_bitcode.ll index 4efb525e5f3e6..2d60e16e54e1e 100644 --- a/clang/test/CodeGen/thinlto_embed_bitcode.ll +++ b/clang/test/CodeGen/thinlto_embed_bitcode.ll @@ -26,5 +26,5 @@ ; CHECK: define void @foo() ; CHECK-OPT-NEXT: ret void ; CHECK-NOOPT-NEXT: call void @bar() -; CHECK-NOOPT: define available_externally void @bar() !thinlto_src_module !0 { +; CHECK-NOOPT: define available_externally void @bar() ; CHECK-NOOPT-NEXT: ret void From 3045b3c3b5dbc4192b9a4057ae165f238b84ddf6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 15 Sep 2020 22:45:50 -0700 Subject: [PATCH 0785/1079] [X86] Add test case for non-power of 2 scatter. NFC --- .../test/CodeGen/X86/masked_gather_scatter.ll | 196 +++++++++++++++++- 1 file changed, 188 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index c82efa56655ea..6f2298c967e91 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1812,6 +1812,186 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ret <3 x i32>%res } +; Non-power of 2 scatter +declare void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32>, <3 x i32*>, i32, <3 x i1>) +define void @test30b(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { +; KNL_64-LABEL: test30b: +; KNL_64: # %bb.0: +; KNL_64-NEXT: andb $1, %dil +; KNL_64-NEXT: andb $1, %sil +; KNL_64-NEXT: addb %sil, %sil +; KNL_64-NEXT: orb %dil, %sil +; KNL_64-NEXT: andb $1, %dl +; KNL_64-NEXT: shlb $2, %dl +; KNL_64-NEXT: orb %sil, %dl +; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 +; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 +; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; KNL_64-NEXT: testb $1, %dl +; KNL_64-NEXT: jne .LBB32_1 +; KNL_64-NEXT: # %bb.2: # %else +; KNL_64-NEXT: testb $2, %dl +; KNL_64-NEXT: jne .LBB32_3 +; KNL_64-NEXT: .LBB32_4: # %else2 +; KNL_64-NEXT: testb $4, %dl +; KNL_64-NEXT: jne .LBB32_5 +; KNL_64-NEXT: .LBB32_6: # %else4 +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq +; KNL_64-NEXT: .LBB32_1: # %cond.store +; KNL_64-NEXT: vmovq %xmm0, %rax +; KNL_64-NEXT: vmovss %xmm2, (%rax) +; KNL_64-NEXT: testb $2, %dl +; KNL_64-NEXT: je .LBB32_4 +; KNL_64-NEXT: .LBB32_3: # %cond.store1 +; KNL_64-NEXT: vpextrq $1, %xmm0, %rax +; KNL_64-NEXT: vextractps $1, %xmm2, (%rax) +; KNL_64-NEXT: testb $4, %dl +; KNL_64-NEXT: je .LBB32_6 +; KNL_64-NEXT: .LBB32_5: # %cond.store3 +; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL_64-NEXT: vmovq %xmm0, %rax +; KNL_64-NEXT: vextractps $2, %xmm2, (%rax) +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: test30b: +; KNL_32: # %bb.0: +; KNL_32-NEXT: pushl %eax +; KNL_32-NEXT: .cfi_def_cfa_offset 8 +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_32-NEXT: andb $1, %al +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_32-NEXT: andb $1, %cl +; KNL_32-NEXT: addb %cl, %cl +; KNL_32-NEXT: orb %al, %cl +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_32-NEXT: andb $1, %al +; KNL_32-NEXT: shlb $2, %al +; KNL_32-NEXT: orb %cl, %al +; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 +; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; KNL_32-NEXT: testb $1, %al +; KNL_32-NEXT: jne .LBB32_1 +; KNL_32-NEXT: # %bb.2: # %else +; KNL_32-NEXT: testb $2, %al +; KNL_32-NEXT: jne .LBB32_3 +; KNL_32-NEXT: .LBB32_4: # %else2 +; KNL_32-NEXT: testb $4, %al +; KNL_32-NEXT: jne .LBB32_5 +; KNL_32-NEXT: .LBB32_6: # %else4 +; KNL_32-NEXT: popl %eax +; KNL_32-NEXT: .cfi_def_cfa_offset 4 +; KNL_32-NEXT: retl +; KNL_32-NEXT: .LBB32_1: # %cond.store +; KNL_32-NEXT: .cfi_def_cfa_offset 8 +; KNL_32-NEXT: vmovd %xmm0, %ecx +; KNL_32-NEXT: vmovss %xmm2, (%ecx) +; KNL_32-NEXT: testb $2, %al +; KNL_32-NEXT: je .LBB32_4 +; KNL_32-NEXT: .LBB32_3: # %cond.store1 +; KNL_32-NEXT: vpextrd $1, %xmm0, %ecx +; KNL_32-NEXT: vextractps $1, %xmm2, (%ecx) +; KNL_32-NEXT: testb $4, %al +; KNL_32-NEXT: je .LBB32_6 +; KNL_32-NEXT: .LBB32_5: # %cond.store3 +; KNL_32-NEXT: vpextrd $2, %xmm0, %eax +; KNL_32-NEXT: vextractps $2, %xmm2, (%eax) +; KNL_32-NEXT: popl %eax +; KNL_32-NEXT: .cfi_def_cfa_offset 4 +; KNL_32-NEXT: retl +; +; SKX-LABEL: test30b: +; SKX: # %bb.0: +; SKX-NEXT: andb $1, %dil +; SKX-NEXT: andb $1, %sil +; SKX-NEXT: addb %sil, %sil +; SKX-NEXT: orb %dil, %sil +; SKX-NEXT: andb $1, %dl +; SKX-NEXT: shlb $2, %dl +; SKX-NEXT: orb %sil, %dl +; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 +; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; SKX-NEXT: testb $1, %dl +; SKX-NEXT: jne .LBB32_1 +; SKX-NEXT: # %bb.2: # %else +; SKX-NEXT: testb $2, %dl +; SKX-NEXT: jne .LBB32_3 +; SKX-NEXT: .LBB32_4: # %else2 +; SKX-NEXT: testb $4, %dl +; SKX-NEXT: jne .LBB32_5 +; SKX-NEXT: .LBB32_6: # %else4 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: .LBB32_1: # %cond.store +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: vmovss %xmm2, (%rax) +; SKX-NEXT: testb $2, %dl +; SKX-NEXT: je .LBB32_4 +; SKX-NEXT: .LBB32_3: # %cond.store1 +; SKX-NEXT: vpextrq $1, %xmm0, %rax +; SKX-NEXT: vextractps $1, %xmm2, (%rax) +; SKX-NEXT: testb $4, %dl +; SKX-NEXT: je .LBB32_6 +; SKX-NEXT: .LBB32_5: # %cond.store3 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: vextractps $2, %xmm2, (%rax) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; +; SKX_32-LABEL: test30b: +; SKX_32: # %bb.0: +; SKX_32-NEXT: pushl %eax +; SKX_32-NEXT: .cfi_def_cfa_offset 8 +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al +; SKX_32-NEXT: andb $1, %al +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %cl +; SKX_32-NEXT: andb $1, %cl +; SKX_32-NEXT: addb %cl, %cl +; SKX_32-NEXT: orb %al, %cl +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al +; SKX_32-NEXT: andb $1, %al +; SKX_32-NEXT: shlb $2, %al +; SKX_32-NEXT: orb %cl, %al +; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 +; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; SKX_32-NEXT: testb $1, %al +; SKX_32-NEXT: jne .LBB32_1 +; SKX_32-NEXT: # %bb.2: # %else +; SKX_32-NEXT: testb $2, %al +; SKX_32-NEXT: jne .LBB32_3 +; SKX_32-NEXT: .LBB32_4: # %else2 +; SKX_32-NEXT: testb $4, %al +; SKX_32-NEXT: jne .LBB32_5 +; SKX_32-NEXT: .LBB32_6: # %else4 +; SKX_32-NEXT: popl %eax +; SKX_32-NEXT: .cfi_def_cfa_offset 4 +; SKX_32-NEXT: retl +; SKX_32-NEXT: .LBB32_1: # %cond.store +; SKX_32-NEXT: .cfi_def_cfa_offset 8 +; SKX_32-NEXT: vmovd %xmm0, %ecx +; SKX_32-NEXT: vmovss %xmm2, (%ecx) +; SKX_32-NEXT: testb $2, %al +; SKX_32-NEXT: je .LBB32_4 +; SKX_32-NEXT: .LBB32_3: # %cond.store1 +; SKX_32-NEXT: vpextrd $1, %xmm0, %ecx +; SKX_32-NEXT: vextractps $1, %xmm2, (%ecx) +; SKX_32-NEXT: testb $4, %al +; SKX_32-NEXT: je .LBB32_6 +; SKX_32-NEXT: .LBB32_5: # %cond.store3 +; SKX_32-NEXT: vpextrd $2, %xmm0, %eax +; SKX_32-NEXT: vextractps $2, %xmm2, (%eax) +; SKX_32-NEXT: popl %eax +; SKX_32-NEXT: .cfi_def_cfa_offset 4 +; SKX_32-NEXT: retl + %sext_ind = sext <3 x i32> %ind to <3 x i64> + %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind + call void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32> %src0, <3 x i32*> %gep.random, i32 4, <3 x i1> %mask) + ret void +} + declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>) define <16 x float*> @test31(<16 x float**> %ptrs) { ; KNL_64-LABEL: test31: @@ -2483,41 +2663,41 @@ define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) { ; KNL_64-LABEL: v1_scatter: ; KNL_64: # %bb.0: ; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: je .LBB44_2 +; KNL_64-NEXT: je .LBB45_2 ; KNL_64-NEXT: # %bb.1: # %cond.store ; KNL_64-NEXT: movl %edi, (%rsi) -; KNL_64-NEXT: .LBB44_2: # %else +; KNL_64-NEXT: .LBB45_2: # %else ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: v1_scatter: ; KNL_32: # %bb.0: ; KNL_32-NEXT: testb $1, {{[0-9]+}}(%esp) -; KNL_32-NEXT: je .LBB44_2 +; KNL_32-NEXT: je .LBB45_2 ; KNL_32-NEXT: # %bb.1: # %cond.store ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; KNL_32-NEXT: movl %ecx, (%eax) -; KNL_32-NEXT: .LBB44_2: # %else +; KNL_32-NEXT: .LBB45_2: # %else ; KNL_32-NEXT: retl ; ; SKX-LABEL: v1_scatter: ; SKX: # %bb.0: ; SKX-NEXT: testb $1, %dl -; SKX-NEXT: je .LBB44_2 +; SKX-NEXT: je .LBB45_2 ; SKX-NEXT: # %bb.1: # %cond.store ; SKX-NEXT: movl %edi, (%rsi) -; SKX-NEXT: .LBB44_2: # %else +; SKX-NEXT: .LBB45_2: # %else ; SKX-NEXT: retq ; ; SKX_32-LABEL: v1_scatter: ; SKX_32: # %bb.0: ; SKX_32-NEXT: testb $1, {{[0-9]+}}(%esp) -; SKX_32-NEXT: je .LBB44_2 +; SKX_32-NEXT: je .LBB45_2 ; SKX_32-NEXT: # %bb.1: # %cond.store ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; SKX_32-NEXT: movl %ecx, (%eax) -; SKX_32-NEXT: .LBB44_2: # %else +; SKX_32-NEXT: .LBB45_2: # %else ; SKX_32-NEXT: retl call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask) ret void From 41f4cd60d54d94e8dac4bbd8d9961dc8ad4a64fc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 15 Sep 2020 23:22:53 -0700 Subject: [PATCH 0786/1079] [X86] Don't scalarize gather/scatters with non-power of 2 element counts. Widen instead. We can pad the mask with zeros in order to widen. We already do this for power 2 types that are smaller than a legal type. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- .../test/CodeGen/X86/masked_gather_scatter.ll | 437 +++++++----------- 2 files changed, 159 insertions(+), 280 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 03f8be094c252..8ce9749dc2d66 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4283,7 +4283,7 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { // scalarize it. if (auto *DataVTy = dyn_cast(DataTy)) { unsigned NumElts = DataVTy->getNumElements(); - if (NumElts == 1 || !isPowerOf2_32(NumElts)) + if (NumElts == 1) return false; } Type *ScalarTy = DataTy->getScalarType(); diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 6f2298c967e91..948928099d38e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1629,182 +1629,122 @@ define <16 x float> @test29(float* %base, <16 x i32> %ind) { ret <16 x float>%res } -; Check non-power-of-2 case. It should be scalarized. declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; KNL_64-LABEL: test30: ; KNL_64: # %bb.0: -; KNL_64-NEXT: andb $1, %dil -; KNL_64-NEXT: andb $1, %sil -; KNL_64-NEXT: addb %sil, %sil -; KNL_64-NEXT: orb %dil, %sil -; KNL_64-NEXT: andb $1, %dl -; KNL_64-NEXT: shlb $2, %dl -; KNL_64-NEXT: orb %sil, %dl +; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 +; KNL_64-NEXT: movw $-3, %ax +; KNL_64-NEXT: kmovw %eax, %k0 +; KNL_64-NEXT: andl $1, %edi +; KNL_64-NEXT: kmovw %edi, %k1 +; KNL_64-NEXT: kandw %k0, %k1, %k0 +; KNL_64-NEXT: kmovw %esi, %k1 +; KNL_64-NEXT: kshiftlw $15, %k1, %k1 +; KNL_64-NEXT: kshiftrw $14, %k1, %k1 +; KNL_64-NEXT: korw %k1, %k0, %k0 +; KNL_64-NEXT: movw $-5, %ax +; KNL_64-NEXT: kmovw %eax, %k1 +; KNL_64-NEXT: kandw %k1, %k0, %k0 +; KNL_64-NEXT: kmovw %edx, %k1 +; KNL_64-NEXT: kshiftlw $15, %k1, %k1 +; KNL_64-NEXT: kshiftrw $13, %k1, %k1 +; KNL_64-NEXT: korw %k1, %k0, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 ; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 ; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: jne .LBB31_1 -; KNL_64-NEXT: # %bb.2: # %else -; KNL_64-NEXT: testb $2, %dl -; KNL_64-NEXT: jne .LBB31_3 -; KNL_64-NEXT: .LBB31_4: # %else2 -; KNL_64-NEXT: testb $4, %dl -; KNL_64-NEXT: jne .LBB31_5 -; KNL_64-NEXT: .LBB31_6: # %else5 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; KNL_64-NEXT: .LBB31_1: # %cond.load -; KNL_64-NEXT: vmovq %xmm0, %rax -; KNL_64-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2 -; KNL_64-NEXT: testb $2, %dl -; KNL_64-NEXT: je .LBB31_4 -; KNL_64-NEXT: .LBB31_3: # %cond.load1 -; KNL_64-NEXT: vpextrq $1, %xmm0, %rax -; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2 -; KNL_64-NEXT: testb $4, %dl -; KNL_64-NEXT: je .LBB31_6 -; KNL_64-NEXT: .LBB31_5: # %cond.load4 -; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL_64-NEXT: vmovq %xmm0, %rax -; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2 +; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm2 {%k1} ; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test30: ; KNL_32: # %bb.0: -; KNL_32-NEXT: pushl %eax -; KNL_32-NEXT: .cfi_def_cfa_offset 8 +; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; KNL_32-NEXT: movw $-3, %ax +; KNL_32-NEXT: kmovw %eax, %k0 ; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_32-NEXT: andb $1, %al -; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %cl -; KNL_32-NEXT: andb $1, %cl -; KNL_32-NEXT: addb %cl, %cl -; KNL_32-NEXT: orb %al, %cl +; KNL_32-NEXT: andl $1, %eax +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kandw %k0, %k1, %k0 ; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_32-NEXT: andb $1, %al -; KNL_32-NEXT: shlb $2, %al -; KNL_32-NEXT: orb %cl, %al +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_32-NEXT: kshiftrw $14, %k1, %k1 +; KNL_32-NEXT: korw %k1, %k0, %k0 +; KNL_32-NEXT: movw $-5, %ax +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kandw %k1, %k0, %k0 +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_32-NEXT: kshiftrw $13, %k1, %k1 +; KNL_32-NEXT: korw %k1, %k0, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 ; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: jne .LBB31_1 -; KNL_32-NEXT: # %bb.2: # %else -; KNL_32-NEXT: testb $2, %al -; KNL_32-NEXT: jne .LBB31_3 -; KNL_32-NEXT: .LBB31_4: # %else2 -; KNL_32-NEXT: testb $4, %al -; KNL_32-NEXT: je .LBB31_6 -; KNL_32-NEXT: .LBB31_5: # %cond.load4 -; KNL_32-NEXT: vpextrd $2, %xmm0, %eax -; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm2, %xmm2 -; KNL_32-NEXT: .LBB31_6: # %else5 +; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} ; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 -; KNL_32-NEXT: popl %eax -; KNL_32-NEXT: .cfi_def_cfa_offset 4 +; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl -; KNL_32-NEXT: .LBB31_1: # %cond.load -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: vmovd %xmm0, %ecx -; KNL_32-NEXT: vpinsrd $0, (%ecx), %xmm2, %xmm2 -; KNL_32-NEXT: testb $2, %al -; KNL_32-NEXT: je .LBB31_4 -; KNL_32-NEXT: .LBB31_3: # %cond.load1 -; KNL_32-NEXT: vpextrd $1, %xmm0, %ecx -; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm2, %xmm2 -; KNL_32-NEXT: testb $4, %al -; KNL_32-NEXT: jne .LBB31_5 -; KNL_32-NEXT: jmp .LBB31_6 ; ; SKX-LABEL: test30: ; SKX: # %bb.0: -; SKX-NEXT: andb $1, %dil -; SKX-NEXT: andb $1, %sil -; SKX-NEXT: addb %sil, %sil -; SKX-NEXT: orb %dil, %sil -; SKX-NEXT: andb $1, %dl -; SKX-NEXT: shlb $2, %dl -; SKX-NEXT: orb %sil, %dl +; SKX-NEXT: movb $-3, %al +; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: kandw %k0, %k1, %k0 +; SKX-NEXT: kmovw %esi, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: movb $-5, %al +; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kandw %k1, %k0, %k0 +; SKX-NEXT: kmovw %edx, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: korw %k1, %k0, %k1 ; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 ; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; SKX-NEXT: testb $1, %dl -; SKX-NEXT: jne .LBB31_1 -; SKX-NEXT: # %bb.2: # %else -; SKX-NEXT: testb $2, %dl -; SKX-NEXT: jne .LBB31_3 -; SKX-NEXT: .LBB31_4: # %else2 -; SKX-NEXT: testb $4, %dl -; SKX-NEXT: jne .LBB31_5 -; SKX-NEXT: .LBB31_6: # %else5 -; SKX-NEXT: vmovdqa %xmm2, %xmm0 -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; SKX-NEXT: .LBB31_1: # %cond.load -; SKX-NEXT: vmovq %xmm0, %rax -; SKX-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2 -; SKX-NEXT: testb $2, %dl -; SKX-NEXT: je .LBB31_4 -; SKX-NEXT: .LBB31_3: # %cond.load1 -; SKX-NEXT: vpextrq $1, %xmm0, %rax -; SKX-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2 -; SKX-NEXT: testb $4, %dl -; SKX-NEXT: je .LBB31_6 -; SKX-NEXT: .LBB31_5: # %cond.load4 -; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 -; SKX-NEXT: vmovq %xmm0, %rax -; SKX-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2 +; SKX-NEXT: vpgatherqd (,%ymm0), %xmm2 {%k1} ; SKX-NEXT: vmovdqa %xmm2, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; SKX_32-LABEL: test30: ; SKX_32: # %bb.0: -; SKX_32-NEXT: pushl %eax -; SKX_32-NEXT: .cfi_def_cfa_offset 8 +; SKX_32-NEXT: movb $-3, %al +; SKX_32-NEXT: kmovw %eax, %k0 +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kshiftlb $7, %k1, %k1 +; SKX_32-NEXT: kshiftrb $7, %k1, %k1 +; SKX_32-NEXT: kandw %k0, %k1, %k0 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: andb $1, %al -; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %cl -; SKX_32-NEXT: andb $1, %cl -; SKX_32-NEXT: addb %cl, %cl -; SKX_32-NEXT: orb %al, %cl +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kshiftlb $7, %k1, %k1 +; SKX_32-NEXT: kshiftrb $6, %k1, %k1 +; SKX_32-NEXT: korw %k1, %k0, %k0 +; SKX_32-NEXT: movb $-5, %al +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kandw %k1, %k0, %k0 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: andb $1, %al -; SKX_32-NEXT: shlb $2, %al -; SKX_32-NEXT: orb %cl, %al +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kshiftlb $7, %k1, %k1 +; SKX_32-NEXT: kshiftrb $5, %k1, %k1 +; SKX_32-NEXT: korw %k1, %k0, %k1 ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 ; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX_32-NEXT: testb $1, %al -; SKX_32-NEXT: jne .LBB31_1 -; SKX_32-NEXT: # %bb.2: # %else -; SKX_32-NEXT: testb $2, %al -; SKX_32-NEXT: jne .LBB31_3 -; SKX_32-NEXT: .LBB31_4: # %else2 -; SKX_32-NEXT: testb $4, %al -; SKX_32-NEXT: je .LBB31_6 -; SKX_32-NEXT: .LBB31_5: # %cond.load4 -; SKX_32-NEXT: vpextrd $2, %xmm0, %eax -; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm2, %xmm2 -; SKX_32-NEXT: .LBB31_6: # %else5 +; SKX_32-NEXT: vpgatherdd (,%xmm0), %xmm2 {%k1} ; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 -; SKX_32-NEXT: popl %eax -; SKX_32-NEXT: .cfi_def_cfa_offset 4 ; SKX_32-NEXT: retl -; SKX_32-NEXT: .LBB31_1: # %cond.load -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: vmovd %xmm0, %ecx -; SKX_32-NEXT: vpinsrd $0, (%ecx), %xmm2, %xmm2 -; SKX_32-NEXT: testb $2, %al -; SKX_32-NEXT: je .LBB31_4 -; SKX_32-NEXT: .LBB31_3: # %cond.load1 -; SKX_32-NEXT: vpextrd $1, %xmm0, %ecx -; SKX_32-NEXT: vpinsrd $1, (%ecx), %xmm2, %xmm2 -; SKX_32-NEXT: testb $4, %al -; SKX_32-NEXT: jne .LBB31_5 -; SKX_32-NEXT: jmp .LBB31_6 %sext_ind = sext <3 x i32> %ind to <3 x i64> %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind @@ -1817,174 +1757,113 @@ declare void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32>, <3 x i32*>, i32, <3 x define void @test30b(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; KNL_64-LABEL: test30b: ; KNL_64: # %bb.0: -; KNL_64-NEXT: andb $1, %dil -; KNL_64-NEXT: andb $1, %sil -; KNL_64-NEXT: addb %sil, %sil -; KNL_64-NEXT: orb %dil, %sil -; KNL_64-NEXT: andb $1, %dl -; KNL_64-NEXT: shlb $2, %dl -; KNL_64-NEXT: orb %sil, %dl +; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 +; KNL_64-NEXT: movw $-3, %ax +; KNL_64-NEXT: kmovw %eax, %k0 +; KNL_64-NEXT: andl $1, %edi +; KNL_64-NEXT: kmovw %edi, %k1 +; KNL_64-NEXT: kandw %k0, %k1, %k0 +; KNL_64-NEXT: kmovw %esi, %k1 +; KNL_64-NEXT: kshiftlw $15, %k1, %k1 +; KNL_64-NEXT: kshiftrw $14, %k1, %k1 +; KNL_64-NEXT: korw %k1, %k0, %k0 +; KNL_64-NEXT: movw $-5, %ax +; KNL_64-NEXT: kmovw %eax, %k1 +; KNL_64-NEXT: kandw %k1, %k0, %k0 +; KNL_64-NEXT: kmovw %edx, %k1 +; KNL_64-NEXT: kshiftlw $15, %k1, %k1 +; KNL_64-NEXT: kshiftrw $13, %k1, %k1 +; KNL_64-NEXT: korw %k1, %k0, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 ; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 ; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: jne .LBB32_1 -; KNL_64-NEXT: # %bb.2: # %else -; KNL_64-NEXT: testb $2, %dl -; KNL_64-NEXT: jne .LBB32_3 -; KNL_64-NEXT: .LBB32_4: # %else2 -; KNL_64-NEXT: testb $4, %dl -; KNL_64-NEXT: jne .LBB32_5 -; KNL_64-NEXT: .LBB32_6: # %else4 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; KNL_64-NEXT: .LBB32_1: # %cond.store -; KNL_64-NEXT: vmovq %xmm0, %rax -; KNL_64-NEXT: vmovss %xmm2, (%rax) -; KNL_64-NEXT: testb $2, %dl -; KNL_64-NEXT: je .LBB32_4 -; KNL_64-NEXT: .LBB32_3: # %cond.store1 -; KNL_64-NEXT: vpextrq $1, %xmm0, %rax -; KNL_64-NEXT: vextractps $1, %xmm2, (%rax) -; KNL_64-NEXT: testb $4, %dl -; KNL_64-NEXT: je .LBB32_6 -; KNL_64-NEXT: .LBB32_5: # %cond.store3 -; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL_64-NEXT: vmovq %xmm0, %rax -; KNL_64-NEXT: vextractps $2, %xmm2, (%rax) +; KNL_64-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test30b: ; KNL_32: # %bb.0: -; KNL_32-NEXT: pushl %eax -; KNL_32-NEXT: .cfi_def_cfa_offset 8 +; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; KNL_32-NEXT: movw $-3, %ax +; KNL_32-NEXT: kmovw %eax, %k0 ; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_32-NEXT: andb $1, %al -; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %cl -; KNL_32-NEXT: andb $1, %cl -; KNL_32-NEXT: addb %cl, %cl -; KNL_32-NEXT: orb %al, %cl +; KNL_32-NEXT: andl $1, %eax +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kandw %k0, %k1, %k0 ; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_32-NEXT: andb $1, %al -; KNL_32-NEXT: shlb $2, %al -; KNL_32-NEXT: orb %cl, %al +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_32-NEXT: kshiftrw $14, %k1, %k1 +; KNL_32-NEXT: korw %k1, %k0, %k0 +; KNL_32-NEXT: movw $-5, %ax +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kandw %k1, %k0, %k0 +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_32-NEXT: kmovw %eax, %k1 +; KNL_32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_32-NEXT: kshiftrw $13, %k1, %k1 +; KNL_32-NEXT: korw %k1, %k0, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 ; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: jne .LBB32_1 -; KNL_32-NEXT: # %bb.2: # %else -; KNL_32-NEXT: testb $2, %al -; KNL_32-NEXT: jne .LBB32_3 -; KNL_32-NEXT: .LBB32_4: # %else2 -; KNL_32-NEXT: testb $4, %al -; KNL_32-NEXT: jne .LBB32_5 -; KNL_32-NEXT: .LBB32_6: # %else4 -; KNL_32-NEXT: popl %eax -; KNL_32-NEXT: .cfi_def_cfa_offset 4 -; KNL_32-NEXT: retl -; KNL_32-NEXT: .LBB32_1: # %cond.store -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: vmovd %xmm0, %ecx -; KNL_32-NEXT: vmovss %xmm2, (%ecx) -; KNL_32-NEXT: testb $2, %al -; KNL_32-NEXT: je .LBB32_4 -; KNL_32-NEXT: .LBB32_3: # %cond.store1 -; KNL_32-NEXT: vpextrd $1, %xmm0, %ecx -; KNL_32-NEXT: vextractps $1, %xmm2, (%ecx) -; KNL_32-NEXT: testb $4, %al -; KNL_32-NEXT: je .LBB32_6 -; KNL_32-NEXT: .LBB32_5: # %cond.store3 -; KNL_32-NEXT: vpextrd $2, %xmm0, %eax -; KNL_32-NEXT: vextractps $2, %xmm2, (%eax) -; KNL_32-NEXT: popl %eax -; KNL_32-NEXT: .cfi_def_cfa_offset 4 +; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} +; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; ; SKX-LABEL: test30b: ; SKX: # %bb.0: -; SKX-NEXT: andb $1, %dil -; SKX-NEXT: andb $1, %sil -; SKX-NEXT: addb %sil, %sil -; SKX-NEXT: orb %dil, %sil -; SKX-NEXT: andb $1, %dl -; SKX-NEXT: shlb $2, %dl -; SKX-NEXT: orb %sil, %dl +; SKX-NEXT: movb $-3, %al +; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: kandw %k0, %k1, %k0 +; SKX-NEXT: kmovw %esi, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: movb $-5, %al +; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kandw %k1, %k0, %k0 +; SKX-NEXT: kmovw %edx, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: korw %k1, %k0, %k1 ; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 ; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; SKX-NEXT: testb $1, %dl -; SKX-NEXT: jne .LBB32_1 -; SKX-NEXT: # %bb.2: # %else -; SKX-NEXT: testb $2, %dl -; SKX-NEXT: jne .LBB32_3 -; SKX-NEXT: .LBB32_4: # %else2 -; SKX-NEXT: testb $4, %dl -; SKX-NEXT: jne .LBB32_5 -; SKX-NEXT: .LBB32_6: # %else4 -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; SKX-NEXT: .LBB32_1: # %cond.store -; SKX-NEXT: vmovq %xmm0, %rax -; SKX-NEXT: vmovss %xmm2, (%rax) -; SKX-NEXT: testb $2, %dl -; SKX-NEXT: je .LBB32_4 -; SKX-NEXT: .LBB32_3: # %cond.store1 -; SKX-NEXT: vpextrq $1, %xmm0, %rax -; SKX-NEXT: vextractps $1, %xmm2, (%rax) -; SKX-NEXT: testb $4, %dl -; SKX-NEXT: je .LBB32_6 -; SKX-NEXT: .LBB32_5: # %cond.store3 -; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 -; SKX-NEXT: vmovq %xmm0, %rax -; SKX-NEXT: vextractps $2, %xmm2, (%rax) +; SKX-NEXT: vpscatterqd %xmm2, (,%ymm0) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; SKX_32-LABEL: test30b: ; SKX_32: # %bb.0: -; SKX_32-NEXT: pushl %eax -; SKX_32-NEXT: .cfi_def_cfa_offset 8 +; SKX_32-NEXT: movb $-3, %al +; SKX_32-NEXT: kmovw %eax, %k0 +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kshiftlb $7, %k1, %k1 +; SKX_32-NEXT: kshiftrb $7, %k1, %k1 +; SKX_32-NEXT: kandw %k0, %k1, %k0 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: andb $1, %al -; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %cl -; SKX_32-NEXT: andb $1, %cl -; SKX_32-NEXT: addb %cl, %cl -; SKX_32-NEXT: orb %al, %cl +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kshiftlb $7, %k1, %k1 +; SKX_32-NEXT: kshiftrb $6, %k1, %k1 +; SKX_32-NEXT: korw %k1, %k0, %k0 +; SKX_32-NEXT: movb $-5, %al +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kandw %k1, %k0, %k0 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: andb $1, %al -; SKX_32-NEXT: shlb $2, %al -; SKX_32-NEXT: orb %cl, %al +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: kshiftlb $7, %k1, %k1 +; SKX_32-NEXT: kshiftrb $5, %k1, %k1 +; SKX_32-NEXT: korw %k1, %k0, %k1 ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 ; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX_32-NEXT: testb $1, %al -; SKX_32-NEXT: jne .LBB32_1 -; SKX_32-NEXT: # %bb.2: # %else -; SKX_32-NEXT: testb $2, %al -; SKX_32-NEXT: jne .LBB32_3 -; SKX_32-NEXT: .LBB32_4: # %else2 -; SKX_32-NEXT: testb $4, %al -; SKX_32-NEXT: jne .LBB32_5 -; SKX_32-NEXT: .LBB32_6: # %else4 -; SKX_32-NEXT: popl %eax -; SKX_32-NEXT: .cfi_def_cfa_offset 4 -; SKX_32-NEXT: retl -; SKX_32-NEXT: .LBB32_1: # %cond.store -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: vmovd %xmm0, %ecx -; SKX_32-NEXT: vmovss %xmm2, (%ecx) -; SKX_32-NEXT: testb $2, %al -; SKX_32-NEXT: je .LBB32_4 -; SKX_32-NEXT: .LBB32_3: # %cond.store1 -; SKX_32-NEXT: vpextrd $1, %xmm0, %ecx -; SKX_32-NEXT: vextractps $1, %xmm2, (%ecx) -; SKX_32-NEXT: testb $4, %al -; SKX_32-NEXT: je .LBB32_6 -; SKX_32-NEXT: .LBB32_5: # %cond.store3 -; SKX_32-NEXT: vpextrd $2, %xmm0, %eax -; SKX_32-NEXT: vextractps $2, %xmm2, (%eax) -; SKX_32-NEXT: popl %eax -; SKX_32-NEXT: .cfi_def_cfa_offset 4 +; SKX_32-NEXT: vpscatterdd %xmm2, (,%xmm0) {%k1} ; SKX_32-NEXT: retl %sext_ind = sext <3 x i32> %ind to <3 x i64> %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind From fc82006331228b6b16ea47cd8093ac145739044b Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Tue, 15 Sep 2020 17:42:03 -0700 Subject: [PATCH 0787/1079] [MemorySSA] Set MustDominate to true for PhiTranslation. --- llvm/include/llvm/Analysis/MemorySSA.h | 2 +- .../Analysis/MemorySSA/phi-translation.ll | 54 ++++++++++++++++++- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 5878b53fa3726..ffd4b02593272 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -1225,7 +1225,7 @@ class upward_defs_iterator OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), DefIterator.getPhiArgBlock(), DT, - false)) { + true)) { if (Translator.getAddr() != Location.Ptr) { CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); if (PerformedPhiTranslation) diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 1274e365066d6..5b5516d8bf766 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -392,8 +392,9 @@ define void @dont_merge_noalias_complex_2(i32 %arg, i32 %arg1) { ; CHECK-NEXT: ; 3 = MemoryPhi({loop.1.header,4},{storebb,2}) ; CHECK-LABEL: storebb: -; NOLIMIT: ; MemoryUse(1) MayAlias -; LIMIT: ; MemoryUse(4) MayAlias +; CHECK-NEXT: %iv.add2 = add nuw nsw i64 %iv, 2 +; CHECK-NEXT: %p.2 = getelementptr inbounds [32 x i32], [32 x i32]* %tmp, i64 0, i64 %iv.add2 +; CHECK-NEXT: ; MemoryUse(4) MayAlias ; CHECK-NEXT: %l.2 = load i32, i32* %p.2, align 4 ; CHECK-NEXT: ; 2 = MemoryDef(4) ; CHECK-NEXT: store i32 10, i32* %p.1, align 4 @@ -424,3 +425,52 @@ storebb: exit: ret void } + +; CHECK-LABEL: define void @use_clobbered_by_def_in_loop() +define void @use_clobbered_by_def_in_loop() { +entry: + %nodeStack = alloca [12 x i32], align 4 + %0 = bitcast [12 x i32]* %nodeStack to i8* + call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %0) + br i1 false, label %cleanup, label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK-NEXT: ; [[NO6:.*]] = MemoryPhi({entry,1},{while.cond.backedge,5}) + +while.cond: ; preds = %entry, %while.cond.backedge + %depth.1 = phi i32 [ %depth.1.be, %while.cond.backedge ], [ 0, %entry ] + %cmp = icmp sgt i32 %depth.1, 0 + br i1 %cmp, label %land.rhs, label %while.end + +; CHECK-LABEL: land.rhs: +; CHECK-NEXT: %sub = add nsw i32 %depth.1, -1 +; CHECK-NEXT: %arrayidx = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %sub +; CHECK-NEXT: ; MemoryUse([[NO6]]) MayAlias +; CHECK-NEXT: %1 = load i32, i32* %arrayidx, align 4 + +land.rhs: ; preds = %while.cond + %sub = add nsw i32 %depth.1, -1 + %arrayidx = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %sub + %1 = load i32, i32* %arrayidx, align 4 + br i1 true, label %while.body, label %while.end + +while.body: ; preds = %land.rhs + br i1 true, label %cleanup, label %while.cond.backedge + +while.cond.backedge: ; preds = %while.body, %while.end + %depth.1.be = phi i32 [ %sub, %while.body ], [ %inc, %while.end ] + br label %while.cond + +while.end: ; preds = %while.cond, %land.rhs + %arrayidx10 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %depth.1 + store i32 %depth.1, i32* %arrayidx10, align 4 + %inc = add nsw i32 %depth.1, 1 + br i1 true, label %cleanup, label %while.cond.backedge + +cleanup: ; preds = %while.body, %while.end, %entry + call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %0) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) From 794467b916e87e8fb09380c67d0d433a29d93a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 13 Sep 2020 00:24:26 +0300 Subject: [PATCH 0788/1079] [llvm-rc] Allow omitting components from VERSIONINFO versions MS rc.exe doesn't require specifying all 4 components. Differential Revision: https://reviews.llvm.org/D87570 --- llvm/test/tools/llvm-rc/Inputs/tag-versioninfo.rc | 2 +- llvm/test/tools/llvm-rc/tag-versioninfo.test | 2 +- llvm/tools/llvm-rc/ResourceScriptParser.cpp | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/test/tools/llvm-rc/Inputs/tag-versioninfo.rc b/llvm/test/tools/llvm-rc/Inputs/tag-versioninfo.rc index 54dbff55067cb..4b567dabcb2bc 100644 --- a/llvm/test/tools/llvm-rc/Inputs/tag-versioninfo.rc +++ b/llvm/test/tools/llvm-rc/Inputs/tag-versioninfo.rc @@ -1,6 +1,6 @@ 1 VERSIONINFO FILEVERSION 1, 2, 3, 4 -PRODUCTVERSION 5, 6, 7, 8 +PRODUCTVERSION 5, 6, 7 FILEFLAGSMASK 50 FILEFLAGS 555 FILEOS 110 diff --git a/llvm/test/tools/llvm-rc/tag-versioninfo.test b/llvm/test/tools/llvm-rc/tag-versioninfo.test index 92c91972a221f..3ce534b880960 100644 --- a/llvm/test/tools/llvm-rc/tag-versioninfo.test +++ b/llvm/test/tools/llvm-rc/tag-versioninfo.test @@ -14,7 +14,7 @@ ; CHECK-NEXT: 0000: A0023400 00005600 53005F00 56004500 |..4...V.S._.V.E.| ; CHECK-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| ; CHECK-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| -; CHECK-NEXT: 0030: 02000100 04000300 06000500 08000700 |................| +; CHECK-NEXT: 0030: 02000100 04000300 06000500 00000700 |................| ; CHECK-NEXT: 0040: 32000000 2B020000 6E000000 237A0800 |2...+...n...#z..| ; CHECK-NEXT: 0050: 0E000000 00000000 00000000 00020000 |................| ; CHECK-NEXT: 0060: 01005300 74007200 69006E00 67004600 |..S.t.r.i.n.g.F.| diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.cpp b/llvm/tools/llvm-rc/ResourceScriptParser.cpp index 2155985c61b8b..5141ac0c3864f 100644 --- a/llvm/tools/llvm-rc/ResourceScriptParser.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptParser.cpp @@ -777,8 +777,10 @@ RCParser::parseVersionInfoFixed() { // VERSION variations take multiple integers. size_t NumInts = RetType::isVersionType(FixedType) ? 4 : 1; - ASSIGN_OR_RETURN(ArgsResult, readIntsWithCommas(NumInts, NumInts)); + ASSIGN_OR_RETURN(ArgsResult, readIntsWithCommas(1, NumInts)); SmallVector ArgInts(ArgsResult->begin(), ArgsResult->end()); + while (ArgInts.size() < NumInts) + ArgInts.push_back(0); Result.setValue(FixedType, ArgInts); } From 74d7356fc63bd1f42bbb20b793f21decf3c98a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 31 Jul 2020 17:41:20 +0300 Subject: [PATCH 0789/1079] [llvm-rc] Update a comment. NFC. Fix a typo and mention one missing step. --- llvm/tools/llvm-rc/ResourceFileWriter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp index 09b078c94cd29..c80605aed4465 100644 --- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp +++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp @@ -138,7 +138,8 @@ enum class NullHandlingMethod { }; // Parses an identifier or string and returns a processed version of it: -// * String the string boundary quotes. +// * Strip the string boundary quotes. +// * Convert the input code page characters to UTF16. // * Squash "" to a single ". // * Replace the escape sequences with their processed version. // For identifiers, this is no-op. From c913f6dce69513b430f705d5a1f4e745f5d0a27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 5 Aug 2020 11:00:21 +0300 Subject: [PATCH 0790/1079] [llvm-rc] Lowercase the option definitions. NFC. This matches how such options are most commonly defined in other tools. This was pointed out in an earlier review a few months ago, that the llvm-rc td entries felt shouty. The INCLUDE option is renamed to includepath, to avoid clashing with the tablegen include directive. --- llvm/tools/llvm-rc/Opts.td | 50 +++++++++++++++++----------------- llvm/tools/llvm-rc/llvm-rc.cpp | 26 +++++++++--------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/llvm/tools/llvm-rc/Opts.td b/llvm/tools/llvm-rc/Opts.td index 873dd785b12bd..613f0a0db31ed 100644 --- a/llvm/tools/llvm-rc/Opts.td +++ b/llvm/tools/llvm-rc/Opts.td @@ -4,55 +4,55 @@ include "llvm/Option/OptParser.td" // These options seem to be important for the tool // and should be implemented. -def FILEOUT : JoinedOrSeparate<[ "/", "-" ], "FO">, +def fileout : JoinedOrSeparate<[ "/", "-" ], "FO">, HelpText<"Change the output file location.">; -def DEFINE : Separate<[ "/", "-" ], "D">, +def define : Separate<[ "/", "-" ], "D">, HelpText<"Define a symbol for the C preprocessor.">; -def UNDEF : Separate<[ "/", "-" ], "U">, +def undef : Separate<[ "/", "-" ], "U">, HelpText<"Undefine a symbol for the C preprocessor.">; -def LANG_ID : JoinedOrSeparate<[ "/", "-" ], "L">, +def lang_id : JoinedOrSeparate<[ "/", "-" ], "L">, HelpText<"Set the default language identifier.">; -def LANG_NAME : Separate<[ "/", "-" ], "LN">, +def lang_name : Separate<[ "/", "-" ], "LN">, HelpText<"Set the default language name.">; -def INCLUDE : Separate<[ "/", "-" ], "I">, HelpText<"Add an include path.">; -def NOINCLUDE : Flag<[ "/", "-" ], "X">, HelpText<"Ignore 'include' variable.">; +def includepath : Separate<[ "/", "-" ], "I">, HelpText<"Add an include path.">; +def noinclude : Flag<[ "/", "-" ], "X">, HelpText<"Ignore 'include' variable.">; -def ADD_NULL : Flag<[ "/", "-" ], "N">, +def add_null : Flag<[ "/", "-" ], "N">, HelpText<"Null-terminate all strings in the string table.">; -def DUPID_NOWARN : Flag<[ "/", "-" ], "Y">, +def dupid_nowarn : Flag<[ "/", "-" ], "Y">, HelpText<"Suppress warnings on duplicate resource IDs.">; -def VERBOSE : Flag<[ "/", "-" ], "V">, HelpText<"Be verbose.">; -def HELP : Flag<[ "/", "-" ], "?">, HelpText<"Display this help and exit.">; -def H : Flag<[ "/", "-" ], "H">, - Alias, +def verbose : Flag<[ "/", "-" ], "V">, HelpText<"Be verbose.">; +def help : Flag<[ "/", "-" ], "?">, HelpText<"Display this help and exit.">; +def h : Flag<[ "/", "-" ], "H">, + Alias, HelpText<"Display this help and exit.">; -def DRY_RUN : Flag<[ "/", "-" ], "dry-run">, +def dry_run : Flag<[ "/", "-" ], "dry-run">, HelpText<"Don't compile the input; only try to parse it.">; -def CODEPAGE : JoinedOrSeparate<[ "/", "-" ], "C">, +def codepage : JoinedOrSeparate<[ "/", "-" ], "C">, HelpText<"Set the codepage used for input strings.">; // Unused switches (at least for now). These will stay unimplemented // in an early stage of development and can be ignored. However, we need to // parse them in order to preserve the compatibility with the original tool. -def NOLOGO : Flag<[ "/", "-" ], "NOLOGO">; -def R : Flag<[ "/", "-" ], "R">; -def SL : Flag<[ "/", "-" ], "SL">; +def nologo : Flag<[ "/", "-" ], "NOLOGO">; +def r : Flag<[ "/", "-" ], "R">; +def sl : Flag<[ "/", "-" ], "SL">; // (Codepages support.) -def W : Flag<[ "/", "-" ], "W">; +def w : Flag<[ "/", "-" ], "W">; // (Support of MUI and similar.) -def FM : Separate<[ "/", "-" ], "FM">; -def Q : Separate<[ "/", "-" ], "Q">; -def G : Flag<[ "/", "-" ], "G">; -def GN : Flag<[ "/", "-" ], "GN">; -def G1 : Flag<[ "/", "-" ], "G1">; -def G2 : Flag<[ "/", "-" ], "G2">; +def fm : Separate<[ "/", "-" ], "FM">; +def q : Separate<[ "/", "-" ], "Q">; +def g : Flag<[ "/", "-" ], "G">; +def gn : Flag<[ "/", "-" ], "GN">; +def g1 : Flag<[ "/", "-" ], "G1">; +def g2 : Flag<[ "/", "-" ], "G2">; diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 71954804f2552..e9027a21d46b8 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -92,12 +92,12 @@ int main(int Argc, const char **Argv) { opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MAI, MAC); // The tool prints nothing when invoked with no command-line arguments. - if (InputArgs.hasArg(OPT_HELP)) { + if (InputArgs.hasArg(OPT_help)) { T.PrintHelp(outs(), "rc [options] file...", "Resource Converter", false); return 0; } - const bool BeVerbose = InputArgs.hasArg(OPT_VERBOSE); + const bool BeVerbose = InputArgs.hasArg(OPT_verbose); std::vector InArgsInfo = InputArgs.getAllArgValues(OPT_INPUT); if (DashDash != Argv + Argc) @@ -141,14 +141,14 @@ int main(int Argc, const char **Argv) { SmallString<128> InputFile(InArgsInfo[0]); llvm::sys::fs::make_absolute(InputFile); Params.InputFilePath = InputFile; - Params.Include = InputArgs.getAllArgValues(OPT_INCLUDE); - Params.NoInclude = InputArgs.getAllArgValues(OPT_NOINCLUDE); + Params.Include = InputArgs.getAllArgValues(OPT_includepath); + Params.NoInclude = InputArgs.getAllArgValues(OPT_noinclude); - if (InputArgs.hasArg(OPT_CODEPAGE)) { - if (InputArgs.getLastArgValue(OPT_CODEPAGE) + if (InputArgs.hasArg(OPT_codepage)) { + if (InputArgs.getLastArgValue(OPT_codepage) .getAsInteger(10, Params.CodePage)) fatalError("Invalid code page: " + - InputArgs.getLastArgValue(OPT_CODEPAGE)); + InputArgs.getLastArgValue(OPT_codepage)); switch (Params.CodePage) { case CpAcp: case CpWin1252: @@ -161,10 +161,10 @@ int main(int Argc, const char **Argv) { } std::unique_ptr Visitor; - bool IsDryRun = InputArgs.hasArg(OPT_DRY_RUN); + bool IsDryRun = InputArgs.hasArg(OPT_dry_run); if (!IsDryRun) { - auto OutArgsInfo = InputArgs.getAllArgValues(OPT_FILEOUT); + auto OutArgsInfo = InputArgs.getAllArgValues(OPT_fileout); if (OutArgsInfo.empty()) { SmallString<128> OutputFile = InputFile; llvm::sys::path::replace_extension(OutputFile, "res"); @@ -182,17 +182,17 @@ int main(int Argc, const char **Argv) { fatalError("Error opening output file '" + OutArgsInfo[0] + "': " + EC.message()); Visitor = std::make_unique(Params, std::move(FOut)); - Visitor->AppendNull = InputArgs.hasArg(OPT_ADD_NULL); + Visitor->AppendNull = InputArgs.hasArg(OPT_add_null); ExitOnErr(NullResource().visit(Visitor.get())); // Set the default language; choose en-US arbitrarily. unsigned PrimaryLangId = 0x09, SubLangId = 0x01; - if (InputArgs.hasArg(OPT_LANG_ID)) { + if (InputArgs.hasArg(OPT_lang_id)) { unsigned LangId; - if (InputArgs.getLastArgValue(OPT_LANG_ID).getAsInteger(16, LangId)) + if (InputArgs.getLastArgValue(OPT_lang_id).getAsInteger(16, LangId)) fatalError("Invalid language id: " + - InputArgs.getLastArgValue(OPT_LANG_ID)); + InputArgs.getLastArgValue(OPT_lang_id)); PrimaryLangId = LangId & 0x3ff; SubLangId = LangId >> 10; } From 4171d5c30ad32282e6ca9027aeff01ef5ff2461b Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Wed, 16 Sep 2020 14:46:12 +0800 Subject: [PATCH 0791/1079] [obj2yaml] Add support for dumping the .debug_addr(v5) section. This patch adds support for dumping the .debug_addr(v5) section to obj2yaml. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87601 --- .../llvm/DebugInfo/DWARF/DWARFDebugAddr.h | 18 ++ .../tools/obj2yaml/ELF/DWARF/debug-addr.yaml | 215 ++++++++++++++++++ llvm/tools/obj2yaml/dwarf2yaml.cpp | 33 +++ llvm/tools/obj2yaml/elf2yaml.cpp | 2 + llvm/tools/obj2yaml/obj2yaml.h | 1 + 5 files changed, 269 insertions(+) create mode 100644 llvm/test/tools/obj2yaml/ELF/DWARF/debug-addr.yaml diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h index 32844ffd570ff..69e67866946ce 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h @@ -74,6 +74,24 @@ class DWARFDebugAddrTable { /// Return the full length of this table, including the length field. /// Return None if the length cannot be identified reliably. Optional getFullLength() const; + + /// Return the DWARF format of this table. + dwarf::DwarfFormat getFormat() const { return Format; } + + /// Return the length of this table. + uint64_t getLength() const { return Length; } + + /// Return the version of this table. + uint16_t getVersion() const { return Version; } + + /// Return the address size of this table. + uint8_t getAddressSize() const { return AddrSize; } + + /// Return the segment selector size of this table. + uint8_t getSegmentSelectorSize() const { return SegSize; } + + /// Return the parsed addresses of this table. + ArrayRef getAddressEntries() const { return Addrs; } }; } // end namespace llvm diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-addr.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-addr.yaml new file mode 100644 index 0000000000000..b294adff5cbd7 --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-addr.yaml @@ -0,0 +1,215 @@ +## Test how we dump the .debug_addr section. + +## a) Dumping address tables from various object files. + +## Dumping address tables from a little endian 64-bit object file. +# RUN: yaml2obj --docnum=1 %s -DADDRESS=0xFFFFFFFFFFFFFFFF \ +# RUN: -DADDRSIZE=4 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC --implicit-check-not=Sections: \ +# RUN: -DLENGTH1=0x0000000000000014 \ +# RUN: -DADDRSIZE1=0x08 \ +# RUN: -DADDR=0xFFFFFFFFFFFFFFFF \ +# RUN: -DLENGTH2=0x000000000000000C \ +# RUN: -DADDRSIZE2=0x04 + +## Dumping address tables from a big endian 64-bit object file. +# RUN: yaml2obj --docnum=1 %s -DENDIAN=MSB -DADDRESS=0xFFFFFFFFFFFFFFFF \ +# RUN: -DADDRSIZE=4 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC --implicit-check-not=Sections: \ +# RUN: -DLENGTH1=0x0000000000000014 \ +# RUN: -DADDRSIZE1=0x08 \ +# RUN: -DADDR=0xFFFFFFFFFFFFFFFF \ +# RUN: -DLENGTH2=0x000000000000000C \ +# RUN: -DADDRSIZE2=0x04 + +## Dumping address tables from a little endian 32-bit object file. +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DADDRESS=0xFFFFFFFF \ +# RUN: -DADDRSIZE=8 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC --implicit-check-not=Sections: \ +# RUN: -DLENGTH1=0x000000000000000C \ +# RUN: -DADDRSIZE1=0x04 \ +# RUN: -DADDR=0x00000000FFFFFFFF \ +# RUN: -DLENGTH2=0x0000000000000014 \ +# RUN: -DADDRSIZE2=0x08 + +## Dumping address tables from a big endian 32-bit object file. +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DENDIAN=MSB -DADDRESS=0xFFFFFFFF \ +# RUN: -DADDRSIZE=8 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC --implicit-check-not=Sections: \ +# RUN: -DLENGTH1=0x000000000000000C \ +# RUN: -DADDRSIZE1=0x04 \ +# RUN: -DADDR=0x00000000FFFFFFFF \ +# RUN: -DLENGTH2=0x0000000000000014 \ +# RUN: -DADDRSIZE2=0x08 + +# BASIC: DWARF: +# BASIC-NEXT: debug_addr: +# BASIC-NEXT: - Length: [[LENGTH1]] +# BASIC-NEXT: Version: 0x0005 +# BASIC-NEXT: AddressSize: [[ADDRSIZE1]] +# BASIC-NEXT: Entries: +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: - Address: 0x0000000000005678 +# BASIC-NEXT: - Format: DWARF64 +# BASIC-NEXT: Length: [[LENGTH1]] +# BASIC-NEXT: Version: 0x0005 +# BASIC-NEXT: AddressSize: [[ADDRSIZE1]] +# BASIC-NEXT: Entries: +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: - Address: [[ADDR]] +# BASIC-NEXT: - Length: [[LENGTH2]] +# BASIC-NEXT: Version: 0x0005 +# BASIC-NEXT: AddressSize: [[ADDRSIZE2]] +# BASIC-NEXT: Entries: +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: - Address: 0x0000000000005678 +# BASIC-NEXT: - Format: DWARF64 +# BASIC-NEXT: Length: [[LENGTH2]] +# BASIC-NEXT: Version: 0x0005 +# BASIC-NEXT: AddressSize: [[ADDRSIZE2]] +# BASIC-NEXT: Entries: +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: - Address: 0x0000000000005678 +# BASIC-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS=64]] + Data: ELFDATA2[[ENDIAN=LSB]] + Type: ET_EXEC +DWARF: + debug_addr: + ## A DWARF32 address table. + - Version: 5 + Entries: + - Address: 0x1234 + - Address: 0x5678 + ## A DWARF64 address table. + - Format: DWARF64 + Version: 5 + Entries: + - Address: 0x1234 + - Address: [[ADDRESS]] + ## A DWARF32 address table with a mutable address size. + - Version: 5 + AddressSize: [[ADDRSIZE]] + Entries: + - Address: 0x1234 + - Address: 0x5678 + ## A DWARF64 address table with a mutable address size. + - Format: DWARF64 + Version: 5 + AddressSize: [[ADDRSIZE]] + Entries: + - Address: 0x1234 + - Address: 0x5678 + +## b) Test dumping a .debug_addr section whose section header properties are +## overridden. + +## Override the sh_type field. +# RUN: yaml2obj --docnum=2 %s -DTYPE=SHT_STRTAB | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_STRTAB --check-prefix=COMMON + +## Override the sh_flags field. +# RUN: yaml2obj --docnum=2 %s -DFLAGS='[ SHF_ALLOC ]' | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_PROGBITS --check-prefixes=COMMON,FLAGS + +## Override the sh_link field. +# RUN: yaml2obj --docnum=2 %s -DLINK=.sec | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_PROGBITS --check-prefixes=COMMON,LINK + +## Override the sh_addr field. +# RUN: yaml2obj --docnum=2 %s -DADDRESS=0x2020 | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_PROGBITS --check-prefixes=COMMON,ADDR + +## Override the sh_addralign field. +# RUN: yaml2obj --docnum=2 %s -DADDRALIGN=3 | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_PROGBITS --check-prefixes=COMMON,ADDRALIGN + +## Override the sh_entsize field. +# RUN: yaml2obj --docnum=2 %s -DENTSIZE=3 | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_PROGBITS --check-prefixes=COMMON,ENTSIZE + +## Override the sh_info field. +# RUN: yaml2obj --docnum=2 %s -DINFO=3 | obj2yaml | \ +# RUN: FileCheck %s -DTYPE=SHT_PROGBITS --check-prefixes=COMMON,INFO + +# COMMON: Sections: +# COMMON-NEXT: - Name: .debug_addr +# COMMON-NEXT: Type: [[TYPE]] +# FLAGS-NEXT: Flags: [ SHF_ALLOC ] +# LINK-NEXT: Link: .sec +# ADDR-NEXT: Address: 0x0000000000002020 +# ADDRALIGN-NEXT: AddressAlign: 0x0000000000000003 +# ENTSIZE-NEXT: EntSize: 0x0000000000000003 +# INFO-NEXT: Info: 0x0000000000000003 +# COMMON-NEXT: - Name: .sec +# COMMON-NEXT: Type: SHT_PROGBITS +# COMMON-NEXT: DWARF: +# COMMON-NEXT: debug_addr: +# COMMON-NEXT: - Length: 0x0000000000000014 +# COMMON-NEXT: Version: 0x0005 +# COMMON-NEXT: AddressSize: 0x08 +# COMMON-NEXT: Entries: +# COMMON-NEXT: - Address: 0x0000000000001234 +# COMMON-NEXT: - Address: 0x0000000000005678 +# COMMON-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_addr + Type: [[TYPE=SHT_PROGBITS]] + Flags: [[FLAGS=]] + Link: [[LINK='']] + EntSize: [[ENTSIZE=]] + Info: [[INFO=]] + AddressAlign: [[ADDRALIGN=0]] + Address: [[ADDRESS=]] + - Name: .sec + Type: SHT_PROGBITS +DWARF: + debug_addr: + - Version: 5 + Entries: + - Address: 0x1234 + - Address: 0x5678 + +## c) Test dumping an address table whose version isn't 5. +## This causes the DWARF parser to fail to parse it and we will dump it as a raw +## content section. + +# RUN: yaml2obj --docnum=3 %s -DCONTENT="AABBCC" | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=RAW --implicit-check-not=DWARF: + +# RAW: Sections: +# RAW-NEXT: - Name: .debug_addr +# RAW-NEXT: Type: SHT_PROGBITS +# RAW-NEXT: AddressAlign: 0x0000000000000001 +# RAW-NEXT: Content: AABBCC +# RAW-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_addr + Type: SHT_PROGBITS + AddressAlign: 1 + Size: [[SIZE=]] + Content: [[CONTENT=]] + +## d) Test dumping an empty .debug_addr section. + +# RUN: yaml2obj --docnum=3 %s -DSIZE=0 | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=EMPTY --implicit-check-not=Sections: + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_addr: [] +# EMPTY-NEXT: ... diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index 1dcf6d42d6ada..10e8ecaeec089 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -8,6 +8,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" @@ -46,6 +47,38 @@ void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) { } } +Error dumpDebugAddr(DWARFContext &DCtx, DWARFYAML::Data &Y) { + DWARFDebugAddrTable AddrTable; + DWARFDataExtractor AddrData(DCtx.getDWARFObj(), + DCtx.getDWARFObj().getAddrSection(), + DCtx.isLittleEndian(), /*AddrSize=*/0); + std::vector AddrTables; + uint64_t Offset = 0; + while (AddrData.isValidOffset(Offset)) { + // We ignore any errors that don't prevent parsing the section, since we can + // still represent such sections. + if (Error Err = AddrTable.extractV5(AddrData, &Offset, /*CUAddrSize=*/0, + consumeError)) + return Err; + AddrTables.emplace_back(); + for (uint64_t Addr : AddrTable.getAddressEntries()) { + // Currently, the parser doesn't support parsing an address table with non + // linear addresses (segment_selector_size != 0). The segment selectors + // are specified to be zero. + AddrTables.back().SegAddrPairs.push_back( + {/*SegmentSelector=*/0, /*Address=*/Addr}); + } + + AddrTables.back().Format = AddrTable.getFormat(); + AddrTables.back().Length = AddrTable.getLength(); + AddrTables.back().Version = AddrTable.getVersion(); + AddrTables.back().AddrSize = AddrTable.getAddressSize(); + AddrTables.back().SegSelectorSize = AddrTable.getSegmentSelectorSize(); + } + Y.DebugAddr = std::move(AddrTables); + return Error::success(); +} + Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { DataExtractor StrData = DCtx.getStringExtractor(); uint64_t Offset = 0; diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index a2c78b81a700b..3c3bef2dfbf4c 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -418,6 +418,8 @@ Optional ELFDumper::dumpDWARFSections( Err = dumpDebugStrings(*DWARFCtx.get(), DWARF); else if (RawSec->Name == ".debug_ranges") Err = dumpDebugRanges(*DWARFCtx.get(), DWARF); + else if (RawSec->Name == ".debug_addr") + Err = dumpDebugAddr(*DWARFCtx.get(), DWARF); else continue; diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h index 66a2d2753622c..c41010f111b68 100644 --- a/llvm/tools/obj2yaml/obj2yaml.h +++ b/llvm/tools/obj2yaml/obj2yaml.h @@ -41,6 +41,7 @@ struct Data; } void dumpDebugAbbrev(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); +llvm::Error dumpDebugAddr(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); llvm::Error dumpDebugARanges(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugPubSections(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); From d3d76039002cd879f7aba37f88fc7312cfc95531 Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Tue, 15 Sep 2020 22:52:42 -0700 Subject: [PATCH 0792/1079] [MemorySSA] Report unoptimized as None, not MayAlias. --- llvm/include/llvm/Analysis/MemorySSA.h | 2 +- llvm/test/Analysis/MemorySSA/optimize-use.ll | 8 ++--- .../Analysis/MemorySSA/phi-translation.ll | 30 +++++++++---------- llvm/test/Analysis/MemorySSA/pr43427.ll | 2 +- llvm/unittests/Analysis/MemorySSATest.cpp | 4 +-- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index ffd4b02593272..0be2933dd3233 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -270,7 +270,7 @@ class MemoryUseOrDef : public MemoryAccess { // Retrieve AliasResult type of the optimized access. Ideally this would be // returned by the caching walker and may go away in the future. Optional getOptimizedAccessType() const { - return OptimizedAccessAlias; + return isOptimized() ? OptimizedAccessAlias : None; } /// Reset the ID of what this MemoryUse was optimized to, causing it to diff --git a/llvm/test/Analysis/MemorySSA/optimize-use.ll b/llvm/test/Analysis/MemorySSA/optimize-use.ll index ec0d5c3df1a3f..38ec971dbf539 100644 --- a/llvm/test/Analysis/MemorySSA/optimize-use.ll +++ b/llvm/test/Analysis/MemorySSA/optimize-use.ll @@ -22,22 +22,22 @@ entry: store i32 7, i32* %1, align 4 ; NOLIMIT: MemoryUse(3) MustAlias ; NOLIMIT-NEXT: %2 = load i32, i32* %0, align 4 -; LIMIT: MemoryUse(4) MayAlias +; LIMIT: MemoryUse(4) ; LIMIT-NEXT: %2 = load i32, i32* %0, align 4 %2 = load i32, i32* %0, align 4 ; NOLIMIT: MemoryUse(4) MustAlias ; NOLIMIT-NEXT: %3 = load i32, i32* %1, align 4 -; LIMIT: MemoryUse(4) MayAlias +; LIMIT: MemoryUse(4) ; LIMIT-NEXT: %3 = load i32, i32* %1, align 4 %3 = load i32, i32* %1, align 4 ; NOLIMIT: MemoryUse(3) MustAlias ; NOLIMIT-NEXT: %4 = load i32, i32* %0, align 4 -; LIMIT: MemoryUse(4) MayAlias +; LIMIT: MemoryUse(4) ; LIMIT-NEXT: %4 = load i32, i32* %0, align 4 %4 = load i32, i32* %0, align 4 ; NOLIMIT: MemoryUse(4) MustAlias ; NOLIMIT-NEXT: %5 = load i32, i32* %1, align 4 -; LIMIT: MemoryUse(4) MayAlias +; LIMIT: MemoryUse(4) ; LIMIT-NEXT: %5 = load i32, i32* %1, align 4 %5 = load i32, i32* %1, align 4 %add = add nsw i32 %3, %5 diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 5b5516d8bf766..7fa6e6c69057e 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -25,7 +25,7 @@ if.end: ; CHECK: 3 = MemoryPhi({entry,1},{if.then,2}) ; NOLIMIT: MemoryUse(1) MayAlias ; NOLIMIT-NEXT: load i8, i8* %local, align 1 -; LIMIT: MemoryUse(3) MayAlias +; LIMIT: MemoryUse(3) ; LIMIT-NEXT: load i8, i8* %local, align 1 load i8, i8* %local, align 1 ret void @@ -68,7 +68,7 @@ phi.1: ; CHECK: 6 = MemoryPhi({phi.2,4},{phi.3,3}) ; NOLIMIT: MemoryUse(1) MayAlias ; NOLIMIT-NEXT: load i8, i8* %local -; LIMIT: MemoryUse(6) MayAlias +; LIMIT: MemoryUse(6) ; LIMIT-NEXT: load i8, i8* %local load i8, i8* %local ret void @@ -81,7 +81,7 @@ define void @cross_phi(i8* noalias %p1, i8* noalias %p2) { store i8 0, i8* %p1 ; NOLIMIT: MemoryUse(1) MustAlias ; NOLIMIT-NEXT: load i8, i8* %p1 -; LIMIT: MemoryUse(1) MayAlias +; LIMIT: MemoryUse(1) ; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 br i1 undef, label %a, label %b @@ -116,7 +116,7 @@ e: ; 8 = MemoryPhi({c,4},{d,5}) ; NOLIMIT: MemoryUse(1) MustAlias ; NOLIMIT-NEXT: load i8, i8* %p1 -; LIMIT: MemoryUse(8) MayAlias +; LIMIT: MemoryUse(8) ; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 ret void @@ -150,7 +150,7 @@ loop.3: store i8 2, i8* %p2 ; NOLIMIT: MemoryUse(1) MayAlias ; NOLIMIT-NEXT: load i8, i8* %p1 -; LIMIT: MemoryUse(4) MayAlias +; LIMIT: MemoryUse(4) ; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 br i1 undef, label %loop.2, label %loop.1 @@ -179,7 +179,7 @@ if.then2: if.end: ; CHECK: 4 = MemoryPhi({while.cond,5},{if.then,1},{if.then2,2}) -; CHECK: MemoryUse(4) MayAlias +; CHECK: MemoryUse(4) ; CHECK-NEXT: load i8, i8* %p1 load i8, i8* %p1 ; CHECK: 3 = MemoryDef(4) @@ -187,7 +187,7 @@ if.end: store i8 2, i8* %p2 ; NOLIMIT: MemoryUse(4) MayAlias ; NOLIMIT-NEXT: load i8, i8* %p1 -; LIMIT: MemoryUse(3) MayAlias +; LIMIT: MemoryUse(3) ; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 br label %while.cond @@ -212,11 +212,11 @@ for.body: ; preds = %entry, %for.inc %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %cmp1 = icmp eq i64 %indvars.iv, 0 %arrayidx2 = getelementptr inbounds i32, i32* %m_i_strides, i64 %indvars.iv -; CHECK: MemoryUse(4) MayAlias +; CHECK: MemoryUse(4) ; CHECK-NEXT: %0 = load i32, i32* %arrayidx2, align 4 %0 = load i32, i32* %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds i32, i32* %eval_left_dims, i64 %indvars.iv -; CHECK: MemoryUse(4) MayAlias +; CHECK: MemoryUse(4) ; CHECK-NEXT: %1 = load i32, i32* %arrayidx4, align 4 %1 = load i32, i32* %arrayidx4, align 4 %mul = mul nsw i32 %1, %0 @@ -270,7 +270,7 @@ for.main.body: ; preds = %if.end220.if.then185_crit_edge, %for.bod %add199 = add nuw nsw i64 %nocontract_idx.0656, 1 %cmp200 = icmp eq i64 %nocontract_idx.0656, 0 %arrayidx.i559 = getelementptr inbounds %BigStruct, %BigStruct* %this, i64 0, i32 7, i32 0, i64 %nocontract_idx.0656 -; CHECK: MemoryUse(4) MayAlias +; CHECK: MemoryUse(4) ; CHECK-NEXT: %tmp21 = load i64, i64* %arrayidx.i559, align 8 %tmp21 = load i64, i64* %arrayidx.i559, align 8 %mul206 = mul nsw i64 %tmp21, %tmp21 @@ -298,7 +298,7 @@ define i32 @dont_merge_noalias_simple(i32* noalias %ptr) { ; CHECK-NEXT: store i16 1, i16* %s1.ptr, align 2 ; CHECK-LABEL: %for.body -; CHECK: ; MemoryUse(4) MayAlias +; CHECK: ; MemoryUse(4) ; CHECK-NEXT: %lv = load i16, i16* %arrayidx, align 2 entry: @@ -331,7 +331,7 @@ define i32 @dont_merge_noalias_complex(i32* noalias %ptr, i32* noalias %another) ; CHECK-NEXT: store i16 1, i16* %s1.ptr, align 2 ; CHECK-LABEL: %for.body -; CHECK: ; MemoryUse(7) MayAlias +; CHECK: ; MemoryUse(7) ; CHECK-NEXT: %lv = load i16, i16* %arrayidx, align 2 entry: @@ -385,7 +385,7 @@ define void @dont_merge_noalias_complex_2(i32 %arg, i32 %arg1) { ; CHECK-LABEL: loop.1.header: ; CHECK-NEXT: ; 4 = MemoryPhi({entry,1},{loop.1.latch,3}) -; CHECK: ; MemoryUse(4) MayAlias +; CHECK: ; MemoryUse(4) ; CHECK-NEXT: %l.1 = load i32, i32* %p.1, align 4 ; CHECK-LABEL: loop.1.latch: @@ -394,7 +394,7 @@ define void @dont_merge_noalias_complex_2(i32 %arg, i32 %arg1) { ; CHECK-LABEL: storebb: ; CHECK-NEXT: %iv.add2 = add nuw nsw i64 %iv, 2 ; CHECK-NEXT: %p.2 = getelementptr inbounds [32 x i32], [32 x i32]* %tmp, i64 0, i64 %iv.add2 -; CHECK-NEXT: ; MemoryUse(4) MayAlias +; CHECK-NEXT: ; MemoryUse(4) ; CHECK-NEXT: %l.2 = load i32, i32* %p.2, align 4 ; CHECK-NEXT: ; 2 = MemoryDef(4) ; CHECK-NEXT: store i32 10, i32* %p.1, align 4 @@ -445,7 +445,7 @@ while.cond: ; preds = %entry, %while.cond. ; CHECK-LABEL: land.rhs: ; CHECK-NEXT: %sub = add nsw i32 %depth.1, -1 ; CHECK-NEXT: %arrayidx = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %sub -; CHECK-NEXT: ; MemoryUse([[NO6]]) MayAlias +; CHECK-NEXT: ; MemoryUse([[NO6]]) ; CHECK-NEXT: %1 = load i32, i32* %arrayidx, align 4 land.rhs: ; preds = %while.cond diff --git a/llvm/test/Analysis/MemorySSA/pr43427.ll b/llvm/test/Analysis/MemorySSA/pr43427.ll index 3cb571505f730..00a015c98e8fd 100644 --- a/llvm/test/Analysis/MemorySSA/pr43427.ll +++ b/llvm/test/Analysis/MemorySSA/pr43427.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: [[NO7]] = MemoryPhi({lbl2,[[NO8]]},{for.end,2}) ; CHECK: cleanup: -; CHECK-NEXT: MemoryUse([[NO7]]) MayAlias +; CHECK-NEXT: MemoryUse([[NO7]]) ; CHECK-NEXT: %cleanup.dest = load i32, i32* undef, align 1 ; CHECK: lbl1.backedge: diff --git a/llvm/unittests/Analysis/MemorySSATest.cpp b/llvm/unittests/Analysis/MemorySSATest.cpp index b470f16261263..5c0c48b788310 100644 --- a/llvm/unittests/Analysis/MemorySSATest.cpp +++ b/llvm/unittests/Analysis/MemorySSATest.cpp @@ -1066,7 +1066,7 @@ TEST_F(MemorySSATest, TestStoreMustAlias) { MemoryDef *MemDef = dyn_cast_or_null(MSSA.getMemoryAccess(V)); EXPECT_EQ(MemDef->isOptimized(), false) << "Store " << I << " is optimized from the start?"; - EXPECT_EQ(MemDef->getOptimizedAccessType(), MayAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType(), None) << "Store " << I << " has correct alias information before being optimized?"; if (V == SA1) @@ -1170,7 +1170,7 @@ TEST_F(MemorySSATest, TestStoreMayAlias) { MemoryDef *MemDef = dyn_cast_or_null(MSSA.getMemoryAccess(V)); EXPECT_EQ(MemDef->isOptimized(), false) << "Store " << I << " is optimized from the start?"; - EXPECT_EQ(MemDef->getOptimizedAccessType(), MayAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType(), None) << "Store " << I << " has correct alias information before being optimized?"; ++I; From 94f7d3dba3c0a6ffd3e8a3f87ae849890578cd88 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 16 Sep 2020 13:59:41 +0700 Subject: [PATCH 0793/1079] [Test] Some more potential range check elimination opportunities --- .../IndVarSimplify/predicated_ranges.ll | 237 ++++++++++++++++++ 1 file changed, 237 insertions(+) diff --git a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll index 62a0a1dcf8656..9aa714c8a56b9 100644 --- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -110,4 +110,241 @@ fail: unreachable } + +define void @predicated_outside_loop_signed(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_outside_loop_signed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: outer.preheader: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %arg + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + +define void @predicated_outside_loop_unsigned(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_outside_loop_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: outer.preheader: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 0, [[SUB3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp ult i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %arg + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + +define void @predicated_inside_loop_signed(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_inside_loop_signed( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[GUARDED:%.*]], label [[EXIT:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_INC]], [[ARG]] +; CHECK-NEXT: br i1 [[CMP4]], label [[OUTER]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %guarded, label %exit + +guarded: + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %arg + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + +define void @predicated_inside_loop_unsigned(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_inside_loop_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[GUARDED:%.*]], label [[EXIT:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 0, [[SUB3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_INC]], [[ARG]] +; CHECK-NEXT: br i1 [[CMP4]], label [[OUTER]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %guarded, label %exit + +guarded: + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp ult i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %arg + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + !0 = !{i32 0, i32 2147483647} From af56be339f8c9660747794cc6755384154602535 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Wed, 16 Sep 2020 08:18:08 +0100 Subject: [PATCH 0794/1079] [flang] Fix docs build Apply a local fix to an issue with recommonmark's AutoStructify extension when used with certain versions of sphinx. See https://github.com/readthedocs/recommonmark/issues/93 Reviewed By: hans Differential Revision: https://reviews.llvm.org/D87714 --- flang/docs/conf.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/flang/docs/conf.py b/flang/docs/conf.py index 851b233767a91..197721a4e4c80 100644 --- a/flang/docs/conf.py +++ b/flang/docs/conf.py @@ -50,6 +50,17 @@ # Setup AutoStructify for inline .rst toctrees in index.md from recommonmark.transform import AutoStructify + + # Stolen from https://github.com/readthedocs/recommonmark/issues/93 + # Monkey patch to fix recommonmark 0.4 doc reference issues. + from recommonmark.states import DummyStateMachine + orig_run_role = DummyStateMachine.run_role + def run_role(self, name, options=None, content=None): + if name == 'doc': + name = 'any' + return orig_run_role(self, name, options, content) + DummyStateMachine.run_role = run_role + def setup(app): # Disable inline math to avoid # https://github.com/readthedocs/recommonmark/issues/120 in Extensions.md From 6985135a43b62db2defc95367432069c9fddd094 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 16 Sep 2020 14:24:00 +0700 Subject: [PATCH 0795/1079] [Test] Add positive range checks tests in addition to negative --- .../IndVarSimplify/predicated_ranges.ll | 131 +++++++++++++++++- 1 file changed, 126 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll index 9aa714c8a56b9..159caf014e3ce 100644 --- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -110,9 +110,9 @@ fail: unreachable } - -define void @predicated_outside_loop_signed(i32 %arg) nounwind #0 { -; CHECK-LABEL: @predicated_outside_loop_signed( +; Cannot remove checks because the range check fails on the last iteration. +define void @predicated_outside_loop_signed_neg(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_outside_loop_signed_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] @@ -169,6 +169,65 @@ exit: ret void } +; Range check can be removed. +define void @predicated_outside_loop_signed_pos(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_outside_loop_signed_pos( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: outer.preheader: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %sub1 + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + define void @predicated_outside_loop_unsigned(i32 %arg) nounwind #0 { ; CHECK-LABEL: @predicated_outside_loop_unsigned( ; CHECK-NEXT: entry: @@ -227,8 +286,9 @@ exit: ret void } -define void @predicated_inside_loop_signed(i32 %arg) nounwind #0 { -; CHECK-LABEL: @predicated_inside_loop_signed( +; Cannot remove checks because the range check fails on the last iteration. +define void @predicated_inside_loop_signed_neg(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_inside_loop_signed_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: @@ -287,6 +347,67 @@ exit: ret void } +; Range check can be trivially removed. +define void @predicated_inside_loop_signed_pos(i32 %arg) nounwind #0 { +; CHECK-LABEL: @predicated_inside_loop_signed_pos( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[GUARDED:%.*]], label [[EXIT:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_INC]], [[SUB1]] +; CHECK-NEXT: br i1 [[CMP4]], label [[OUTER]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %guarded, label %exit + +guarded: + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %sub1 + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + define void @predicated_inside_loop_unsigned(i32 %arg) nounwind #0 { ; CHECK-LABEL: @predicated_inside_loop_unsigned( ; CHECK-NEXT: entry: From b42fa0c040961b3704e826ddc969c0e98238c3ba Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 16 Sep 2020 00:03:07 -0700 Subject: [PATCH 0796/1079] Revert "[Asan] Fix false leak report" Additional investigated confirmed that issue is not about AddrIsInside, but missing registers. This reverts commit 9d01612db48fa27d18c6320974b8d711572e5c67. --- compiler-rt/lib/asan/asan_allocator.cpp | 14 ++++++---- .../test/asan/TestCases/redzone_noleak.cpp | 28 ------------------- 2 files changed, 9 insertions(+), 33 deletions(-) delete mode 100644 compiler-rt/test/asan/TestCases/redzone_noleak.cpp diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index b1d99699a6e64..691f64c0ef362 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -158,6 +158,9 @@ enum { class AsanChunk : public ChunkBase { public: uptr Beg() { return reinterpret_cast(this) + kChunkHeaderSize; } + bool AddrIsInside(uptr addr) { + return (addr >= Beg()) && (addr < Beg() + UsedSize()); + } }; class LargeChunkHeader { @@ -1113,11 +1116,12 @@ uptr PointsIntoChunk(void *p) { if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != __asan::CHUNK_ALLOCATED) return 0; - // AsanChunk presence means that we point into some block from underlying - // allocators. Don't check whether p points into user memory, since until - // the return from AsanAllocator::Allocator we may have no such - // pointer anywhere. But we must already have a pointer to GetBlockBegin(). - return m->Beg(); + uptr chunk = m->Beg(); + if (m->AddrIsInside(addr)) + return chunk; + if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(), addr)) + return chunk; + return 0; } uptr GetUserBegin(uptr chunk) { diff --git a/compiler-rt/test/asan/TestCases/redzone_noleak.cpp b/compiler-rt/test/asan/TestCases/redzone_noleak.cpp deleted file mode 100644 index f122c05e5108e..0000000000000 --- a/compiler-rt/test/asan/TestCases/redzone_noleak.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Test whether pointers into left redzone count memory are reachable. -// If user thread is inside asan allocator code then we may have no -// pointers into user part of memory yet. However we should have a pointer -// into the allocated memory chunk. -// -// RUN: %clangxx_asan %s -o %t -// RUN: %run %t 2>&1 - -#include -#include -#include - -void *pointers[1000]; -void **cur = pointers; - -void leak(int n, int offset) { - printf("%d %d\n", n, offset); - for (int i = 0; i < 3; ++i) - *(cur++) = (new int[n]) + offset; -} - -int main(int argc, char **argv) { - for (int n = 1; n < 10000000; n = n * 2) { - leak(n, 0); - leak(n, -1); - } - return 0; -} From a8a85166d81f573af7ff325fdf93dd8bdfdeddbf Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 16 Sep 2020 00:27:13 -0700 Subject: [PATCH 0797/1079] Revert "[Asan] Accept __lsan_ignore_object for redzone pointer" We still keep AddrIsInside. This reverts commit 1d70984fa220f966ddcecd7906c5f10368fe1b93. --- compiler-rt/lib/asan/asan_allocator.cpp | 6 ++++-- compiler-rt/test/asan/TestCases/lsan_annotations.cpp | 7 ++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 691f64c0ef362..58b496a3ca4b1 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -1172,8 +1172,10 @@ void ForEachChunk(ForEachChunkCallback callback, void *arg) { IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); - if (!m || (atomic_load(&m->chunk_state, memory_order_acquire) != - __asan::CHUNK_ALLOCATED)) { + if (!m || + (atomic_load(&m->chunk_state, memory_order_acquire) != + __asan::CHUNK_ALLOCATED) || + !m->AddrIsInside(addr)) { return kIgnoreObjectInvalid; } if (m->lsan_tag == kIgnored) diff --git a/compiler-rt/test/asan/TestCases/lsan_annotations.cpp b/compiler-rt/test/asan/TestCases/lsan_annotations.cpp index ce7c19b8f2d05..158c2fdf9f481 100644 --- a/compiler-rt/test/asan/TestCases/lsan_annotations.cpp +++ b/compiler-rt/test/asan/TestCases/lsan_annotations.cpp @@ -5,7 +5,7 @@ #include #include -int *x, *y, *z; +int *x, *y; int main() { x = new int; @@ -16,9 +16,6 @@ int main() { y = new int; } - z = new int; - __lsan_ignore_object(z - 1); - - x = y = z = nullptr; + x = y = nullptr; return 0; } From 070b96962f517772fff4bf3c27cc825b46a136b5 Mon Sep 17 00:00:00 2001 From: Yvan Roux Date: Wed, 16 Sep 2020 09:54:26 +0200 Subject: [PATCH 0798/1079] [ARM][MachineOutliner] Add calls handling. Handles calls inside outlined regions, by saving and restoring the link register. Differential Revision: https://reviews.llvm.org/D87136 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 121 ++++++++++++++++-- .../CodeGen/ARM/machine-outliner-default.mir | 116 ----------------- 2 files changed, 112 insertions(+), 125 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index d7d51fdd29ca8..d81c8efa1597d 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5678,6 +5678,7 @@ struct OutlinerCosts { const int FrameRegSave; const int CallDefault; const int FrameDefault; + const int SaveRestoreLROnStack; OutlinerCosts(const ARMSubtarget &target) : CallTailCall(target.isThumb() ? 4 : 4), @@ -5689,7 +5690,8 @@ struct OutlinerCosts { CallRegSave(target.isThumb() ? 8 : 12), FrameRegSave(target.isThumb() ? 2 : 4), CallDefault(target.isThumb() ? 8 : 12), - FrameDefault(target.isThumb() ? 2 : 4) {} + FrameDefault(target.isThumb() ? 2 : 4), + SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} }; unsigned @@ -5830,10 +5832,28 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault); SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); CandidatesWithoutStackFixups.push_back(C); - } - else + } else return outliner::OutlinedFunction(); } + + // Does every candidate's MBB contain a call? If so, then we might have a + // call in the range. + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + // check if the range contains a call. These require a save + restore of + // the link register. + if (std::any_of(FirstCand.front(), FirstCand.back(), + [](const MachineInstr &MI) { return MI.isCall(); })) + NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; + + // Handle the last instruction separately. If it is tail call, then the + // last instruction is a call, we don't want to save + restore in this + // case. However, it could be possible that the last instruction is a + // call without it being valid to tail call this sequence. We should + // consider this as well. + else if (FrameID != MachineOutlinerThunk && + FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) + NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; + } RepeatedSequenceLocs = CandidatesWithoutStackFixups; } @@ -5973,6 +5993,23 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, return outliner::InstrType::Illegal; if (MI.isCall()) { + // Get the function associated with the call. Look at each operand and find + // the one that represents the calle and get its name. + const Function *Callee = nullptr; + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isGlobal()) { + Callee = dyn_cast(MOP.getGlobal()); + break; + } + } + + // Dont't outline calls to "mcount" like functions, in particular Linux + // kernel function tracing relies on it. + if (Callee && + (Callee->getName() == "\01__gnu_mcount_nc" || + Callee->getName() == "\01mcount" || Callee->getName() == "__mcount")) + return outliner::InstrType::Illegal; + // If we don't know anything about the callee, assume it depends on the // stack layout of the caller. In that case, it's only legal to outline // as a tail-call. Explicitly list the call instructions we know about so @@ -5982,7 +6019,29 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, Opc == ARM::tBLXr || Opc == ARM::tBLXi) UnknownCallOutlineType = outliner::InstrType::LegalTerminator; - return UnknownCallOutlineType; + if (!Callee) + return UnknownCallOutlineType; + + // We have a function we have information about. Check if it's something we + // can safely outline. + MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); + + // We don't know what's going on with the callee at all. Don't touch it. + if (!CalleeMF) + return UnknownCallOutlineType; + + // Check if we know anything about the callee saves on the function. If we + // don't, then don't touch it, since that implies that we haven't computed + // anything about its stack frame yet. + MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || + MFI.getNumObjects() > 0) + return UnknownCallOutlineType; + + // At this point, we can say that CalleeMF ought to not pass anything on the + // stack. Therefore, we can outline it. + return outliner::InstrType::Legal; } // Since calls are handled, don't touch LR or PC @@ -6045,10 +6104,6 @@ void ARMBaseInstrInfo::restoreLRFromStack( void ARMBaseInstrInfo::buildOutlinedFrame( MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const { - // Nothing is needed for tail-calls. - if (OF.FrameConstructionID == MachineOutlinerTailCall) - return; - // For thunk outlining, rewrite the last instruction from a call to a // tail-call. if (OF.FrameConstructionID == MachineOutlinerThunk) { @@ -6065,9 +6120,57 @@ void ARMBaseInstrInfo::buildOutlinedFrame( if (isThumb && !Call->getOperand(FuncOp).isReg()) MIB.add(predOps(ARMCC::AL)); Call->eraseFromParent(); - return; } + // Is there a call in the outlined range? + auto IsNonTailCall = [](MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); + }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { + MachineBasicBlock::iterator It = MBB.begin(); + MachineBasicBlock::iterator Et = MBB.end(); + + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + Et = std::prev(MBB.end()); + + // We have to save and restore LR, we need to add it to the liveins if it + // is not already part of the set. This is suffient since outlined + // functions only have one block. + if (!MBB.isLiveIn(ARM::LR)) + MBB.addLiveIn(ARM::LR); + + // Insert a save before the outlined region + saveLROnStack(MBB, It); + + unsigned StackAlignment = Subtarget.getStackAlignment().value(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const MCRegisterInfo *MRI = STI.getRegisterInfo(); + unsigned DwarfReg = MRI->getDwarfRegNum(ARM::LR, true); + // Add a CFI saying the stack was moved down. + int64_t StackPosEntry = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Add a CFI saying that the LR that we want to find is now higher than + // before. + int64_t LRPosEntry = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfReg, StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Insert a restore before the terminator for the function. Restore LR. + restoreLRFromStack(MBB, Et); + } + + // If this is a tail call outlined function, then there's already a return. + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + return; + // Here we have to insert the return ourselves. Get the correct opcode from // current feature set. BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode())) diff --git a/llvm/test/CodeGen/ARM/machine-outliner-default.mir b/llvm/test/CodeGen/ARM/machine-outliner-default.mir index 452d6a96c5393..9db4207d2df7a 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-default.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-default.mir @@ -5,8 +5,6 @@ --- | define void @outline_default_arm() #0 { ret void } define void @outline_default_thumb() #1 { ret void } - define void @outline_default_KO_call_arm() #0 { ret void } - define void @outline_default_KO_call_thumb() #1 { ret void } define void @outline_default_KO_stack_arm() #0 { ret void } define void @outline_default_KO_stack_thumb() #0 { ret void } declare void @bar() @@ -118,120 +116,6 @@ body: | ... --- -name: outline_default_KO_call_arm -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: outline_default_KO_call_arm - ; CHECK: bb.0: - ; CHECK: liveins: $lr - ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp - ; CHECK: $r0 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: bb.1: - ; CHECK: liveins: $lr, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp - ; CHECK: $r0 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: bb.2: - ; CHECK: liveins: $lr, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp - ; CHECK: $r0 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: bb.3: - ; CHECK: liveins: $lr, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: $r2 = MOVr $lr, 14 /* CC::al */, $noreg, $noreg - ; CHECK: BX_RET 14 /* CC::al */, $noreg - bb.0: - liveins: $lr - BL @bar, implicit-def dead $lr, implicit $sp - $r0 = MOVi 2, 14, $noreg, $noreg - $r1 = MOVi 2, 14, $noreg, $noreg - $r2 = MOVi 2, 14, $noreg, $noreg - $r3 = MOVi 2, 14, $noreg, $noreg - $r4 = MOVi 2, 14, $noreg, $noreg - bb.1: - liveins: $lr, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - BL @bar, implicit-def dead $lr, implicit $sp - $r0 = MOVi 2, 14, $noreg, $noreg - $r1 = MOVi 2, 14, $noreg, $noreg - $r2 = MOVi 2, 14, $noreg, $noreg - $r3 = MOVi 2, 14, $noreg, $noreg - $r4 = MOVi 2, 14, $noreg, $noreg - bb.2: - liveins: $lr, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - BL @bar, implicit-def dead $lr, implicit $sp - $r0 = MOVi 2, 14, $noreg, $noreg - $r1 = MOVi 2, 14, $noreg, $noreg - $r2 = MOVi 2, 14, $noreg, $noreg - $r3 = MOVi 2, 14, $noreg, $noreg - $r4 = MOVi 2, 14, $noreg, $noreg - bb.3: - liveins: $lr, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - $r2 = MOVr $lr, 14, $noreg, $noreg - BX_RET 14, $noreg -... ---- - -name: outline_default_KO_call_thumb -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: outline_default_KO_call_thumb - ; CHECK: bb.0: - ; CHECK: liveins: $lr - ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp - ; CHECK: $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: bb.1: - ; CHECK: liveins: $lr, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp - ; CHECK: $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: bb.2: - ; CHECK: liveins: $lr, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp - ; CHECK: $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: bb.3: - ; CHECK: liveins: $lr, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: $r2 = tMOVr $lr, 14 /* CC::al */, $noreg - ; CHECK: tBX_RET 14 /* CC::al */, $noreg - bb.0: - liveins: $lr - tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp - $r0 = t2MOVi 2, 14, $noreg, $noreg - $r1 = t2MOVi 2, 14, $noreg, $noreg - $r2 = t2MOVi 2, 14, $noreg, $noreg - bb.1: - liveins: $lr, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp - $r0 = t2MOVi 2, 14, $noreg, $noreg - $r1 = t2MOVi 2, 14, $noreg, $noreg - $r2 = t2MOVi 2, 14, $noreg, $noreg - bb.2: - liveins: $lr, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp - $r0 = t2MOVi 2, 14, $noreg, $noreg - $r1 = t2MOVi 2, 14, $noreg, $noreg - $r2 = t2MOVi 2, 14, $noreg, $noreg - bb.3: - liveins: $lr, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - $r2 = tMOVr $lr, 14, $noreg - tBX_RET 14, $noreg -... ---- - name: outline_default_KO_stack_arm tracksRegLiveness: true body: | From d427df6369f1d229a9f498b4dc621433ada380d2 Mon Sep 17 00:00:00 2001 From: Aleksandr Platonov Date: Wed, 16 Sep 2020 11:04:53 +0300 Subject: [PATCH 0799/1079] [clangd] Don't use zlib when it's unavailable. Without this patch `clangd` crashes at try to load compressed string table when `zlib` is not available. Example: - Build `clangd` with MinGW (`zlib` found) - Build index - Build `clangd` with Visual Studio compiler (`zlib` not found) - Try to load index Reviewed By: sammccall, adamcz Differential Revision: https://reviews.llvm.org/D87673 --- clang-tools-extra/clangd/index/Serialization.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index c099a30c4d348..e7f65f087b1c4 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -201,12 +201,13 @@ llvm::Expected readStringTable(llvm::StringRef Data) { llvm::SmallString<1> UncompressedStorage; if (UncompressedSize == 0) // No compression Uncompressed = R.rest(); - else { + else if (llvm::zlib::isAvailable()) { if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage, UncompressedSize)) return std::move(E); Uncompressed = UncompressedStorage; - } + } else + return error("Compressed string table, but zlib is unavailable"); StringTableIn Table; llvm::StringSaver Saver(Table.Arena); From ef0b9f3307a1fa1c82b34098213ec854c1b5e608 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Mon, 14 Sep 2020 15:44:54 +0100 Subject: [PATCH 0800/1079] [ARM][LowOverheadLoops] Combine a VCMP and VPST into a VPT This patch combines a VCMP followed by a VPST into a VPT, which has the same semantics as the combination of the former two. --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 39 ++++++++++++--- .../LowOverheadLoops/vcmp-vpst-combination.ll | 49 +++++++++++++++++++ 2 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 755c2e5eb6665..7acb70c5e7f53 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1298,6 +1298,12 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I) RemovePredicate(&*I); + // Check if the instruction defining vpr is a vcmp so it can be combined + // with the VPST This should be the divergent instruction + MachineInstr *VCMP = VCMPOpcodeToVPT(Divergent->MI->getOpcode()) != 0 + ? Divergent->MI + : nullptr; + unsigned Size = 0; auto E = MachineBasicBlock::reverse_iterator(Divergent->MI); auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI); @@ -1307,13 +1313,32 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { ++Size; ++I; } - // Create a VPST (with a null mask for now, we'll recompute it later). - MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt, - InsertAt->getDebugLoc(), - TII->get(ARM::MVE_VPST)); - MIB.addImm(0); - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen()); - LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); + MachineInstrBuilder MIB; + LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " + << *Block.getPredicateThen()); + if (VCMP) { + // Combine the VPST and VCMP into a VPT + MIB = + BuildMI(*InsertAt->getParent(), InsertAt, InsertAt->getDebugLoc(), + TII->get(VCMPOpcodeToVPT(VCMP->getOpcode()))); + MIB.addImm(ARMVCC::Then); + // Register one + MIB.add(VCMP->getOperand(1)); + // Register two + MIB.add(VCMP->getOperand(2)); + // The comparison code, e.g. ge, eq, lt + MIB.add(VCMP->getOperand(3)); + LLVM_DEBUG(dbgs() + << "ARM Loops: Combining with VCMP to VPT: " << *MIB); + LoLoop.ToRemove.insert(VCMP); + } else { + // Create a VPST (with a null mask for now, we'll recompute it later) + // or a VPT in case there was a VCMP right before it + MIB = BuildMI(*InsertAt->getParent(), InsertAt, + InsertAt->getDebugLoc(), TII->get(ARM::MVE_VPST)); + MIB.addImm(0); + LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); + } LoLoop.ToRemove.insert(Block.getPredicateThen()); LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll new file mode 100644 index 0000000000000..222c2f036ca8b --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=force-enabled-no-reductions -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @vcmp_vpst_combination(<16 x i8>* %pSrc, i16 zeroext %blockSize, i8* nocapture %pResult, i32* nocapture %pIndex) { +; CHECK-LABEL: vcmp_vpst_combination: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov.i8 q0, #0x7f +; CHECK-NEXT: dlstp.8 lr, r1 +; CHECK-NEXT: .LBB0_1: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrb.u8 q1, [r0] +; CHECK-NEXT: vpt.s8 ge, q0, q1 +; CHECK-NEXT: vmovt q0, q1 +; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %do.end +; CHECK-NEXT: pop {r7, pc} +entry: + %conv = zext i16 %blockSize to i32 + %0 = tail call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 0, i32 1) + %1 = extractvalue { <16 x i8>, i32 } %0, 0 + br label %do.body + +do.body: ; preds = %do.body, %entry + %indexVec.0 = phi <16 x i8> [ %1, %entry ], [ %add, %do.body ] + %curExtremIdxVec.0 = phi <16 x i8> [ zeroinitializer, %entry ], [ %6, %do.body ] + %curExtremValVec.0 = phi <16 x i8> [ , %entry ], [ %6, %do.body ] + %blkCnt.0 = phi i32 [ %conv, %entry ], [ %sub2, %do.body ] + %2 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %blkCnt.0) + %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %pSrc, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) + %4 = icmp sle <16 x i8> %3, %curExtremValVec.0 + %5 = and <16 x i1> %4, %2 + %6 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %3, <16 x i8> %3, <16 x i1> %5, <16 x i8> %curExtremValVec.0) + %add = add <16 x i8> %indexVec.0, + %sub2 = add nsw i32 %blkCnt.0, -16 + %cmp = icmp sgt i32 %blkCnt.0, 16 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret <16 x i8> %6 +} + +declare { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32, i32) + +declare <16 x i1> @llvm.arm.mve.vctp8(i32) + +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) + +declare <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) From cb1ef0eaff8726a8c1fe4b8440f6734cbbe91630 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 16 Sep 2020 09:34:31 +0100 Subject: [PATCH 0801/1079] Follow up rG635b87511ec3: forgot to add/commit the new test file. NFC. --- .../LowOverheadLoops/tail-pred-forced.ll | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-forced.ll diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-forced.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-forced.ll new file mode 100644 index 0000000000000..e2fa8ea77071d --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-forced.ll @@ -0,0 +1,61 @@ +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s --check-prefixes=CHECK,ENABLED +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=force-enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s --check-prefixes=CHECK,FORCED + +; CHECK-LABEL: set_iterations_not_rounded_up +; +; ENABLED: call <4 x i1> @llvm.get.active.lane.mask +; ENABLED-NOT: vctp +; +; FORCED-NOT: call <4 x i1> @llvm.get.active.lane.mask +; FORCED: vctp +; +; CHECK: ret void +; +define dso_local void @set_iterations_not_rounded_up(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +entry: + %cmp8 = icmp sgt i32 %N, 0 + +; Here, v5 which is used in set.loop.iterations which is usually rounded up to +; a next multiple of the VF when emitted from the vectoriser, which means a +; bound can be put on this expression. Without this, we can't, and should flag +; this as potentially overflow behaviour. + + %v5 = add nuw nsw i32 %N, 1 + br i1 %cmp8, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %trip.count.minus.1 = add i32 %N, -1 + call void @llvm.set.loop.iterations.i32(i32 %v5) + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %v6 = phi i32 [ %v5, %vector.ph ], [ %v8, %vector.body ] + %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* + %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %v7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v7, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + %v8 = call i32 @llvm.loop.decrement.reg.i32(i32 %v6, i32 1) + %v9 = icmp ne i32 %v8, 0 + br i1 %v9, label %vector.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare void @llvm.set.loop.iterations.i32(i32) +declare i32 @llvm.loop.decrement.reg.i32(i32, i32) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) From 159abe09d25b19c24bf23ce50757987c0f25abe4 Mon Sep 17 00:00:00 2001 From: Alok Kumar Sharma Date: Thu, 10 Sep 2020 11:53:43 +0530 Subject: [PATCH 0802/1079] [DebugInfo][flang] DISubrange support for fortran assumed size array This is needed to support assumed size array of fortran which can have missing upperBound/count , contrary to current DISubrange support. Example: subroutine sub (array1, array2) integer :: array1 (*) integer :: array2 (4:9, 10:*) array1(7:8) = 9 array2(5, 10) = 10 end subroutine Now the validation check is relaxed for fortran. Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D87500 --- llvm/include/llvm/BinaryFormat/Dwarf.h | 67 +++++++++- llvm/lib/IR/Verifier.cpp | 9 +- llvm/test/DebugInfo/X86/assumed_size_array.ll | 122 ++++++++++++++++++ 3 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/assumed_size_array.ll diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index bcc447a84a4dc..28cbc2c6a0e4b 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -183,6 +183,7 @@ enum SourceLanguage { }; inline bool isCPlusPlus(SourceLanguage S) { + bool result = false; // Deliberately enumerate all the language options so we get a warning when // new language options are added (-Wswitch) that'll hopefully help keep this // switch up-to-date when new C++ versions are added. @@ -191,7 +192,8 @@ inline bool isCPlusPlus(SourceLanguage S) { case DW_LANG_C_plus_plus_03: case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14: - return true; + result = true; + break; case DW_LANG_C89: case DW_LANG_C: case DW_LANG_Ada83: @@ -230,9 +232,68 @@ inline bool isCPlusPlus(SourceLanguage S) { case DW_LANG_BORLAND_Delphi: case DW_LANG_lo_user: case DW_LANG_hi_user: - return false; + result = false; + break; + } + + return result; +} + +inline bool isFortran(SourceLanguage S) { + bool result = false; + // Deliberately enumerate all the language options so we get a warning when + // new language options are added (-Wswitch) that'll hopefully help keep this + // switch up-to-date when new Fortran versions are added. + switch (S) { + case DW_LANG_Fortran77: + case DW_LANG_Fortran90: + case DW_LANG_Fortran95: + case DW_LANG_Fortran03: + case DW_LANG_Fortran08: + result = true; + break; + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_Ada83: + case DW_LANG_C_plus_plus: + case DW_LANG_Cobol74: + case DW_LANG_Cobol85: + case DW_LANG_Pascal83: + case DW_LANG_Modula2: + case DW_LANG_Java: + case DW_LANG_C99: + case DW_LANG_Ada95: + case DW_LANG_PLI: + case DW_LANG_ObjC: + case DW_LANG_ObjC_plus_plus: + case DW_LANG_UPC: + case DW_LANG_D: + case DW_LANG_Python: + case DW_LANG_OpenCL: + case DW_LANG_Go: + case DW_LANG_Modula3: + case DW_LANG_Haskell: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_OCaml: + case DW_LANG_Rust: + case DW_LANG_C11: + case DW_LANG_Swift: + case DW_LANG_Julia: + case DW_LANG_Dylan: + case DW_LANG_C_plus_plus_14: + case DW_LANG_RenderScript: + case DW_LANG_BLISS: + case DW_LANG_Mips_Assembler: + case DW_LANG_GOOGLE_RenderScript: + case DW_LANG_BORLAND_Delphi: + case DW_LANG_lo_user: + case DW_LANG_hi_user: + result = false; + break; } - llvm_unreachable("Invalid source language"); + + return result; } enum CaseSensitivity { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index a5baa2bf16314..3fed0bf64b6e7 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -282,6 +282,9 @@ class Verifier : public InstVisitor, VerifierSupport { /// Whether the current function has a DISubprogram attached to it. bool HasDebugInfo = false; + /// The current source language. + dwarf::SourceLanguage CurrentSourceLang = dwarf::DW_LANG_lo_user; + /// Whether source was present on the first DIFile encountered in each CU. DenseMap HasSourceDebugInfo; @@ -895,7 +898,9 @@ void Verifier::visitDIScope(const DIScope &N) { void Verifier::visitDISubrange(const DISubrange &N) { AssertDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); - AssertDI(N.getRawCountNode() || N.getRawUpperBound(), + bool HasAssumedSizedArraySupport = dwarf::isFortran(CurrentSourceLang); + AssertDI(HasAssumedSizedArraySupport || N.getRawCountNode() || + N.getRawUpperBound(), "Subrange must contain count or upperBound", &N); AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(), "Subrange can have any one of count or upperBound", &N); @@ -1100,6 +1105,8 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) { AssertDI(!N.getFile()->getFilename().empty(), "invalid filename", &N, N.getFile()); + CurrentSourceLang = (dwarf::SourceLanguage)N.getSourceLanguage(); + verifySourceDebugInfo(N, *N.getFile()); AssertDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind), diff --git a/llvm/test/DebugInfo/X86/assumed_size_array.ll b/llvm/test/DebugInfo/X86/assumed_size_array.ll new file mode 100644 index 0000000000000..cad7afdd68b59 --- /dev/null +++ b/llvm/test/DebugInfo/X86/assumed_size_array.ll @@ -0,0 +1,122 @@ +;; Check whether fortran assumed size array is accepted +;; which has upperBound absent in DISubrange + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -filetype=obj -o %t.o +; RUN: llvm-dwarfdump %t.o | FileCheck %s + +; CHECK-LABEL: DW_TAG_formal_parameter +; CHECK: DW_AT_name ("array1") +; CHECK: DW_AT_type ([[type1:0x[0-9a-f]+]] +; CHECK-LABEL: DW_TAG_formal_parameter +; CHECK: DW_AT_name ("array2") +; CHECK: DW_AT_type ([[type2:0x[0-9a-f]+]] +; CHECK: [[type1]]: DW_TAG_array_type +; CHECK: DW_TAG_subrange_type +; CHECK: [[type2]]: DW_TAG_array_type +; CHECK: DW_TAG_subrange_type +; CHECK: DW_AT_lower_bound (4) +; CHECK: DW_AT_upper_bound (9) +; CHECK: DW_TAG_subrange_type +; CHECK: DW_AT_lower_bound (10) +; +; +;; original fortran program +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;subroutine sub (array1, array2) +;; integer :: array1 (*) +;; integer :: array2 (4:9, 10:*) +;; +;; array1(7:8) = 9 +;; array2(5, 10) = 10 +;;end subroutine +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; ModuleID = 'assumed_size_array.ll' +source_filename = "assumed_size_array.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.C344_sub_ = internal constant i32 10 +@.C345_sub_ = internal constant i64 10 +@.C351_sub_ = internal constant i64 5 +@.C341_sub_ = internal constant i32 9 +@.C322_sub_ = internal constant i64 1 +@.C350_sub_ = internal constant i64 8 +@.C349_sub_ = internal constant i64 7 + +define void @sub_(i64* noalias %array1, i64* noalias %array2) #0 !dbg !5 { +L.entry: + %.dY0001_361 = alloca i64, align 8 + %"i$a_357" = alloca i64, align 8 + call void @llvm.dbg.declare(metadata i64* %array1, metadata !16, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.declare(metadata i64* %array2, metadata !18, metadata !DIExpression()), !dbg !17 + br label %L.LB1_364 + +L.LB1_364: ; preds = %L.entry + store i64 2, i64* %.dY0001_361, align 8, !dbg !19 + call void @llvm.dbg.declare(metadata i64* %"i$a_357", metadata !20, metadata !DIExpression()), !dbg !17 + store i64 7, i64* %"i$a_357", align 8, !dbg !19 + br label %L.LB1_359 + +L.LB1_359: ; preds = %L.LB1_359, %L.LB1_364 + %0 = load i64, i64* %"i$a_357", align 8, !dbg !19 + call void @llvm.dbg.value(metadata i64 %0, metadata !22, metadata !DIExpression()), !dbg !17 + %1 = bitcast i64* %array1 to i8*, !dbg !19 + %2 = getelementptr i8, i8* %1, i64 -4, !dbg !19 + %3 = bitcast i8* %2 to i32*, !dbg !19 + %4 = getelementptr i32, i32* %3, i64 %0, !dbg !19 + store i32 9, i32* %4, align 4, !dbg !19 + %5 = load i64, i64* %"i$a_357", align 8, !dbg !19 + call void @llvm.dbg.value(metadata i64 %5, metadata !23, metadata !DIExpression()), !dbg !17 + %6 = add nsw i64 %5, 1, !dbg !19 + store i64 %6, i64* %"i$a_357", align 8, !dbg !19 + %7 = load i64, i64* %.dY0001_361, align 8, !dbg !19 + %8 = sub nsw i64 %7, 1, !dbg !19 + store i64 %8, i64* %.dY0001_361, align 8, !dbg !19 + %9 = load i64, i64* %.dY0001_361, align 8, !dbg !19 + %10 = icmp sgt i64 %9, 0, !dbg !19 + br i1 %10, label %L.LB1_359, label %L.LB1_383, !dbg !19 + +L.LB1_383: ; preds = %L.LB1_359 + %11 = bitcast i64* %array2 to i8*, !dbg !24 + %12 = getelementptr i8, i8* %11, i64 4, !dbg !24 + %13 = bitcast i8* %12 to i32*, !dbg !24 + store i32 10, i32* %13, align 4, !dbg !24 + ret void, !dbg !25 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4, imports: !4) +!3 = !DIFile(filename: "assumed_size_array.f90", directory: "/tmp") +!4 = !{} +!5 = distinct !DISubprogram(name: "sub", scope: !2, file: !3, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !2) +!6 = !DISubroutineType(types: !7) +!7 = !{null, !8, !12} +!8 = !DICompositeType(tag: DW_TAG_array_type, baseType: !9, align: 32, elements: !10) +!9 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !{!11} +!11 = !DISubrange(lowerBound: 1) +!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !9, align: 32, elements: !13) +!13 = !{!14, !15} +!14 = !DISubrange(lowerBound: 4, upperBound: 9) +!15 = !DISubrange(lowerBound: 10) +!16 = !DILocalVariable(name: "array1", arg: 1, scope: !5, file: !3, line: 1, type: !8) +!17 = !DILocation(line: 0, scope: !5) +!18 = !DILocalVariable(name: "array2", arg: 2, scope: !5, file: !3, line: 1, type: !12) +!19 = !DILocation(line: 5, column: 1, scope: !5) +!20 = distinct !DILocalVariable(scope: !5, file: !3, type: !21, flags: DIFlagArtificial) +!21 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) +!22 = distinct !DILocalVariable(scope: !5, file: !3, type: !21, flags: DIFlagArtificial) +!23 = distinct !DILocalVariable(scope: !5, file: !3, type: !21, flags: DIFlagArtificial) +!24 = !DILocation(line: 6, column: 1, scope: !5) +!25 = !DILocation(line: 7, column: 1, scope: !5) From ef4851742de5e64a1ba9de51e375ac503d2d7ecb Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 16 Sep 2020 11:50:14 +0300 Subject: [PATCH 0803/1079] [llvm-readobj][test] - Address a forgotten review comment for D86923. Seems I've forgot to address this bit and this looks like a reason of a failture on mac (http://45.33.8.238/mac/20491/step_11.txt). --- .../llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test index bd862e2669a1d..dc421c14eae90 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols-size-from-hash-table.test @@ -353,7 +353,7 @@ ProgramHeaders: # RUN: llvm-readobj --sections --dyn-relocations %t4.1 2>&1 >> %t4.out.llvm.txt 2>&1 # RUN: FileCheck %s -DFILE=%t4.1 --input-file=%t4.out.llvm.txt --check-prefix=BROKEN-NCHAIN-LLVM -# BROKEN-NCHAIN-LLVM: {{^}}[[#%u, FILESIZE:]] +# BROKEN-NCHAIN-LLVM: [[#%u, FILESIZE:]] # BROKEN-NCHAIN-LLVM: warning: '[[FILE]]': the size (0x17ffffffe8) of the dynamic symbol table at 0x[[#%x, DYNSYMOFF:]], derived from the hash table, goes past the end of the file (0x[[#%x, FILESIZE]]) and will be ignored # BROKEN-NCHAIN-LLVM: Name: .dynsym From 3a0a2a6347f5a79ebfba2cc2b763dd02001d9baa Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Wed, 16 Sep 2020 11:11:31 +0200 Subject: [PATCH 0804/1079] [clangd] Implement hot index reloading for clangd-index-server This patch adds a mechanism to load new versions of index into clangd-index-server using SwapIndex and FileStatus information about last modification time without downtime. Reviewed By: kadircet Differential Revision: https://reviews.llvm.org/D87450 --- .../clangd/index/remote/server/Server.cpp | 96 +++++++++++++++---- 1 file changed, 78 insertions(+), 18 deletions(-) diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp index e9838cce85e3d..d8cf542496627 100644 --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -12,15 +12,25 @@ #include "index/Symbol.h" #include "index/remote/marshalling/Marshalling.h" #include "support/Logger.h" +#include "support/Shutdown.h" +#include "support/ThreadsafeFS.h" #include "support/Trace.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Chrono.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/VirtualFileSystem.h" +#include #include #include +#include +#include #include "Index.grpc.pb.h" @@ -63,15 +73,10 @@ llvm::cl::opt ServerAddress( "server-address", llvm::cl::init("0.0.0.0:50051"), llvm::cl::desc("Address of the invoked server. Defaults to 0.0.0.0:50051")); -std::unique_ptr openIndex(llvm::StringRef Index) { - return loadIndex(Index, /*UseIndex=*/true); -} - class RemoteIndexServer final : public SymbolIndex::Service { public: - RemoteIndexServer(std::unique_ptr Index, - llvm::StringRef IndexRoot) - : Index(std::move(Index)) { + RemoteIndexServer(clangd::SymbolIndex &Index, llvm::StringRef IndexRoot) + : Index(Index) { llvm::SmallString<256> NativePath = IndexRoot; llvm::sys::path::native(NativePath); ProtobufMarshaller = std::unique_ptr(new Marshaller( @@ -91,7 +96,7 @@ class RemoteIndexServer final : public SymbolIndex::Service { } unsigned Sent = 0; unsigned FailedToSend = 0; - Index->lookup(*Req, [&](const clangd::Symbol &Item) { + Index.lookup(*Req, [&](const clangd::Symbol &Item) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); if (!SerializedItem) { elog("Unable to convert Symbol to protobuf: {0}", @@ -124,7 +129,7 @@ class RemoteIndexServer final : public SymbolIndex::Service { } unsigned Sent = 0; unsigned FailedToSend = 0; - bool HasMore = Index->fuzzyFind(*Req, [&](const clangd::Symbol &Item) { + bool HasMore = Index.fuzzyFind(*Req, [&](const clangd::Symbol &Item) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); if (!SerializedItem) { elog("Unable to convert Symbol to protobuf: {0}", @@ -155,7 +160,7 @@ class RemoteIndexServer final : public SymbolIndex::Service { } unsigned Sent = 0; unsigned FailedToSend = 0; - bool HasMore = Index->refs(*Req, [&](const clangd::Ref &Item) { + bool HasMore = Index.refs(*Req, [&](const clangd::Ref &Item) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); if (!SerializedItem) { elog("Unable to convert Ref to protobuf: {0}", @@ -188,7 +193,7 @@ class RemoteIndexServer final : public SymbolIndex::Service { } unsigned Sent = 0; unsigned FailedToSend = 0; - Index->relations( + Index.relations( *Req, [&](const SymbolID &Subject, const clangd::Symbol &Object) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Subject, Object); if (!SerializedItem) { @@ -210,22 +215,56 @@ class RemoteIndexServer final : public SymbolIndex::Service { return grpc::Status::OK; } - std::unique_ptr Index; std::unique_ptr ProtobufMarshaller; + clangd::SymbolIndex &Index; }; -void runServer(std::unique_ptr Index, - const std::string &ServerAddress) { - RemoteIndexServer Service(std::move(Index), IndexRoot); +// Detect changes in \p IndexPath file and load new versions of the index +// whenever they become available. +void hotReload(clangd::SwapIndex &Index, llvm::StringRef IndexPath, + llvm::vfs::Status &LastStatus, + llvm::IntrusiveRefCntPtr &FS) { + auto Status = FS->status(IndexPath); + // Requested file is same as loaded index: no reload is needed. + if (!Status || (Status->getLastModificationTime() == + LastStatus.getLastModificationTime() && + Status->getSize() == LastStatus.getSize())) + return; + vlog("Found different index version: existing index was modified at {0}, new " + "index was modified at {1}. Attempting to reload.", + LastStatus.getLastModificationTime(), Status->getLastModificationTime()); + LastStatus = *Status; + std::unique_ptr NewIndex = loadIndex(IndexPath); + if (!NewIndex) { + elog("Failed to load new index. Old index will be served."); + return; + } + Index.reset(std::move(NewIndex)); + log("New index version loaded. Last modification time: {0}, size: {1} bytes.", + Status->getLastModificationTime(), Status->getSize()); +} + +void runServerAndWait(clangd::SymbolIndex &Index, llvm::StringRef ServerAddress, + llvm::StringRef IndexPath) { + RemoteIndexServer Service(Index, IndexRoot); grpc::EnableDefaultHealthCheckService(true); grpc::ServerBuilder Builder; - Builder.AddListeningPort(ServerAddress, grpc::InsecureServerCredentials()); + Builder.AddListeningPort(ServerAddress.str(), + grpc::InsecureServerCredentials()); Builder.RegisterService(&Service); std::unique_ptr Server(Builder.BuildAndStart()); log("Server listening on {0}", ServerAddress); + std::thread ServerShutdownWatcher([&]() { + static constexpr auto WatcherFrequency = std::chrono::seconds(5); + while (!clang::clangd::shutdownRequested()) + std::this_thread::sleep_for(WatcherFrequency); + Server->Shutdown(); + }); + Server->Wait(); + ServerShutdownWatcher.join(); } } // namespace @@ -239,6 +278,7 @@ int main(int argc, char *argv[]) { using namespace clang::clangd::remote; llvm::cl::ParseCommandLineOptions(argc, argv, Overview); llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + llvm::sys::SetInterruptFunction(&clang::clangd::requestShutdown); if (!llvm::sys::path::is_absolute(IndexRoot)) { llvm::errs() << "Index root should be an absolute path.\n"; @@ -273,12 +313,32 @@ int main(int argc, char *argv[]) { if (Tracer) TracingSession.emplace(*Tracer); - std::unique_ptr Index = openIndex(IndexPath); + clang::clangd::RealThreadsafeFS TFS; + auto FS = TFS.view(llvm::None); + auto Status = FS->status(IndexPath); + if (!Status) { + elog("{0} does not exist.", IndexPath); + return Status.getError().value(); + } + + auto Index = std::make_unique( + clang::clangd::loadIndex(IndexPath)); if (!Index) { llvm::errs() << "Failed to open the index.\n"; return -1; } - runServer(std::move(Index), ServerAddress); + std::thread HotReloadThread([&Index, &Status, &FS]() { + llvm::vfs::Status LastStatus = *Status; + static constexpr auto RefreshFrequency = std::chrono::seconds(90); + while (!clang::clangd::shutdownRequested()) { + hotReload(*Index, llvm::StringRef(IndexPath), LastStatus, FS); + std::this_thread::sleep_for(RefreshFrequency); + } + }); + + runServerAndWait(*Index, ServerAddress, IndexPath); + + HotReloadThread.join(); } From 6040e2a6d97d9f9445715dfc468c3112f40e2588 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Mon, 7 Sep 2020 13:22:12 +0100 Subject: [PATCH 0805/1079] [Support] Add GlobPattern::isTrivialMatchAll() GlobPattern::isTrivialMatchAll() returns true for the GlobPattern "*" which will match all inputs. This can be used to avoid performing expensive preparation of the input for match() when the result of the match will always be true. Differential Revision: https://reviews.llvm.org/D87468 --- llvm/include/llvm/Support/GlobPattern.h | 10 ++++++++++ llvm/unittests/Support/GlobPatternTest.cpp | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 3e5989d025007..b79de6f41c494 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -31,6 +31,16 @@ class GlobPattern { static Expected create(StringRef Pat); bool match(StringRef S) const; + // Returns true for glob pattern "*". Can be used to avoid expensive + // preparation/acquisition of the input for match(). + bool isTrivialMatchAll() const { + if (Prefix && Prefix->empty()) { + assert(!Suffix); + return true; + } + return false; + } + private: bool matchOne(ArrayRef Pat, StringRef S) const; diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index 17d60b2b85087..7acd311b0bb92 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -133,4 +133,17 @@ TEST_F(GlobPatternTest, ExtSym) { EXPECT_TRUE((bool)Pat2); EXPECT_TRUE(Pat2->match("\xFF")); } + +TEST_F(GlobPatternTest, IsTrivialMatchAll) { + Expected Pat1 = GlobPattern::create("*"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->isTrivialMatchAll()); + + const char *NegativeCases[] = {"a*", "*a", "?*", "*?", "**", "\\*"}; + for (auto *P : NegativeCases) { + Expected Pat2 = GlobPattern::create(P); + EXPECT_TRUE((bool)Pat2); + EXPECT_FALSE(Pat2->isTrivialMatchAll()); + } +} } From 77152a6b7ac07ce65568d7c69305653e7cad4bb0 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Wed, 9 Sep 2020 10:48:21 +0100 Subject: [PATCH 0806/1079] [LLD][ELF] Optimize linker script filename glob pattern matching NFC Optimize the filename glob pattern matching in LinkerScript::computeInputSections() and LinkerScript::shouldKeep(). Add InputFile::getNameForScript() which gets and if required caches the Inputfile's name used for linker script matching. This avoids the overhead of name creation that was in getFilename() in LinkerScript.cpp. Add InputSectionDescription::matchesFile() and SectionPattern::excludesFile() which perform the glob pattern matching for an InputFile and make use of a cache of the previous result. As both computeInputSections() and shouldKeep() process sections in order and the sections of the same InputFile are contiguous, these single entry caches can significantly speed up performance for more complex glob patterns. These changes have been seen to reduce link time with --gc-sections by up to ~40% with linker scripts that contain KEEP filename glob patterns such as "*crtbegin*.o". Differential Revision: https://reviews.llvm.org/D87469 --- lld/ELF/AArch64ErrataFix.h | 2 +- lld/ELF/ARMErrataFix.h | 2 +- lld/ELF/InputFiles.cpp | 10 +++++++++ lld/ELF/InputFiles.h | 6 ++++++ lld/ELF/LinkerScript.cpp | 37 +++++++++++++++++++++----------- lld/ELF/LinkerScript.h | 22 +++++++++++++++---- lld/ELF/Relocations.h | 2 +- lld/include/lld/Common/Strings.h | 7 +++++- 8 files changed, 67 insertions(+), 21 deletions(-) diff --git a/lld/ELF/AArch64ErrataFix.h b/lld/ELF/AArch64ErrataFix.h index 0548b58751ff9..dfe57b95dd996 100644 --- a/lld/ELF/AArch64ErrataFix.h +++ b/lld/ELF/AArch64ErrataFix.h @@ -18,7 +18,7 @@ namespace elf { class Defined; class InputSection; -struct InputSectionDescription; +class InputSectionDescription; class OutputSection; class Patch843419Section; diff --git a/lld/ELF/ARMErrataFix.h b/lld/ELF/ARMErrataFix.h index 5a39bcc75cd3b..a93609b35bafc 100644 --- a/lld/ELF/ARMErrataFix.h +++ b/lld/ELF/ARMErrataFix.h @@ -19,7 +19,7 @@ namespace elf { class Defined; class InputSection; -struct InputSectionDescription; +class InputSectionDescription; class OutputSection; class Patch657417Section; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 63474b15e451e..bd079b41ac908 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -274,6 +274,16 @@ std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, } } +StringRef InputFile::getNameForScript() const { + if (archiveName.empty()) + return getName(); + + if (nameForScriptCache.empty()) + nameForScriptCache = (archiveName + Twine(':') + getName()).str(); + + return nameForScriptCache; +} + template DWARFCache *ObjFile::getDwarf() { llvm::call_once(initDwarf, [this]() { dwarf = std::make_unique(std::make_unique( diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 7af85e417ca58..b1c83ddf384fb 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -92,6 +92,9 @@ class InputFile { return symbols; } + // Get filename to use for linker script processing. + StringRef getNameForScript() const; + // Filename of .a which contained this file. If this file was // not in an archive file, it is the empty string. We use this // string for creating error messages. @@ -147,6 +150,9 @@ class InputFile { private: const Kind fileKind; + + // Cache for getNameForScript(). + mutable std::string nameForScriptCache; }; class ELFFileBase : public InputFile { diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 11f0fc9d5fbe2..ba51a8b402fd1 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -320,20 +320,33 @@ void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) { cmd->sym->type = v.type; } -static std::string getFilename(InputFile *file) { - if (!file) - return ""; - if (file->archiveName.empty()) - return std::string(file->getName()); - return (file->archiveName + ':' + file->getName()).str(); +static inline StringRef getFilename(const InputFile *file) { + return file ? file->getNameForScript() : StringRef(); } -bool LinkerScript::shouldKeep(InputSectionBase *s) { - if (keptSections.empty()) +bool InputSectionDescription::matchesFile(const InputFile *file) const { + if (filePat.isTrivialMatchAll()) + return true; + + if (!matchesFileCache || matchesFileCache->first != file) + matchesFileCache.emplace(file, filePat.match(getFilename(file))); + + return matchesFileCache->second; +} + +bool SectionPattern::excludesFile(const InputFile *file) const { + if (excludedFilePat.empty()) return false; - std::string filename = getFilename(s->file); + + if (!excludesFileCache || excludesFileCache->first != file) + excludesFileCache.emplace(file, excludedFilePat.match(getFilename(file))); + + return excludesFileCache->second; +} + +bool LinkerScript::shouldKeep(InputSectionBase *s) { for (InputSectionDescription *id : keptSections) - if (id->filePat.match(filename)) + if (id->matchesFile(s->file)) for (SectionPattern &p : id->sectionPatterns) if (p.sectionPat.match(s->name) && (s->flags & id->withFlags) == id->withFlags && @@ -433,9 +446,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, if (!pat.sectionPat.match(sec->name)) continue; - std::string filename = getFilename(sec->file); - if (!cmd->filePat.match(filename) || - pat.excludedFilePat.match(filename) || + if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) || (sec->flags & cmd->withFlags) != cmd->withFlags || (sec->flags & cmd->withoutFlags) != 0) continue; diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 4a1a5fd71b67f..efa473f45e308 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -29,6 +29,7 @@ namespace lld { namespace elf { class Defined; +class InputFile; class InputSection; class InputSectionBase; class OutputSection; @@ -146,19 +147,32 @@ struct MemoryRegion { // This struct represents one section match pattern in SECTIONS() command. // It can optionally have negative match pattern for EXCLUDED_FILE command. // Also it may be surrounded with SORT() command, so contains sorting rules. -struct SectionPattern { +class SectionPattern { + StringMatcher excludedFilePat; + + // Cache of the most recent input argument and result of excludesFile(). + mutable llvm::Optional> excludesFileCache; + +public: SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) : excludedFilePat(pat1), sectionPat(pat2), sortOuter(SortSectionPolicy::Default), sortInner(SortSectionPolicy::Default) {} - StringMatcher excludedFilePat; + bool excludesFile(const InputFile *file) const; + StringMatcher sectionPat; SortSectionPolicy sortOuter; SortSectionPolicy sortInner; }; -struct InputSectionDescription : BaseCommand { +class InputSectionDescription : public BaseCommand { + SingleStringMatcher filePat; + + // Cache of the most recent input argument and result of matchesFile(). + mutable llvm::Optional> matchesFileCache; + +public: InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, uint64_t withoutFlags = 0) : BaseCommand(InputSectionKind), filePat(filePattern), @@ -168,7 +182,7 @@ struct InputSectionDescription : BaseCommand { return c->kind == InputSectionKind; } - SingleStringMatcher filePat; + bool matchesFile(const InputFile *file) const; // Input sections that matches at least one of SectionPatterns // will be associated with this InputSectionDescription. diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 4f48082b8be9d..fccd56880718a 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -131,7 +131,7 @@ bool hexagonNeedsTLSSymbol(ArrayRef outputSections); class ThunkSection; class Thunk; -struct InputSectionDescription; +class InputSectionDescription; class ThunkCreator { public: diff --git a/lld/include/lld/Common/Strings.h b/lld/include/lld/Common/Strings.h index 3940d2443cd45..38d93e01c0b95 100644 --- a/lld/include/lld/Common/Strings.h +++ b/lld/include/lld/Common/Strings.h @@ -39,6 +39,11 @@ class SingleStringMatcher { // Match s against this pattern, exactly if ExactMatch is true. bool match(llvm::StringRef s) const; + // Returns true for pattern "*" which will match all inputs. + bool isTrivialMatchAll() const { + return !ExactMatch && GlobPatternMatcher.isTrivialMatchAll(); + } + private: // Whether to do an exact match irregardless of the presence of wildcard // character. @@ -69,7 +74,7 @@ class StringMatcher { // Add a new pattern to the existing ones to match against. void addPattern(SingleStringMatcher Matcher) { patterns.push_back(Matcher); } - bool empty() { return patterns.empty(); } + bool empty() const { return patterns.empty(); } // Match s against the patterns. bool match(llvm::StringRef s) const; From a8d02015fcb783d5fdf1e09edd1b9e152c5d19b7 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 14 Sep 2020 16:38:29 +0300 Subject: [PATCH 0807/1079] [llvm-readobj][test] - Improve section-symbols.test `section-symbols.test` tests how we print section symbols in different situations. We might have 2 different cases: 1) A named STT_SECTION symbol. 2) An unnamed STT_SECTION symbol. Usually section symbols have no name and then `--symbols` uses their section names when prints them. If symbol has a name, then it is used. For `--relocations` we also want to have this logic probably, but currently we always ignore symbol names and always use section names. It is not consistent with GNU readelf and with our logic for `--symbols`. This patch refines testing to document the existent behavior and improve coverage. Differential revision: https://reviews.llvm.org/D87612 --- .../llvm-readobj/ELF/section-symbols.test | 125 +++++++++++++++--- 1 file changed, 104 insertions(+), 21 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/section-symbols.test b/llvm/test/tools/llvm-readobj/ELF/section-symbols.test index 3b6a2eca4fc4e..1aac1e6f06e8f 100644 --- a/llvm/test/tools/llvm-readobj/ELF/section-symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/section-symbols.test @@ -1,35 +1,71 @@ -## ELF section symbols use the section names when printing. This test verifies -## this and also that appropriate things are printed if the section is somehow -## invalid. +## ELF section symbols use the corresponding section names when printing +## unnamed symbols. This test verifies this and also that appropriate things +## are printed if the section is somehow invalid. # RUN: yaml2obj %s -o %t1 -# RUN: llvm-readobj %t1 --symbols 2> %t.llvm.err1 | FileCheck %s --check-prefix=LLVM1 -# RUN: FileCheck %s --input-file %t.llvm.err1 --check-prefix=WARN1 --implicit-check-not=warning -# RUN: llvm-readelf %t1 --symbols 2> %t.gnu.err1 | FileCheck %s --check-prefix=GNU1 -# RUN: FileCheck %s --input-file %t.gnu.err1 --check-prefix=WARN1 --implicit-check-not=warning +## FIXME: 1) Relocations should print section symbol names when they are not empty. +## 2) We should still print a relocation even when we are unable to lookup a symbol name. +# RUN: llvm-readobj %t1 --symbols --relocations 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t1 --check-prefix=LLVM1 --implicit-check-not="warning:" +# RUN: llvm-readelf %t1 --symbols --relocations 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t1 --check-prefix=GNU1 --implicit-check-not="warning:" + +# LLVM1: Relocations [ +# LLVM1-NEXT: Section (4) .rela.foo { +# LLVM1-NEXT: 0x1 R_X86_64_NONE .foo 0x0 +# LLVM1-NEXT: 0x2 R_X86_64_NONE .foo 0x0 +# LLVM1-NEXT: warning: '[[FILE]]': unable to print relocation 3 in SHT_RELA section with index 4: invalid section index: 67 +# LLVM1-NEXT: warning: '[[FILE]]': unable to print relocation 4 in SHT_RELA section with index 4: invalid section index: 67 +# LLVM1-NEXT: 0x5 R_X86_64_NONE .bar 0x0 +# LLVM1-NEXT: 0x6 R_X86_64_NONE .bar 0x0 +# LLVM1-NEXT: warning: '[[FILE]]': unable to print relocation 7 in SHT_RELA section with index 4: invalid section index: 66 +# LLVM1-NEXT: warning: '[[FILE]]': unable to print relocation 8 in SHT_RELA section with index 4: invalid section index: 66 +# LLVM1-NEXT: } +# LLVM1-NEXT: ] # LLVM1: Name: (0) # LLVM1: Name: .foo (0) +# LLVM1: Name: symbol1 (25) +# LLVM1: warning: '[[FILE]]': invalid section index: 67 # LLVM1: Name:
    (0) +# LLVM1: Name: symbol2 (17) # LLVM1: Name: .bar (0) +# LLVM1: Name: symbol3 (9) +# LLVM1: warning: '[[FILE]]': invalid section index: 66 # LLVM1: Name:
    (0) +# LLVM1: Name: symbol4 (1) + +# GNU1: Relocation section '.rela.foo' at offset 0x58 contains 8 entries: +# GNU1-NEXT: Offset Info Type Sym. Value Symbol's Name + Addend +# GNU1-NEXT: 00000001 00000100 R_X86_64_NONE 00000000 .foo + 0 +# GNU1-NEXT: 00000002 00000200 R_X86_64_NONE 00000000 .foo + 0 +# GNU1-NEXT: warning: '[[FILE]]': unable to print relocation 3 in SHT_RELA section with index 4: invalid section index: 67 +# GNU1-NEXT: warning: '[[FILE]]': unable to print relocation 4 in SHT_RELA section with index 4: invalid section index: 67 +# GNU1-NEXT: 00000005 00000500 R_X86_64_NONE 00000000 .bar + 0 +# GNU1-NEXT: 00000006 00000600 R_X86_64_NONE 00000000 .bar + 0 +# GNU1-NEXT: warning: '[[FILE]]': unable to print relocation 7 in SHT_RELA section with index 4: invalid section index: 66 +# GNU1-NEXT: warning: '[[FILE]]': unable to print relocation 8 in SHT_RELA section with index 4: invalid section index: 66 -# GNU1: Symbol table '.symtab' contains 5 entries: +# GNU1: Symbol table '.symtab' contains 9 entries: # GNU1-NEXT: Num: {{.*}} Type {{.*}} Ndx Name # GNU1-NEXT: 0: {{.*}} NOTYPE {{.*}} UND {{$}} -# GNU1-NEXT: 1: {{.*}} SECTION {{.*}} 1 .foo -# GNU1-NEXT: 2: {{.*}} SECTION {{.*}} 67
    -# GNU1-NEXT: 3: {{.*}} SECTION {{.*}} 2 .bar -# GNU1-NEXT: 4: {{.*}} SECTION {{.*}} 66
    - -# WARN1: warning: '{{.*}}.tmp1': invalid section index: 67 -# WARN1: warning: '{{.*}}.tmp1': invalid section index: 66 +# GNU1-NEXT: 1: {{.*}} SECTION {{.*}} 1 .foo +# GNU1-NEXT: 2: {{.*}} SECTION {{.*}} 1 symbol1 +# GNU1-NEXT: warning: '[[FILE]]': invalid section index: 67 +# GNU1-NEXT: 3: {{.*}} SECTION {{.*}} 67
    +# GNU1-NEXT: 4: {{.*}} SECTION {{.*}} 67 symbol2 +# GNU1-NEXT: 5: {{.*}} SECTION {{.*}} 2 .bar +# GNU1-NEXT: 6: {{.*}} SECTION {{.*}} 2 symbol3 +# GNU1-NEXT: warning: '[[FILE]]': invalid section index: 66 +# GNU1-NEXT: 7: {{.*}} SECTION {{.*}} 66
    +# GNU1-NEXT: 8: {{.*}} SECTION {{.*}} 66 symbol4 --- !ELF FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - Type: ET_REL + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 Sections: - Name: .foo Type: SHT_PROGBITS @@ -38,22 +74,69 @@ Sections: - Name: .symtab_shndx Type: SHT_SYMTAB_SHNDX Link: .symtab - Entries: [ 0, 0, 0, 2, 0x42 ] + Entries: [ 0, 0, 0, 0, 0, 2, 2, 0x42, 0x42 ] + - Name: .rela.foo + Type: SHT_RELA + Link: .symtab + Info: .foo + Relocations: + - Offset: 0x1 + Symbol: 1 + Type: R_X86_64_NONE + - Offset: 0x2 + Symbol: 2 + Type: R_X86_64_NONE + - Offset: 0x3 + Symbol: 3 + Type: R_X86_64_NONE + - Offset: 0x4 + Symbol: 4 + Type: R_X86_64_NONE + - Offset: 0x5 + Symbol: 5 + Type: R_X86_64_NONE + - Offset: 0x6 + Symbol: 6 + Type: R_X86_64_NONE + - Offset: 0x7 + Symbol: 7 + Type: R_X86_64_NONE + - Offset: 0x8 + Symbol: 8 + Type: R_X86_64_NONE Symbols: +## Case 1: a valid unnamed section symbol. - Name: "" Section: .foo Type: STT_SECTION +## Case 2: a valid named section symbol. + - Name: "symbol1" + Section: .foo + Type: STT_SECTION +## Case 3: an unnamed section symbol with invalid index. - Name: "" Index: 0x43 Type: STT_SECTION - # Section symbol via SHT_SYMTAB_SHNDX. +## Case 4: a named section symbol with invalid index. + - Name: "symbol2" + Index: 0x43 + Type: STT_SECTION +## Case 5: a valid unnamed section symbol via SHT_SYMTAB_SHNDX. - Name: "" Index: SHN_XINDEX Type: STT_SECTION - # Section symbol via SHT_SYMTAB_SHNDX with invalid index. +## Case 6: a valid named section symbol via SHT_SYMTAB_SHNDX. + - Name: "symbol3" + Index: SHN_XINDEX + Type: STT_SECTION +## Case 7: a unnamed section symbol via SHT_SYMTAB_SHNDX with invalid index. - Name: "" Index: SHN_XINDEX Type: STT_SECTION +## Case 8: a named section symbol via SHT_SYMTAB_SHNDX with invalid index. + - Name: "symbol4" + Index: SHN_XINDEX + Type: STT_SECTION # RUN: yaml2obj %s --docnum=2 -o %t2 # RUN: llvm-readobj %t2 --symbols 2> %t.llvm.err2 | FileCheck %s --check-prefix=LLVM2 From ac2717bfdd0d36ce4b5c33661045a36db3c0cc45 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Wed, 16 Sep 2020 10:59:19 +0100 Subject: [PATCH 0808/1079] [ARM][LowOverheadLoops] Fix tests after ef0b9f3 ef0b9f3 didn't update the tests that it affected. --- .../Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll | 3 +-- llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir | 3 +-- llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir | 3 +-- llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 459e2c8395997..522cce49f75a1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -408,8 +408,7 @@ define dso_local void @continue_on_zero(i32* noalias nocapture %arg, i32* noalia ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vpst +; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [r0] ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir index f754559c4f264..29ebd7bd6cf13 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir @@ -118,8 +118,7 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $q2, $q3, $r0, $r1 - ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 0, killed $noreg - ; CHECK: MVE_VPST 2, implicit $vpr + ; CHECK: MVE_VPTv4u32 2, renamable $q1, renamable $q0, 8, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q0, renamable $q2, 2, 1, killed renamable $vpr ; CHECK: renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir index 5ec6079e6cbfd..a1a1e785672db 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir @@ -110,8 +110,7 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $q2, $r0, $r1 - ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 0, killed $noreg - ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: MVE_VPTv4u32 4, renamable $q1, renamable $q0, 8, implicit-def $vpr ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index 311a06a675771..2d890aaac331e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -19,8 +19,7 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q4, [r0], #16 -; CHECK-NEXT: vcmp.f32 ge, q1, q4 -; CHECK-NEXT: vpstt +; CHECK-NEXT: vptt.f32 ge, q1, q4 ; CHECK-NEXT: vmovt q1, q4 ; CHECK-NEXT: vmovt q0, q2 ; CHECK-NEXT: vadd.i32 q2, q2, q3 From a909a84ef2d9220242512b8be1206ee3d9b3d8b9 Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Wed, 16 Sep 2020 12:09:29 +0200 Subject: [PATCH 0809/1079] [clang-tidy] Improve documentation on Clangd integration The integration is already complete; this patch updates information as well as suggests using Clang-Tidy via Clangd integration that is vastly available in most editors through LSP client plugins. Reviewed By: hokein Differential Revision: https://reviews.llvm.org/D87686 --- .../docs/clang-tidy/Integrations.rst | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/docs/clang-tidy/Integrations.rst b/clang-tools-extra/docs/clang-tidy/Integrations.rst index bdd012aec89ee..94851631fe1f6 100644 --- a/clang-tools-extra/docs/clang-tidy/Integrations.rst +++ b/clang-tools-extra/docs/clang-tidy/Integrations.rst @@ -2,12 +2,17 @@ Clang-tidy IDE/Editor Integrations ================================== -.. _Clangd: https://clang.llvm.org/extra/clangd.html +.. _clangd: http://clangd.llvm.org/ +.. _is available: https://clangd.llvm.org/installation.html#editor-plugins +.. _more: https://langserver.org/#implementations-client Apart from being a standalone tool, :program:`clang-tidy` is integrated into -various IDEs, code analyzers, and editors. Besides, it is currently being -integrated into Clangd_. The following table shows the most -well-known :program:`clang-tidy` integrations in detail. +various IDEs, code analyzers, and editors. We recommend using clangd_ which +integrates :program:`clang-tidy` and `is available`_ in most major editors +through plugins (Vim, Emacs, Visual Studio Code, Sublime Text and more_). + +The following table shows the most well-known :program:`clang-tidy` +integrations in detail. +--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ | | Feature | @@ -17,8 +22,8 @@ well-known :program:`clang-tidy` integrations in detail. |A.L.E. for Vim | \+\ | \-\ | \-\ | \-\ | \+\ | +--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ |Clang Power Tools for Visual Studio | \-\ | \+\ | \-\ | \+\ | \-\ | -+--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ -|Clangd | \+\ | \-\ | \-\ | \-\ | \-\ | ++-------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Clangd | \+\ | \-\ | \-\ | \+\ | \-\ | +--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ |CLion IDE | \+\ | \+\ | \+\ | \+\ | \+\ | +--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ From 3f682611ab26442fb2a5fd49f44c6f032150a2e6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 11:02:09 +0100 Subject: [PATCH 0810/1079] [DAG] Remover getOperand() call. NFCI. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 909698ded4edc..9109aca880282 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10213,7 +10213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ISD::CondCode CC = cast(N0.getOperand(2))->get(); - EVT N00VT = N0.getOperand(0).getValueType(); + EVT N00VT = N00.getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. From 3e5a4ef51a1d0def10525b2059f5cdab0cb0ae8d Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Wed, 16 Sep 2020 12:27:20 +0200 Subject: [PATCH 0811/1079] Fix table formatting after D87686 --- clang-tools-extra/docs/clang-tidy/Integrations.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/clang-tidy/Integrations.rst b/clang-tools-extra/docs/clang-tidy/Integrations.rst index 94851631fe1f6..c81a00deb68ad 100644 --- a/clang-tools-extra/docs/clang-tidy/Integrations.rst +++ b/clang-tools-extra/docs/clang-tidy/Integrations.rst @@ -22,7 +22,7 @@ integrations in detail. |A.L.E. for Vim | \+\ | \-\ | \-\ | \-\ | \+\ | +--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ |Clang Power Tools for Visual Studio | \-\ | \+\ | \-\ | \+\ | \-\ | -+-------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ |Clangd | \+\ | \-\ | \-\ | \+\ | \-\ | +--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ |CLion IDE | \+\ | \+\ | \+\ | \+\ | \+\ | From 86172ce378169743bf82d9e69e6f428ec8ee81d1 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 16 Sep 2020 11:17:13 +0100 Subject: [PATCH 0812/1079] [ARM] Add more validForTailPredication Modify the unit test to inspect all MVE instructions and mark the load/store/move of vpr/p0 as valid, as well as the remaining scalar shifts. Differential Revision: https://reviews.llvm.org/D87753 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 2 +- llvm/lib/Target/ARM/ARMInstrVFP.td | 20 ++++---- .../unittests/Target/ARM/MachineInstrTest.cpp | 49 ++++++++++++------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 1d562c5702c62..6c3d3be58c72f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -450,7 +450,7 @@ class MVE_ScalarShift { let Inst{31-20} = 0b111010100101; let Inst{8} = 0b1; - + let validForTailPredication=1; } class MVE_ScalarShiftSingleReg; } - let Predicates = [HasV8_1MMainline, HasMVEInt] in { + let Predicates = [HasV8_1MMainline, HasMVEInt], + D=MVEDomain, validForTailPredication=1 in { // System level VPR/P0 -> GPR let Uses = [VPR] in def VMRS_VPR : MovFromVFP<0b1100 /* vpr */, (outs GPR:$Rt), (ins), @@ -2845,12 +2846,19 @@ let Defs = [FPSCR] in { } } -let Predicates = [HasV8_1MMainline, HasMVEInt] in { +let Predicates = [HasV8_1MMainline, HasMVEInt], + D=MVEDomain, validForTailPredication=1 in { let Uses = [VPR] in { defm VSTR_VPR : vfp_vstrldr_sysreg<0b0,0b1100, "vpr">; } defm VSTR_P0 : vfp_vstrldr_sysreg<0b0,0b1101, "p0", (outs), (ins VCCR:$P0)>; + + let Defs = [VPR] in { + defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">; + } + defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0", + (outs VCCR:$P0), (ins)>; } let Uses = [FPSCR] in { @@ -2862,11 +2870,3 @@ let Uses = [FPSCR] in { defm VLDR_FPCXTS : vfp_vstrldr_sysreg<0b1,0b1111, "fpcxts">; } } - -let Predicates = [HasV8_1MMainline, HasMVEInt] in { - let Defs = [VPR] in { - defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">; - } - defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0", - (outs VCCR:$P0), (ins)>; -} diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index bc37f991c3081..08cc81860a166 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -383,12 +383,20 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_ASRLi: case MVE_ASRLr: case MVE_LSRL: + case MVE_LSLLi: + case MVE_LSLLr: case MVE_SQRSHR: + case MVE_SQRSHRL: case MVE_SQSHL: + case MVE_SQSHLL: case MVE_SRSHR: + case MVE_SRSHRL: case MVE_UQRSHL: + case MVE_UQRSHLL: case MVE_UQSHL: + case MVE_UQSHLL: case MVE_URSHR: + case MVE_URSHRL: case MVE_VABDf16: case MVE_VABDf32: case MVE_VABDs16: @@ -972,6 +980,20 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VSUBi16: case MVE_VSUBi32: case MVE_VSUBi8: + case VLDR_P0_off: + case VLDR_P0_post: + case VLDR_P0_pre: + case VLDR_VPR_off: + case VLDR_VPR_post: + case VLDR_VPR_pre: + case VSTR_P0_off: + case VSTR_P0_post: + case VSTR_P0_pre: + case VSTR_VPR_off: + case VSTR_VPR_post: + case VSTR_VPR_pre: + case VMRS_P0: + case VMRS_VPR: return true; } }; @@ -996,27 +1018,16 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { ARMSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), std::string(TM->getTargetFeatureString()), *static_cast(TM.get()), false); - const ARMBaseInstrInfo *TII = ST.getInstrInfo(); - auto MII = TM->getMCInstrInfo(); + auto MII = TM->getMCInstrInfo(); for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) { - const MCInstrDesc &Desc = TII->get(i); - - for (auto &Op : Desc.operands()) { - // Only check instructions that access the MQPR regs. - if ((Op.OperandType & MCOI::OPERAND_REGISTER) == 0 || - (Op.RegClass != ARM::MQPRRegClassID && - Op.RegClass != ARM::QQPRRegClassID && - Op.RegClass != ARM::QQQQPRRegClassID)) - continue; - - uint64_t Flags = MII->get(i).TSFlags; - bool Valid = (Flags & ARMII::ValidForTailPredication) != 0; - ASSERT_EQ(IsValidTPOpcode(i), Valid) - << MII->getName(i) - << ": mismatched expectation for tail-predicated safety\n"; - break; - } + uint64_t Flags = MII->get(i).TSFlags; + if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) + continue; + bool Valid = (Flags & ARMII::ValidForTailPredication) != 0; + ASSERT_EQ(IsValidTPOpcode(i), Valid) + << MII->getName(i) + << ": mismatched expectation for tail-predicated safety\n"; } } From a63b2a4614b6b776cffcc0ab033e288024aa73b9 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 16 Sep 2020 11:47:26 +0100 Subject: [PATCH 0813/1079] [ARM] Fix tail predication predicate tracking Clear the CurrentPredicate when we find an instruction which would completely overwrite the VPR. This fix essentially means we're back to not really being able to handle VPT instructions when tail predicating. Differential Revision: https://reviews.llvm.org/D87610 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 15 ++++- .../cond-vector-reduce-mve-codegen.ll | 17 ++++-- .../Thumb2/LowOverheadLoops/disjoint-vcmp.mir | 15 +++-- .../Thumb2/LowOverheadLoops/remat-vctp.ll | 18 +++++- .../Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir | 18 ++++-- .../Thumb2/LowOverheadLoops/vpt-blocks.mir | 57 ++++++++++++++----- 6 files changed, 107 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 7acb70c5e7f53..38c2544bcee6d 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -874,6 +874,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { if (MI->getOpcode() != ARM::MVE_VPST) { assert(MI->findRegisterDefOperandIdx(ARM::VPR) != -1 && "VPT does not implicitly define VPR?!"); + CurrentPredicate.clear(); CurrentPredicate.insert(MI); } @@ -913,6 +914,16 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { } } + // If this instruction defines the VPR, update the predicate for the + // proceeding instructions. + if (IsDef) { + // Clear the existing predicate when we're not in VPT Active state. + if (!isVectorPredicated(MI)) + CurrentPredicate.clear(); + CurrentPredicate.insert(MI); + LLVM_DEBUG(dbgs() << "ARM Loops: Adding Predicate: " << *MI); + } + // If we find a vpr def that is not already predicated on the vctp, we've // got disjoint predicates that may not be equivalent when we do the // conversion. @@ -928,9 +939,9 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { // If we find an instruction that has been marked as not valid for tail // predication, only allow the instruction if it's contained within a valid // VPT block. - if ((Flags & ARMII::ValidForTailPredication) == 0 && !IsUse) { + if ((Flags & ARMII::ValidForTailPredication) == 0) { LLVM_DEBUG(dbgs() << "ARM Loops: Can't tail predicate: " << *MI); - return false; + return IsUse; } // If the instruction is already explicitly predicated, then the conversion diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 522cce49f75a1..a60ad09dd360d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -464,19 +464,28 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %bb4 +; CHECK-NEXT: add.w r12, r3, #3 +; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: bic r12, r12, #3 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vptt.i32 ne, q0, zr +; CHECK-NEXT: vctp.32 r3 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vpttt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q0, r2 +; CHECK-NEXT: vctpt.32 r3 ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: le lr, .LBB5_2 ; CHECK-NEXT: @ %bb.3: @ %bb32 ; CHECK-NEXT: pop {r7, pc} bb: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir index 37a7b7bd010dd..550972e4a4f45 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir @@ -135,27 +135,34 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $r12 = t2MOVi16 target-flags(arm-lo16) @mask, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @mask, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4 = t2BICri killed renamable $r4, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r5 = t2LDRHi12 killed renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 2 from %ir.mask.gep9) + ; CHECK: renamable $r12 = t2SUBri killed renamable $r4, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: $vpr = VMSR_P0 $r5, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 16, 14 /* CC::al */, $noreg, $noreg ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) ; CHECK: renamable $q0 = MVE_VDUP32 killed renamable $r5, 0, $noreg, undef renamable $q0 ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r3, $r12 + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) - ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: MVE_VPST 2, implicit $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) ; CHECK: renamable $r3, renamable $q2 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r12, renamable $q2 = MVE_VLDRWU32_pre killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.scevgep2, align 8) ; CHECK: MVE_VPTv4u32 8, renamable $q0, killed renamable $q2, 2, implicit-def $vpr ; CHECK: MVE_VSTRWU32 killed renamable $q1, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: ; CHECK: $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r7, def $pc diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll index 198ec16af634c..6ce2b9f5f1c02 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -7,13 +7,23 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: ldrd r5, r12, [sp, #80] +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: mov r4, r12 ; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r4, #4 ; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r4, r12, r4 ; CHECK-NEXT: vmov.i32 q2, #0x1 -; CHECK-NEXT: dlstp.32 lr, r12 +; CHECK-NEXT: add.w lr, r4, #3 +; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %bb6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q4, [r1], #16 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 ; CHECK-NEXT: vabs.s32 q5, q4 ; CHECK-NEXT: vcls.s32 q3, q5 ; CHECK-NEXT: vshl.u32 q5, q5, q3 @@ -31,13 +41,15 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg ; CHECK-NEXT: vqshl.s32 q5, q5, #1 ; CHECK-NEXT: vpt.s32 lt, q4, zr ; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 ; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vstrwt.32 q4, [r2], #16 ; CHECK-NEXT: vstrwt.32 q3, [r3], #16 -; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %bb44 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: pop {r4, r5, r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir index 2f1641516a0d9..6df9702ca01dc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir @@ -118,16 +118,24 @@ body: | ; CHECK: bb.1.bb3: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r12 = t2ADDri renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r3 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 4, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPi32r renamable $q0, $zr, 1, 1, killed renamable $vpr ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) @@ -135,7 +143,7 @@ body: | ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: ; CHECK: $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir index 60a578d81594f..74f1e05684449 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir @@ -215,17 +215,26 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg - ; CHECK: MVE_VPTv4s32r 4, renamable $q1, renamable $r2, 11, implicit-def $vpr + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + ; CHECK: MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: @@ -593,17 +602,26 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg - ; CHECK: MVE_VPTv4s32r 12, renamable $q1, renamable $r2, 10, implicit-def $vpr + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + ; CHECK: MVE_VPTv4s32r 14, renamable $q1, renamable $r2, 10, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 13, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 2, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; @@ -713,17 +731,26 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg - ; CHECK: MVE_VPTv4s32r 4, renamable $q0, renamable $r2, 11, implicit-def $vpr + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + ; CHECK: MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: From 7029e5d4ca20d20982da8efe89de27acd8d7d75b Mon Sep 17 00:00:00 2001 From: Adam Czachorowski Date: Tue, 15 Sep 2020 20:13:00 +0200 Subject: [PATCH 0814/1079] [clangd] Actually parse Index section of the YAML file. This fixes a bug in dbf486c0de92c76df77c1a1f815cf16533ecbb3a, which introduced the Index section of the config, but did not register the parse method, so it didn't work in a YAML file (but did in a test). Differential Revision: https://reviews.llvm.org/D87710 --- clang-tools-extra/clangd/ConfigYAML.cpp | 1 + .../clangd/unittests/ConfigYAMLTests.cpp | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index 16639f6649c2b..9988fe3766480 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -38,6 +38,7 @@ class Parser { DictParser Dict("Config", this); Dict.handle("If", [&](Node &N) { parse(F.If, N); }); Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); }); + Dict.handle("Index", [&](Node &N) { parse(F.Index, N); }); Dict.parse(N); return !(N.failed() || HadError); } diff --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp index a9526ce2367c4..27b1c0cfc56dd 100644 --- a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp @@ -47,16 +47,21 @@ CompileFlags: { Add: [foo, bar] } Add: | b az +--- +Index: + Background: Skip )yaml"; auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback()); EXPECT_THAT(Diags.Diagnostics, IsEmpty()); - ASSERT_EQ(Results.size(), 2u); - EXPECT_FALSE(Results.front().If.HasUnrecognizedCondition); - EXPECT_THAT(Results.front().If.PathMatch, ElementsAre(Val("abc"))); - EXPECT_THAT(Results.front().CompileFlags.Add, - ElementsAre(Val("foo"), Val("bar"))); + ASSERT_EQ(Results.size(), 3u); + EXPECT_FALSE(Results[0].If.HasUnrecognizedCondition); + EXPECT_THAT(Results[0].If.PathMatch, ElementsAre(Val("abc"))); + EXPECT_THAT(Results[0].CompileFlags.Add, ElementsAre(Val("foo"), Val("bar"))); + + EXPECT_THAT(Results[1].CompileFlags.Add, ElementsAre(Val("b\naz\n"))); - EXPECT_THAT(Results.back().CompileFlags.Add, ElementsAre(Val("b\naz\n"))); + ASSERT_TRUE(Results[2].Index.Background); + EXPECT_EQ("Skip", *Results[2].Index.Background.getValue()); } TEST(ParseYAML, Locations) { From 779a2a2edcea89ad5f5bf99eeac90516542159d9 Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Tue, 15 Sep 2020 12:53:13 +0200 Subject: [PATCH 0815/1079] [clang-tidy] Crash fix for bugprone-misplaced-pointer-arithmetic-in-alloc Placement new operators on non-object types cause crash in `bugprone-misplaced-pointer-arithmetic-in-alloc`. This patch fixes this issue. Differential Revision: https://reviews.llvm.org/D87683 --- .../MisplacedPointerArithmeticInAllocCheck.cpp | 6 +++--- ...bugprone-misplaced-pointer-arithmetic-in-alloc.cpp | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp index 2a6a0ae53a4f3..6208cb5cfc9dc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp @@ -77,9 +77,9 @@ void MisplacedPointerArithmeticInAllocCheck::check( CallName = "operator new[]"; } else { const auto *CtrE = New->getConstructExpr(); - if (!CtrE->getArg(CtrE->getNumArgs() - 1) - ->getType() - ->isIntegralOrEnumerationType()) + if (!CtrE || !CtrE->getArg(CtrE->getNumArgs() - 1) + ->getType() + ->isIntegralOrEnumerationType()) return; CallName = "operator new"; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-misplaced-pointer-arithmetic-in-alloc.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-misplaced-pointer-arithmetic-in-alloc.cpp index 42250da2610df..00d12891cde88 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-misplaced-pointer-arithmetic-in-alloc.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-misplaced-pointer-arithmetic-in-alloc.cpp @@ -51,3 +51,14 @@ void bad_new_array(int n, int m) { // CHECK-FIXES: p = new char[n - m] + 10; // FIXME: should be p = new char[n - m + 10]; } + +namespace std { +typedef decltype(sizeof(void*)) size_t; +} + +void* operator new(std::size_t, void*); + +void placement_new_ptr(void *buf, C *old) { + C **p = new (buf) C*(old) + 1; + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:11: warning: arithmetic operation is applied to the result of operator new() instead of its size-like argument +} From dbd45b2db8e0c396fa20d4c72734c4f31f54af96 Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Fri, 11 Sep 2020 19:04:38 +0200 Subject: [PATCH 0816/1079] [ASTMatchers] Fix `hasBody` for the descendants of `FunctionDecl` //AST Matcher// `hasBody` is a polymorphic matcher that behaves differently for loop statements and function declarations. The main difference is the for functions declarations it does not only call `FunctionDecl::getBody()` but first checks whether the declaration in question is that specific declaration which has the body by calling `FunctionDecl::doesThisDeclarationHaveABody()`. This is achieved by specialization of the template `GetBodyMatcher`. Unfortunately template specializations do not catch the descendants of the class for which the template is specialized. Therefore it does not work correcly for the descendants of `FunctionDecl`, such as `CXXMethodDecl`, `CXXConstructorDecl`, `CXXDestructorDecl` etc. This patch fixes this issue by using a template metaprogram. The patch also introduces a new matcher `hasAnyBody` which matches declarations which have a body present in the AST but not necessarily belonging to that particular declaration. Differential Revision: https://reviews.llvm.org/D87527 --- .../modernize/UseEqualsDeleteCheck.cpp | 4 +- clang/include/clang/ASTMatchers/ASTMatchers.h | 40 ++++++++++++++++- .../clang/ASTMatchers/ASTMatchersInternal.h | 14 +++--- .../ASTMatchers/ASTMatchersTraversalTest.cpp | 43 ++++++++++++++++++- 4 files changed, 89 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp index ea4bf91b0d438..7d5ae89551731 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp @@ -36,12 +36,12 @@ void UseEqualsDeleteCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( cxxMethodDecl( PrivateSpecialFn, - unless(anyOf(hasBody(stmt()), isDefaulted(), isDeleted(), + unless(anyOf(hasAnyBody(stmt()), isDefaulted(), isDeleted(), ast_matchers::isTemplateInstantiation(), // Ensure that all methods except private special member // functions are defined. hasParent(cxxRecordDecl(hasMethod(unless( - anyOf(PrivateSpecialFn, hasBody(stmt()), isPure(), + anyOf(PrivateSpecialFn, hasAnyBody(stmt()), isPure(), isDefaulted(), isDeleted())))))))) .bind(SpecialFunction), this); diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index e670459fe8a2f..bd89906eadb0f 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -4879,7 +4879,9 @@ AST_MATCHER_P(ArraySubscriptExpr, hasBase, } /// Matches a 'for', 'while', 'do while' statement or a function -/// definition that has a given body. +/// definition that has a given body. Note that in case of functions +/// this matcher only matches the definition itself and not the other +/// declarations of the same function. /// /// Given /// \code @@ -4889,6 +4891,18 @@ AST_MATCHER_P(ArraySubscriptExpr, hasBase, /// matches 'for (;;) {}' /// with compoundStmt() /// matching '{}' +/// +/// Given +/// \code +/// void f(); +/// void f() {} +/// \endcode +/// hasBody(functionDecl()) +/// matches 'void f() {}' +/// with compoundStmt() +/// matching '{}' +/// but does not match 'void f();' + AST_POLYMORPHIC_MATCHER_P(hasBody, AST_POLYMORPHIC_SUPPORTED_TYPES(DoStmt, ForStmt, WhileStmt, @@ -4900,6 +4914,30 @@ AST_POLYMORPHIC_MATCHER_P(hasBody, InnerMatcher.matches(*Statement, Finder, Builder)); } +/// Matches a function declaration that has a given body present in the AST. +/// Note that this matcher matches all the declarations of a function whose +/// body is present in the AST. +/// +/// Given +/// \code +/// void f(); +/// void f() {} +/// void g(); +/// \endcode +/// hasAnyBody(functionDecl()) +/// matches both 'void f();' +/// and 'void f() {}' +/// with compoundStmt() +/// matching '{}' +/// but does not match 'void g();' +AST_MATCHER_P(FunctionDecl, hasAnyBody, + internal::Matcher, InnerMatcher) { + const Stmt *const Statement = Node.getBody(); + return (Statement != nullptr && + InnerMatcher.matches(*Statement, Finder, Builder)); +} + + /// Matches compound statements where at least one substatement matches /// a given matcher. Also matches StmtExprs that have CompoundStmt as children. /// diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h index 09774b3c912c7..2a3f503f99516 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h +++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h @@ -1835,18 +1835,18 @@ struct NotEqualsBoundNodePredicate { DynTypedNode Node; }; +template struct GetBodyMatcher { + static const Stmt *get(const Ty &Node) { return Node.getBody(); } +}; + template -struct GetBodyMatcher { +struct GetBodyMatcher::value>::type> { static const Stmt *get(const Ty &Node) { - return Node.getBody(); + return Node.doesThisDeclarationHaveABody() ? Node.getBody() : nullptr; } }; -template <> -inline const Stmt *GetBodyMatcher::get(const FunctionDecl &Node) { - return Node.doesThisDeclarationHaveABody() ? Node.getBody() : nullptr; -} - template struct HasSizeMatcher { static bool hasSize(const Ty &Node, unsigned int N) { diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 72fbef5cdc175..39222fbe42491 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -1612,10 +1612,49 @@ TEST(HasBody, FindsBodyOfForWhileDoLoops) { doStmt(hasBody(compoundStmt())))); EXPECT_TRUE(matches("void f() { int p[2]; for (auto x : p) {} }", cxxForRangeStmt(hasBody(compoundStmt())))); +} + +TEST(HasBody, FindsBodyOfFunctions) { EXPECT_TRUE(matches("void f() {}", functionDecl(hasBody(compoundStmt())))); EXPECT_TRUE(notMatches("void f();", functionDecl(hasBody(compoundStmt())))); - EXPECT_TRUE(matches("void f(); void f() {}", - functionDecl(hasBody(compoundStmt())))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "void f(); void f() {}", + functionDecl(hasBody(compoundStmt())).bind("func"), + std::make_unique>("func", 1))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "class C { void f(); }; void C::f() {}", + cxxMethodDecl(hasBody(compoundStmt())).bind("met"), + std::make_unique>("met", 1))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "class C { C(); }; C::C() {}", + cxxConstructorDecl(hasBody(compoundStmt())).bind("ctr"), + std::make_unique>("ctr", 1))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "class C { ~C(); }; C::~C() {}", + cxxDestructorDecl(hasBody(compoundStmt())).bind("dtr"), + std::make_unique>("dtr", 1))); +} + +TEST(HasAnyBody, FindsAnyBodyOfFunctions) { + EXPECT_TRUE(matches("void f() {}", functionDecl(hasAnyBody(compoundStmt())))); + EXPECT_TRUE(notMatches("void f();", + functionDecl(hasAnyBody(compoundStmt())))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "void f(); void f() {}", + functionDecl(hasAnyBody(compoundStmt())).bind("func"), + std::make_unique>("func", 2))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "class C { void f(); }; void C::f() {}", + cxxMethodDecl(hasAnyBody(compoundStmt())).bind("met"), + std::make_unique>("met", 2))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "class C { C(); }; C::C() {}", + cxxConstructorDecl(hasAnyBody(compoundStmt())).bind("ctr"), + std::make_unique>("ctr", 2))); + EXPECT_TRUE(matchAndVerifyResultTrue( + "class C { ~C(); }; C::~C() {}", + cxxDestructorDecl(hasAnyBody(compoundStmt())).bind("dtr"), + std::make_unique>("dtr", 2))); } TEST(HasAnySubstatement, MatchesForTopLevelCompoundStatement) { From 4abb5cd83902f1351db473c720ee0b95ebdcb338 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 12:11:29 +0100 Subject: [PATCH 0817/1079] CGBlocks.cpp - assert non-null CGF pointer. NFCI. Fixes static analyzer warning. --- clang/lib/CodeGen/CGBlocks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index ac5559a93d9cc..ee0c14641803b 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -580,7 +580,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // Since a __block variable cannot be captured by lambdas, its type and // the capture field type should always match. - assert(getCaptureFieldType(*CGF, CI) == variable->getType() && + assert(CGF && getCaptureFieldType(*CGF, CI) == variable->getType() && "capture type differs from the variable type"); layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(), Qualifiers::OCL_None, &CI, From aa1e15dda9e5941611f2183ba34087c2d02beb1a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 12:17:44 +0100 Subject: [PATCH 0818/1079] TokenAnnotator.cpp - remove useless pointer null test. NFCI. We dereference the Left pointer throughout the parseParens() function apart from this single case - just add an non-null assertion and drop the check. Fixes clang static analayzer null dereference warning. --- clang/lib/Format/TokenAnnotator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 841f0b41e9a7f..2fa3b28f3a390 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -198,8 +198,8 @@ class AnnotatingParser { if (!CurrentToken) return false; FormatToken *Left = CurrentToken->Previous; - FormatToken *PrevNonComment = - Left ? Left->getPreviousNonComment() : nullptr; + assert(Left && "Unknown previous token"); + FormatToken *PrevNonComment = Left->getPreviousNonComment(); Left->ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); From 439f5749d978acfa69f1a2d20c797c3fc0d97989 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 12:29:50 +0100 Subject: [PATCH 0819/1079] [AST] ASTReader::ReadModuleMapFileBlock - assert non-null Module. NFCI. At this stage the Module* shouldn't be null - add an assert to fix a clang static analyzer warning. --- clang/lib/Serialization/ASTReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 6f5fa67117c09..f02c43f337674 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3950,7 +3950,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, return OutOfDate; } - assert(M->Name == F.ModuleName && "found module with different name"); + assert(M && M->Name == F.ModuleName && "found module with different name"); // Check the primary module map file. auto StoredModMap = FileMgr.getFile(F.ModuleMapPath); From f5c7102dbc7223e98ce5c0f02b343ed92062987c Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 16 Sep 2020 13:42:01 +0200 Subject: [PATCH 0820/1079] Update dead links to Itanium and ARM ABIs. NFC --- clang/lib/CodeGen/ItaniumCXXABI.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 3b752d306055f..69825a036a1e4 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -9,11 +9,11 @@ // This provides C++ code generation targeting the Itanium C++ ABI. The class // in this file generates structures that follow the Itanium C++ ABI, which is // documented at: -// http://www.codesourcery.com/public/cxx-abi/abi.html -// http://www.codesourcery.com/public/cxx-abi/abi-eh.html +// https://itanium-cxx-abi.github.io/cxx-abi/abi.html +// https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html // // It also supports the closely-related ARM ABI, documented at: -// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0041c/IHI0041C_cppabi.pdf +// https://developer.arm.com/documentation/ihi0041/g/ // //===----------------------------------------------------------------------===// From 0a0abc0ede0ff8015e30aae89a3f89c7dc5b3f0f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 12:40:15 +0100 Subject: [PATCH 0821/1079] [Sema] isOpenMPCapturedDecl - assert we locate CapturedRegionScopeInfo. NFCI. Fixes clang static analayzer null dereference warning. --- clang/lib/Sema/SemaOpenMP.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 1aeb52a213f6e..336f264229146 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2194,6 +2194,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, break; } } + assert(CSI && "Failed to find CapturedRegionScopeInfo"); SmallVector Regions; getOpenMPCaptureRegions(Regions, DSAStack->getDirective(CSI->OpenMPLevel)); From 1c421046d742102e7016567d41a9db6a1fb61906 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 16 Sep 2020 12:42:58 +0100 Subject: [PATCH 0822/1079] [RDA] Fix getUniqueReachingDef for self loops We've fixed the case where this could return an instruction after the given instruction, but also means that we can falsely return a 'unique' def when they could be one coming from the backedge of a loop. Differential Revision: https://reviews.llvm.org/D87751 --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 16 ++++++--------- .../vctp-add-operand-liveout.mir | 20 ++++++++++++++----- .../CodeGen/Thumb2/LowOverheadLoops/wlstp.mir | 18 ++++++++++++----- .../wrong-vctp-opcode-liveout.mir | 10 ++++++---- .../wrong-vctp-operand-liveout.mir | 20 ++++++++++++++----- 5 files changed, 55 insertions(+), 29 deletions(-) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 5a4837079bed9..86c2f63fd3aac 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -397,7 +397,6 @@ ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI, int PhysReg, return; } - SmallPtrSet Visited; for (auto *MBB : MI->getParent()->predecessors()) getLiveOuts(MBB, PhysReg, Defs); } @@ -437,18 +436,15 @@ MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI, SmallPtrSet VisitedBBs; SmallPtrSet Incoming; MachineBasicBlock *Parent = MI->getParent(); - VisitedBBs.insert(Parent); for (auto *Pred : Parent->predecessors()) - getLiveOuts(Pred, PhysReg, Incoming, VisitedBBs); + getLiveOuts(Pred, PhysReg, Incoming); - // If we have a local def and an incoming instruction, then there's not a - // unique instruction def. - if (!Incoming.empty() && LocalDef) - return nullptr; - else if (Incoming.size() == 1) + // Check that we have a single incoming value and that it does not + // come from the same block as MI - since it would mean that the def + // is executed after MI. + if (Incoming.size() == 1 && (*Incoming.begin())->getParent() != Parent) return *Incoming.begin(); - else - return LocalDef; + return nullptr; } MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir index cdc9d7e7be9c6..4f80869de3ccb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir @@ -122,18 +122,28 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 - ; CHECK: $lr = MVE_DLSTP_32 renamable $r2 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: dead $lr = t2DLS renamable $r12 + ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 + ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r2 ; CHECK: renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index 7578b429790be..23cdf73263b01 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -425,8 +425,13 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_32 $r2, %bb.1 + ; CHECK: $lr = t2WLS killed renamable $lr, %bb.1 ; CHECK: tB %bb.4, 14 /* CC::al */, $noreg ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) @@ -436,15 +441,18 @@ body: | ; CHECK: successors: %bb.3(0x04000000), %bb.2(0x7c000000) ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 4) - ; CHECK: renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 0, $noreg :: (load 16 from %ir.lsr.iv1, align 4) + ; CHECK: renamable $vpr = MVE_VCTP32 $r2, 0, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) + ; CHECK: renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) ; CHECK: $r3 = tMOVr $r2, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $q1 = nsw MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = nsw MVE_VADDi32 killed renamable $q1, renamable $q0, 1, killed renamable $vpr, undef renamable $q1 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: liveins: $q0, $q1, $r3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir index e377b06fea9f8..d91556e3e70b9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir @@ -133,21 +133,23 @@ body: | ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r12 = tMOVr killed $r3, 14 /* CC::al */, $noreg ; CHECK: $r3 = tMOVr $r2, 14 /* CC::al */, $noreg - ; CHECK: dead $lr = MVE_DLSTP_32 renamable $r3 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3, $r12 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r12 = nsw t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r2, $r3 ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 1, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir index 05bfdbb2fc0f8..337816146e5f0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir @@ -119,18 +119,28 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 - ; CHECK: $lr = MVE_DLSTP_32 renamable $r2 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: dead $lr = t2DLS renamable $r12 + ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 + ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r2 ; CHECK: renamable $vpr = MVE_VCTP32 killed renamable $r2, 0, $noreg From 158989184e9c6bfec25cefe55022dd41894a54dd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Sep 2020 07:40:15 -0400 Subject: [PATCH 0823/1079] [SLP] change poorly named variable; NFC 'V' shadows a function argument. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3347419077e3f..7d85cf5f9bddd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6844,8 +6844,9 @@ class HorizontalReduction { // so set it as externally used to prevent it from being deleted. ExternallyUsedValues[ReductionRoot]; SmallVector IgnoreList; - for (auto &V : ReductionOps) - IgnoreList.append(V.begin(), V.end()); + for (ReductionOpsType &RdxOp : ReductionOps) + IgnoreList.append(RdxOp.begin(), RdxOp.end()); + while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, IgnoreList); From bbad998bab52a1eabbb6a1ca16cc2129b3f99aa5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Sep 2020 07:44:03 -0400 Subject: [PATCH 0824/1079] [SLP] move loop index variable declaration to its use; NFC --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7d85cf5f9bddd..62269d2e7b9e7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6819,7 +6819,6 @@ class HorizontalReduction { FastMathFlags Unsafe; Unsafe.setFast(); Builder.setFastMathFlags(Unsafe); - unsigned i = 0; BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; // The same extra argument may be used several time, so log each attempt @@ -6847,6 +6846,7 @@ class HorizontalReduction { for (ReductionOpsType &RdxOp : ReductionOps) IgnoreList.append(RdxOp.begin(), RdxOp.end()); + unsigned i = 0; while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, IgnoreList); From 0cee1bf5d17dd424c569df7e2604be10906bd515 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Sep 2020 08:11:19 -0400 Subject: [PATCH 0825/1079] [SLP] remove redundant size check; NFC We bail out on small array size anyway. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 62269d2e7b9e7..0fc5d1a810b50 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6796,14 +6796,10 @@ class HorizontalReduction { return true; } - /// Attempt to vectorize the tree found by - /// matchAssociativeReduction. + /// Attempt to vectorize the tree found by matchAssociativeReduction. bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { - if (ReducedVals.empty()) - return false; - - // If there is a sufficient number of reduction values, reduce - // to a nearby power-of-2. Can safely generate oversized + // If there are a sufficient number of reduction values, reduce + // to a nearby power-of-2. We can safely generate oversized // vectors and rely on the backend to split them to legal sizes. unsigned NumReducedVals = ReducedVals.size(); if (NumReducedVals < 4) From 6a23668e78b05703ccba552e09b09b8055924bb6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Sep 2020 08:26:21 -0400 Subject: [PATCH 0826/1079] [SLP] remove uses of 'auto' that obscure functionality; NFC --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0fc5d1a810b50..619964a6f457c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6817,9 +6817,9 @@ class HorizontalReduction { Builder.setFastMathFlags(Unsafe); BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; - // The same extra argument may be used several time, so log each attempt + // The same extra argument may be used several times, so log each attempt // to use it. - for (auto &Pair : ExtraArgs) { + for (std::pair &Pair : ExtraArgs) { assert(Pair.first && "DebugLoc must be set."); ExternallyUsedValues[Pair.second].push_back(Pair.first); } @@ -6844,7 +6844,7 @@ class HorizontalReduction { unsigned i = 0; while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { - auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); + ArrayRef VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, IgnoreList); Optional> Order = V.bestOrder(); // TODO: Handle orders of size less than number of elements in the vector. From 3ce9ec0cfa9e3690df8a345636d6fa3e385610c3 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 16 Sep 2020 13:38:36 +0100 Subject: [PATCH 0827/1079] [ARM] Reorder some logic Re-order some checks in ValidateMVEInst. --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 34 +++++++++++---------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 38c2544bcee6d..abfd339903c22 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -854,6 +854,24 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { if (CannotTailPredicate) return false; + const MCInstrDesc &MCID = MI->getDesc(); + uint64_t Flags = MCID.TSFlags; + if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) + return true; + + if (MI->getOpcode() == ARM::MVE_VPSEL || + MI->getOpcode() == ARM::MVE_VPNOT) { + // TODO: Allow VPSEL and VPNOT, we currently cannot because: + // 1) It will use the VPR as a predicate operand, but doesn't have to be + // instead a VPT block, which means we can assert while building up + // the VPT block because we don't find another VPT or VPST to being a new + // one. + // 2) VPSEL still requires a VPR operand even after tail predicating, + // which means we can't remove it unless there is another + // instruction, such as vcmp, that can provide the VPR def. + return false; + } + if (isVCTP(MI)) { // If we find another VCTP, check whether it uses the same value as the main VCTP. // If it does, store it in the SecondaryVCTPs set, else refuse it. @@ -881,22 +899,10 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { VPTBlocks.emplace_back(MI, CurrentPredicate); CurrentBlock = &VPTBlocks.back(); return true; - } else if (MI->getOpcode() == ARM::MVE_VPSEL || - MI->getOpcode() == ARM::MVE_VPNOT) { - // TODO: Allow VPSEL and VPNOT, we currently cannot because: - // 1) It will use the VPR as a predicate operand, but doesn't have to be - // instead a VPT block, which means we can assert while building up - // the VPT block because we don't find another VPT or VPST to being a new - // one. - // 2) VPSEL still requires a VPR operand even after tail predicating, - // which means we can't remove it unless there is another - // instruction, such as vcmp, that can provide the VPR def. - return false; } bool IsUse = false; bool IsDef = false; - const MCInstrDesc &MCID = MI->getDesc(); for (int i = MI->getNumOperands() - 1; i >= 0; --i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.getReg() != ARM::VPR) @@ -932,10 +938,6 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { return false; } - uint64_t Flags = MCID.TSFlags; - if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) - return true; - // If we find an instruction that has been marked as not valid for tail // predication, only allow the instruction if it's contained within a valid // VPT block. From 4dd9c709ef1b59f0ec8e71100c624ec946b95fe2 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Wed, 16 Sep 2020 13:45:45 +0100 Subject: [PATCH 0828/1079] [clang-format] [NFC] Fix spelling mistake in the documentation Ensure ClangFormatStyleOptions.rst can be regenerated from Format.h Patch By: YangZhihui Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D87352 --- clang/docs/ClangFormatStyleOptions.rst | 10 ++++++---- clang/include/clang/Format/Format.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 72a25032151ff..20e829135b33c 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -758,10 +758,14 @@ the configuration (without a prefix: ``Auto``). int bbbbbbbbbbbbbbbbbbbbb) { } + + **AttributeMacros** (``std::vector``) A vector of strings that should be interpreted as attributes/qualifiers instead of identifiers. This can be useful for language extensions or - static analyzer annotations: + static analyzer annotations. + + For example: .. code-block:: c++ @@ -775,8 +779,6 @@ the configuration (without a prefix: ``Auto``). AttributeMacros: ['__capability', '__output', '__ununsed'] - For example: __capability. - **BinPackArguments** (``bool``) If ``false``, a function call's arguments will either be all on the same line or will have one line each. @@ -2246,7 +2248,7 @@ the configuration (without a prefix: ``Auto``). **ObjCBreakBeforeNestedBlockParam** (``bool``) Break parameters list into lines when there is nested block - parameters in a fuction call. + parameters in a function call. .. code-block:: c++ diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 6bb828d60071f..c6c182b7bdcef 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1860,7 +1860,7 @@ struct FormatStyle { bool ObjCSpaceAfterProperty; /// Break parameters list into lines when there is nested block - /// parameters in a fuction call. + /// parameters in a function call. /// \code /// false: /// - (void)_aMethod From 24238f09edb98b0f460aa41139874ae5d4e5cd8d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Sep 2020 08:47:35 -0400 Subject: [PATCH 0829/1079] [SLP] fix formatting; NFC Also move variable declarations closer to usage and add code comments. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 619964a6f457c..3d19e867b6c29 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6805,10 +6805,6 @@ class HorizontalReduction { if (NumReducedVals < 4) return false; - unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); - - Value *VectorizedTree = nullptr; - // FIXME: Fast-math-flags should be set based on the instructions in the // reduction (not all of 'fast' are required). IRBuilder<> Builder(cast(ReductionRoot)); @@ -6842,7 +6838,9 @@ class HorizontalReduction { for (ReductionOpsType &RdxOp : ReductionOps) IgnoreList.append(RdxOp.begin(), RdxOp.end()); + Value *VectorizedTree = nullptr; unsigned i = 0; + unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { ArrayRef VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, IgnoreList); @@ -6867,25 +6865,25 @@ class HorizontalReduction { int ReductionCost = getReductionCost(TTI, ReducedVals[i], ReduxWidth); int Cost = TreeCost + ReductionCost; if (Cost >= -SLPCostThreshold) { - V.getORE()->emit([&]() { - return OptimizationRemarkMissed( - SV_NAME, "HorSLPNotBeneficial", cast(VL[0])) - << "Vectorizing horizontal reduction is possible" - << "but not beneficial with cost " - << ore::NV("Cost", Cost) << " and threshold " - << ore::NV("Threshold", -SLPCostThreshold); - }); - break; + V.getORE()->emit([&]() { + return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial", + cast(VL[0])) + << "Vectorizing horizontal reduction is possible" + << "but not beneficial with cost " << ore::NV("Cost", Cost) + << " and threshold " + << ore::NV("Threshold", -SLPCostThreshold); + }); + break; } LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost << ". (HorRdx)\n"); V.getORE()->emit([&]() { - return OptimizationRemark( - SV_NAME, "VectorizedHorizontalReduction", cast(VL[0])) - << "Vectorized horizontal reduction with cost " - << ore::NV("Cost", Cost) << " and with tree size " - << ore::NV("TreeSize", V.getTreeSize()); + return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", + cast(VL[0])) + << "Vectorized horizontal reduction with cost " + << ore::NV("Cost", Cost) << " and with tree size " + << ore::NV("TreeSize", V.getTreeSize()); }); // Vectorize a tree. @@ -6902,15 +6900,19 @@ class HorizontalReduction { Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); - if (VectorizedTree) { + + if (!VectorizedTree) { + // Initialize the final value in the reduction. + VectorizedTree = ReducedSubTree; + } else { + // Update the final value in the reduction. Builder.SetCurrentDebugLocation(Loc); OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, ReducedSubTree, ReductionData.getKind()); VectorizedTree = VectReductionData.createOp(Builder, "op.rdx", ReductionOps); - } else - VectorizedTree = ReducedSubTree; + } i += ReduxWidth; ReduxWidth = PowerOf2Floor(NumReducedVals - i); } From 82687cf47b24a509ecd78e02fbc5666ba667ff4b Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Tue, 15 Sep 2020 14:18:51 -0400 Subject: [PATCH 0830/1079] Add section with details about DAGs. --- llvm/docs/TableGen/ProgRef.rst | 51 ++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 07f0ba8a54dd0..7bc70c8f89e6d 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -285,10 +285,11 @@ wide range of records conveniently and compactly. ``dag`` This type represents a nestable directed acyclic graph (DAG) of nodes. - Each node has an operator and one or more operands. A operand can be + Each node has an operator and zero or more operands. A operand can be another ``dag`` object, allowing an arbitrary tree of nodes and edges. - As an example, DAGs are used to represent code and patterns for use by - the code generator instruction selection algorithms. + As an example, DAGs are used to represent code patterns for use by + the code generator instruction selection algorithms. See `Directed + acyclic graphs (DAGs)`_ for more details; :token:`ClassID` Specifying a class name in a type context indicates @@ -374,6 +375,7 @@ sometimes not when the value is the empty list (``[]``). This represents a DAG initializer (note the parentheses). The first :token:`DagArg` is called the "operator" of the DAG and must be a record. +See `Directed acyclic graphs (DAGs)`_ for more details. .. productionlist:: SimpleValue6: `TokIdentifier` @@ -582,7 +584,7 @@ in a ``bit`` field. The ``defvar`` form defines a variable whose value can be used in other value expressions within the body. The variable is not a field: it does not become a field of the class or record being defined. Variables are provided -to hold temporary values while processing the body. See `Defvar in Record +to hold temporary values while processing the body. See `Defvar in a Record Body`_ for more details. When class ``C2`` inherits from class ``C1``, it acquires all the field @@ -1129,7 +1131,7 @@ the next iteration. The following ``defvar`` will not work:: defvar i = !add(i, 1) Variables can also be defined with ``defvar`` in a record body. See -`Defvar in Record Body`_ for more details. +`Defvar in a Record Body`_ for more details. ``foreach`` --- iterate over a sequence of statements ----------------------------------------------------- @@ -1193,7 +1195,7 @@ the usual way: in a case like ``if v1 then if v2 then {...} else {...}``, the The :token:`IfBody` of the then and else arms of the ``if`` establish an inner scope. Any ``defvar`` variables defined in the bodies go out of scope -when the bodies are finished (see `Defvar in Record Body`_ for more details). +when the bodies are finished (see `Defvar in a Record Body`_ for more details). The ``if`` statement can also be used in a record :token:`Body`. @@ -1201,8 +1203,41 @@ The ``if`` statement can also be used in a record :token:`Body`. Additional Details ================== -Defvar in record body ---------------------- +Directed acyclic graphs (DAGs) +------------------------------ + +A directed acyclic graph can be represented directly in TableGen using the +``dag`` datatype. A DAG node consists of an operator and zero or more +operands. Each operand can be of any desired type. By using another DAG node +as an operand, an arbitrary graph of DAG nodes can be built. + +The syntax of a ``dag`` instance is: + + ``(`` *operator* *operand1*\ ``,`` *operand2*\ ``,`` ... ``)`` + +The operator must be present and must be a record. There can be zero or more +operands, separated by commas. The operator and operands can have three +formats. + +====================== ============================================= +Format Meaning +====================== ============================================= +*value* operand value +*value*\ ``:``\ *name* operand value and associated name +*name* operand name with unset (uninitialized) value +====================== ============================================= + +The *value* can be any TableGen value. The *name*, if present, must be a +:token:`TokVarName`, which starts with a dollar sign (``$``). The purpose of +a name is to tag an operator or operand in a DAG with a particular meaning, +or to associate an operand in one DAG with a like-named operand in another +DAG. + +The following bang operators manipulate DAGs: ``!con``, ``!dag``, ``!foreach``, +``!getop``, ``!setop``. + +Defvar in a record body +----------------------- In addition to defining global variables, the ``defvar`` statement can be used inside the :token:`Body` of a class or record definition to define From 4341c6618decb4014a167bc83aeeed49ab49b34f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 16 Sep 2020 08:10:55 -0400 Subject: [PATCH 0831/1079] [OPENMP]Do not allow threadprivates as base for array-like reduction. The base must be shared between the threads, threadprivates are not allowed to be bases for array-like reductions. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D85762 --- clang/lib/Sema/SemaOpenMP.cpp | 11 +++++++++++ clang/test/OpenMP/parallel_reduction_messages.cpp | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 336f264229146..1a0470a9606d9 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15120,6 +15120,17 @@ static bool actOnOMPReductionKindClause( continue; } } + } else { + // Threadprivates cannot be shared between threads, so dignose if the base + // is a threadprivate variable. + DSAStackTy::DSAVarData DVar = Stack->getTopDSA(D, /*FromParent=*/false); + if (DVar.CKind == OMPC_threadprivate) { + S.Diag(ELoc, diag::err_omp_wrong_dsa) + << getOpenMPClauseName(DVar.CKind) + << getOpenMPClauseName(OMPC_reduction); + reportOriginalDsa(S, Stack, D, DVar); + continue; + } } // Try to find 'declare reduction' corresponding construct before using diff --git a/clang/test/OpenMP/parallel_reduction_messages.cpp b/clang/test/OpenMP/parallel_reduction_messages.cpp index b464bf5b96437..12b34a4de07ba 100644 --- a/clang/test/OpenMP/parallel_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_reduction_messages.cpp @@ -92,6 +92,8 @@ class S6 { // expected-note 3 {{candidate function (the implicit copy assignment S3 h, k; #pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} +int *gptr; +#pragma omp threadprivate(gptr) // expected-note {{defined as threadprivate or thread local}} template // expected-note {{declared here}} T tmain(T argc) { @@ -277,6 +279,8 @@ int main(int argc, char **argv) { m++; #pragma omp parallel reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} m++; +#pragma omp parallel reduction(+:gptr[:argc]) // expected-error {{threadprivate or thread local variable cannot be reduction}} + ; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } From cb9528a0420e01caf7f3dc8288a11258fcf1425d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 16 Sep 2020 14:49:44 +0100 Subject: [PATCH 0832/1079] [DSE] Add another test cases with loop carried dependence. --- .../multiblock-loop-carried-dependence.ll | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll index b168dcaa859eb..b7a882a65bc15 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -S %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" @@ -141,3 +141,72 @@ exit: } declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +; Make sure `store i32 10, i32* %ptr.2` in %cond.store is not removed. The +; stored value may be read by `%use = load i32, i32* %ptr.1` in a future +; iteration. +define void@test.3() { +; CHECK-LABEL: @test.3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NODESTACK:%.*]] = alloca [12 x i32], align 4 +; CHECK-NEXT: [[NODESTACK_CAST:%.*]] = bitcast [12 x i32]* [[NODESTACK]] to i8* +; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond(i32 1) +; CHECK-NEXT: br i1 [[C_1]], label [[CLEANUP:%.*]], label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[DEPTH_1:%.*]] = phi i32 [ [[DEPTH_1_BE:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEPTH_1]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_READ:%.*]], label [[COND_STORE:%.*]] +; CHECK: cond.read: +; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[DEPTH_1]], -3 +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds [12 x i32], [12 x i32]* [[NODESTACK]], i32 0, i32 [[SUB]] +; CHECK-NEXT: [[USE:%.*]] = load i32, i32* [[PTR_1]], align 4 +; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond(i32 [[USE]]) +; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[COND_STORE]] +; CHECK: cond.store: +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds [12 x i32], [12 x i32]* [[NODESTACK]], i32 0, i32 [[DEPTH_1]] +; CHECK-NEXT: store i32 10, i32* [[PTR_2]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[DEPTH_1]], 1 +; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond(i32 20) +; CHECK-NEXT: br i1 [[C_3]], label [[CLEANUP]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[DEPTH_1_BE]] = phi i32 [ [[SUB]], [[COND_READ]] ], [ [[INC]], [[COND_STORE]] ] +; CHECK-NEXT: br label [[LOOP_HEADER]] +; CHECK: cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull [[NODESTACK_CAST]]) +; CHECK-NEXT: ret void +; +entry: + %nodeStack = alloca [12 x i32], align 4 + %nodeStack.cast = bitcast [12 x i32]* %nodeStack to i8* + %c.1 = call i1 @cond(i32 1) + br i1 %c.1, label %cleanup, label %loop.header + +loop.header: ; preds = %entry, %while.cond.backedge + %depth.1 = phi i32 [ %depth.1.be, %loop.latch ], [ 3, %entry ] + %cmp = icmp sgt i32 %depth.1, 0 + br i1 %cmp, label %cond.read, label %cond.store + +cond.read: ; preds = %while.cond + %sub = add nsw i32 %depth.1, -3 + %ptr.1 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %sub + %use = load i32, i32* %ptr.1, align 4 + %c.2 = call i1 @cond(i32 %use) + br i1 %c.2, label %loop.latch, label %cond.store + +cond.store: + %ptr.2 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %depth.1 + store i32 10, i32* %ptr.2, align 4 + %inc = add nsw i32 %depth.1, 1 + %c.3 = call i1 @cond(i32 20) + br i1 %c.3, label %cleanup, label %loop.latch + +loop.latch: + %depth.1.be = phi i32 [ %sub, %cond.read ], [ %inc, %cond.store ] + br label %loop.header + +cleanup: ; preds = %while.body, %while.end, %entry + call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %nodeStack.cast) #3 + ret void +} + +declare i1 @cond(i32) From 855ec517a300daee6acb48474b6d3304c0914c60 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 15 Sep 2020 12:04:59 +0200 Subject: [PATCH 0833/1079] [mlir] Model StringRef in C API Numerous MLIR functions return instances of `StringRef` to refer to a non-owning fragment of a string (usually owned by the context). This is a relatively simple class that is defined in LLVM. Provide a simple wrapper in the MLIR C API that contains the pointer and length of the string fragment and use it for Standard attribute functions that return StringRef instead of the previous, callback-based mechanism. Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D87677 --- mlir/docs/CAPI.md | 51 ++++++++---------- mlir/include/mlir-c/StandardAttributes.h | 67 +++++++----------------- mlir/include/mlir-c/Support.h | 57 ++++++++++++++++++++ mlir/include/mlir/CAPI/Support.h | 31 +++++++++++ mlir/lib/Bindings/Python/IRModules.cpp | 6 +-- mlir/lib/CAPI/IR/CMakeLists.txt | 1 + mlir/lib/CAPI/IR/StandardAttributes.cpp | 53 +++++++------------ mlir/lib/CAPI/IR/Support.cpp | 15 ++++++ mlir/test/CAPI/ir.c | 34 +++++++----- 9 files changed, 187 insertions(+), 128 deletions(-) create mode 100644 mlir/include/mlir-c/Support.h create mode 100644 mlir/include/mlir/CAPI/Support.h create mode 100644 mlir/lib/CAPI/IR/Support.cpp diff --git a/mlir/docs/CAPI.md b/mlir/docs/CAPI.md index 2ec25d15747c7..e71dee0917744 100644 --- a/mlir/docs/CAPI.md +++ b/mlir/docs/CAPI.md @@ -97,37 +97,32 @@ as follows. its first argument is `Y`, and it is the responsibility of the caller to ensure it is indeed the case. -### Returning String References +### Auxiliary Types + +#### `StringRef` Numerous MLIR functions return instances of `StringRef` to refer to a non-owning segment of a string. This segment may or may not be null-terminated. In C API, -these functions take an additional callback argument of type -`MlirStringCallback` (pointer to a function with signature `void (*)(const char -*, intptr_t, void *)`) and a pointer to user-defined data. This callback is -invoked with a pointer to the string segment, its size and is forwarded the -user-defined data. The caller is in charge of managing the string segment -according to its memory model: for strings owned by the object (e.g., string -attributes), the caller can store the pointer and the size and use them directly -as long as the parent object is live or copy the string to a new location with a -null terminator if expected; for generated strings (e.g., in printing), the -caller is expected to copy the string segment if it intends to use it later. - -**Note:** this interface may be revised in the near future. - -### Conversion To String and Printing - -IR objects can be converted to a string representation, for example for -printing, using `mlirXPrint(MlirX, MlirStringCallback, void *)` functions. These -functions accept take arguments a callback with signature `void (*)(const char -*, intptr_t, void *)` and a pointer to user-defined data. They call the callback -and supply it with chunks of the string representation, provided as a pointer to -the first character and a length, and forward the user-defined data unmodified. -It is up to the caller to allocate memory if the string representation must be -stored and perform the copy. There is no guarantee that the pointer supplied to -the callback points to a null-terminated string, the size argument should be -used to find the end of the string. The callback may be called multiple times -with consecutive chunks of the string representation (the printing itself is -buffered). +these are represented as instances of `MlirStringRef` structure that contains a +pointer to the first character of the string fragment (`str`) and the fragment +length (`length`). Note that the fragment is _not necessarily_ null-terminated, +the `length` field must be used to identify the last character. `MlirStringRef` +is a non-owning pointer, the caller is in charge of perfoming the copy or +ensuring that the pointee outlives all uses of `MlirStringRef`. + +### Printing + +IR objects can be printed using `mlirXPrint(MlirX, MlirStringCallback, void *)` +functions. These functions accept take arguments a callback with signature `void +(*)(const char *, intptr_t, void *)` and a pointer to user-defined data. They +call the callback and supply it with chunks of the string representation, +provided as a pointer to the first character and a length, and forward the +user-defined data unmodified. It is up to the caller to allocate memory if the +string representation must be stored and perform the copy. There is no guarantee +that the pointer supplied to the callback points to a null-terminated string, +the size argument should be used to find the end of the string. The callback may +be called multiple times with consecutive chunks of the string representation +(the printing itself is buffered). *Rationale*: this approach allows the caller to have full control of the allocation and avoid unnecessary allocation and copying inside the printer. diff --git a/mlir/include/mlir-c/StandardAttributes.h b/mlir/include/mlir-c/StandardAttributes.h index ab8d837aeeb8b..2ea2ba7a2d4fa 100644 --- a/mlir/include/mlir-c/StandardAttributes.h +++ b/mlir/include/mlir-c/StandardAttributes.h @@ -16,6 +16,7 @@ #include "mlir-c/AffineMap.h" #include "mlir-c/IR.h" +#include "mlir-c/Support.h" #ifdef __cplusplus extern "C" { @@ -152,13 +153,9 @@ MlirAttribute mlirOpaqueAttrGet(MlirContext ctx, const char *dialectNamespace, * is associated. The namespace string is owned by the context. */ const char *mlirOpaqueAttrGetDialectNamespace(MlirAttribute attr); -/** Calls the provided callback with the opaque byte data stored in the given - * opaque attribute. The callback is invoked once, and the data it receives is - * not necessarily null terminated. The data remains live as long as the context - * in which the attribute lives. */ -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirOpaqueAttrGetData(MlirAttribute attr, MlirStringCallback callback, - void *userData); +/** Returns the raw data as a string reference. The data remains live as long as + * the context in which the attribute lives. */ +MlirStringRef mlirOpaqueAttrGetData(MlirAttribute attr); /*============================================================================*/ /* String attribute. */ @@ -178,13 +175,9 @@ MlirAttribute mlirStringAttrGet(MlirContext ctx, intptr_t length, MlirAttribute mlirStringAttrTypedGet(MlirType type, intptr_t length, const char *data); -/** Calls the provided callback with the string stored in the given string - * attribute. The callback is invoked once, and the data it receives is not - * necessarily null terminated. The data remains live as long as the context in - * which the attribute lives. */ -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirStringAttrGetValue(MlirAttribute attr, MlirStringCallback callback, - void *userData); +/** Returns the attribute values as a string reference. The data remains live as + * long as the context in which the attribute lives. */ +MlirStringRef mlirStringAttrGetValue(MlirAttribute attr); /*============================================================================*/ /* SymbolRef attribute. */ @@ -201,23 +194,13 @@ MlirAttribute mlirSymbolRefAttrGet(MlirContext ctx, intptr_t length, const char *symbol, intptr_t numReferences, MlirAttribute *references); -/** Calls the provided callback with the string containing the root referenced - * symbol. The callback is invoked once, and the data it receives is not - * necessarily null terminated. The data remains live as long as the context in - * which the attribute lives. */ -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirSymbolRefAttrGetRootReference(MlirAttribute attr, - MlirStringCallback callback, - void *userData); - -/** Calls the provided callback with the string containing the leaf referenced - * symbol. The callback is invoked once, and the data it receives is not - * necessarily null terminated. The data remains live as long as the context in - * which the attribute lives. */ -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirSymbolRefAttrGetLeafReference(MlirAttribute attr, - MlirStringCallback callback, - void *userData); +/** Returns the string reference to the root referenced symbol. The data remains + * live as long as the context in which the attribute lives. */ +MlirStringRef mlirSymbolRefAttrGetRootReference(MlirAttribute attr); + +/** Returns the stirng reference to the leaf referenced symbol. The data remains + * live as long as the context in which the attribute lives. */ +MlirStringRef mlirSymbolRefAttrGetLeafReference(MlirAttribute attr); /** Returns the number of references nested in the given symbol reference * attribute. */ @@ -240,14 +223,9 @@ int mlirAttributeIsAFlatSymbolRef(MlirAttribute attr); MlirAttribute mlirFlatSymbolRefAttrGet(MlirContext ctx, intptr_t length, const char *symbol); -/** Calls the provided callback with the string containing the referenced - * symbol. The callback is invoked once, and the data it receives is not - * necessarily null terminated. The data remains live as long as the context in - * which the attribute lives. */ -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirFloatSymbolRefAttrGetValue(MlirAttribute attr, - MlirStringCallback callback, - void *userData); +/** Returns the referenced symbol as a string reference. The data remains live + * as long as the context in which the attribute lives. */ +MlirStringRef mlirFlatSymbolRefAttrGetValue(MlirAttribute attr); /*============================================================================*/ /* Type attribute. */ @@ -383,10 +361,7 @@ int64_t mlirDenseElementsAttrGetInt64SplatValue(MlirAttribute attr); uint64_t mlirDenseElementsAttrGetUInt64SplatValue(MlirAttribute attr); float mlirDenseElementsAttrGetFloatSplatValue(MlirAttribute attr); double mlirDenseElementsAttrGetDoubleSplatValue(MlirAttribute attr); -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirDenseElementsAttrGetStringSplatValue(MlirAttribute attr, - MlirStringCallback callback, - void *userData); +MlirStringRef mlirDenseElementsAttrGetStringSplatValue(MlirAttribute attr); /** Returns the pos-th value (flat contiguous indexing) of a specific type * contained by the given dense elements attribute. */ @@ -397,10 +372,8 @@ int64_t mlirDenseElementsAttrGetInt64Value(MlirAttribute attr, intptr_t pos); uint64_t mlirDenseElementsAttrGetUInt64Value(MlirAttribute attr, intptr_t pos); float mlirDenseElementsAttrGetFloatValue(MlirAttribute attr, intptr_t pos); double mlirDenseElementsAttrGetDoubleValue(MlirAttribute attr, intptr_t pos); -/* TODO: consider exposing StringRef and using it instead of the callback. */ -void mlirDenseElementsAttrGetStringValue(MlirAttribute attr, intptr_t pos, - MlirStringCallback callback, - void *userData); +MlirStringRef mlirDenseElementsAttrGetStringValue(MlirAttribute attr, + intptr_t pos); /*============================================================================*/ /* Opaque elements attribute. */ diff --git a/mlir/include/mlir-c/Support.h b/mlir/include/mlir-c/Support.h new file mode 100644 index 0000000000000..1039c68c09bf0 --- /dev/null +++ b/mlir/include/mlir-c/Support.h @@ -0,0 +1,57 @@ +/*===-- mlir-c/Support.h - Helpers for C API to Core MLIR ---------*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the auxiliary data structures used in C APIs to core *| +|* MLIR functionality. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MLIR_C_SUPPORT_H +#define MLIR_C_SUPPORT_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*============================================================================*/ +/* MlirStringRef. */ +/*============================================================================*/ + +/** A pointer to a sized fragment of a string, not necessarily null-terminated. + * Does not own the underlying string. This is equivalent to llvm::StringRef. + */ +struct MlirStringRef { + const char *data; /**< Pointer to the first symbol. */ + size_t length; /**< Length of the fragment. */ +}; +typedef struct MlirStringRef MlirStringRef; + +/** Constructs a string reference from the pointer and length. The pointer need + * not reference to a null-terminated string. + */ +inline MlirStringRef mlirStringRefCreate(const char *str, size_t length) { + MlirStringRef result; + result.data = str; + result.length = length; + return result; +} + +/** Constructs a string reference from a null-terminated C string. Prefer + * mlirStringRefCreate if the length of the string is known. + */ +MlirStringRef mlirStringRefCreateFromCString(const char *str); + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_SUPPORT_H diff --git a/mlir/include/mlir/CAPI/Support.h b/mlir/include/mlir/CAPI/Support.h new file mode 100644 index 0000000000000..0c2b069906657 --- /dev/null +++ b/mlir/include/mlir/CAPI/Support.h @@ -0,0 +1,31 @@ +//===- Support.h - C API Helpers Implementation -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions for converting MLIR C++ objects into helper +// C structures for the purpose of C API. This file should not be included from +// C++ code other than C API implementation nor from C code. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CAPI_SUPPORT_H +#define MLIR_CAPI_SUPPORT_H + +#include "mlir-c/Support.h" +#include "llvm/ADT/StringRef.h" + +/// Converts a StringRef into its MLIR C API equivalent. +inline MlirStringRef wrap(llvm::StringRef ref) { + return mlirStringRefCreate(ref.data(), ref.size()); +} + +/// Creates a StringRef out of its MLIR C API equivalent. +inline llvm::StringRef unwrap(MlirStringRef ref) { + return llvm::StringRef(ref.data, ref.length); +} + +#endif // MLIR_CAPI_SUPPORT_H diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index bf1235a77d08c..527c530518cac 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -285,10 +285,8 @@ class PyStringAttribute : public PyConcreteAttribute { c.def_property_readonly( "value", [](PyStringAttribute &self) { - PySinglePartStringAccumulator accum; - mlirStringAttrGetValue(self.attr, accum.getCallback(), - accum.getUserData()); - return accum.takeValue(); + MlirStringRef stringRef = mlirStringAttrGetValue(self.attr); + return py::str(stringRef.data, stringRef.length); }, "Returns the value of the string attribute"); } diff --git a/mlir/lib/CAPI/IR/CMakeLists.txt b/mlir/lib/CAPI/IR/CMakeLists.txt index 3e2e3d6a22d82..4158a4c96efd0 100644 --- a/mlir/lib/CAPI/IR/CMakeLists.txt +++ b/mlir/lib/CAPI/IR/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_library(MLIRCAPIIR IR.cpp StandardAttributes.cpp StandardTypes.cpp + Support.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/lib/CAPI/IR/StandardAttributes.cpp b/mlir/lib/CAPI/IR/StandardAttributes.cpp index cade603132dcf..77d5fcb8b33c2 100644 --- a/mlir/lib/CAPI/IR/StandardAttributes.cpp +++ b/mlir/lib/CAPI/IR/StandardAttributes.cpp @@ -9,6 +9,7 @@ #include "mlir-c/StandardAttributes.h" #include "mlir/CAPI/AffineMap.h" #include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Support.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/StandardTypes.h" @@ -165,10 +166,8 @@ const char *mlirOpaqueAttrGetDialectNamespace(MlirAttribute attr) { return unwrap(attr).cast().getDialectNamespace().c_str(); } -void mlirOpaqueAttrGetData(MlirAttribute attr, MlirStringCallback callback, - void *userData) { - StringRef data = unwrap(attr).cast().getAttrData(); - callback(data.data(), static_cast(data.size()), userData); +MlirStringRef mlirOpaqueAttrGetData(MlirAttribute attr) { + return wrap(unwrap(attr).cast().getAttrData()); } /*============================================================================*/ @@ -189,10 +188,8 @@ MlirAttribute mlirStringAttrTypedGet(MlirType type, intptr_t length, return wrap(StringAttr::get(StringRef(data, length), unwrap(type))); } -void mlirStringAttrGetValue(MlirAttribute attr, MlirStringCallback callback, - void *userData) { - StringRef data = unwrap(attr).cast().getValue(); - callback(data.data(), static_cast(data.size()), userData); +MlirStringRef mlirStringAttrGetValue(MlirAttribute attr) { + return wrap(unwrap(attr).cast().getValue()); } /*============================================================================*/ @@ -213,18 +210,12 @@ MlirAttribute mlirSymbolRefAttrGet(MlirContext ctx, intptr_t length, return wrap(SymbolRefAttr::get(StringRef(symbol, length), refs, unwrap(ctx))); } -void mlirSymbolRefAttrGetRootReference(MlirAttribute attr, - MlirStringCallback callback, - void *userData) { - StringRef ref = unwrap(attr).cast().getRootReference(); - callback(ref.data(), ref.size(), userData); +MlirStringRef mlirSymbolRefAttrGetRootReference(MlirAttribute attr) { + return wrap(unwrap(attr).cast().getRootReference()); } -void mlirSymbolRefAttrGetLeafReference(MlirAttribute attr, - MlirStringCallback callback, - void *userData) { - StringRef ref = unwrap(attr).cast().getLeafReference(); - callback(ref.data(), ref.size(), userData); +MlirStringRef mlirSymbolRefAttrGetLeafReference(MlirAttribute attr) { + return wrap(unwrap(attr).cast().getLeafReference()); } intptr_t mlirSymbolRefAttrGetNumNestedReferences(MlirAttribute attr) { @@ -250,11 +241,8 @@ MlirAttribute mlirFlatSymbolRefAttrGet(MlirContext ctx, intptr_t length, return wrap(FlatSymbolRefAttr::get(StringRef(symbol, length), unwrap(ctx))); } -void mlirFloatSymbolRefAttrGetValue(MlirAttribute attr, - MlirStringCallback callback, - void *userData) { - StringRef symbol = unwrap(attr).cast().getValue(); - callback(symbol.data(), symbol.size(), userData); +MlirStringRef mlirFlatSymbolRefAttrGetValue(MlirAttribute attr) { + return wrap(unwrap(attr).cast().getValue()); } /*============================================================================*/ @@ -477,12 +465,9 @@ float mlirDenseElementsAttrGetFloatSplatValue(MlirAttribute attr) { double mlirDenseElementsAttrGetDoubleSplatValue(MlirAttribute attr) { return unwrap(attr).cast().getSplatValue(); } -void mlirDenseElementsAttrGetStringSplatValue(MlirAttribute attr, - MlirStringCallback callback, - void *userData) { - StringRef str = - unwrap(attr).cast().getSplatValue(); - callback(str.data(), str.size(), userData); +MlirStringRef mlirDenseElementsAttrGetStringSplatValue(MlirAttribute attr) { + return wrap( + unwrap(attr).cast().getSplatValue()); } //===----------------------------------------------------------------------===// @@ -518,13 +503,11 @@ double mlirDenseElementsAttrGetDoubleValue(MlirAttribute attr, intptr_t pos) { return *(unwrap(attr).cast().getValues().begin() + pos); } -void mlirDenseElementsAttrGetStringValue(MlirAttribute attr, intptr_t pos, - MlirStringCallback callback, - void *userData) { - StringRef str = +MlirStringRef mlirDenseElementsAttrGetStringValue(MlirAttribute attr, + intptr_t pos) { + return wrap( *(unwrap(attr).cast().getValues().begin() + - pos); - callback(str.data(), str.size(), userData); + pos)); } /*============================================================================*/ diff --git a/mlir/lib/CAPI/IR/Support.cpp b/mlir/lib/CAPI/IR/Support.cpp new file mode 100644 index 0000000000000..e4b409906297d --- /dev/null +++ b/mlir/lib/CAPI/IR/Support.cpp @@ -0,0 +1,15 @@ +//===- Support.cpp - Helpers for C interface to MLIR API ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/Support.h" + +#include + +MlirStringRef mlirStringRefCreateFromCString(const char *str) { + return mlirStringRefCreate(str, strlen(str)); +} diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index 0a8ebae4e19e0..ceb19ef730e48 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -408,31 +408,36 @@ int printStandardAttributes(MlirContext ctx) { mlirAttributeDump(boolean); const char data[] = "abcdefghijklmnopqestuvwxyz"; - char buffer[10]; MlirAttribute opaque = mlirOpaqueAttrGet(ctx, "std", 3, data, mlirNoneTypeGet(ctx)); if (!mlirAttributeIsAOpaque(opaque) || strcmp("std", mlirOpaqueAttrGetDialectNamespace(opaque))) return 4; - mlirOpaqueAttrGetData(opaque, callbackSetFixedLengthString, buffer); - if (buffer[0] != 'a' || buffer[1] != 'b' || buffer[2] != 'c') + + MlirStringRef opaqueData = mlirOpaqueAttrGetData(opaque); + if (opaqueData.length != 3 || + strncmp(data, opaqueData.data, opaqueData.length)) return 5; mlirAttributeDump(opaque); MlirAttribute string = mlirStringAttrGet(ctx, 2, data + 3); if (!mlirAttributeIsAString(string)) return 6; - mlirStringAttrGetValue(string, callbackSetFixedLengthString, buffer); - if (buffer[0] != 'd' || buffer[1] != 'e') + + MlirStringRef stringValue = mlirStringAttrGetValue(string); + if (stringValue.length != 2 || + strncmp(data + 3, stringValue.data, stringValue.length)) return 7; mlirAttributeDump(string); MlirAttribute flatSymbolRef = mlirFlatSymbolRefAttrGet(ctx, 3, data + 5); if (!mlirAttributeIsAFlatSymbolRef(flatSymbolRef)) return 8; - mlirFloatSymbolRefAttrGetValue(flatSymbolRef, callbackSetFixedLengthString, - buffer); - if (buffer[0] != 'f' || buffer[1] != 'g' || buffer[2] != 'h') + + MlirStringRef flatSymbolRefValue = + mlirFlatSymbolRefAttrGetValue(flatSymbolRef); + if (flatSymbolRefValue.length != 3 || + strncmp(data + 5, flatSymbolRefValue.data, flatSymbolRefValue.length)) return 9; mlirAttributeDump(flatSymbolRef); @@ -445,12 +450,13 @@ int printStandardAttributes(MlirContext ctx) { !mlirAttributeEqual(mlirSymbolRefAttrGetNestedReference(symbolRef, 1), flatSymbolRef)) return 10; - mlirSymbolRefAttrGetLeafReference(symbolRef, callbackSetFixedLengthString, - buffer); - mlirSymbolRefAttrGetRootReference(symbolRef, callbackSetFixedLengthString, - buffer + 3); - if (buffer[0] != 'f' || buffer[1] != 'g' || buffer[2] != 'h' || - buffer[3] != 'i' || buffer[4] != 'j') + + MlirStringRef symbolRefLeaf = mlirSymbolRefAttrGetLeafReference(symbolRef); + MlirStringRef symbolRefRoot = mlirSymbolRefAttrGetRootReference(symbolRef); + if (symbolRefLeaf.length != 3 || + strncmp(data + 5, symbolRefLeaf.data, symbolRefLeaf.length) || + symbolRefRoot.length != 2 || + strncmp(data + 8, symbolRefRoot.data, symbolRefRoot.length)) return 11; mlirAttributeDump(symbolRef); From 01e2b394ee16502440dbbb5440502a1e2aaf1477 Mon Sep 17 00:00:00 2001 From: Dangeti Tharun kumar Date: Wed, 16 Sep 2020 15:11:24 +0100 Subject: [PATCH 0834/1079] [Partial Inliner] Compute intrinsic cost through TTI https://bugs.llvm.org/show_bug.cgi?id=45932 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region") getting triggered in computeBBInlineCost. Intrinsics like "assume" are considered regular function calls while computing costs. This patch enables computeBBInlineCost to queries TTI for intrinsic call cost. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87132 --- llvm/lib/Transforms/IPO/PartialInlining.cpp | 64 +++++++++++++------ .../PartialInlining/intrinsic-call-cost.ll | 55 ++++++++++++++++ 2 files changed, 100 insertions(+), 19 deletions(-) create mode 100644 llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index e1dc036ae413c..a185e964d1b63 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -226,10 +226,13 @@ struct PartialInlinerImpl { // multi-region outlining. FunctionCloner(Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC); + function_ref LookupAC, + function_ref GetTTI); FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC); + function_ref LookupAC, + function_ref GetTTI); + ~FunctionCloner(); // Prepare for function outlining: making sure there is only @@ -266,6 +269,7 @@ struct PartialInlinerImpl { std::unique_ptr ClonedFuncBFI = nullptr; OptimizationRemarkEmitter &ORE; function_ref LookupAC; + function_ref GetTTI; }; private: @@ -334,7 +338,7 @@ struct PartialInlinerImpl { // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). - static int computeBBInlineCost(BasicBlock *BB); + static int computeBBInlineCost(BasicBlock *BB, TargetTransformInfo *TTI); std::unique_ptr computeOutliningInfo(Function *F); std::unique_ptr @@ -448,9 +452,10 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, // Use the same computeBBInlineCost function to compute the cost savings of // the outlining the candidate region. + TargetTransformInfo *FTTI = &GetTTI(*F); int OverallFunctionCost = 0; for (auto &BB : *F) - OverallFunctionCost += computeBBInlineCost(&BB); + OverallFunctionCost += computeBBInlineCost(&BB, FTTI); #ifndef NDEBUG if (TracePartialInlining) @@ -509,7 +514,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, continue; int OutlineRegionCost = 0; for (auto *BB : DominateVector) - OutlineRegionCost += computeBBInlineCost(BB); + OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent())); #ifndef NDEBUG if (TracePartialInlining) @@ -843,7 +848,8 @@ bool PartialInlinerImpl::shouldPartialInline( // TODO: Ideally we should share Inliner's InlineCost Analysis code. // For now use a simplified version. The returned 'InlineCost' will be used // to esimate the size cost as well as runtime cost of the BB. -int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { +int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB, + TargetTransformInfo *TTI) { int InlineCost = 0; const DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); for (Instruction &I : BB->instructionsWithoutDebug()) { @@ -866,6 +872,21 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { if (I.isLifetimeStartOrEnd()) continue; + if (auto *II = dyn_cast(&I)) { + Intrinsic::ID IID = II->getIntrinsicID(); + SmallVector Tys; + FastMathFlags FMF; + for (Value *Val : II->args()) + Tys.push_back(Val->getType()); + + if (auto *FPMO = dyn_cast(II)) + FMF = FPMO->getFastMathFlags(); + + IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF); + InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency); + continue; + } + if (CallInst *CI = dyn_cast(&I)) { InlineCost += getCallsiteCost(*CI, DL); continue; @@ -893,11 +914,13 @@ PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { BasicBlock* OutliningCallBB = FuncBBPair.second; // Now compute the cost of the call sequence to the outlined function // 'OutlinedFunction' in BB 'OutliningCallBB': - OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB); + auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc); + OutliningFuncCallCost += + computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI); // Now compute the cost of the extracted/outlined function itself: for (BasicBlock &BB : *OutlinedFunc) - OutlinedFunctionCost += computeBBInlineCost(&BB); + OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI); } assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); @@ -962,8 +985,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC) - : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { + function_ref LookupAC, + function_ref GetTTI) + : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) { ClonedOI = std::make_unique(); // Clone the function, so that we can hack away on it. @@ -987,8 +1011,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningMultiRegionInfo *OI, OptimizationRemarkEmitter &ORE, - function_ref LookupAC) - : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { + function_ref LookupAC, + function_ref GetTTI) + : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) { ClonedOMRI = std::make_unique(); // Clone the function, so that we can hack away on it. @@ -1099,10 +1124,10 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { - auto ComputeRegionCost = [](SmallVectorImpl &Region) { + auto ComputeRegionCost = [&](SmallVectorImpl &Region) { int Cost = 0; for (BasicBlock* BB : Region) - Cost += computeBBInlineCost(BB); + Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent())); return Cost; }; @@ -1196,9 +1221,10 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { // Gather up the blocks that we're going to extract. std::vector ToExtract; + auto *ClonedFuncTTI = &GetTTI(*ClonedFunc); ToExtract.push_back(ClonedOI->NonReturnBlock); - OutlinedRegionCost += - PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost( + ClonedOI->NonReturnBlock, ClonedFuncTTI); for (BasicBlock &BB : *ClonedFunc) if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); @@ -1206,7 +1232,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { // into the outlined function which may make the outlining // overhead (the difference of the outlined function cost // and OutliningRegionCost) look larger. - OutlinedRegionCost += computeBBInlineCost(&BB); + OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI); } // Extract the body of the if. @@ -1276,7 +1302,7 @@ std::pair PartialInlinerImpl::unswitchFunction(Function *F) { std::unique_ptr OMRI = computeOutliningColdRegionsInfo(F, ORE); if (OMRI) { - FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache); + FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI); #ifndef NDEBUG if (TracePartialInlining) { @@ -1309,7 +1335,7 @@ std::pair PartialInlinerImpl::unswitchFunction(Function *F) { if (!OI) return {false, nullptr}; - FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache); + FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache, GetTTI); Cloner.NormalizeReturnBlock(); Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining(); diff --git a/llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll b/llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll new file mode 100644 index 0000000000000..8f5a92df8407c --- /dev/null +++ b/llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll @@ -0,0 +1,55 @@ +; RUN: opt -partial-inliner -S < %s | FileCheck %s + +; Checks that valid costs are computed for intrinsic calls. +; https://bugs.llvm.org/show_bug.cgi?id=45932 + + +@emit_notes = external global i8, align 2 + +; CHECK: var_reg_delete +; CHECK-NEXT: bb +; CHECK-NEXT: tail call void @delete_variable_part() +; CHECK-NEXT: ret void + +define void @var_reg_delete() { +bb: + tail call void @delete_variable_part() + ret void +} + +; CHECK: delete_variable_part +; CHECK-NEXT: bb +; CHECK-NEXT: %tmp1.i = tail call i32 @find_variable_location_part() +; CHECK-NEXT: %tmp3.i = icmp sgt i32 %tmp1.i, -1 +; CHECK-NEXT: br i1 %tmp3.i, label %bb4.i, label %delete_slot_part.exit + +; CHECK: bb4.i +; CHECK-NEXT: %tmp.i.i = load i8, i8* @emit_notes +; CHECK-NEXT: %tmp1.i.i = icmp ne i8 %tmp.i.i, 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 %tmp1.i.i) +; CHECK-NEXT: unreachable + +; CHECK: delete_slot_part.exit +; CHECK-NEXT: ret void + +define void @delete_variable_part() { +bb: + %tmp1.i = tail call i32 @find_variable_location_part() + %tmp3.i = icmp sgt i32 %tmp1.i, -1 + br i1 %tmp3.i, label %bb4.i, label %delete_slot_part.exit + +bb4.i: + %tmp.i.i = load i8, i8* @emit_notes, align 2 + %tmp1.i.i = icmp ne i8 %tmp.i.i, 0 + tail call void @llvm.assume(i1 %tmp1.i.i) + unreachable + +delete_slot_part.exit: + ret void +} + +; CHECK: declare i32 @find_variable_location_part +declare i32 @find_variable_location_part() + +; CHECK: declare void @llvm.assume(i1 noundef) +declare void @llvm.assume(i1 noundef) From 8c0dc1e38b6c1a2d35c66ac4b0c1ccd616dd1685 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 16 Sep 2020 10:03:35 -0400 Subject: [PATCH 0835/1079] Enable inlining for Linalg dialect Enable inlining for Linalg dialect. Differential Revision: https://reviews.llvm.org/D87567 --- mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp | 35 ++++++++++++++++++++++ mlir/test/Dialect/Linalg/inlining.mlir | 31 +++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 mlir/test/Dialect/Linalg/inlining.mlir diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp index b8bffd35f5a12..abc82f300f633 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp @@ -17,6 +17,7 @@ #include "mlir/IR/StandardTypes.h" #include "mlir/Parser.h" #include "mlir/Support/LLVM.h" +#include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" @@ -24,6 +25,38 @@ using namespace mlir; using namespace mlir::linalg; +//===----------------------------------------------------------------------===// +// LinalgDialect Dialect Interfaces +//===----------------------------------------------------------------------===// + +namespace { + +struct LinalgInlinerInterface : public DialectInlinerInterface { + using DialectInlinerInterface::DialectInlinerInterface; + + // We don't have any special restrictions on what can be inlined into + // destination regions (e.g. while/conditional bodies). Always allow it. + bool isLegalToInline(Region *dest, Region *src, + BlockAndValueMapping &valueMapping) const final { + return true; + } + // Operations in Linalg dialect are always legal to inline. + bool isLegalToInline(Operation *, Region *, + BlockAndValueMapping &) const final { + return true; + } + // Handle the given inlined terminator by replacing it with a new operation + // as necessary. Required when the region has only one block. + void handleTerminator(Operation *op, + ArrayRef valuesToRepl) const final {} +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// LinalgDialect +//===----------------------------------------------------------------------===// + void mlir::linalg::LinalgDialect::initialize() { addTypes(); addOperations< @@ -34,7 +67,9 @@ void mlir::linalg::LinalgDialect::initialize() { #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >(); + addInterfaces(); } + Type mlir::linalg::LinalgDialect::parseType(DialectAsmParser &parser) const { // Parse the main keyword for the type. StringRef keyword; diff --git a/mlir/test/Dialect/Linalg/inlining.mlir b/mlir/test/Dialect/Linalg/inlining.mlir new file mode 100644 index 0000000000000..1e5af263eb832 --- /dev/null +++ b/mlir/test/Dialect/Linalg/inlining.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-opt %s -inline | FileCheck %s + +// These tests verify that regions with operations from Lingalg dialect +// can be inlined. + +#accesses = [ + affine_map<(i) -> (i)>, + affine_map<(i) -> (i)> +] + +#trait = { + args_in = 1, + args_out = 1, + indexing_maps = #accesses, + iterator_types = ["parallel"] +} + +func @inline_into(%arg0: memref) { + // CHECK: linalg.generic + call @inlined_fn(%arg0) : (memref) -> () + return +} + +func @inlined_fn(%arg0: memref) { + // CHECK: linalg.generic + linalg.generic #trait %arg0, %arg0 { + ^bb(%0 : f32, %1 : f32) : + linalg.yield %0 : f32 + } : memref, memref + return +} From d9953d155493bf11a2276e202800f844a1d02396 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 16 Sep 2020 10:48:51 -0400 Subject: [PATCH 0836/1079] [mlir][openacc] Add missing operands for acc.parallel operation Add missing operands to represent copin with readonly modifier, copyout with zero modifier, create with zero modifier and default clause. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D87733 --- .../mlir/Dialect/OpenACC/OpenACCOps.td | 18 ++++- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 77 +++++++++++++++---- mlir/test/Dialect/OpenACC/ops.mlir | 50 ++++++++++-- 3 files changed, 123 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 0d37215ea4e54..f6350dbdf0db9 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -64,6 +64,15 @@ def OpenACC_ReductionOpAttr : StrEnumAttr<"ReductionOpAttr", // 2.5.1 parallel Construct //===----------------------------------------------------------------------===// +// Parallel op default enumeration +def OpenACC_DefaultNone : StrEnumAttrCase<"none">; +def OpenACC_DefaultPresent : StrEnumAttrCase<"present">; +def OpenACC_DefaultAttr : StrEnumAttr<"DefaultAttr", + "default attribute value for parallel op", + [OpenACC_DefaultNone, OpenACC_DefaultPresent]> { + let cppNamespace = "::mlir::acc"; +} + def OpenACC_ParallelOp : OpenACC_Op<"parallel", [AttrSizedOperandSegments]> { let summary = "parallel construct"; @@ -92,14 +101,18 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", Variadic:$reductionOperands, Variadic:$copyOperands, Variadic:$copyinOperands, + Variadic:$copyinReadonlyOperands, Variadic:$copyoutOperands, + Variadic:$copyoutZeroOperands, Variadic:$createOperands, + Variadic:$createZeroOperands, Variadic:$noCreateOperands, Variadic:$presentOperands, Variadic:$devicePtrOperands, Variadic:$attachOperands, Variadic:$gangPrivateOperands, - Variadic:$gangFirstPrivateOperands); + Variadic:$gangFirstPrivateOperands, + OptionalAttr:$defaultAttr); let regions = (region AnyRegion:$region); @@ -114,8 +127,11 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", static StringRef getReductionKeyword() { return "reduction"; } static StringRef getCopyKeyword() { return "copy"; } static StringRef getCopyinKeyword() { return "copyin"; } + static StringRef getCopyinReadonlyKeyword() { return "copyin_readonly"; } static StringRef getCopyoutKeyword() { return "copyout"; } + static StringRef getCopyoutZeroKeyword() { return "copyout_zero"; } static StringRef getCreateKeyword() { return "create"; } + static StringRef getCreateZeroKeyword() { return "create_zero"; } static StringRef getNoCreateKeyword() { return "no_create"; } static StringRef getPresentKeyword() { return "present"; } static StringRef getDevicePtrKeyword() { return "deviceptr"; } diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 3e4d1c3f0e7dc..6149512250422 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -116,8 +116,11 @@ static ParseResult parseOptionalOperand(OpAsmParser &parser, StringRef keyword, /// `reduction` `(` value-list `)`? /// `copy` `(` value-list `)`? /// `copyin` `(` value-list `)`? +/// `copyin_readonly` `(` value-list `)`? /// `copyout` `(` value-list `)`? +/// `copyout_zero` `(` value-list `)`? /// `create` `(` value-list `)`? +/// `create_zero` `(` value-list `)`? /// `no_create` `(` value-list `)`? /// `present` `(` value-list `)`? /// `deviceptr` `(` value-list `)`? @@ -129,10 +132,16 @@ static ParseResult parseParallelOp(OpAsmParser &parser, OperationState &result) { Builder &builder = parser.getBuilder(); SmallVector privateOperands, - firstprivateOperands, createOperands, copyOperands, copyinOperands, - copyoutOperands, noCreateOperands, presentOperands, devicePtrOperands, - attachOperands, waitOperands, reductionOperands; - SmallVector operandTypes; + firstprivateOperands, copyOperands, copyinOperands, + copyinReadonlyOperands, copyoutOperands, copyoutZeroOperands, + createOperands, createZeroOperands, noCreateOperands, presentOperands, + devicePtrOperands, attachOperands, waitOperands, reductionOperands; + SmallVector waitOperandTypes, reductionOperandTypes, + copyOperandTypes, copyinOperandTypes, copyinReadonlyOperandTypes, + copyoutOperandTypes, copyoutZeroOperandTypes, createOperandTypes, + createZeroOperandTypes, noCreateOperandTypes, presentOperandTypes, + deviceptrOperandTypes, attachOperandTypes, privateOperandTypes, + firstprivateOperandTypes; OpAsmParser::OperandType async, numGangs, numWorkers, vectorLength, ifCond, selfCond; bool hasAsync = false, hasNumGangs = false, hasNumWorkers = false; @@ -148,7 +157,7 @@ static ParseResult parseParallelOp(OpAsmParser &parser, // wait()? if (failed(parseOperandList(parser, ParallelOp::getWaitKeyword(), - waitOperands, operandTypes, result))) + waitOperands, waitOperandTypes, result))) return failure(); // num_gangs(value)? @@ -180,57 +189,78 @@ static ParseResult parseParallelOp(OpAsmParser &parser, // reduction()? if (failed(parseOperandList(parser, ParallelOp::getReductionKeyword(), - reductionOperands, operandTypes, result))) + reductionOperands, reductionOperandTypes, + result))) return failure(); // copy()? if (failed(parseOperandList(parser, ParallelOp::getCopyKeyword(), - copyOperands, operandTypes, result))) + copyOperands, copyOperandTypes, result))) return failure(); // copyin()? if (failed(parseOperandList(parser, ParallelOp::getCopyinKeyword(), - copyinOperands, operandTypes, result))) + copyinOperands, copyinOperandTypes, result))) + return failure(); + + // copyin_readonly()? + if (failed(parseOperandList(parser, ParallelOp::getCopyinReadonlyKeyword(), + copyinReadonlyOperands, + copyinReadonlyOperandTypes, result))) return failure(); // copyout()? if (failed(parseOperandList(parser, ParallelOp::getCopyoutKeyword(), - copyoutOperands, operandTypes, result))) + copyoutOperands, copyoutOperandTypes, result))) + return failure(); + + // copyout_zero()? + if (failed(parseOperandList(parser, ParallelOp::getCopyoutZeroKeyword(), + copyoutZeroOperands, copyoutZeroOperandTypes, + result))) return failure(); // create()? if (failed(parseOperandList(parser, ParallelOp::getCreateKeyword(), - createOperands, operandTypes, result))) + createOperands, createOperandTypes, result))) + return failure(); + + // create_zero()? + if (failed(parseOperandList(parser, ParallelOp::getCreateZeroKeyword(), + createZeroOperands, createZeroOperandTypes, + result))) return failure(); // no_create()? if (failed(parseOperandList(parser, ParallelOp::getNoCreateKeyword(), - noCreateOperands, operandTypes, result))) + noCreateOperands, noCreateOperandTypes, result))) return failure(); // present()? if (failed(parseOperandList(parser, ParallelOp::getPresentKeyword(), - presentOperands, operandTypes, result))) + presentOperands, presentOperandTypes, result))) return failure(); // deviceptr()? if (failed(parseOperandList(parser, ParallelOp::getDevicePtrKeyword(), - devicePtrOperands, operandTypes, result))) + devicePtrOperands, deviceptrOperandTypes, + result))) return failure(); // attach()? if (failed(parseOperandList(parser, ParallelOp::getAttachKeyword(), - attachOperands, operandTypes, result))) + attachOperands, attachOperandTypes, result))) return failure(); // private()? if (failed(parseOperandList(parser, ParallelOp::getPrivateKeyword(), - privateOperands, operandTypes, result))) + privateOperands, privateOperandTypes, result))) return failure(); // firstprivate()? if (failed(parseOperandList(parser, ParallelOp::getFirstPrivateKeyword(), - firstprivateOperands, operandTypes, result))) + firstprivateOperands, firstprivateOperandTypes, + result))) return failure(); // Parallel op region @@ -249,8 +279,11 @@ static ParseResult parseParallelOp(OpAsmParser &parser, static_cast(reductionOperands.size()), static_cast(copyOperands.size()), static_cast(copyinOperands.size()), + static_cast(copyinReadonlyOperands.size()), static_cast(copyoutOperands.size()), + static_cast(copyoutZeroOperands.size()), static_cast(createOperands.size()), + static_cast(createZeroOperands.size()), static_cast(noCreateOperands.size()), static_cast(presentOperands.size()), static_cast(devicePtrOperands.size()), @@ -309,14 +342,26 @@ static void print(OpAsmPrinter &printer, ParallelOp &op) { printOperandList(op.copyinOperands(), ParallelOp::getCopyinKeyword(), printer); + // copyin_readonly()? + printOperandList(op.copyinReadonlyOperands(), + ParallelOp::getCopyinReadonlyKeyword(), printer); + // copyout()? printOperandList(op.copyoutOperands(), ParallelOp::getCopyoutKeyword(), printer); + // copyout_zero()? + printOperandList(op.copyoutZeroOperands(), + ParallelOp::getCopyoutZeroKeyword(), printer); + // create()? printOperandList(op.createOperands(), ParallelOp::getCreateKeyword(), printer); + // create_zero()? + printOperandList(op.createZeroOperands(), ParallelOp::getCreateZeroKeyword(), + printer); + // no_create()? printOperandList(op.noCreateOperands(), ParallelOp::getNoCreateKeyword(), printer); diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index b1a78c61d65d9..3398f95bf607a 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -265,14 +265,54 @@ func @testop(%a: memref<10xf32>) -> () { // CHECK-NEXT: acc.yield // CHECK-NEXT: } - -func @testparallelop() -> () { +func @testparallelop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { %vectorLength = constant 128 : index acc.parallel vector_length(%vectorLength) { } + acc.parallel copyin(%a: memref<10xf32>, %b: memref<10xf32>) { + } + acc.parallel copyin_readonly(%a: memref<10xf32>, %b: memref<10xf32>) { + } + acc.parallel copyin(%a: memref<10xf32>) copyout_zero(%b: memref<10xf32>, %c: memref<10x10xf32>) { + } + acc.parallel copyout(%b: memref<10xf32>, %c: memref<10x10xf32>) create(%a: memref<10xf32>) { + } + acc.parallel copyout_zero(%b: memref<10xf32>, %c: memref<10x10xf32>) create_zero(%a: memref<10xf32>) { + } + acc.parallel no_create(%a: memref<10xf32>) present(%b: memref<10xf32>, %c: memref<10x10xf32>) { + } + acc.parallel deviceptr(%a: memref<10xf32>) attach(%b: memref<10xf32>, %c: memref<10x10xf32>) { + } + acc.parallel private(%a: memref<10xf32>, %c: memref<10x10xf32>) firstprivate(%b: memref<10xf32>) { + } + acc.parallel { + } attributes {defaultAttr = "none"} + acc.parallel { + } attributes {defaultAttr = "present"} return } -// CHECK: [[VECTORLENGTH:%.*]] = constant 128 : index -// CHECK-NEXT: acc.parallel vector_length([[VECTORLENGTH]]) { -// CHECK-NEXT: } +// CHECK: func @testparallelop([[ARGA:%.*]]: memref<10xf32>, [[ARGB:%.*]]: memref<10xf32>, [[ARGC:%.*]]: memref<10x10xf32>) { +// CHECK: [[VECTORLENGTH:%.*]] = constant 128 : index +// CHECK: acc.parallel vector_length([[VECTORLENGTH]]) { +// CHECK-NEXT: } +// CHECK: acc.parallel copyin([[ARGA]]: memref<10xf32>, [[ARGB]]: memref<10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel copyin_readonly([[ARGA]]: memref<10xf32>, [[ARGB]]: memref<10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel copyin([[ARGA]]: memref<10xf32>) copyout_zero([[ARGB]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel copyout([[ARGB]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) create([[ARGA]]: memref<10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel copyout_zero([[ARGB]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) create_zero([[ARGA]]: memref<10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel no_create([[ARGA]]: memref<10xf32>) present([[ARGB]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel deviceptr([[ARGA]]: memref<10xf32>) attach([[ARGB]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel private([[ARGA]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) firstprivate([[ARGB]]: memref<10xf32>) { +// CHECK-NEXT: } +// CHECK: acc.parallel { +// CHECK-NEXT: } attributes {defaultAttr = "none"} +// CHECK: acc.parallel { +// CHECK-NEXT: } attributes {defaultAttr = "present"} From aa4b0b755a02d69f7f20fddf1d011b0f67a0d207 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 15:46:23 +0100 Subject: [PATCH 0837/1079] [X86][SSE] Move VZEXT_MOVL(INSERT_SUBVECTOR(UNDEF,X,0)) handling into combineTargetShuffle. Now that we're getting better at combining shuffles of different vector widths, this can now be performed as part of the standard target shuffle combines and isn't required for cleanup. Exposed a minor issue in combineX86ShufflesRecursively where we failed to check if a shuffle's src ops were simple types. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 52 ++++++++++++------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 46295d10d2c28..6b316a3e5a71e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35913,9 +35913,9 @@ static SDValue combineX86ShufflesRecursively( SDValue Op = SrcOps[SrcOpIndex]; Op = peekThroughOneUseBitcasts(Op); - MVT VT = Op.getSimpleValueType(); - if (!VT.isVector()) - return SDValue(); // Bail if we hit a non-vector. + EVT VT = Op.getValueType(); + if (!VT.isVector() || !VT.isSimple()) + return SDValue(); // Bail if we hit a non-simple non-vector. assert(VT.getSizeInBits() == RootSizeInBits && "Can only combine shuffles of the same vector register size."); @@ -36718,6 +36718,27 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } } + // Pull subvector inserts into undef through VZEXT_MOVL by making it an + // insert into a zero vector. This helps get VZEXT_MOVL closer to + // scalar_to_vectors where 256/512 are canonicalized to an insert and a + // 128-bit scalar_to_vector. This reduces the number of isel patterns. + if (!DCI.isBeforeLegalizeOps() && N0.hasOneUse()) { + SDValue V = peekThroughOneUseBitcasts(N0); + + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() && + isNullConstant(V.getOperand(2))) { + SDValue In = V.getOperand(1); + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), + In.getValueSizeInBits() / + VT.getScalarSizeInBits()); + In = DAG.getBitcast(SubVT, In); + SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + getZeroVector(VT, Subtarget, DAG, DL), Movl, + V.getOperand(2)); + } + } + return SDValue(); } case X86ISD::BLENDI: { @@ -37396,32 +37417,11 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // TODO - merge this into combineX86ShufflesRecursively. APInt KnownUndef, KnownZero; APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, + DCI)) return SDValue(N, 0); } - // Pull subvector inserts into undef through VZEXT_MOVL by making it an - // insert into a zero vector. This helps get VZEXT_MOVL closer to - // scalar_to_vectors where 256/512 are canonicalized to an insert and a - // 128-bit scalar_to_vector. This reduces the number of isel patterns. - if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() && - N->getOperand(0).hasOneUse()) { - SDValue V = peekThroughOneUseBitcasts(N->getOperand(0)); - - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) { - SDValue In = V.getOperand(1); - MVT SubVT = - MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), - In.getValueSizeInBits() / VT.getScalarSizeInBits()); - In = DAG.getBitcast(SubVT, In); - SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, SubVT, In); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, - getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl), - Movl, V.getOperand(2)); - } - } - return SDValue(); } From 54bb9e86498010c631a40dbd82617c433beea712 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 15 Sep 2020 12:00:38 +0100 Subject: [PATCH 0838/1079] [AMDGPU] Add -show-mc-encoding to setreg tests This is a pre-commit for D87446 "[AMDGPU] Enable scheduling around FP MODE-setting instructions" --- .../AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll | 1033 +++++++++++------ .../CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll | 998 ++++++++++------ 2 files changed, 1314 insertions(+), 717 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll index da0455f3ed8f2..250458bbe29e7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX6 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX789 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX789 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s ; FIXME: This test has a DAG duplicate @@ -13,20 +13,27 @@ ; Set FP32 fp_round to round to zero define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() { -; GFX6789-LABEL: test_setreg_f32_round_mode_rtz: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f32_round_mode_rtz: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 ; encoding: [0x01,0x08,0x80,0xba,0x03,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f32_round_mode_rtz: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 ; encoding: [0x01,0x08,0x00,0xba,0x03,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f32_round_mode_rtz: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 ; encoding: [0x01,0x08,0x80,0xba,0x03,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) call void asm sideeffect "", ""() ret void @@ -34,20 +41,27 @@ define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() { ; Set FP64/FP16 fp_round to round to zero define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() { -; GFX6789-LABEL: test_setreg_f64_round_mode_rtz: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f64_round_mode_rtz: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; encoding: [0x81,0x08,0x80,0xba,0x03,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f64_round_mode_rtz: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; encoding: [0x81,0x08,0x00,0xba,0x03,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f64_round_mode_rtz: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; encoding: [0x81,0x08,0x80,0xba,0x03,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2177, i32 3) call void asm sideeffect "", ""() ret void @@ -55,20 +69,27 @@ define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() { ; Set all fp_round to round to zero define amdgpu_kernel void @test_setreg_all_round_mode_rtz() { -; GFX6789-LABEL: test_setreg_all_round_mode_rtz: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_all_round_mode_rtz: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 ; encoding: [0x81,0x18,0x80,0xba,0x07,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_all_round_mode_rtz: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 ; encoding: [0x81,0x18,0x00,0xba,0x07,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_all_round_mode_rtz: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 ; encoding: [0x81,0x18,0x80,0xba,0x07,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6273, i32 7) call void asm sideeffect "", ""() ret void @@ -76,100 +97,135 @@ define amdgpu_kernel void @test_setreg_all_round_mode_rtz() { ; Set FP32 fp_round to dynamic mode define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) { -; GFX6789-LABEL: test_setreg_roundingmode_var: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_roundingmode_var: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 ; encoding: [0x01,0x08,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_roundingmode_var: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 ; encoding: [0x01,0x08,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_roundingmode_var: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 ; encoding: [0x01,0x08,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2049, i32 %var.mode) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_ieee_mode_off() { -; GFX6789-LABEL: test_setreg_ieee_mode_off: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_ieee_mode_off: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 ; encoding: [0x41,0x02,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_ieee_mode_off: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 ; encoding: [0x41,0x02,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_ieee_mode_off: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 ; encoding: [0x41,0x02,0x80,0xba,0x00,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 577, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_ieee_mode_on() { -; GFX6789-LABEL: test_setreg_ieee_mode_on: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_ieee_mode_on: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 ; encoding: [0x41,0x02,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_ieee_mode_on: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 ; encoding: [0x41,0x02,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_ieee_mode_on: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 ; encoding: [0x41,0x02,0x80,0xba,0x01,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 577, i32 1) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_dx10_clamp_off() { -; GFX6789-LABEL: test_setreg_dx10_clamp_off: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_dx10_clamp_off: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 ; encoding: [0x01,0x02,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_dx10_clamp_off: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 ; encoding: [0x01,0x02,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_dx10_clamp_off: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 ; encoding: [0x01,0x02,0x80,0xba,0x00,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 513, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_dx10_clamp_on() { -; GFX6789-LABEL: test_setreg_dx10_clamp_on: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_dx10_clamp_on: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 ; encoding: [0x01,0x02,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_dx10_clamp_on: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 ; encoding: [0x01,0x02,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_dx10_clamp_on: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 ; encoding: [0x01,0x02,0x80,0xba,0x01,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 513, i32 1) call void asm sideeffect "", ""() ret void @@ -177,20 +233,27 @@ define amdgpu_kernel void @test_setreg_dx10_clamp_on() { ; Sets full width of fp round and fp denorm fields, to a variable define amdgpu_cs void @test_setreg_full_both_round_mode_and_denorm_mode(i32 inreg %mode) { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 ; encoding: [0x01,0x38,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 ; encoding: [0x01,0x38,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 ; encoding: [0x01,0x38,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 inreg %mode) call void asm sideeffect "", ""() ret void @@ -198,20 +261,27 @@ define amdgpu_cs void @test_setreg_full_both_round_mode_and_denorm_mode(i32 inre ; Does not cover last bit of denorm field define amdgpu_cs void @test_setreg_most_both_round_mode_and_denorm_mode() { -; GFX6789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 ; encoding: [0x01,0x30,0x80,0xba,0x06,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 ; encoding: [0x01,0x30,0x00,0xba,0x06,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 ; encoding: [0x01,0x30,0x80,0xba,0x06,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 12289, i32 6) call void asm sideeffect "", ""() ret void @@ -219,200 +289,270 @@ define amdgpu_cs void @test_setreg_most_both_round_mode_and_denorm_mode() { ; Does not cover first bit of denorm field define amdgpu_cs void @test_setreg_most_both_round_mode_and_denorm_mode_6() { -; GFX6789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 ; encoding: [0x41,0x10,0x80,0xba,0x06,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 ; encoding: [0x41,0x10,0x00,0xba,0x06,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 ; encoding: [0x41,0x10,0x80,0xba,0x06,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 4161, i32 6) call void asm sideeffect "", ""() ret void } define amdgpu_cs void @test_setreg_f32_denorm_mode(i32 inreg %val) { -; GFX6789-LABEL: test_setreg_f32_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f32_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 ; encoding: [0x01,0x09,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f32_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 ; encoding: [0x01,0x09,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f32_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 ; encoding: [0x01,0x09,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2305, i32 %val) call void asm sideeffect "", ""() ret void } define amdgpu_cs void @test_setreg_f64_denorm_mode(i32 inreg %val) { -; GFX6789-LABEL: test_setreg_f64_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f64_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 ; encoding: [0x81,0x09,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f64_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 ; encoding: [0x81,0x09,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f64_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 ; encoding: [0x81,0x09,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2433, i32 %val) call void asm sideeffect "", ""() ret void } define amdgpu_cs void @test_setreg_full_denorm_mode(i32 inreg %val) { -; GFX6789-LABEL: test_setreg_full_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; encoding: [0x01,0x18,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; encoding: [0x01,0x18,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; encoding: [0x01,0x18,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 %val) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_0() { -; GFX6789-LABEL: test_setreg_full_round_mode_0: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; encoding: [0x01,0x18,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_0: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; encoding: [0x01,0x18,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_1() { -; GFX6789-LABEL: test_setreg_full_round_mode_1: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_1: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 ; encoding: [0x01,0x18,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_1: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 ; encoding: [0x01,0x18,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x1 +; GFX10-NEXT: s_round_mode 0x1 ; encoding: [0x01,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 1) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_2() { -; GFX6789-LABEL: test_setreg_full_round_mode_2: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_2: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 ; encoding: [0x01,0x18,0x80,0xba,0x02,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_2: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 ; encoding: [0x01,0x18,0x00,0xba,0x02,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x2 +; GFX10-NEXT: s_round_mode 0x2 ; encoding: [0x02,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 2) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_4() { -; GFX6789-LABEL: test_setreg_full_round_mode_4: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 ; encoding: [0x01,0x18,0x80,0xba,0x04,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_4: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 ; encoding: [0x01,0x18,0x00,0xba,0x04,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x4 +; GFX10-NEXT: s_round_mode 0x4 ; encoding: [0x04,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 4) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_8() { -; GFX6789-LABEL: test_setreg_full_round_mode_8: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 ; encoding: [0x01,0x18,0x80,0xba,0x08,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_8: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 ; encoding: [0x01,0x18,0x00,0xba,0x08,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x8 +; GFX10-NEXT: s_round_mode 0x8 ; encoding: [0x08,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 8) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_15() { -; GFX6789-LABEL: test_setreg_full_round_mode_15: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; encoding: [0x01,0x18,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_15: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; encoding: [0x01,0x18,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0xf +; GFX10-NEXT: s_round_mode 0xf ; encoding: [0x0f,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 15) call void asm sideeffect "", ""() ret void @@ -420,60 +560,81 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() { ; Should truncate set immediate value define amdgpu_kernel void @test_setreg_full_round_mode_42() { -; GFX6789-LABEL: test_setreg_full_round_mode_42: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_42: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 ; encoding: [0x01,0x18,0x80,0xba,0x2a,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_42: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 ; encoding: [0x01,0x18,0x00,0xba,0x2a,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0xa +; GFX10-NEXT: s_round_mode 0xa ; encoding: [0x0a,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 42) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_0() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_0: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 ; encoding: [0x01,0x19,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_0: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 ; encoding: [0x01,0x19,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_1() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_1: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_1: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 ; encoding: [0x01,0x19,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_1: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 ; encoding: [0x01,0x19,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 1 +; GFX10-NEXT: s_denorm_mode 1 ; encoding: [0x01,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 1) call void asm sideeffect "", ""() ret void @@ -481,100 +642,135 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() { define amdgpu_kernel void @test_setreg_full_denorm_mode_2() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_2: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_2: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 ; encoding: [0x01,0x19,0x80,0xba,0x02,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_2: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 ; encoding: [0x01,0x19,0x00,0xba,0x02,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 2 +; GFX10-NEXT: s_denorm_mode 2 ; encoding: [0x02,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 2) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_4() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_4: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 ; encoding: [0x01,0x19,0x80,0xba,0x04,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_4: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 ; encoding: [0x01,0x19,0x00,0xba,0x04,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 4 +; GFX10-NEXT: s_denorm_mode 4 ; encoding: [0x04,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 4) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_8() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_8: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 ; encoding: [0x01,0x19,0x80,0xba,0x08,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_8: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 ; encoding: [0x01,0x19,0x00,0xba,0x08,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 8 +; GFX10-NEXT: s_denorm_mode 8 ; encoding: [0x08,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 8) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_15() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_15: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 ; encoding: [0x01,0x19,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_15: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 ; encoding: [0x01,0x19,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 15 +; GFX10-NEXT: s_denorm_mode 15 ; encoding: [0x0f,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 15) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_42() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_42: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_42: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 ; encoding: [0x01,0x19,0x80,0xba,0x2a,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_42: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 ; encoding: [0x01,0x19,0x00,0xba,0x2a,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 10 +; GFX10-NEXT: s_denorm_mode 10 ; encoding: [0x0a,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 42) call void asm sideeffect "", ""() ret void @@ -582,231 +778,308 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() { ; Sets all fp round and fp denorm bits. define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 ; encoding: [0x01,0x38,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 ; encoding: [0x01,0x38,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 ; encoding: [0x01,0x38,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 ; encoding: [0x01,0x38,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x1 +; GFX10-NEXT: s_round_mode 0x1 ; encoding: [0x01,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 1) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 ; encoding: [0x01,0x38,0x80,0xba,0x02,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 ; encoding: [0x01,0x38,0x00,0xba,0x02,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x2 +; GFX10-NEXT: s_round_mode 0x2 ; encoding: [0x02,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 2) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 ; encoding: [0x01,0x38,0x80,0xba,0x04,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 ; encoding: [0x01,0x38,0x00,0xba,0x04,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x4 +; GFX10-NEXT: s_round_mode 0x4 ; encoding: [0x04,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 4) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 ; encoding: [0x01,0x38,0x80,0xba,0x08,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 ; encoding: [0x01,0x38,0x00,0xba,0x08,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x8 +; GFX10-NEXT: s_round_mode 0x8 ; encoding: [0x08,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 8) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 ; encoding: [0x01,0x38,0x80,0xba,0x10,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 ; encoding: [0x01,0x38,0x00,0xba,0x10,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 1 +; GFX10-NEXT: s_denorm_mode 1 ; encoding: [0x01,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 16) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 ; encoding: [0x01,0x38,0x80,0xba,0x20,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 ; encoding: [0x01,0x38,0x00,0xba,0x20,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 2 +; GFX10-NEXT: s_denorm_mode 2 ; encoding: [0x02,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 32) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 ; encoding: [0x01,0x38,0x80,0xba,0x40,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 ; encoding: [0x01,0x38,0x00,0xba,0x40,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 4 +; GFX10-NEXT: s_denorm_mode 4 ; encoding: [0x04,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 64) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 ; encoding: [0x01,0x38,0x80,0xba,0x80,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 ; encoding: [0x01,0x38,0x00,0xba,0x80,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 8 +; GFX10-NEXT: s_denorm_mode 8 ; encoding: [0x08,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 128) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 ; encoding: [0x01,0x38,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 ; encoding: [0x01,0x38,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0xf +; GFX10-NEXT: s_round_mode 0xf ; encoding: [0x0f,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 15) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff ; encoding: [0x01,0x38,0x80,0xba,0xff,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff ; encoding: [0x01,0x38,0x00,0xba,0xff,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0xf +; GFX10-NEXT: s_round_mode 0xf ; encoding: [0x0f,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 15 +; GFX10-NEXT: s_denorm_mode 15 ; encoding: [0x0f,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 255) call void asm sideeffect "", ""() ret void @@ -814,61 +1087,82 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255( ; Truncate extra high bit define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 ; encoding: [0x01,0x38,0x80,0xba,0x55,0x02,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 ; encoding: [0x01,0x38,0x00,0xba,0x55,0x02,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x5 +; GFX10-NEXT: s_round_mode 0x5 ; encoding: [0x05,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 5 +; GFX10-NEXT: s_denorm_mode 5 ; encoding: [0x05,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 597) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_set_8_bits_straddles_round_and_denorm() { -; GFX6789-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff ; encoding: [0x81,0x38,0x80,0xba,0xff,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff ; encoding: [0x81,0x38,0x00,0xba,0xff,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff ; encoding: [0x81,0x38,0x80,0xba,0xff,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14465, i32 255) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() { -; GFX6789-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 ; encoding: [0x81,0x18,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 ; encoding: [0x81,0x18,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 ; encoding: [0x81,0x18,0x80,0xba,0x0f,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6273, i32 15) call void asm sideeffect "", ""() ret void @@ -876,25 +1170,34 @@ define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() { ; FIXME: Broken for DAG define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) { -; GFX6789-LABEL: test_setreg_roundingmode_var_vgpr: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6789-NEXT: v_readfirstlane_b32 s4, v0 -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: test_setreg_roundingmode_var_vgpr: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX6-NEXT: v_readfirstlane_b32 s4, v0 ; encoding: [0x00,0x05,0x08,0x7e] +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 ; encoding: [0x01,0x10,0x84,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] +; +; GFX789-LABEL: test_setreg_roundingmode_var_vgpr: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX789-NEXT: v_readfirstlane_b32 s4, v0 ; encoding: [0x00,0x05,0x08,0x7e] +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 ; encoding: [0x01,0x10,0x04,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] ; ; GFX10-LABEL: test_setreg_roundingmode_var_vgpr: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 ; encoding: [0x00,0x05,0x08,0x7e] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 ; encoding: [0x01,0x10,0x84,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode) call void asm sideeffect "", ""() ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll index 88bfa8a0b687d..758069023579a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX6 %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX789 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX789 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s ; FIXME: This copy of the test is a subset of the -global-isel version, since the VGPR case doesn't work. @@ -13,20 +13,27 @@ ; Set FP32 fp_round to round to zero define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() { -; GFX6789-LABEL: test_setreg_f32_round_mode_rtz: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f32_round_mode_rtz: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 ; encoding: [0x01,0x08,0x80,0xba,0x03,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f32_round_mode_rtz: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 ; encoding: [0x01,0x08,0x00,0xba,0x03,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f32_round_mode_rtz: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 ; encoding: [0x01,0x08,0x80,0xba,0x03,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) call void asm sideeffect "", ""() ret void @@ -34,20 +41,27 @@ define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() { ; Set FP64/FP16 fp_round to round to zero define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() { -; GFX6789-LABEL: test_setreg_f64_round_mode_rtz: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f64_round_mode_rtz: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; encoding: [0x81,0x08,0x80,0xba,0x03,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f64_round_mode_rtz: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; encoding: [0x81,0x08,0x00,0xba,0x03,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f64_round_mode_rtz: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 ; encoding: [0x81,0x08,0x80,0xba,0x03,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2177, i32 3) call void asm sideeffect "", ""() ret void @@ -55,20 +69,27 @@ define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() { ; Set all fp_round to round to zero define amdgpu_kernel void @test_setreg_all_round_mode_rtz() { -; GFX6789-LABEL: test_setreg_all_round_mode_rtz: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_all_round_mode_rtz: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 ; encoding: [0x81,0x18,0x80,0xba,0x07,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_all_round_mode_rtz: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 ; encoding: [0x81,0x18,0x00,0xba,0x07,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_all_round_mode_rtz: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 7 ; encoding: [0x81,0x18,0x80,0xba,0x07,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6273, i32 7) call void asm sideeffect "", ""() ret void @@ -76,100 +97,135 @@ define amdgpu_kernel void @test_setreg_all_round_mode_rtz() { ; Set FP32 fp_round to dynamic mode define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) { -; GFX6789-LABEL: test_setreg_roundingmode_var: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_roundingmode_var: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 ; encoding: [0x01,0x08,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_roundingmode_var: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 ; encoding: [0x01,0x08,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_roundingmode_var: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s0 ; encoding: [0x01,0x08,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2049, i32 %var.mode) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_ieee_mode_off() { -; GFX6789-LABEL: test_setreg_ieee_mode_off: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_ieee_mode_off: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 ; encoding: [0x41,0x02,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_ieee_mode_off: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 ; encoding: [0x41,0x02,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_ieee_mode_off: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 0 ; encoding: [0x41,0x02,0x80,0xba,0x00,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 577, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_ieee_mode_on() { -; GFX6789-LABEL: test_setreg_ieee_mode_on: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_ieee_mode_on: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 ; encoding: [0x41,0x02,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_ieee_mode_on: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 ; encoding: [0x41,0x02,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_ieee_mode_on: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 9, 1), 1 ; encoding: [0x41,0x02,0x80,0xba,0x01,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 577, i32 1) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_dx10_clamp_off() { -; GFX6789-LABEL: test_setreg_dx10_clamp_off: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_dx10_clamp_off: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 ; encoding: [0x01,0x02,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_dx10_clamp_off: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 ; encoding: [0x01,0x02,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_dx10_clamp_off: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 0 ; encoding: [0x01,0x02,0x80,0xba,0x00,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 513, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_dx10_clamp_on() { -; GFX6789-LABEL: test_setreg_dx10_clamp_on: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_dx10_clamp_on: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 ; encoding: [0x01,0x02,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_dx10_clamp_on: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 ; encoding: [0x01,0x02,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_dx10_clamp_on: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 8, 1), 1 ; encoding: [0x01,0x02,0x80,0xba,0x01,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 513, i32 1) call void asm sideeffect "", ""() ret void @@ -177,20 +233,27 @@ define amdgpu_kernel void @test_setreg_dx10_clamp_on() { ; Sets full width of fp round and fp denorm fields, to a variable define amdgpu_cs void @test_setreg_full_both_round_mode_and_denorm_mode(i32 inreg %mode) { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 ; encoding: [0x01,0x38,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 ; encoding: [0x01,0x38,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 8), s0 ; encoding: [0x01,0x38,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 inreg %mode) call void asm sideeffect "", ""() ret void @@ -198,20 +261,27 @@ define amdgpu_cs void @test_setreg_full_both_round_mode_and_denorm_mode(i32 inre ; Does not cover last bit of denorm field define amdgpu_cs void @test_setreg_most_both_round_mode_and_denorm_mode() { -; GFX6789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 ; encoding: [0x01,0x30,0x80,0xba,0x06,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 ; encoding: [0x01,0x30,0x00,0xba,0x06,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_most_both_round_mode_and_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 7), 6 ; encoding: [0x01,0x30,0x80,0xba,0x06,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 12289, i32 6) call void asm sideeffect "", ""() ret void @@ -219,200 +289,270 @@ define amdgpu_cs void @test_setreg_most_both_round_mode_and_denorm_mode() { ; Does not cover first bit of denorm field define amdgpu_cs void @test_setreg_most_both_round_mode_and_denorm_mode_6() { -; GFX6789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 ; encoding: [0x41,0x10,0x80,0xba,0x06,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 ; encoding: [0x41,0x10,0x00,0xba,0x06,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_most_both_round_mode_and_denorm_mode_6: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 1, 3), 6 ; encoding: [0x41,0x10,0x80,0xba,0x06,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 4161, i32 6) call void asm sideeffect "", ""() ret void } define amdgpu_cs void @test_setreg_f32_denorm_mode(i32 inreg %val) { -; GFX6789-LABEL: test_setreg_f32_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f32_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 ; encoding: [0x01,0x09,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f32_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 ; encoding: [0x01,0x09,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f32_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 ; encoding: [0x01,0x09,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2305, i32 %val) call void asm sideeffect "", ""() ret void } define amdgpu_cs void @test_setreg_f64_denorm_mode(i32 inreg %val) { -; GFX6789-LABEL: test_setreg_f64_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_f64_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 ; encoding: [0x81,0x09,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_f64_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 ; encoding: [0x81,0x09,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_f64_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 6, 2), s0 ; encoding: [0x81,0x09,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 2433, i32 %val) call void asm sideeffect "", ""() ret void } define amdgpu_cs void @test_setreg_full_denorm_mode(i32 inreg %val) { -; GFX6789-LABEL: test_setreg_full_denorm_mode: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; encoding: [0x01,0x18,0x80,0xb9] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; encoding: [0x01,0x18,0x00,0xb9] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; encoding: [0x01,0x18,0x80,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 %val) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_0() { -; GFX6789-LABEL: test_setreg_full_round_mode_0: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; encoding: [0x01,0x18,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_0: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; encoding: [0x01,0x18,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_1() { -; GFX6789-LABEL: test_setreg_full_round_mode_1: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_1: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 ; encoding: [0x01,0x18,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_1: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 ; encoding: [0x01,0x18,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x1 +; GFX10-NEXT: s_round_mode 0x1 ; encoding: [0x01,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 1) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_2() { -; GFX6789-LABEL: test_setreg_full_round_mode_2: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_2: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 ; encoding: [0x01,0x18,0x80,0xba,0x02,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_2: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 ; encoding: [0x01,0x18,0x00,0xba,0x02,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x2 +; GFX10-NEXT: s_round_mode 0x2 ; encoding: [0x02,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 2) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_4() { -; GFX6789-LABEL: test_setreg_full_round_mode_4: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 ; encoding: [0x01,0x18,0x80,0xba,0x04,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_4: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 ; encoding: [0x01,0x18,0x00,0xba,0x04,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x4 +; GFX10-NEXT: s_round_mode 0x4 ; encoding: [0x04,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 4) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_8() { -; GFX6789-LABEL: test_setreg_full_round_mode_8: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 ; encoding: [0x01,0x18,0x80,0xba,0x08,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_8: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 ; encoding: [0x01,0x18,0x00,0xba,0x08,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0x8 +; GFX10-NEXT: s_round_mode 0x8 ; encoding: [0x08,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 8) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_round_mode_15() { -; GFX6789-LABEL: test_setreg_full_round_mode_15: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; encoding: [0x01,0x18,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_15: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; encoding: [0x01,0x18,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0xf +; GFX10-NEXT: s_round_mode 0xf ; encoding: [0x0f,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 15) call void asm sideeffect "", ""() ret void @@ -420,60 +560,81 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() { ; Should truncate set immediate value define amdgpu_kernel void @test_setreg_full_round_mode_42() { -; GFX6789-LABEL: test_setreg_full_round_mode_42: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_round_mode_42: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 ; encoding: [0x01,0x18,0x80,0xba,0x2a,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_round_mode_42: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 ; encoding: [0x01,0x18,0x00,0xba,0x2a,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_round_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_round_mode 0xa +; GFX10-NEXT: s_round_mode 0xa ; encoding: [0x0a,0x00,0xa4,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6145, i32 42) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_0() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_0: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 ; encoding: [0x01,0x19,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_0: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 ; encoding: [0x01,0x19,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_1() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_1: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_1: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 ; encoding: [0x01,0x19,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_1: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 ; encoding: [0x01,0x19,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 1 +; GFX10-NEXT: s_denorm_mode 1 ; encoding: [0x01,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 1) call void asm sideeffect "", ""() ret void @@ -481,100 +642,135 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() { define amdgpu_kernel void @test_setreg_full_denorm_mode_2() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_2: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_2: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 ; encoding: [0x01,0x19,0x80,0xba,0x02,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_2: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 ; encoding: [0x01,0x19,0x00,0xba,0x02,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 2 +; GFX10-NEXT: s_denorm_mode 2 ; encoding: [0x02,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 2) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_4() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_4: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 ; encoding: [0x01,0x19,0x80,0xba,0x04,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_4: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 ; encoding: [0x01,0x19,0x00,0xba,0x04,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 4 +; GFX10-NEXT: s_denorm_mode 4 ; encoding: [0x04,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 4) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_8() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_8: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 ; encoding: [0x01,0x19,0x80,0xba,0x08,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_8: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 ; encoding: [0x01,0x19,0x00,0xba,0x08,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 8 +; GFX10-NEXT: s_denorm_mode 8 ; encoding: [0x08,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 8) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_15() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_15: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 ; encoding: [0x01,0x19,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_15: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 ; encoding: [0x01,0x19,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 15 +; GFX10-NEXT: s_denorm_mode 15 ; encoding: [0x0f,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 15) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_denorm_mode_42() { -; GFX6789-LABEL: test_setreg_full_denorm_mode_42: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_denorm_mode_42: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 ; encoding: [0x01,0x19,0x80,0xba,0x2a,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_denorm_mode_42: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 ; encoding: [0x01,0x19,0x00,0xba,0x2a,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_denorm_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 10 +; GFX10-NEXT: s_denorm_mode 10 ; encoding: [0x0a,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6401, i32 42) call void asm sideeffect "", ""() ret void @@ -582,231 +778,308 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() { ; Sets all fp round and fp denorm bits. define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 ; encoding: [0x01,0x38,0x80,0xba,0x00,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 ; encoding: [0x01,0x38,0x00,0xba,0x00,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 0) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 ; encoding: [0x01,0x38,0x80,0xba,0x01,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 ; encoding: [0x01,0x38,0x00,0xba,0x01,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x1 +; GFX10-NEXT: s_round_mode 0x1 ; encoding: [0x01,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 1) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 ; encoding: [0x01,0x38,0x80,0xba,0x02,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 ; encoding: [0x01,0x38,0x00,0xba,0x02,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x2 +; GFX10-NEXT: s_round_mode 0x2 ; encoding: [0x02,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 2) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 ; encoding: [0x01,0x38,0x80,0xba,0x04,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 ; encoding: [0x01,0x38,0x00,0xba,0x04,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x4 +; GFX10-NEXT: s_round_mode 0x4 ; encoding: [0x04,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 4) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 ; encoding: [0x01,0x38,0x80,0xba,0x08,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 ; encoding: [0x01,0x38,0x00,0xba,0x08,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x8 +; GFX10-NEXT: s_round_mode 0x8 ; encoding: [0x08,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 8) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 ; encoding: [0x01,0x38,0x80,0xba,0x10,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 ; encoding: [0x01,0x38,0x00,0xba,0x10,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 1 +; GFX10-NEXT: s_denorm_mode 1 ; encoding: [0x01,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 16) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 ; encoding: [0x01,0x38,0x80,0xba,0x20,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 ; encoding: [0x01,0x38,0x00,0xba,0x20,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 2 +; GFX10-NEXT: s_denorm_mode 2 ; encoding: [0x02,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 32) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 ; encoding: [0x01,0x38,0x80,0xba,0x40,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 ; encoding: [0x01,0x38,0x00,0xba,0x40,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 4 +; GFX10-NEXT: s_denorm_mode 4 ; encoding: [0x04,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 64) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 ; encoding: [0x01,0x38,0x80,0xba,0x80,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 ; encoding: [0x01,0x38,0x00,0xba,0x80,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x0 +; GFX10-NEXT: s_round_mode 0x0 ; encoding: [0x00,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 8 +; GFX10-NEXT: s_denorm_mode 8 ; encoding: [0x08,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 128) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 ; encoding: [0x01,0x38,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 ; encoding: [0x01,0x38,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0xf +; GFX10-NEXT: s_round_mode 0xf ; encoding: [0x0f,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 0 +; GFX10-NEXT: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 15) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff ; encoding: [0x01,0x38,0x80,0xba,0xff,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff ; encoding: [0x01,0x38,0x00,0xba,0xff,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0xf +; GFX10-NEXT: s_round_mode 0xf ; encoding: [0x0f,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 15 +; GFX10-NEXT: s_denorm_mode 15 ; encoding: [0x0f,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 255) call void asm sideeffect "", ""() ret void @@ -814,61 +1087,82 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255( ; Truncate extra high bit define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597() { -; GFX6789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 ; encoding: [0x01,0x38,0x80,0xba,0x55,0x02,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 ; encoding: [0x01,0x38,0x00,0xba,0x55,0x02,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_round_mode 0x5 +; GFX10-NEXT: s_round_mode 0x5 ; encoding: [0x05,0x00,0xa4,0xbf] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_denorm_mode 5 +; GFX10-NEXT: s_denorm_mode 5 ; encoding: [0x05,0x00,0xa5,0xbf] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14337, i32 597) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_set_8_bits_straddles_round_and_denorm() { -; GFX6789-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff ; encoding: [0x81,0x38,0x80,0xba,0xff,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff ; encoding: [0x81,0x38,0x00,0xba,0xff,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_set_8_bits_straddles_round_and_denorm: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 8), 0xff ; encoding: [0x81,0x38,0x80,0xba,0xff,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 14465, i32 255) call void asm sideeffect "", ""() ret void } define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() { -; GFX6789-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: -; GFX6789: ; %bb.0: -; GFX6789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 -; GFX6789-NEXT: ;;#ASMSTART -; GFX6789-NEXT: ;;#ASMEND -; GFX6789-NEXT: s_endpgm +; GFX6-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 ; encoding: [0x81,0x18,0x80,0xba,0x0f,0x00,0x00,0x00] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX789-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: +; GFX789: ; %bb.0: +; GFX789-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 ; encoding: [0x81,0x18,0x00,0xba,0x0f,0x00,0x00,0x00] +; GFX789-NEXT: ;;#ASMSTART +; GFX789-NEXT: ;;#ASMEND +; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; ; GFX10-LABEL: test_setreg_set_4_bits_straddles_round_and_denorm: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 +; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 4), 15 ; encoding: [0x81,0x18,0x80,0xba,0x0f,0x00,0x00,0x00] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_endpgm +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] call void @llvm.amdgcn.s.setreg(i32 6273, i32 15) call void asm sideeffect "", ""() ret void From 90777e2924ec7f99a3f1b718a636f47036012514 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 9 Sep 2020 17:21:36 +0100 Subject: [PATCH 0839/1079] [AMDGPU] Enable scheduling around FP MODE-setting instructions Pre-gfx10 all MODE-setting instructions were S_SETREG_B32 which is marked as having unmodeled side effects, which makes the machine scheduler treat it as a barrier. Now that we have proper implicit $mode operands we can use a no-side-effects S_SETREG_B32_mode pseudo instead for setregs that only touch the FP MODE bits, to give the scheduler more freedom. Differential Revision: https://reviews.llvm.org/D87446 --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 9 +- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 15 +++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 83 ++++++++++--------- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 - llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 9 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 39 ++++++--- .../AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll | 2 +- llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll | 8 +- llvm/test/CodeGen/AMDGPU/frem.ll | 6 +- 9 files changed, 102 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 67db397b19f63..432d951018d09 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -67,7 +67,14 @@ static bool isSGetReg(unsigned Opcode) { } static bool isSSetReg(unsigned Opcode) { - return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; + switch (Opcode) { + case AMDGPU::S_SETREG_B32: + case AMDGPU::S_SETREG_B32_mode: + case AMDGPU::S_SETREG_IMM32_B32: + case AMDGPU::S_SETREG_IMM32_B32_mode: + return true; + } + return false; } static bool isRWLane(unsigned Opcode) { diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index b5f6765e85abb..a24394cdf795f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -355,10 +355,17 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, } // Special case for s_setreg_b32 - if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) { - MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32)); - appendFoldCandidate(FoldList, MI, OpNo, OpToFold); - return true; + if (OpToFold->isImm()) { + unsigned ImmOpc = 0; + if (Opc == AMDGPU::S_SETREG_B32) + ImmOpc = AMDGPU::S_SETREG_IMM32_B32; + else if (Opc == AMDGPU::S_SETREG_B32_mode) + ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode; + if (ImmOpc) { + MI->setDesc(TII->get(ImmOpc)); + appendFoldCandidate(FoldList, MI, OpNo, OpToFold); + return true; + } } // If we are already folding into another operand of MI, then diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7a71c1d35526d..91f35fa770a80 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4235,9 +4235,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( return emitGWSMemViolTestLoop(MI, BB); case AMDGPU::S_SETREG_B32: { - if (!getSubtarget()->hasDenormModeInst()) - return BB; - // Try to optimize cases that only set the denormal mode or rounding mode. // // If the s_setreg_b32 fully sets all of the bits in the rounding mode or @@ -4247,9 +4244,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( // FIXME: This could be predicates on the immediate, but tablegen doesn't // allow you to have a no side effect instruction in the output of a // sideeffecting pattern. - - // TODO: Should also emit a no side effects pseudo if only FP bits are - // touched, even if not all of them or to a variable. unsigned ID, Offset, Width; AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width); if (ID != AMDGPU::Hwreg::ID_MODE) @@ -4257,45 +4251,54 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( const unsigned WidthMask = maskTrailingOnes(Width); const unsigned SetMask = WidthMask << Offset; - unsigned SetDenormOp = 0; - unsigned SetRoundOp = 0; - - // The dedicated instructions can only set the whole denorm or round mode at - // once, not a subset of bits in either. - if (SetMask == - (AMDGPU::Hwreg::FP_ROUND_MASK | AMDGPU::Hwreg::FP_DENORM_MASK)) { - // If this fully sets both the round and denorm mode, emit the two - // dedicated instructions for these. - SetRoundOp = AMDGPU::S_ROUND_MODE; - SetDenormOp = AMDGPU::S_DENORM_MODE; - } else if (SetMask == AMDGPU::Hwreg::FP_ROUND_MASK) { - SetRoundOp = AMDGPU::S_ROUND_MODE; - } else if (SetMask == AMDGPU::Hwreg::FP_DENORM_MASK) { - SetDenormOp = AMDGPU::S_DENORM_MODE; - } - - if (SetRoundOp || SetDenormOp) { - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg()); - if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) { - unsigned ImmVal = Def->getOperand(1).getImm(); - if (SetRoundOp) { - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp)) - .addImm(ImmVal & 0xf); - - // If we also have the denorm mode, get just the denorm mode bits. - ImmVal >>= 4; - } - if (SetDenormOp) { - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp)) - .addImm(ImmVal & 0xf); - } + if (getSubtarget()->hasDenormModeInst()) { + unsigned SetDenormOp = 0; + unsigned SetRoundOp = 0; + + // The dedicated instructions can only set the whole denorm or round mode + // at once, not a subset of bits in either. + if (SetMask == + (AMDGPU::Hwreg::FP_ROUND_MASK | AMDGPU::Hwreg::FP_DENORM_MASK)) { + // If this fully sets both the round and denorm mode, emit the two + // dedicated instructions for these. + SetRoundOp = AMDGPU::S_ROUND_MODE; + SetDenormOp = AMDGPU::S_DENORM_MODE; + } else if (SetMask == AMDGPU::Hwreg::FP_ROUND_MASK) { + SetRoundOp = AMDGPU::S_ROUND_MODE; + } else if (SetMask == AMDGPU::Hwreg::FP_DENORM_MASK) { + SetDenormOp = AMDGPU::S_DENORM_MODE; + } - MI.eraseFromParent(); + if (SetRoundOp || SetDenormOp) { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg()); + if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) { + unsigned ImmVal = Def->getOperand(1).getImm(); + if (SetRoundOp) { + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp)) + .addImm(ImmVal & 0xf); + + // If we also have the denorm mode, get just the denorm mode bits. + ImmVal >>= 4; + } + + if (SetDenormOp) { + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp)) + .addImm(ImmVal & 0xf); + } + + MI.eraseFromParent(); + return BB; + } } } + // If only FP bits are touched, used the no side effects pseudo. + if ((SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK | + AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) + MI.setDesc(TII->get(AMDGPU::S_SETREG_B32_mode)); + return BB; } default: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9aa28cff10868..21ad82d546612 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3070,9 +3070,6 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, // Target-independent instructions do not have an implicit-use of EXEC, even // when they operate on VGPRs. Treating EXEC modifications as scheduling // boundaries prevents incorrect movements of such instructions. - - // TODO: Don't treat setreg with known constant that only changes MODE as - // barrier. return MI.modifiesRegister(AMDGPU::EXEC, &RI) || MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || MI.getOpcode() == AMDGPU::S_SETREG_B32 || diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index 0e162ac42c111..a2e1486e4b9a6 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -242,8 +242,10 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, Status IPChange; for (MachineInstr &MI : MBB) { Status InstrMode = getInstructionMode(MI, TII); - if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) || - (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) { + if (MI.getOpcode() == AMDGPU::S_SETREG_B32 || + MI.getOpcode() == AMDGPU::S_SETREG_B32_mode || + MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || + MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) { // We preserve any explicit mode register setreg instruction we encounter, // as we assume it has been inserted by a higher authority (this is // likely to be a very rare occurrence). @@ -267,7 +269,8 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, // If this is an immediate then we know the value being set, but if it is // not an immediate then we treat the modified bits of the mode register // as unknown. - if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) { + if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || + MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) { unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm(); unsigned Mode = (Val << Offset) & Mask; Status Setreg = Status(Mask, Mode); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index df2e18fd44146..e65096b7448b4 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -813,8 +813,6 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo < "$sdst, $simm16" >; -let hasSideEffects = 1 in { - let mayLoad = 1 in { // s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow // its use in the readcyclecounter selection. @@ -825,40 +823,55 @@ def S_GETREG_B32 : SOPK_Pseudo < "$sdst, $simm16", [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> { let SOPKZext = 1; + let hasSideEffects = 1; } -} +} // End mayLoad = 1 -let mayLoad = 0, mayStore =0 in { +let mayLoad = 0, mayStore = 0, Defs = [MODE], Uses = [MODE] in { // FIXME: Need to truncate immediate to 16-bits. -def S_SETREG_B32 : SOPK_Pseudo < +class S_SETREG_B32_Pseudo pattern=[]> : SOPK_Pseudo < "s_setreg_b32", (outs), (ins SReg_32:$sdst, hwreg:$simm16), "$simm16, $sdst", - [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> { + pattern>; +def S_SETREG_B32 : S_SETREG_B32_Pseudo < + [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> { // Use custom inserter to optimize some cases to - // S_DENORM_MODE/S_ROUND_MODE. + // S_DENORM_MODE/S_ROUND_MODE/S_SETREG_B32_mode. let usesCustomInserter = 1; - let Defs = [MODE]; - let Uses = [MODE]; + let hasSideEffects = 1; +} + +// Variant of SETREG that is guaranteed to only touch FP bits in the MODE +// register, so doesn't have unmodeled side effects. +def S_SETREG_B32_mode : S_SETREG_B32_Pseudo { + let hasSideEffects = 0; } // FIXME: Not on SI? //def S_GETREG_REGRD_B32 : SOPK_32 , "s_getreg_regrd_b32">; -def S_SETREG_IMM32_B32 : SOPK_Pseudo < +class S_SETREG_IMM32_B32_Pseudo : SOPK_Pseudo < "s_setreg_imm32_b32", (outs), (ins i32imm:$imm, hwreg:$simm16), "$simm16, $imm"> { let Size = 8; // Unlike every other SOPK instruction. let has_sdst = 0; - let Defs = [MODE]; - let Uses = [MODE]; } +def S_SETREG_IMM32_B32 : S_SETREG_IMM32_B32_Pseudo { + let hasSideEffects = 1; } -} // End hasSideEffects = 1 + +// Variant of SETREG_IMM32 that is guaranteed to only touch FP bits in the MODE +// register, so doesn't have unmodeled side effects. +def S_SETREG_IMM32_B32_mode : S_SETREG_IMM32_B32_Pseudo { + let hasSideEffects = 0; +} + +} // End mayLoad = 0, mayStore = 0, Defs = [MODE], Uses = [MODE] class SOPK_WAITCNT pat=[]> : SOPK_Pseudo< diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll index 250458bbe29e7..d84282eb3ede3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll @@ -1194,9 +1194,9 @@ define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 ; encoding: [0x00,0x05,0x08,0x7e] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 ; encoding: [0x01,0x10,0x84,0xb9] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 ; encoding: [0x01,0x10,0x84,0xb9] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode) call void asm sideeffect "", ""() diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll index 9286e91e09b2c..216ab53cb24e1 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -17,14 +17,14 @@ define float @fdiv_f32(float %a, float %b) #0 { ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: S_SETREG_B32 killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode + ; GCN: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode ; GCN: %14:vgpr_32 = nofpexcept V_FMA_F32 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec ; GCN: %15:vgpr_32 = nofpexcept V_FMA_F32 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec ; GCN: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec ; GCN: %17:vgpr_32 = nofpexcept V_FMA_F32 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN: %18:vgpr_32 = nofpexcept V_FMA_F32 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec ; GCN: %19:vgpr_32 = nofpexcept V_FMA_F32 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN: S_SETREG_B32 killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode + ; GCN: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode ; GCN: $vcc = COPY %7 ; GCN: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GCN: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec @@ -50,14 +50,14 @@ define float @fdiv_nnan_f32(float %a, float %b) #0 { ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: S_SETREG_B32 killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode + ; GCN: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode ; GCN: %14:vgpr_32 = nnan nofpexcept V_FMA_F32 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec ; GCN: %15:vgpr_32 = nnan nofpexcept V_FMA_F32 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec ; GCN: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec ; GCN: %17:vgpr_32 = nnan nofpexcept V_FMA_F32 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN: %18:vgpr_32 = nnan nofpexcept V_FMA_F32 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec ; GCN: %19:vgpr_32 = nnan nofpexcept V_FMA_F32 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN: S_SETREG_B32 killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode + ; GCN: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode ; GCN: $vcc = COPY %7 ; GCN: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GCN: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 720e45b3c30f5..d5ee24a8bd1a7 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -1040,9 +1040,9 @@ define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ; CI-NEXT: v_trunc_f32_e32 v4, v4 ; CI-NEXT: v_fma_f32 v0, -v4, v2, v0 ; CI-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v1 +; CI-NEXT: v_div_scale_f32 v2, vcc, v1, v3, v1 ; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_div_scale_f32 v2, vcc, v1, v3, v1 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; CI-NEXT: v_rcp_f32_e32 v5, v4 ; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 @@ -1265,9 +1265,9 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half> ; CI-NEXT: v_trunc_f32_e32 v8, v8 ; CI-NEXT: v_fma_f32 v1, -v8, v1, v5 ; CI-NEXT: v_div_scale_f32 v8, s[4:5], v7, v7, v4 +; CI-NEXT: v_div_scale_f32 v5, vcc, v4, v7, v4 ; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_div_scale_f32 v5, vcc, v4, v7, v4 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_rcp_f32_e32 v9, v8 ; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 @@ -1300,8 +1300,8 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half> ; CI-NEXT: v_trunc_f32_e32 v4, v4 ; CI-NEXT: v_fma_f32 v0, -v4, v0, v3 ; CI-NEXT: v_div_scale_f32 v4, s[4:5], v6, v6, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_div_scale_f32 v3, vcc, v2, v6, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; CI-NEXT: v_rcp_f32_e32 v5, v4 ; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 From cd4615120233c54034b42bafc3d2bcc9f29db63d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 16:17:35 +0100 Subject: [PATCH 0840/1079] [X86] Assert that we've found a terminator instruction. NFCI. Fixes clang static analayzer null dereference warning. --- .../Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp index 7e91c37367d2f..d57871130b0cb 100644 --- a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp @@ -161,6 +161,7 @@ bool X86SpeculativeExecutionSideEffectSuppression::runOnMachineFunction( // This branch requires adding an LFENCE. if (!PrevInstIsLFENCE) { + assert(FirstTerminator && "Unknown terminator instruction"); BuildMI(MBB, FirstTerminator, DebugLoc(), TII->get(X86::LFENCE)); NumLFENCEsInserted++; Modified = true; From 833b3b0d3a2ff4b8243940eef1a960050ec48682 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Thu, 23 Jul 2020 16:59:00 +0200 Subject: [PATCH 0841/1079] [AMDGPU] Add v3f16/v3i16 support to SDag Fix lowering and instruction selection for v3x16 types and enable InstCombine to emit them. This patch only implements it for the selection dag. GlobalISel tests in GlobalISel/llvm.amdgcn.image.load.1d.d16.ll and GlobalISel/llvm.amdgcn.image.store.2d.d16.ll still don't work. Differential Revision: https://reviews.llvm.org/D84420 --- .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 9 +- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 5 - llvm/lib/Target/AMDGPU/BUFInstructions.td | 102 ++++++++------ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 86 +++++++++--- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 42 ++++++ .../test/CodeGen/AMDGPU/image-load-d16-tfe.ll | 79 +++++++++-- .../llvm.amdgcn.buffer.load.format.d16.ll | 16 ++- .../llvm.amdgcn.buffer.store.format.d16.ll | 7 + .../AMDGPU/llvm.amdgcn.image.d16.dim.ll | 32 +++++ .../llvm.amdgcn.image.sample.d16.dim.ll | 128 ++++++++++++++++++ .../llvm.amdgcn.raw.buffer.load.format.d16.ll | 13 ++ ...llvm.amdgcn.raw.buffer.store.format.d16.ll | 26 ++++ .../llvm.amdgcn.raw.tbuffer.load.d16.ll | 17 ++- .../llvm.amdgcn.raw.tbuffer.store.d16.ll | 26 ++++ ...vm.amdgcn.struct.buffer.load.format.d16.ll | 14 ++ ...m.amdgcn.struct.buffer.store.format.d16.ll | 26 ++++ .../llvm.amdgcn.struct.tbuffer.load.d16.ll | 17 ++- .../llvm.amdgcn.struct.tbuffer.store.d16.ll | 25 ++++ .../AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll | 14 ++ .../AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll | 23 ++++ .../AMDGPU/amdgcn-demanded-vector-elts.ll | 10 +- 21 files changed, 632 insertions(+), 85 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index ae087d3bbd8cb..855d9f3c12a84 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -955,11 +955,12 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); for (unsigned i = 0, e = Results.size(); i != e; ++i) { - // If this is a chain output just replace it. - if (Results[i].getValueType() == MVT::Other) - ReplaceValueWith(SDValue(N, i), Results[i]); - else + // If this is a chain output or already widened just replace it. + bool WasWidened = SDValue(N, i).getValueType() != Results[i].getValueType(); + if (WasWidened) SetWidenedVector(SDValue(N, i), Results[i]); + else + ReplaceValueWith(SDValue(N, i), Results[i]); } return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index c9be4e11cfc11..b441351211734 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -929,11 +929,6 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, if (!NewNumElts) return UndefValue::get(II.getType()); - // FIXME: Allow v3i16/v3f16 in buffer and image intrinsics when the types are - // fully supported. - if (II.getType()->getScalarSizeInBits() == 16 && NewNumElts == 3) - return nullptr; - if (NewNumElts >= VWidth && DemandedElts.isMask()) { if (DMaskIdx >= 0) II.setArgOperand(DMaskIdx, Args[DMaskIdx]); diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 480070505d62b..e1c9f1609a02a 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -529,21 +529,23 @@ multiclass MUBUF_Pseudo_Loads { - def _OFFSET : MUBUF_Load_Pseudo , + defvar legal_load_vt = !if(!eq(!cast(load_vt), !cast(v3f16)), v4f16, load_vt); + + def _OFFSET : MUBUF_Load_Pseudo , MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; - def _ADDR64 : MUBUF_Load_Pseudo , + def _ADDR64 : MUBUF_Load_Pseudo , MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; - def _OFFEN : MUBUF_Load_Pseudo ; - def _IDXEN : MUBUF_Load_Pseudo ; - def _BOTHEN : MUBUF_Load_Pseudo ; + def _OFFEN : MUBUF_Load_Pseudo ; + def _IDXEN : MUBUF_Load_Pseudo ; + def _BOTHEN : MUBUF_Load_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Load_Pseudo ; - def _OFFEN_exact : MUBUF_Load_Pseudo ; - def _IDXEN_exact : MUBUF_Load_Pseudo ; - def _BOTHEN_exact : MUBUF_Load_Pseudo ; + def _OFFSET_exact : MUBUF_Load_Pseudo ; + def _OFFEN_exact : MUBUF_Load_Pseudo ; + def _IDXEN_exact : MUBUF_Load_Pseudo ; + def _BOTHEN_exact : MUBUF_Load_Pseudo ; } } @@ -577,25 +579,27 @@ multiclass MUBUF_Pseudo_Stores { - def _OFFSET : MUBUF_Store_Pseudo (store_vt), !cast(v3f16)), v4f16, store_vt); + + def _OFFSET : MUBUF_Store_Pseudo , MUBUFAddr64Table<0, NAME>; - def _ADDR64 : MUBUF_Store_Pseudo , MUBUFAddr64Table<1, NAME>; - def _OFFEN : MUBUF_Store_Pseudo ; - def _IDXEN : MUBUF_Store_Pseudo ; - def _BOTHEN : MUBUF_Store_Pseudo ; + def _OFFEN : MUBUF_Store_Pseudo ; + def _IDXEN : MUBUF_Store_Pseudo ; + def _BOTHEN : MUBUF_Store_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Store_Pseudo ; - def _OFFEN_exact : MUBUF_Store_Pseudo ; - def _IDXEN_exact : MUBUF_Store_Pseudo ; - def _BOTHEN_exact : MUBUF_Store_Pseudo ; + def _OFFSET_exact : MUBUF_Store_Pseudo ; + def _OFFEN_exact : MUBUF_Store_Pseudo ; + def _IDXEN_exact : MUBUF_Store_Pseudo ; + def _BOTHEN_exact : MUBUF_Store_Pseudo ; } } @@ -1162,9 +1166,11 @@ let SubtargetPredicate = isGFX10Plus in { //===----------------------------------------------------------------------===// multiclass MUBUF_LoadIntrinsicPat { + string opcode, ValueType memoryVt = vt> { + defvar st = !if(!eq(!cast(memoryVt), !cast(vt)), name, mubuf_intrinsic_load); + def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), @@ -1172,7 +1178,7 @@ multiclass MUBUF_LoadIntrinsicPat; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), @@ -1180,7 +1186,7 @@ multiclass MUBUF_LoadIntrinsicPat; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), @@ -1188,7 +1194,7 @@ multiclass MUBUF_LoadIntrinsicPat; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), @@ -1212,6 +1218,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in { defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; } // End HasUnpackedD16VMem. @@ -1221,6 +1228,8 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; } // End HasPackedD16VMem. @@ -1243,9 +1252,11 @@ defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; multiclass MUBUF_StoreIntrinsicPat { + string opcode, ValueType memoryVt = vt> { + defvar st = !if(!eq(!cast(memoryVt), !cast(vt)), name, mubuf_intrinsic_store); + def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) getVregSrcForVT.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), @@ -1253,7 +1264,7 @@ multiclass MUBUF_StoreIntrinsicPat; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), @@ -1262,7 +1273,7 @@ multiclass MUBUF_StoreIntrinsicPat; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), @@ -1271,7 +1282,7 @@ multiclass MUBUF_StoreIntrinsicPat; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, timm), (!cast(opcode # _BOTHEN_exact) getVregSrcForVT.ret:$vdata, @@ -1296,6 +1307,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in { defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; } // End HasUnpackedD16VMem. @@ -1305,6 +1317,8 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; } // End HasPackedD16VMem. @@ -1694,9 +1708,11 @@ defm : MUBUFScratchStorePat { + string opcode, ValueType memoryVt = vt> { + defvar st = !if(!eq(!cast(memoryVt), !cast(vt)), name, mtbuf_intrinsic_load); + def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), @@ -1705,7 +1721,7 @@ multiclass MTBUF_LoadIntrinsicPat; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), @@ -1714,7 +1730,7 @@ multiclass MTBUF_LoadIntrinsicPat; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), @@ -1723,7 +1739,7 @@ multiclass MTBUF_LoadIntrinsicPat; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), @@ -1747,6 +1763,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in { defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; } // End HasUnpackedD16VMem. @@ -1754,13 +1771,16 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; } // End HasPackedD16VMem. multiclass MTBUF_StoreIntrinsicPat { + string opcode, ValueType memoryVt = vt> { + defvar st = !if(!eq(!cast(memoryVt), !cast(vt)), name, mtbuf_intrinsic_store); + def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) getVregSrcForVT.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), @@ -1769,7 +1789,7 @@ multiclass MTBUF_StoreIntrinsicPat; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), @@ -1778,7 +1798,7 @@ multiclass MTBUF_StoreIntrinsicPat; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), @@ -1787,7 +1807,7 @@ multiclass MTBUF_StoreIntrinsicPat; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, + (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, timm), (!cast(opcode # _BOTHEN_exact) getVregSrcForVT.ret:$vdata, @@ -1811,6 +1831,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in { defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; } // End HasUnpackedD16VMem. @@ -1818,6 +1839,7 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; } // End HasPackedD16VMem. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 91f35fa770a80..7580a1fda6d5b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -806,6 +806,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3i16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); @@ -817,6 +819,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v3i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v3f16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom); @@ -4556,15 +4560,27 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// Used for D16: Casts the result of an instruction into the right vector, +// packs values if loads return unpacked values. static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, const SDLoc &DL, SelectionDAG &DAG, bool Unpacked) { if (!LoadVT.isVector()) return Result; + // Cast back to the original packed type or to a larger type that is a + // multiple of 32 bit for D16. Widening the return type is a required for + // legalization. + EVT FittingLoadVT = LoadVT; + if ((LoadVT.getVectorNumElements() % 2) == 1) { + FittingLoadVT = + EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(), + LoadVT.getVectorNumElements() + 1); + } + if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16. // Truncate to v2i16/v4i16. - EVT IntLoadVT = LoadVT.changeTypeToInteger(); + EVT IntLoadVT = FittingLoadVT.changeTypeToInteger(); // Workaround legalizer not scalarizing truncate after vector op // legalization but not creating intermediate vector trunc. @@ -4573,14 +4589,18 @@ static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, for (SDValue &Elt : Elts) Elt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Elt); + // Pad illegal v1i16/v3fi6 to v4i16 + if ((LoadVT.getVectorNumElements() % 2) == 1) + Elts.push_back(DAG.getUNDEF(MVT::i16)); + Result = DAG.getBuildVector(IntLoadVT, DL, Elts); // Bitcast to original type (v2f16/v4f16). - return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result); + return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result); } // Cast back to the original packed type. - return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result); + return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result); } SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, @@ -4594,10 +4614,16 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, EVT LoadVT = M->getValueType(0); EVT EquivLoadVT = LoadVT; - if (Unpacked && LoadVT.isVector()) { - EquivLoadVT = LoadVT.isVector() ? - EVT::getVectorVT(*DAG.getContext(), MVT::i32, - LoadVT.getVectorNumElements()) : LoadVT; + if (LoadVT.isVector()) { + if (Unpacked) { + EquivLoadVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + LoadVT.getVectorNumElements()); + } else if ((LoadVT.getVectorNumElements() % 2) == 1) { + // Widen v3f16 to legal type + EquivLoadVT = + EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(), + LoadVT.getVectorNumElements() + 1); + } } // Change from v4f16/v2f16 to EquivLoadVT. @@ -4608,8 +4634,6 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, IsIntrinsic ? (unsigned)ISD::INTRINSIC_W_CHAIN : Opcode, DL, VTList, Ops, M->getMemoryVT(), M->getMemOperand()); - if (!Unpacked) // Just adjusted the opcode. - return Load; SDValue Adjusted = adjustLoadValueTypeImpl(Load, LoadVT, DL, DAG, Unpacked); @@ -4813,8 +4837,9 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) { if (Res.getOpcode() == ISD::MERGE_VALUES) { // FIXME: Hacky - Results.push_back(Res.getOperand(0)); - Results.push_back(Res.getOperand(1)); + for (unsigned I = 0; I < Res.getNumOperands(); I++) { + Results.push_back(Res.getOperand(I)); + } } else { Results.push_back(Res); Results.push_back(Res.getValue(1)); @@ -5844,10 +5869,18 @@ static SDValue constructRetValue(SelectionDAG &DAG, if (IsD16) Data = adjustLoadValueTypeImpl(Data, ReqRetVT, DL, DAG, Unpacked); - if (!ReqRetVT.isVector()) + EVT LegalReqRetVT = ReqRetVT; + if (!ReqRetVT.isVector()) { Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data); - - Data = DAG.getNode(ISD::BITCAST, DL, ReqRetVT, Data); + } else { + // We need to widen the return vector to a legal type + if ((ReqRetVT.getVectorNumElements() % 2) == 1) { + LegalReqRetVT = + EVT::getVectorVT(*DAG.getContext(), ReqRetVT.getVectorElementType(), + ReqRetVT.getVectorNumElements() + 1); + } + } + Data = DAG.getNode(ISD::BITCAST, DL, LegalReqRetVT, Data); if (TexFail) return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL); @@ -7315,17 +7348,28 @@ SDValue SITargetLowering::handleD16VData(SDValue VData, return VData; SDLoc DL(VData); - assert((StoreVT.getVectorNumElements() != 3) && "Handle v3f16"); + unsigned NumElements = StoreVT.getVectorNumElements(); if (Subtarget->hasUnpackedD16VMem()) { // We need to unpack the packed data to store. EVT IntStoreVT = StoreVT.changeTypeToInteger(); SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData); - EVT EquivStoreVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, - StoreVT.getVectorNumElements()); + EVT EquivStoreVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElements); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData); return DAG.UnrollVectorOp(ZExt.getNode()); + } else if (NumElements == 3) { + EVT IntStoreVT = + EVT::getIntegerVT(*DAG.getContext(), StoreVT.getStoreSizeInBits()); + SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData); + + EVT WidenedStoreVT = EVT::getVectorVT( + *DAG.getContext(), StoreVT.getVectorElementType(), NumElements + 1); + EVT WidenedIntVT = EVT::getIntegerVT(*DAG.getContext(), + WidenedStoreVT.getStoreSizeInBits()); + SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData); + return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt); } assert(isTypeLegal(StoreVT)); @@ -7505,8 +7549,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, EVT VDataVT = VData.getValueType(); EVT EltType = VDataVT.getScalarType(); bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); - if (IsD16) + if (IsD16) { VData = handleD16VData(VData, DAG); + VDataVT = VData.getValueType(); + } if (!isTypeLegal(VDataVT)) { VData = @@ -7550,8 +7596,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, EVT EltType = VDataVT.getScalarType(); bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); - if (IsD16) + if (IsD16) { VData = handleD16VData(VData, DAG); + VDataVT = VData.getValueType(); + } if (!isTypeLegal(VDataVT)) { VData = diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 034563a0cbd11..7fdbe2afa033c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -538,6 +538,48 @@ def si_setcc_uniform : PatFrag < return true; }]>; +//===----------------------------------------------------------------------===// +// SDNodes PatFrags for a16 loads and stores with 3 components. +// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory +// load/store size. +//===----------------------------------------------------------------------===// + +class mubuf_intrinsic_load : PatFrag < + (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$auxiliary, node:$idxen), + (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$auxiliary, node:$idxen)> { + let IsLoad = 1; + let MemoryVT = vt; +} + +class mubuf_intrinsic_store : PatFrag < + (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$auxiliary, node:$idxen), + (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$auxiliary, node:$idxen)> { + let IsStore = 1; + let MemoryVT = vt; +} + +class mtbuf_intrinsic_load : PatFrag < + (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$format, node:$auxiliary, node:$idxen), + (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$format, node:$auxiliary, node:$idxen)> { + let IsLoad = 1; + let MemoryVT = vt; +} + +class mtbuf_intrinsic_store : PatFrag < + (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$format, node:$auxiliary, node:$idxen), + (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, + node:$format, node:$auxiliary, node:$idxen)> { + let IsStore = 1; + let MemoryVT = vt; +} + //===----------------------------------------------------------------------===// // SDNodes PatFrags for d16 loads //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll index 9e7cca3ded721..f52aa1e4dee1e 100644 --- a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll +++ b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll @@ -321,14 +321,77 @@ define amdgpu_ps void @load_1d_v2f16_tfe_dmask3(<8 x i32> inreg %rsrc, i32 %s) { ret void } -; define amdgpu_ps void @load_1d_v3f16_tfe_dmask7(<8 x i32> inreg %rsrc, i32 %s) { -; %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) -; %v.data = extractvalue { <3 x half>, i32 } %v, 0 -; %v.err = extractvalue { <3 x half>, i32 } %v, 1 -; store volatile <3 x half> %v.data, <3 x half> addrspace(1)* undef -; store volatile i32 %v.err, i32 addrspace(1)* undef -; ret void -; } +define amdgpu_ps void @load_1d_v3f16_tfe_dmask7(<8 x i32> inreg %rsrc, i32 %s) { +; GFX9-LABEL: load_1d_v3f16_tfe_dmask7: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_mov_b32 s11, s9 +; GFX9-NEXT: s_mov_b32 s10, s8 +; GFX9-NEXT: s_mov_b32 s9, s7 +; GFX9-NEXT: s_mov_b32 s8, s6 +; GFX9-NEXT: s_mov_b32 s7, s5 +; GFX9-NEXT: s_mov_b32 s6, s4 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-NEXT: image_load v[1:3], v0, s[4:11] dmask:0x7 unorm tfe d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_short v[0:1], v2, off +; GFX9-NEXT: global_store_dword v[0:1], v1, off +; GFX9-NEXT: global_store_dword v[0:1], v3, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: load_1d_v3f16_tfe_dmask7: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: s_mov_b32 s11, s9 +; GFX10-NEXT: s_mov_b32 s10, s8 +; GFX10-NEXT: s_mov_b32 s9, s7 +; GFX10-NEXT: s_mov_b32 s8, s6 +; GFX10-NEXT: s_mov_b32 s7, s5 +; GFX10-NEXT: s_mov_b32 s6, s4 +; GFX10-NEXT: s_mov_b32 s5, s3 +; GFX10-NEXT: s_mov_b32 s4, s2 +; GFX10-NEXT: v_mov_b32_e32 v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v3, v1 +; GFX10-NEXT: image_load v[1:3], v0, s[4:11] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_short v[0:1], v2, off +; GFX10-NEXT: global_store_dword v[0:1], v1, off +; GFX10-NEXT: global_store_dword v[0:1], v3, off +; GFX10-NEXT: s_endpgm +; +; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask7: +; GFX8-UNPACKED: ; %bb.0: +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9 +; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8 +; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7 +; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6 +; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5 +; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4 +; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3 +; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2 +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1 +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1 +; GFX8-UNPACKED-NEXT: image_load v[1:4], v0, s[4:11] dmask:0x7 unorm tfe d16 +; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) +; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX8-UNPACKED-NEXT: flat_store_short v[0:1], v3 +; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v0 +; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v4 +; GFX8-UNPACKED-NEXT: s_endpgm + %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %v.data = extractvalue { <3 x half>, i32 } %v, 0 + %v.err = extractvalue { <3 x half>, i32 } %v, 1 + store volatile <3 x half> %v.data, <3 x half> addrspace(1)* undef + store volatile i32 %v.err, i32 addrspace(1)* undef + ret void +} define amdgpu_ps void @load_1d_v4f16_tfe_dmask15(<8 x i32> inreg %rsrc, i32 %s) { ; GFX9-LABEL: load_1d_v4f16_tfe_dmask15: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll index 274a5b2f0a78b..b1c2a030ea9f5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s ; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s @@ -23,6 +23,19 @@ main_body: ret half %elt } +; GCN-LABEL: {{^}}buffer_load_format_d16_xyz: +; UNPACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +define amdgpu_ps half @buffer_load_format_d16_xyz(<4 x i32> inreg %rsrc) { +main_body: + %data = call <3 x half> @llvm.amdgcn.buffer.load.format.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0) + %elt = extractelement <3 x half> %data, i32 2 + ret half %elt +} + ; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw: ; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] @@ -38,4 +51,5 @@ main_body: declare half @llvm.amdgcn.buffer.load.format.f16(<4 x i32>, i32, i32, i1, i1) declare <2 x half> @llvm.amdgcn.buffer.load.format.v2f16(<4 x i32>, i32, i32, i1, i1) +declare <3 x half> @llvm.amdgcn.buffer.load.format.v3f16(<4 x i32>, i32, i32, i1, i1) declare <4 x half> @llvm.amdgcn.buffer.load.format.v4f16(<4 x i32>, i32, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll index 5ece33f0195cd..aadd9a448a1b3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll @@ -28,6 +28,12 @@ main_body: ret void } +define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <3 x half> %data, i32 %index) { +main_body: + call void @llvm.amdgcn.buffer.store.format.v3f16(<3 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0) + ret void +} + ; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 @@ -54,4 +60,5 @@ main_body: declare void @llvm.amdgcn.buffer.store.format.f16(half, <4 x i32>, i32, i32, i1, i1) declare void @llvm.amdgcn.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i1, i1) +declare void @llvm.amdgcn.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i1, i1) declare void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll index 9e6be563c383e..da1174d7eb860 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll @@ -23,6 +23,18 @@ main_body: ret float %r } +; GCN-LABEL: {{^}}image_load_v3f16: +; UNPACKED: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}} +; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}} +; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}} +define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +main_body: + %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> + %r = bitcast <4 x half> %ext to <2 x float> + ret <2 x float> %r +} + ; GCN-LABEL: {{^}}image_load_v4f16: ; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} @@ -56,6 +68,14 @@ main_body: ret float %x } +define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { +main_body: + %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> + %res = bitcast <4 x half> %ext to <2 x float> + ret <2 x float> %res +} + ; GCN-LABEL: {{^}}image_store_f16 ; GFX89: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} ; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}} @@ -78,6 +98,14 @@ main_body: ret void } +define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { +main_body: + %r = bitcast <2 x float> %in to <4 x half> + %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> + call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + ; GCN-LABEL: {{^}}image_store_v4f16 ; UNPACKED: v_lshrrev_b32_e32 ; UNPACKED: v_and_b32_e32 @@ -110,15 +138,19 @@ main_body: declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll index 8a358ee59c963..6843134f83932 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -206,6 +206,131 @@ main_body: ret <2 x float> %r } +define amdgpu_ps <2 x float> @image_sample_b_2d_v3f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +; TONGA-LABEL: image_sample_b_2d_v3f16: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: s_mov_b64 s[12:13], exec +; TONGA-NEXT: s_wqm_b64 exec, exec +; TONGA-NEXT: s_and_b64 exec, exec, s[12:13] +; TONGA-NEXT: image_sample_b v[0:2], v[0:2], s[0:7], s[8:11] dmask:0x7 d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; TONGA-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: v_mov_b32_e32 v1, v2 +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_b_2d_v3f16: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: s_mov_b64 s[12:13], exec +; GFX81-NEXT: s_wqm_b64 exec, exec +; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX81-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x7 d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_b_2d_v3f16: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b64 s[12:13], exec +; GFX9-NEXT: s_wqm_b64 exec, exec +; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX9-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x7 d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_b_2d_v3f16: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_2D d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %tex = call <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + %tex_wide = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> + %r = bitcast <4 x half> %tex_wide to <2 x float> + ret <2 x float> %r +} + +define amdgpu_ps <4 x float> @image_sample_b_2d_v3f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +; TONGA-LABEL: image_sample_b_2d_v3f16_tfe: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: s_mov_b64 s[12:13], exec +; TONGA-NEXT: s_wqm_b64 exec, exec +; TONGA-NEXT: v_mov_b32_e32 v3, 0 +; TONGA-NEXT: v_mov_b32_e32 v4, v3 +; TONGA-NEXT: v_mov_b32_e32 v5, v3 +; TONGA-NEXT: v_mov_b32_e32 v6, v3 +; TONGA-NEXT: s_and_b64 exec, exec, s[12:13] +; TONGA-NEXT: image_sample_b v[3:6], v[0:2], s[0:7], s[8:11] dmask:0x7 tfe d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_lshlrev_b32_e32 v0, 16, v4 +; TONGA-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: v_mov_b32_e32 v1, v5 +; TONGA-NEXT: v_mov_b32_e32 v2, v6 +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_b_2d_v3f16_tfe: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: s_mov_b64 s[12:13], exec +; GFX81-NEXT: s_wqm_b64 exec, exec +; GFX81-NEXT: v_mov_b32_e32 v3, 0 +; GFX81-NEXT: v_mov_b32_e32 v4, v3 +; GFX81-NEXT: v_mov_b32_e32 v5, v3 +; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX81-NEXT: image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0x7 tfe d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: v_mov_b32_e32 v0, v3 +; GFX81-NEXT: v_mov_b32_e32 v1, v4 +; GFX81-NEXT: v_mov_b32_e32 v2, v5 +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_b_2d_v3f16_tfe: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b64 s[12:13], exec +; GFX9-NEXT: s_wqm_b64 exec, exec +; GFX9-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v4, v3 +; GFX9-NEXT: v_mov_b32_e32 v5, v3 +; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX9-NEXT: image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0x7 tfe d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_b_2d_v3f16_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, v2 +; GFX10-NEXT: v_mov_b32_e32 v4, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, v0 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_sample_b v[0:2], v[3:5], s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_2D tfe d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %tex = call {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + %tex.vec = extractvalue {<3 x half>, i32} %tex, 0 + %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> undef, <4 x i32> + %tex.err = extractvalue {<3 x half>, i32} %tex, 1 + %tex.vecf = bitcast <4 x half> %tex.vec_wide to <2 x float> + %tex.vecf.0 = extractelement <2 x float> %tex.vecf, i32 0 + %tex.vecf.1 = extractelement <2 x float> %tex.vecf, i32 1 + %r.0 = insertelement <4 x float> undef, float %tex.vecf.0, i32 0 + %r.1 = insertelement <4 x float> %r.0, float %tex.vecf.1, i32 1 + %tex.errf = bitcast i32 %tex.err to float + %r = insertelement <4 x float> %r.1, float %tex.errf, i32 2 + ret <4 x float> %r +} + define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { ; TONGA-LABEL: image_sample_b_2d_v4f16: ; TONGA: ; %bb.0: ; %main_body @@ -334,10 +459,13 @@ main_body: declare half @llvm.amdgcn.image.sample.2d.f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <3 x half> @llvm.amdgcn.image.sample.2d.v3f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare {<2 x half>,i32} @llvm.amdgcn.image.sample.2d.v2f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare {<2 x half>,i32} @llvm.amdgcn.image.sample.c.d.1d.v2f16i32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare {<4 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v4f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll index fb28bc0748b08..2ebf3f6633a97 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll @@ -23,6 +23,18 @@ main_body: ret half %elt } +; GCN-LABEL: {{^}}buffer_load_format_d16_xyz: +; UNPACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +define amdgpu_ps half @buffer_load_format_d16_xyz(<4 x i32> inreg %rsrc) { +main_body: + %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0) + %elt = extractelement <3 x half> %data, i32 2 + ret half %elt +} + ; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw: ; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] @@ -38,4 +50,5 @@ main_body: declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32) declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32) +declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32) declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll index 139496282addf..68e77aff667c9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll @@ -28,6 +28,31 @@ main_body: ret void } +; GCN-LABEL: {{^}}buffer_store_format_d16_xyz: +; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 + +; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} +; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16 +; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], [[K]] +; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], [[K]] + +; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] +; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]] + +; UNPACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen + +; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}} +; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] +; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]] + +; PACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen +define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) { +main_body: + %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) + ret void +} + ; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 @@ -54,4 +79,5 @@ main_body: declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32) declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) +declare void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i32) declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll index db7949f540964..0ebc4e67b4fbe 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll @@ -26,6 +26,21 @@ main_body: ret half %elt } +; GCN-LABEL: {{^}}tbuffer_load_d16_xyz: +; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] +; GFX10-UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] +; GFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] +; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) { +main_body: + %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0) + %elt = extractelement <3 x half> %data, i32 2 + ret half %elt +} + ; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: ; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; GFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] @@ -43,5 +58,5 @@ main_body: declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) +declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) - diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll index 5041cf3197342..281c48513b6ae 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll @@ -32,6 +32,31 @@ main_body: ret void } +; GCN-LABEL: {{^}}tbuffer_store_d16_xyz: +; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, + +; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} +; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16 +; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], [[K]] +; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], [[K]] + +; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] +; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]] +; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] + + +; PACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}} +; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] +; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]] +; PREGFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] +; GFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] +define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data) { +main_body: + %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0) + ret void +} + ; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, @@ -58,4 +83,5 @@ main_body: declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32) declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32) +declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32) declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll index 3e0d87bb6ef93..e6c90336724b5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll @@ -23,6 +23,19 @@ main_body: ret half %elt } +; GCN-LABEL: {{^}}buffer_load_format_d16_xyz: +; UNPACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen +; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +define amdgpu_ps half @buffer_load_format_d16_xyz(<4 x i32> inreg %rsrc) { +main_body: + %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %elt = extractelement <3 x half> %data, i32 2 + ret half %elt +} + ; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw: ; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] @@ -47,5 +60,6 @@ main_body: declare half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32>, i32, i32, i32, i32) declare <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32, i32) +declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32) declare <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32, i32) declare i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32>, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll index 8ae753b59ab54..69c9a633db864 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll @@ -28,6 +28,31 @@ main_body: ret void } +; GCN-LABEL: {{^}}buffer_store_format_d16_xyz: +; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 + +; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} +; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16 +; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], [[K]] +; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], [[K]] + +; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] +; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]] + +; UNPACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen + +; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}} +; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] +; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]] + +; PACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen +define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) { +main_body: + %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0) + ret void +} + ; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 @@ -64,5 +89,6 @@ main_body: declare void @llvm.amdgcn.struct.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32, i32) declare void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32) +declare void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32) declare void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32) declare void @llvm.amdgcn.struct.buffer.store.format.i16(i16, <4 x i32>, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll index 2fd21a10564d4..ebf8940e034a4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll @@ -28,6 +28,21 @@ main_body: ret half %elt } +; GCN-LABEL: {{^}}tbuffer_load_d16_xyz: +; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 +; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen +; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen +; GFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen +; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) { +main_body: + %data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) + %elt = extractelement <3 x half> %data, i32 2 + ret half %elt +} + ; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 ; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen @@ -45,5 +60,5 @@ main_body: declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32) declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32) +declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32) declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32) - diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll index ca78b29cc8f53..93634fbffb935 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll @@ -32,6 +32,30 @@ main_body: ret void } +; GCN-LABEL: {{^}}tbuffer_store_d16_xyz: +; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 + +; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} +; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16 +; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], [[K]] +; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], [[K]] + +; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] +; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]] +; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen + +; PACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}} +; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] +; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]] +; PREGFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen +; GFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen +define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) { +main_body: + %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) + ret void +} + ; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 @@ -57,4 +81,5 @@ main_body: declare void @llvm.amdgcn.struct.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32) declare void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32) +declare void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32) declare void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll index 205cc5f78d335..2839f92d2aae1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll @@ -23,6 +23,19 @@ main_body: ret half %elt } +; GCN-LABEL: {{^}}tbuffer_load_d16_xyz: +; UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] +; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) { +main_body: + %data = call <3 x half> @llvm.amdgcn.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0) + %elt = extractelement <3 x half> %data, i32 2 + ret half %elt +} + ; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: ; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] @@ -38,4 +51,5 @@ main_body: declare half @llvm.amdgcn.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) declare <2 x half> @llvm.amdgcn.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) +declare <3 x half> @llvm.amdgcn.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) declare <4 x half> @llvm.amdgcn.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll index 4dd76a3a632dc..a940df3540cfe 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll @@ -28,6 +28,28 @@ main_body: ret void } +; GCN-LABEL: {{^}}tbuffer_store_d16_xyz: +; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 + +; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} +; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16 +; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], [[K]] +; UNPACKED-DAG: s_and_b32 [[SHR1:s[0-9]+]], s[[S_DATA_1]], [[K]] + +; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] +; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]] +; UNPACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen + +; PACKED-DAG: s_and_b32 [[SHR0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}} +; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] +; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR0]] +; PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen +define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <3 x half> %data, i32 %vindex) { +main_body: + call void @llvm.amdgcn.tbuffer.store.v3f16(<3 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0) + ret void +} + ; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 @@ -52,4 +74,5 @@ main_body: declare void @llvm.amdgcn.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) declare void @llvm.amdgcn.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) +declare void @llvm.amdgcn.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) declare void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll index 1969056311f8c..f8e7789d5f021 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -2161,10 +2161,9 @@ define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc ret half %elt1 } -; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16). ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16( -; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) -; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 2 +; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) +; CHECK-NEXT: %elt1 = extractelement <3 x half> %data, i32 2 ; CHECK-NEXT: ret half %elt1 define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) @@ -2992,10 +2991,9 @@ define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %d ret half %elt0 } -; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32). ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) -; CHECK-NEXT: %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> +; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) +; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> undef, <4 x i32> ; CHECK-NEXT: ret <4 x half> %res define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) From 71131db6895430d1c027712677a99a573eb7545f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 30 Aug 2020 17:28:48 -0400 Subject: [PATCH 0842/1079] AMDGPU: Improve <2 x i24> arguments and return value handling This was asserting for GlobalISel. For SelectionDAG, this was passing this on the stack. Instead, scalarize this as if it were a 32-bit vector. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 60 +++++--- .../AMDGPU/GlobalISel/function-returns.ll | 121 +++++++++++++++ .../GlobalISel/irtranslator-function-args.ll | 98 ++++++++++++ llvm/test/CodeGen/AMDGPU/call-return-types.ll | 14 ++ llvm/test/CodeGen/AMDGPU/fshr.ll | 142 +++++------------- llvm/test/CodeGen/AMDGPU/function-args.ll | 10 ++ 6 files changed, 321 insertions(+), 124 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7580a1fda6d5b..6350562ec4f95 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -921,15 +921,18 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, if (VT.isVector()) { EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32) - return ScalarVT.getSimpleVT(); + if (Size == 16) { + if (Subtarget->has16BitInsts()) + return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; + return VT.isInteger() ? MVT::i32 : MVT::f32; + } - if (Size > 32) - return MVT::i32; + if (Size < 16) + return Subtarget->has16BitInsts() ? MVT::i16 : MVT::i32; + return Size == 32 ? ScalarVT.getSimpleVT() : MVT::i32; + } - if (Size == 16 && Subtarget->has16BitInsts()) - return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; - } else if (VT.getSizeInBits() > 32) + if (VT.getSizeInBits() > 32) return MVT::i32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); @@ -946,14 +949,15 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32) + // FIXME: Should probably promote 8-bit vectors to i16. + if (Size == 16 && Subtarget->has16BitInsts()) + return (NumElts + 1) / 2; + + if (Size <= 32) return NumElts; if (Size > 32) return NumElts * ((Size + 31) / 32); - - if (Size == 16 && Subtarget->has16BitInsts()) - return (NumElts + 1) / 2; } else if (VT.getSizeInBits() > 32) return (VT.getSizeInBits() + 31) / 32; @@ -968,6 +972,16 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); + // FIXME: We should fix the ABI to be the same on targets without 16-bit + // support, but unless we can properly handle 3-vectors, it will be still be + // inconsistent. + if (Size == 16 && Subtarget->has16BitInsts()) { + RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16; + IntermediateVT = RegisterVT; + NumIntermediates = (NumElts + 1) / 2; + return NumIntermediates; + } + if (Size == 32) { RegisterVT = ScalarVT.getSimpleVT(); IntermediateVT = RegisterVT; @@ -975,20 +989,26 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( return NumIntermediates; } - if (Size > 32) { + if (Size < 16 && Subtarget->has16BitInsts()) { + // FIXME: Should probably form v2i16 pieces + RegisterVT = MVT::i16; + IntermediateVT = ScalarVT; + NumIntermediates = NumElts; + return NumIntermediates; + } + + + if (Size != 16 && Size <= 32) { RegisterVT = MVT::i32; - IntermediateVT = RegisterVT; - NumIntermediates = NumElts * ((Size + 31) / 32); + IntermediateVT = ScalarVT; + NumIntermediates = NumElts; return NumIntermediates; } - // FIXME: We should fix the ABI to be the same on targets without 16-bit - // support, but unless we can properly handle 3-vectors, it will be still be - // inconsistent. - if (Size == 16 && Subtarget->has16BitInsts()) { - RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16; + if (Size > 32) { + RegisterVT = MVT::i32; IntermediateVT = RegisterVT; - NumIntermediates = (NumElts + 1) / 2; + NumIntermediates = NumElts * ((Size + 31) / 32); return NumIntermediates; } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index acd71947aeeed..fa569b941c935 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -196,6 +196,89 @@ define half @f16_func_void() #0 { ret half %val } +define i24 @i24_func_void() #0 { + ; CHECK-LABEL: name: i24_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i24, i24 addrspace(1)* undef + ret i24 %val +} + +define zeroext i24 @i24_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i24_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i24, i24 addrspace(1)* undef + ret i24 %val +} + +define signext i24 @i24_signext_func_void() #0 { + ; CHECK-LABEL: name: i24_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i24, i24 addrspace(1)* undef + ret i24 %val +} + +define <2 x i24> @v2i24_func_void() #0 { + ; CHECK-LABEL: name: v2i24_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load 6 from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load <2 x i24>, <2 x i24> addrspace(1)* undef + ret <2 x i24> %val +} + +define <3 x i24> @v3i24_func_void() #0 { + ; CHECK-LABEL: name: v3i24_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load 9 from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load <3 x i24>, <3 x i24> addrspace(1)* undef + ret <3 x i24> %val +} + define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): @@ -977,6 +1060,44 @@ define <16 x i8> @v16i8_func_void() #0 { ret <16 x i8> %val } +define <2 x i8> @v2i8_func_void() #0 { + ; CHECK-LABEL: name: v2i8_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load 2 from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load <2 x i8>, <2 x i8> addrspace(1)* undef + ret <2 x i8> %val +} + +define <3 x i8> @v3i8_func_void() #0 { + ; CHECK-LABEL: name: v3i8_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load 3 from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load <3 x i8>, <3 x i8> addrspace(1)* undef + ret <3 x i8> %val +} + define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 28f60ca7528db..96d0c9d1d4a80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -553,6 +553,104 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ret void } +define void @void_func_v2i24(<2 x i24> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2i24 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store 6 into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] + store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef + ret void +} + +define void @void_func_v3i24(<3 x i24> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3i24 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store 9 into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] + store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef + ret void +} + +define void @void_func_v2i8(<2 x i8> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store 2 into `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] + store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef + ret void +} + +define void @void_func_v3i8(<3 x i8> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store 3 into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] + store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef + ret void +} + +define void @void_func_v4i8(<4 x i8> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store 4 into `<4 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] + store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef + ret void +} + define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p3i8 ; CHECK: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll index 8751c61dcd400..33b201bbe6d8e 100644 --- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll @@ -30,6 +30,8 @@ declare <3 x float> @external_v3f32_func_void() #0 declare <5 x float> @external_v5f32_func_void() #0 declare <2 x double> @external_v2f64_func_void() #0 +declare <2 x i24> @external_v2i24_func_void() #0 + declare <2 x i32> @external_v2i32_func_void() #0 declare <3 x i32> @external_v3i32_func_void() #0 declare <4 x i32> @external_v4i32_func_void() #0 @@ -250,6 +252,18 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ret void } +; GCN-LABEL: {{^}}test_call_external_v2i24_func_void: +; GCN: s_swappc_b64 +; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1 +define amdgpu_kernel void @test_call_external_v2i24_func_void() #0 { + %val = call <2 x i24> @external_v2i24_func_void() + %elt0 = extractelement <2 x i24> %val, i32 0 + %elt1 = extractelement <2 x i24> %val, i32 1 + %add = add i24 %elt0, %elt1 + store volatile i24 %add, i24 addrspace(1)* undef + ret void +} + ; GCN-LABEL: {{^}}test_call_external_v3f32_func_void: ; GCN: s_swappc ; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2] diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 0733e2877bffc..96b609436da78 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -981,127 +981,61 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2 ; SI-LABEL: v_fshr_v2i24: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 -; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 -; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:4 -; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:8 ; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab -; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 -; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 -; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 -; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 -; SI-NEXT: s_waitcnt vmcnt(4) -; SI-NEXT: v_mul_hi_u32 v11, v2, s4 -; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_mul_hi_u32 v12, v3, s4 -; SI-NEXT: s_waitcnt vmcnt(2) -; SI-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; SI-NEXT: v_lshrrev_b32_e32 v11, 4, v11 -; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 -; SI-NEXT: v_mul_lo_u32 v11, v11, 24 -; SI-NEXT: v_mul_lo_u32 v12, v12, 24 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshlrev_b32_e32 v6, 8, v6 -; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v12 -; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2 +; SI-NEXT: v_mul_hi_u32 v6, v4, s4 +; SI-NEXT: v_mul_hi_u32 v7, v5, s4 +; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v6 +; SI-NEXT: v_mul_lo_u32 v6, v6, 24 +; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v7 +; SI-NEXT: v_mul_lo_u32 v6, v6, 24 +; SI-NEXT: v_add_i32_e32 v4, vcc, 8, v4 +; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v3 +; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v6 ; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3 -; SI-NEXT: v_alignbit_b32 v1, v1, v6, v2 -; SI-NEXT: v_alignbit_b32 v2, v5, v4, v3 -; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen -; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen -; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen -; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; SI-NEXT: v_alignbit_b32 v1, v1, v2, v3 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_fshr_v2i24: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 -; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:4 -; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:8 ; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab -; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 -; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 -; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 -; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 -; VI-NEXT: s_waitcnt vmcnt(4) -; VI-NEXT: v_mul_hi_u32 v11, v2, s4 -; VI-NEXT: s_waitcnt vmcnt(3) -; VI-NEXT: v_mul_hi_u32 v12, v3, s4 -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; VI-NEXT: v_lshrrev_b32_e32 v11, 4, v11 -; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 -; VI-NEXT: v_mul_lo_u32 v11, v11, 24 -; VI-NEXT: v_mul_lo_u32 v12, v12, 24 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v6, 8, v6 -; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v11 -; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v12 -; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2 +; VI-NEXT: v_mul_hi_u32 v6, v4, s4 +; VI-NEXT: v_mul_hi_u32 v7, v5, s4 +; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v6 +; VI-NEXT: v_mul_lo_u32 v6, v6, 24 +; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 +; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v7 +; VI-NEXT: v_mul_lo_u32 v6, v6, 24 +; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v4 +; VI-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3 +; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v6 ; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3 -; VI-NEXT: v_alignbit_b32 v1, v1, v6, v2 -; VI-NEXT: v_alignbit_b32 v2, v5, v4, v3 -; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen -; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen -; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen -; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_alignbit_b32 v1, v1, v2, v3 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshr_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 -; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 -; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab -; GFX9-NEXT: s_waitcnt vmcnt(5) -; GFX9-NEXT: v_mul_hi_u32 v6, v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(4) -; GFX9-NEXT: v_mul_hi_u32 v7, v2, s4 -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX9-NEXT: v_mul_hi_u32 v6, v4, s4 +; GFX9-NEXT: v_mul_hi_u32 v7, v5, s4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 -; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 -; GFX9-NEXT: v_add_u32_e32 v1, 8, v1 -; GFX9-NEXT: v_add_u32_e32 v2, 8, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v1, v8, v5, v1 -; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, v2 -; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 -; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 -; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 -; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v2 -; GFX9-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:4 -; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v7 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_add_u32_e32 v4, 8, v4 +; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v3 +; GFX9-NEXT: v_sub_u32_e32 v3, v5, v6 +; GFX9-NEXT: v_add_u32_e32 v3, 8, v3 +; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fshr_v2i24: diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index ded8d7ad55113..1f2657fe94d29 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -344,6 +344,16 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ret void } +; GCN-LABEL: {{^}}void_func_v2i24: +; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1 +define void @void_func_v2i24(<2 x i24> %arg0) #0 { + %elt0 = extractelement <2 x i24> %arg0, i32 0 + %elt1 = extractelement <2 x i24> %arg0, i32 1 + %add = add i24 %elt0, %elt1 + store i24 %add, i24 addrspace(1)* undef + ret void +} + ; GCN-LABEL: {{^}}void_func_v2f32: ; GCN-NOT: v[0:1] ; GCN-NOT: v0 From 6e85c3d5c786f0d3878d7f79503e8641d1b7030b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 15 Sep 2020 14:54:38 -0700 Subject: [PATCH 0843/1079] [NFC][Regalloc] accessors for 'reg' and 'weight' Also renamed the fields to follow style guidelines. Accessors help with readability - weight mutation, in particular, is easier to follow this way. Differential Revision: https://reviews.llvm.org/D87725 --- llvm/include/llvm/CodeGen/LiveInterval.h | 20 +-- llvm/include/llvm/CodeGen/LiveRangeEdit.h | 2 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 18 +-- llvm/lib/CodeGen/InlineSpiller.cpp | 25 ++-- llvm/lib/CodeGen/LiveDebugVariables.cpp | 6 +- llvm/lib/CodeGen/LiveInterval.cpp | 23 ++-- llvm/lib/CodeGen/LiveIntervalCalc.cpp | 4 +- llvm/lib/CodeGen/LiveIntervalUnion.cpp | 6 +- llvm/lib/CodeGen/LiveIntervals.cpp | 16 +-- llvm/lib/CodeGen/LiveRangeEdit.cpp | 22 +-- llvm/lib/CodeGen/LiveRegMatrix.cpp | 20 +-- llvm/lib/CodeGen/MachineVerifier.cpp | 8 +- llvm/lib/CodeGen/RegAllocBase.cpp | 28 ++-- llvm/lib/CodeGen/RegAllocBasic.cpp | 8 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 125 +++++++++--------- llvm/lib/CodeGen/RegAllocPBQP.cpp | 20 +-- llvm/lib/CodeGen/RegisterCoalescer.cpp | 47 +++---- llvm/lib/CodeGen/RenameIndependentSubregs.cpp | 10 +- llvm/lib/CodeGen/SplitKit.cpp | 14 +- llvm/lib/CodeGen/StackSlotColoring.cpp | 17 +-- llvm/lib/CodeGen/TargetRegisterInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 22 +-- llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp | 2 +- .../WebAssemblyOptimizeLiveIntervals.cpp | 2 +- .../WebAssembly/WebAssemblyRegColoring.cpp | 22 +-- 25 files changed, 250 insertions(+), 239 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index 0764257125e6e..a63eaac44063b 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -704,12 +704,16 @@ namespace llvm { private: SubRange *SubRanges = nullptr; ///< Single linked list of subregister live /// ranges. + const unsigned Reg; // the register or stack slot of this interval. + float Weight = 0.0; // weight of this interval public: - const unsigned reg; // the register or stack slot of this interval. - float weight; // weight of this interval + unsigned reg() const { return Reg; } + float weight() const { return Weight; } + void incrementWeight(float Inc) { Weight += Inc; } + void setWeight(float Value) { Weight = Value; } - LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {} + LiveInterval(unsigned Reg, float Weight) : Reg(Reg), Weight(Weight) {} ~LiveInterval() { clearSubRanges(); @@ -806,14 +810,10 @@ namespace llvm { unsigned getSize() const; /// isSpillable - Can this interval be spilled? - bool isSpillable() const { - return weight != huge_valf; - } + bool isSpillable() const { return Weight != huge_valf; } /// markNotSpillable - Mark interval as not spillable - void markNotSpillable() { - weight = huge_valf; - } + void markNotSpillable() { Weight = huge_valf; } /// For a given lane mask @p LaneMask, compute indexes at which the /// lane is marked undefined by subregister definitions. @@ -870,7 +870,7 @@ namespace llvm { bool operator<(const LiveInterval& other) const { const SlotIndex &thisIndex = beginIndex(); const SlotIndex &otherIndex = other.beginIndex(); - return std::tie(thisIndex, reg) < std::tie(otherIndex, other.reg); + return std::tie(thisIndex, Reg) < std::tie(otherIndex, other.Reg); } void print(raw_ostream &OS) const; diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 3c4273130ab2b..af8fe91431c88 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -152,7 +152,7 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { return *Parent; } - Register getReg() const { return getParent().reg; } + Register getReg() const { return getParent().reg(); } /// Iterator for accessing the new registers added by this edit. using iterator = SmallVectorImpl::const_iterator; diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 254503673fd2b..75cf6a63dc9a7 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -86,7 +86,7 @@ static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, VirtRegMap *VRM, const TargetInstrInfo &TII) { - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); unsigned Original = VRM ? VRM->getOriginal(Reg) : 0; for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { @@ -140,7 +140,7 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Check if unspillable. if (weight < 0) return; - li.weight = weight; + li.setWeight(weight); } float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start, @@ -159,10 +159,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; - std::pair TargetHint = mri.getRegAllocationHint(li.reg); + std::pair TargetHint = mri.getRegAllocationHint(li.reg()); if (li.isSpillable() && VRM) { - Register Reg = li.reg; + Register Reg = li.reg(); Register Original = VRM->getOriginal(Reg); const LiveInterval &OrigInt = LIS.getInterval(Original); // li comes from a split of OrigInt. If OrigInt was marked @@ -215,7 +215,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, std::set CopyHints; for (MachineRegisterInfo::reg_instr_nodbg_iterator - I = mri.reg_instr_nodbg_begin(li.reg), + I = mri.reg_instr_nodbg_begin(li.reg()), E = mri.reg_instr_nodbg_end(); I != E;) { MachineInstr *mi = &*(I++); @@ -243,7 +243,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // Calculate instr weight. bool reads, writes; - std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg()); weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi); // Give extra weight to what looks like a loop induction variable update. @@ -256,7 +256,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // Get allocation hints from copies. if (!mi->isCopy()) continue; - Register hint = copyHint(mi, li.reg, tri, mri); + Register hint = copyHint(mi, li.reg(), tri, mri); if (!hint) continue; // Force hweight onto the stack so that x86 doesn't add hidden precision, @@ -275,7 +275,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, if (updateLI && CopyHints.size()) { // Remove a generic hint if previously added by target. if (TargetHint.first == 0 && TargetHint.second) - mri.clearSimpleHint(li.reg); + mri.clearSimpleHint(li.reg()); std::set HintedRegs; for (auto &Hint : CopyHints) { @@ -283,7 +283,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, (TargetHint.first != 0 && Hint.Reg == TargetHint.second)) // Don't add the same reg twice or the target-type hint again. continue; - mri.addRegAllocationHint(li.reg, Hint.Reg); + mri.addRegAllocationHint(li.reg(), Hint.Reg); } // Weakly boost the spill weight of hinted registers. diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 59e8a5cea1c3c..911ac88c802fc 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -289,8 +289,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { // Check that all uses satisfy our criteria. for (MachineRegisterInfo::reg_instr_nodbg_iterator - RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), - E = MRI.reg_instr_nodbg_end(); RI != E; ) { + RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()), + E = MRI.reg_instr_nodbg_end(); + RI != E;) { MachineInstr &MI = *RI++; // Allow copies to/from Reg. @@ -299,11 +300,11 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { // Allow stack slot loads. int FI; - if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) continue; // Allow stack slot stores. - if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) continue; // Allow a single additional instruction. @@ -432,7 +433,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { do { LiveInterval *LI; std::tie(LI, VNI) = WorkList.pop_back_val(); - Register Reg = LI->reg; + Register Reg = LI->reg(); LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); @@ -511,7 +512,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { if (!SnippetCopies.count(MI)) continue; LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); - assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy"); VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); assert(SnipVNI && "Snippet undefined before copy"); WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); @@ -556,7 +557,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg, bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction SmallVector, 8> Ops; - VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg(), &Ops); if (!RI.Reads) return false; @@ -568,7 +569,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { LLVM_DEBUG(dbgs() << "\tadding flags: "); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) MO.setIsUndef(); } LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); @@ -608,7 +609,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // If we can't guarantee that we'll be able to actually assign the new vreg, // we can't remat. - if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) { + if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg(), MI)) { markValueUsed(&VirtReg, ParentVNI); LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); return false; @@ -633,7 +634,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Replace operands for (const auto &OpPair : Ops) { MachineOperand &MO = OpPair.first->getOperand(OpPair.second); - if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) { MO.setReg(NewVReg); MO.setIsKill(); } @@ -1171,7 +1172,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, // save a copy of LiveInterval in StackSlotToOrigLI because the original // LiveInterval may be cleared after all its references are spilled. if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { - auto LI = std::make_unique(OrigLI.reg, OrigLI.weight); + auto LI = std::make_unique(OrigLI.reg(), OrigLI.weight()); LI->assign(OrigLI, Allocator); StackSlotToOrigLI[StackSlot] = std::move(LI); } @@ -1199,7 +1200,7 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, MachineBasicBlock &BB, Register &LiveReg) { SlotIndex Idx; - Register OrigReg = OrigLI.reg; + Register OrigReg = OrigLI.reg(); MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); if (MI != BB.end()) Idx = LIS.getInstructionIndex(*MI); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 97cc7a0c30343..bfc6483db39a7 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -777,12 +777,12 @@ void UserValue::addDefsFromCopies( if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. - if (!Register::isVirtualRegister(LI->reg)) + if (!Register::isVirtualRegister(LI->reg())) return; // Collect all the (vreg, valno) pairs that are copies of LI. SmallVector, 8> CopyValues; - for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) { + for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) { MachineInstr *MI = MO.getParent(); // Copies of the full value. if (MO.getSubReg() || !MI->isCopy()) @@ -1066,7 +1066,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, LII->start < LocMapI.stop()) { // Overlapping correct location. Allocate NewLocNo now. if (NewLocNo == UndefLocNo) { - MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); + MachineOperand MO = MachineOperand::CreateReg(LI->reg(), false); MO.setSubReg(locations[OldLocNo].getSubReg()); NewLocNo = getLocationNo(MO); DidChange = true; diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 930dc116205a3..ce0e58772068a 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -951,9 +951,9 @@ void LiveInterval::refineSubRanges( MatchingRange = createSubRangeFrom(Allocator, Matching, SR); // Now that the subrange is split in half, make sure we // only keep in the subranges the VNIs that touch the related half. - stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI, + stripValuesNotDefiningMask(reg(), *MatchingRange, Matching, Indexes, TRI, ComposeSubRegIdx); - stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI, + stripValuesNotDefiningMask(reg(), SR, SR.LaneMask, Indexes, TRI, ComposeSubRegIdx); } Apply(*MatchingRange); @@ -977,11 +977,11 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const { - assert(Register::isVirtualRegister(reg)); - LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); + assert(Register::isVirtualRegister(reg())); + LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg()); assert((VRegMask & LaneMask).any()); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - for (const MachineOperand &MO : MRI.def_operands(reg)) { + for (const MachineOperand &MO : MRI.def_operands(reg())) { if (!MO.isUndef()) continue; unsigned SubReg = MO.getSubReg(); @@ -1043,12 +1043,12 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const { } void LiveInterval::print(raw_ostream &OS) const { - OS << printReg(reg) << ' '; + OS << printReg(reg()) << ' '; super::print(OS); // Print subranges for (const SubRange &SR : subranges()) OS << SR; - OS << " weight:" << weight; + OS << " weight:" << Weight; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1087,7 +1087,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { // Make sure SubRanges are fine and LaneMasks are disjunct. LaneBitmask Mask; - LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) + LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg()) : LaneBitmask::getAll(); for (const SubRange &SR : subranges()) { // Subrange lanemask should be disjunct to any previous subrange masks. @@ -1361,8 +1361,9 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) { void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI) { // Rewrite instructions. - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), - RE = MRI.reg_end(); RI != RE;) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg()), + RE = MRI.reg_end(); + RI != RE;) { MachineOperand &MO = *RI; MachineInstr *MI = RI->getParent(); ++RI; @@ -1382,7 +1383,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], if (!VNI) continue; if (unsigned EqClass = getEqClass(VNI)) - MO.setReg(LIV[EqClass-1]->reg); + MO.setReg(LIV[EqClass - 1]->reg()); } // Distribute subregister liveranges. diff --git a/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/llvm/lib/CodeGen/LiveIntervalCalc.cpp index 30c2d74a71c53..e8fd069d17a0a 100644 --- a/llvm/lib/CodeGen/LiveIntervalCalc.cpp +++ b/llvm/lib/CodeGen/LiveIntervalCalc.cpp @@ -60,7 +60,7 @@ void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { // Visit all def operands. If the same instruction has multiple defs of Reg, // createDeadDef() will deduplicate. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { if (!MO.isDef() && !MO.readsReg()) continue; @@ -127,7 +127,7 @@ void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) { } } resetLiveOutMap(); - extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI); + extendToUses(MainRange, LI.reg(), LaneBitmask::getAll(), &LI); } void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) { diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 43fa8f2d7157a..cccc14e4e8a44 100644 --- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -85,8 +85,8 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { return; } for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { - OS << " [" << SI.start() << ' ' << SI.stop() << "):" - << printReg(SI.value()->reg, TRI); + OS << " [" << SI.start() << ' ' << SI.stop() + << "):" << printReg(SI.value()->reg(), TRI); } OS << '\n'; } @@ -95,7 +95,7 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { // Verify the live intervals in this union and add them to the visited set. void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI) - VisitedVRegs.set(SI.value()->reg); + VisitedVRegs.set(SI.value()->reg()); } #endif //!NDEBUG diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index b60fea6fb4e3d..d41b1f2b0adff 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -193,7 +193,7 @@ bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LICalc && "LICalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); + LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg())); return computeDeadValues(LI, nullptr); } @@ -453,13 +453,13 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl *dead) { LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); - assert(Register::isVirtualRegister(li->reg) && + assert(Register::isVirtualRegister(li->reg()) && "Can only shrink virtual registers"); // Shrink subregister live ranges. bool NeedsCleanup = false; for (LiveInterval::SubRange &S : li->subranges()) { - shrinkToUses(S, li->reg); + shrinkToUses(S, li->reg()); if (S.empty()) NeedsCleanup = true; } @@ -469,8 +469,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; - // Visit all instructions reading li->reg. - unsigned Reg = li->reg; + // Visit all instructions reading li->reg(). + unsigned Reg = li->reg(); for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) { if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg)) continue; @@ -523,7 +523,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - unsigned VReg = LI.reg; + unsigned VReg = LI.reg(); if (MRI->shouldTrackSubRegLiveness(VReg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); @@ -543,7 +543,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(LI.reg, TRI); + MI->addRegisterDead(LI.reg(), TRI); if (HaveDeadDef) MayHaveSplitComponents = true; HaveDeadDef = true; @@ -1716,7 +1716,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, if (NumComp <= 1) return; LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { Register NewVReg = MRI->createVirtualRegister(RegClass); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 9de77c19a23a2..f269020af2219 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -188,7 +188,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, MachineInstr *DefMI = nullptr, *UseMI = nullptr; // Check that there is a single def and a single use. - for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { + for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg())) { MachineInstr *MI = MO.getParent(); if (MO.isDef()) { if (DefMI && DefMI != MI) @@ -224,7 +224,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, << " into single use: " << *UseMI); SmallVector Ops; - if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + if (UseMI->readsWritesVirtualRegister(LI->reg(), &Ops).second) return false; MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS); @@ -236,7 +236,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (UseMI->shouldUpdateCallSiteInfo()) UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); - DefMI->addRegisterDead(LI->reg, nullptr); + DefMI->addRegisterDead(LI->reg(), nullptr); Dead.push_back(DefMI); ++NumDCEFoldedLoads; return true; @@ -332,7 +332,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // Remove defined value. if (MOI->isDef()) { if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + TheDelegate->LRE_WillShrinkVirtReg(LI.reg()); LIS.removeVRegDefAt(LI, Idx); if (LI.empty()) RegsToErase.push_back(Reg); @@ -369,7 +369,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, pop_back(); DeadRemats->insert(MI); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - MI->substituteRegister(Dest, NewLI.reg, 0, TRI); + MI->substituteRegister(Dest, NewLI.reg(), 0, TRI); MI->getOperand(0).setIsDead(true); } else { if (TheDelegate) @@ -409,7 +409,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; - unsigned VReg = LI->reg; + unsigned VReg = LI->reg(); if (TheDelegate) TheDelegate->LRE_WillShrinkVirtReg(VReg); if (!LIS.shrinkToUses(LI, &Dead)) @@ -442,9 +442,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (Original != VReg && Original != 0) - VRM->setIsSplitFromReg(SplitLI->reg, Original); + VRM->setIsSplitFromReg(SplitLI->reg(), Original); if (TheDelegate) - TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg); + TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg(), VReg); } } } @@ -466,11 +466,11 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); - if (MRI.recomputeRegClass(LI.reg)) + if (MRI.recomputeRegClass(LI.reg())) LLVM_DEBUG({ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - dbgs() << "Inflated " << printReg(LI.reg) << " to " - << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; + dbgs() << "Inflated " << printReg(LI.reg()) << " to " + << TRI->getRegClassName(MRI.getRegClass(LI.reg())) << '\n'; }); VRAI.calculateSpillWeightAndHint(LI); } diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index 08f046420fa1d..6b1775f28c045 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -102,10 +102,10 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, } void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { - LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to " + LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to " << printReg(PhysReg, TRI) << ':'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg(), PhysReg); foreachUnit( TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { @@ -119,10 +119,10 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { } void LiveRegMatrix::unassign(LiveInterval &VirtReg) { - Register PhysReg = VRM->getPhys(VirtReg.reg); - LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " - << printReg(PhysReg, TRI) << ':'); - VRM->clearVirt(VirtReg.reg); + Register PhysReg = VRM->getPhys(VirtReg.reg()); + LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI) + << " from " << printReg(PhysReg, TRI) << ':'); + VRM->clearVirt(VirtReg.reg()); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { @@ -148,8 +148,8 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, // Check if the cached information is valid. // The same BitVector can be reused for all PhysRegs. // We could cache multiple VirtRegs if it becomes necessary. - if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) { - RegMaskVirtReg = VirtReg.reg; + if (RegMaskVirtReg != VirtReg.reg() || RegMaskTag != UserTag) { + RegMaskVirtReg = VirtReg.reg(); RegMaskTag = UserTag; RegMaskUsable.clear(); LIS->checkRegMaskInterference(VirtReg, RegMaskUsable); @@ -165,7 +165,7 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg) { if (VirtReg.empty()) return false; - CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); + CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI); bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 2aa14c8131edd..312429955021f 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2529,7 +2529,7 @@ void MachineVerifier::verifyLiveIntervals() { } const LiveInterval &LI = LiveInts->getInterval(Reg); - assert(Reg == LI.reg && "Invalid reg to interval mapping"); + assert(Reg == LI.reg() && "Invalid reg to interval mapping"); verifyLiveInterval(LI); } @@ -2855,7 +2855,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); assert(Register::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); @@ -2872,10 +2872,10 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } if (SR.empty()) { report("Subrange must not be empty", MF); - report_context(SR, LI.reg, SR.LaneMask); + report_context(SR, LI.reg(), SR.LaneMask); } Mask |= SR.LaneMask; - verifyLiveRange(SR, LI.reg, SR.LaneMask); + verifyLiveRange(SR, LI.reg(), SR.LaneMask); if (!LI.covers(SR)) { report("A Subrange is not covered by the main range", MF); report_context(LI); diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index d228268536724..f7fe1063afeae 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -87,13 +87,13 @@ void RegAllocBase::allocatePhysRegs() { // Continue assigning vregs one at a time to available physical registers. while (LiveInterval *VirtReg = dequeue()) { - assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned"); // Unused registers can appear when the spiller coalesces snippets. - if (MRI->reg_nodbg_empty(VirtReg->reg)) { + if (MRI->reg_nodbg_empty(VirtReg->reg())) { LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); aboutToRemoveInterval(*VirtReg); - LIS->removeInterval(VirtReg->reg); + LIS->removeInterval(VirtReg->reg()); continue; } @@ -104,8 +104,8 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. LLVM_DEBUG(dbgs() << "\nselectOrSplit " - << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) - << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); + << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg())) + << ':' << *VirtReg << " w=" << VirtReg->weight() << '\n'); using VirtRegVec = SmallVector; @@ -117,8 +117,9 @@ void RegAllocBase::allocatePhysRegs() { // Probably caused by an inline asm. MachineInstr *MI = nullptr; for (MachineRegisterInfo::reg_instr_iterator - I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); - I != E; ) { + I = MRI->reg_instr_begin(VirtReg->reg()), + E = MRI->reg_instr_end(); + I != E;) { MI = &*(I++); if (MI->isInlineAsm()) break; @@ -133,8 +134,9 @@ void RegAllocBase::allocatePhysRegs() { report_fatal_error("ran out of registers during register allocation"); } // Keep going after reporting the error. - VRM->assignVirt2Phys(VirtReg->reg, - RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + VRM->assignVirt2Phys( + VirtReg->reg(), + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg())).front()); continue; } @@ -145,16 +147,16 @@ void RegAllocBase::allocatePhysRegs() { assert(LIS->hasInterval(Reg)); LiveInterval *SplitVirtReg = &LIS->getInterval(Reg); - assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); - if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + assert(!VRM->hasPhys(SplitVirtReg->reg()) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg())) { assert(SplitVirtReg->empty() && "Non-empty but used interval"); LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); aboutToRemoveInterval(*SplitVirtReg); - LIS->removeInterval(SplitVirtReg->reg); + LIS->removeInterval(SplitVirtReg->reg()); continue; } LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(Register::isVirtualRegister(SplitVirtReg->reg) && + assert(Register::isVirtualRegister(SplitVirtReg->reg()) && "expect split value in virtual register"); enqueue(SplitVirtReg); ++NumNewQueued; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 5009bcc0a3973..a4ce9d70a270a 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -46,7 +46,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", namespace { struct CompSpillWeight { bool operator()(LiveInterval *A, LiveInterval *B) const { - return A->weight < B->weight; + return A->weight() < B->weight(); } }; } @@ -213,7 +213,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, Q.collectInterferingVRegs(); for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight()) return false; Intfs.push_back(Intf); } @@ -227,7 +227,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, LiveInterval &Spill = *Intfs[i]; // Skip duplicates. - if (!VRM->hasPhys(Spill.reg)) + if (!VRM->hasPhys(Spill.reg())) continue; // Deallocate the interfering vreg by removing it from the union. @@ -259,7 +259,7 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVector PhysRegSpillCands; // Check for an available register in this class. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); + AllocationOrder Order(VirtReg.reg(), *VRM, RegClassInfo, Matrix); while (Register PhysReg = Order.next()) { // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 41cf002612654..dbb8f27cffcd8 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -247,12 +247,12 @@ class RAGreedy : public MachineFunctionPass, IndexedMap ExtraRegInfo; LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return ExtraRegInfo[VirtReg.reg].Stage; + return ExtraRegInfo[VirtReg.reg()].Stage; } void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); - ExtraRegInfo[VirtReg.reg].Stage = Stage; + ExtraRegInfo[VirtReg.reg()].Stage = Stage; } template @@ -677,7 +677,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); - const unsigned Reg = LI->reg; + const unsigned Reg = LI->reg(); assert(Register::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); unsigned Prio; @@ -768,7 +768,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. - if (Register Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Register Hint = MRI->getSimpleHint(VirtReg.reg())) if (Order.isHint(Hint)) { LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); EvictionCost MaxCost; @@ -800,7 +800,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, //===----------------------------------------------------------------------===// Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); + AllocationOrder Order(VirtReg.reg(), *VRM, RegClassInfo, Matrix); Register PhysReg; while ((PhysReg = Order.next())) { if (PhysReg == PrevReg) @@ -846,8 +846,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, if (CanSplit && IsHint && !BreaksHint) return true; - if (A.weight > B.weight) { - LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + if (A.weight() > B.weight()) { + LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n'); return true; } return false; @@ -878,7 +878,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, // // This works out so a register without a cascade number is allowed to evict // anything, and it can be evicted by anything. - unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; if (!Cascade) Cascade = NextCascade; @@ -892,13 +892,13 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - assert(Register::isVirtualRegister(Intf->reg) && + assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); // Do not allow eviction of a virtual register if we are in the middle // of last-chance recoloring and this virtual register is one that we // have scavenged a physical register for. - if (FixedRegisters.count(Intf->reg)) + if (FixedRegisters.count(Intf->reg())) return false; // Never evict spill products. They cannot split or spill. @@ -910,12 +910,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, // // Also allow urgent evictions of unspillable ranges from a strictly // larger allocation order. - bool Urgent = !VirtReg.isSpillable() && - (Intf->isSpillable() || - RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < - RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); + bool Urgent = + !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) < + RegClassInfo.getNumAllocatableRegs( + MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. - unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade; if (Cascade <= IntfCascade) { if (!Urgent) return false; @@ -924,10 +926,10 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, Cost.BrokenHints += 10; } // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); // Update eviction cost. Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; @@ -977,17 +979,17 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, continue; // Cannot evict non virtual reg interference. - if (!Register::isVirtualRegister(Intf->reg)) + if (!Register::isVirtualRegister(Intf->reg())) return false; // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); // Update eviction cost. Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; @@ -1018,7 +1020,7 @@ unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, float *BestEvictweight) { EvictionCost BestEvictCost; BestEvictCost.setMax(); - BestEvictCost.MaxWeight = VirtReg.weight; + BestEvictCost.MaxWeight = VirtReg.weight(); unsigned BestEvicteePhys = 0; // Go over all physical registers and find the best candidate for eviction @@ -1043,9 +1045,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg, // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. - unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; if (!Cascade) - Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++; LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); @@ -1067,18 +1069,18 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg, for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { LiveInterval *Intf = Intfs[i]; // The same VirtReg may be present in multiple RegUnits. Skip duplicates. - if (!VRM->hasPhys(Intf->reg)) + if (!VRM->hasPhys(Intf->reg())) continue; - LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg); + LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); Matrix->unassign(*Intf); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; + ExtraRegInfo[Intf->reg()].Cascade = Cascade; ++NumEvicted; - NewVRegs.push_back(Intf->reg); + NewVRegs.push_back(Intf->reg()); } } @@ -1114,10 +1116,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; - BestCost.MaxWeight = VirtReg.weight; + BestCost.MaxWeight = VirtReg.weight(); // Check of any registers in RC are below CostPerUseLimit. - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " @@ -1578,7 +1580,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, bool *CanCauseEvictionChain) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; - unsigned VirtRegToSplit = SA->getParent().reg; + unsigned VirtRegToSplit = SA->getParent().reg(); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -1679,7 +1681,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Isolate even single instructions when dealing with a proper sub-class. // That guarantees register class inflation for the stack interval because it // is all copies. - unsigned Reg = SA->getParent().reg; + unsigned Reg = SA->getParent().reg(); bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); // First handle all the blocks with uses. @@ -1942,7 +1944,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, // See splitCanCauseEvictionChain for detailed description of bad // eviction chain scenarios. LLVM_DEBUG(dbgs() << "Best split candidate of vreg " - << printReg(VirtReg.reg, TRI) << " may "); + << printReg(VirtReg.reg(), TRI) << " may "); if (!(*CanCauseEvictionChain)) LLVM_DEBUG(dbgs() << "not "); LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); @@ -2001,7 +2003,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); - Register Reg = VirtReg.reg; + Register Reg = VirtReg.reg(); bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit, SplitSpillMode); @@ -2067,7 +2069,7 @@ static unsigned getNumAllocatableRegsForConstraints( unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(CurRC)) return 0; @@ -2095,8 +2097,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) if (MI->isFullCopy() || SuperRCNumAllocatableRegs == - getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, - TRI, RCI)) { + getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, + TII, TRI, RCI)) { LLVM_DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); continue; } @@ -2113,7 +2115,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); + DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Assign all new registers to RS_Spill. This was the last chance. @@ -2169,7 +2171,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; // Update the gaps covered by IntI. - const float weight = IntI.value()->weight; + const float weight = IntI.value()->weight(); for (; Gap != NumGaps; ++Gap) { GapWeight[Gap] = std::max(GapWeight[Gap], weight); if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) @@ -2409,7 +2411,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->useIntv(SegStart, SegStop); SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); + DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, @@ -2511,7 +2513,7 @@ bool RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) { - const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); @@ -2530,9 +2532,10 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, // However, if VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. if (((getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg) == CurRC) && - !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) || - FixedRegisters.count(Intf->reg)) { + MRI->getRegClass(Intf->reg()) == CurRC) && + !(hasTiedDef(MRI, VirtReg.reg()) && + !hasTiedDef(MRI, Intf->reg()))) || + FixedRegisters.count(Intf->reg())) { LLVM_DEBUG( dbgs() << "Early abort: the interference is not recolorable.\n"); return false; @@ -2608,8 +2611,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, DenseMap VirtRegToPhysReg; // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". - assert(!FixedRegisters.count(VirtReg.reg)); - FixedRegisters.insert(VirtReg.reg); + assert(!FixedRegisters.count(VirtReg.reg())); + FixedRegisters.insert(VirtReg.reg()); SmallVector CurrentNewVRegs; Order.rewind(); @@ -2644,7 +2647,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg; + Register ItVirtReg = (*It)->reg(); enqueue(RecoloringQueue, *It); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); @@ -2697,7 +2700,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg; + Register ItVirtReg = (*It)->reg(); if (VRM->hasPhys(ItVirtReg)) Matrix->unassign(**It); Register ItPhysReg = VirtRegToPhysReg[ItVirtReg]; @@ -2743,7 +2746,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); Matrix->assign(*LI, PhysReg); - FixedRegisters.insert(LI->reg); + FixedRegisters.insert(LI->reg()); } return true; } @@ -2900,7 +2903,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { SmallSet Visited; SmallVector RecoloringCandidates; HintsInfo Info; - unsigned Reg = VirtReg.reg; + unsigned Reg = VirtReg.reg(); Register PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. @@ -3003,11 +3006,11 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { for (LiveInterval *LI : SetOfBrokenHints) { - assert(Register::isVirtualRegister(LI->reg) && + assert(Register::isVirtualRegister(LI->reg()) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). // Ignore those. - if (!VRM->hasPhys(LI->reg)) + if (!VRM->hasPhys(LI->reg())) continue; tryHintRecoloring(*LI); } @@ -3019,10 +3022,10 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); + AllocationOrder Order(VirtReg.reg(), *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { // If VirtReg got an assignment, the eviction info is no longre relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); + LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. @@ -3040,7 +3043,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, LiveRangeStage Stage = getStage(VirtReg); LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " - << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); + << ExtraRegInfo[VirtReg.reg()].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not @@ -3049,7 +3052,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Register PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, FixedRegisters)) { - Register Hint = MRI->getSimpleHint(VirtReg.reg); + Register Hint = MRI->getSimpleHint(VirtReg.reg()); // If VirtReg has a hint and that hint is broken record this // virtual register as a recoloring candidate for broken hint. // Indeed, since we evicted a variable in its neighborhood it is @@ -3059,7 +3062,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SetOfBrokenHints.insert(&VirtReg); // If VirtReg eviction someone, the eviction info for it as an evictee is // no longre relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); + LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } @@ -3071,7 +3074,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Stage < RS_Split) { setStage(VirtReg, RS_Split); LLVM_DEBUG(dbgs() << "wait for second round\n"); - NewVRegs.push_back(VirtReg.reg); + NewVRegs.push_back(VirtReg.reg()); return 0; } @@ -3081,7 +3084,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { // If VirtReg got split, the eviction info is no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); + LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } } @@ -3100,7 +3103,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // right thing here. Anyway, that is still good for early testing. setStage(VirtReg, RS_Memory); LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n"); - NewVRegs.push_back(VirtReg.reg); + NewVRegs.push_back(VirtReg.reg()); } else { NamedRegionTimer T("spill", "Spiller", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -3111,7 +3114,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Tell LiveDebugVariables about the new ranges. Ranges not being covered by // the new regs are kept in LDV (still mapping to the old register), until // we rewrite spilled locations in LDV at a later stage. - DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS); + DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS); if (VerifyEnabled) MF->verify(this, "After spilling"); diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 34701b71f2816..0f848f62f7d1e 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -199,7 +199,7 @@ class SpillCosts : public PBQPRAConstraint { for (auto NId : G.nodeIds()) { PBQP::PBQPNum SpillCost = - LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; + LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight(); if (SpillCost == 0.0) SpillCost = std::numeric_limits::min(); else @@ -290,7 +290,7 @@ class Interference : public PBQPRAConstraint { // If two intervals end at the same point, we need a way to break the tie or // the set will assume they're actually equal and refuse to insert a // "duplicate". Just compare the vregs - fast and guaranteed unique. - return std::get<0>(I1)->reg < std::get<0>(I2)->reg; + return std::get<0>(I1)->reg() < std::get<0>(I2)->reg(); } static bool isAtLastSegment(const IntervalInfo &I) { @@ -595,8 +595,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, // If this is an empty interval move it to the EmptyIntervalVRegs set then // continue. if (VRegLI.empty()) { - EmptyIntervalVRegs.insert(VRegLI.reg); - VRegsToAlloc.erase(VRegLI.reg); + EmptyIntervalVRegs.insert(VRegLI.reg()); + VRegsToAlloc.erase(VRegLI.reg()); continue; } @@ -684,7 +684,7 @@ void RegAllocPBQP::spillVReg(Register VReg, const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); (void)TRI; LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " - << LRE.getParent().weight << ", New vregs: "); + << LRE.getParent().weight() << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. @@ -692,8 +692,8 @@ void RegAllocPBQP::spillVReg(Register VReg, I != E; ++I) { const LiveInterval &LI = LIS.getInterval(*I); assert(!LI.empty() && "Empty spill range."); - LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); - VRegsToAlloc.insert(LI.reg); + LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " "); + VRegsToAlloc.insert(LI.reg()); } LLVM_DEBUG(dbgs() << ")\n"); @@ -749,10 +749,10 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, I != E; ++I) { LiveInterval &LI = LIS.getInterval(*I); - unsigned PReg = MRI.getSimpleHint(LI.reg); + unsigned PReg = MRI.getSimpleHint(LI.reg()); if (PReg == 0) { - const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); + const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg()); const ArrayRef RawPRegOrder = RC.getRawAllocationOrder(MF); for (unsigned CandidateReg : RawPRegOrder) { if (!VRM.getRegInfo().isReserved(CandidateReg)) { @@ -764,7 +764,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, "No un-reserved physical registers in this register class"); } - VRM.assignVirt2Phys(LI.reg, PReg); + VRM.assignVirt2Phys(LI.reg(), PReg); } } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 17160a9f42cd5..9bff32bb39166 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -649,7 +649,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // in IntB, we can merge them. if (ValS+1 != BS) return false; - LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); + LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI)); SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -692,13 +692,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true); if (UIdx != -1) { ValSEndInst->getOperand(UIdx).setIsKill(false); } // Rewrite the copy. - CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); + CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI); // If the copy instruction was killing the destination register or any // subrange before the merge trim the live range. bool RecomputeLiveRange = AS->end == CopyIdx; @@ -817,7 +817,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; // If DefMI is a two-address instruction then commuting it will change the // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg()); assert(DefIdx != -1); unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) @@ -838,7 +838,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); Register NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) + if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill()) return { false, false }; // Make sure there are no other definitions of IntB that would reach the @@ -848,7 +848,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { + for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) { MachineInstr *UseMI = MO.getParent(); unsigned OpNo = &MO - &UseMI->getOperand(0); SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI); @@ -870,9 +870,9 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return { false, false }; - if (Register::isVirtualRegister(IntA.reg) && - Register::isVirtualRegister(IntB.reg) && - !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) + if (Register::isVirtualRegister(IntA.reg()) && + Register::isVirtualRegister(IntB.reg()) && + !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg()))) return { false, false }; if (NewMI != DefMI) { LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); @@ -891,9 +891,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // = B // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()), UE = MRI->use_end(); - UI != UE; /* ++UI is below because of possible MI removal */) { + UI != UE; + /* ++UI is below because of possible MI removal */) { MachineOperand &UseMO = *UI; ++UI; if (UseMO.isUndef()) @@ -920,7 +921,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; if (!UseMI->isCopy()) continue; - if (UseMI->getOperand(0).getReg() != IntB.reg || + if (UseMI->getOperand(0).getReg() != IntB.reg() || UseMI->getOperand(0).getSubReg()) continue; @@ -951,10 +952,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); if (IntA.hasSubRanges() || IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { - LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg()); IntA.createSubRangeFrom(Allocator, Mask, IntA); } else if (!IntB.hasSubRanges()) { - LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg()); IntB.createSubRangeFrom(Allocator, Mask, IntB); } SlotIndex AIdx = CopyIdx.getRegSlot(true); @@ -1100,8 +1101,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, continue; } // Check DefMI is a reverse copy and it is in BB Pred. - if (DefMI->getOperand(0).getReg() != IntA.reg || - DefMI->getOperand(1).getReg() != IntB.reg || + if (DefMI->getOperand(0).getReg() != IntA.reg() || + DefMI->getOperand(1).getReg() != IntB.reg() || DefMI->getParent() != Pred) { CopyLeftBB = Pred; continue; @@ -1158,8 +1159,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // Insert new copy to CopyLeftBB. MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), - TII->get(TargetOpcode::COPY), IntB.reg) - .addReg(IntA.reg); + TII->get(TargetOpcode::COPY), IntB.reg()) + .addReg(IntA.reg()); SlotIndex NewCopyIdx = LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); @@ -1752,7 +1753,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); LaneBitmask UnusedLanes = FullMask & ~UsedLanes; DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); @@ -1991,7 +1992,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { continue; LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) << ")\n"); - LIS->shrinkToUses(S, LI.reg); + LIS->shrinkToUses(S, LI.reg()); } LI.removeEmptySubRanges(); } @@ -3353,7 +3354,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { if (LI.valnos.size() < LargeIntervalSizeThreshold) return false; - auto &Counter = LargeLIVisitCounter[LI.reg]; + auto &Counter = LargeLIVisitCounter[LI.reg()]; if (Counter < LargeIntervalFreqThreshold) { Counter++; return false; @@ -3456,8 +3457,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Kill flags are going to be wrong if the live ranges were overlapping. // Eventually, we should simply clear all kill flags when computing live // ranges. They are reinserted after register allocation. - MRI->clearKillFlags(LHS.reg); - MRI->clearKillFlags(RHS.reg); + MRI->clearKillFlags(LHS.reg()); + MRI->clearKillFlags(RHS.reg()); if (!EndPoints.empty()) { // Recompute the parts of the live range we had to remove because of diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 4ee28d6bbb465..0872ec303460d 100644 --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -130,7 +130,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { return false; // Create a new VReg for each class. - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); SmallVector Intervals; Intervals.push_back(&LI); @@ -175,7 +175,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes, // across subranges when they are affected by the same MachineOperand. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); Classes.grow(NumComponents); - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { if (!MO.isDef() && !MO.readsReg()) continue; @@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, const SmallVectorImpl &SubRangeInfos, const SmallVectorImpl &Intervals) const { const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = Intervals[0]->reg; + unsigned Reg = Intervals[0]->reg(); for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ) { MachineOperand &MO = *I++; @@ -242,7 +242,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, break; } - unsigned VReg = Intervals[ID]->reg; + unsigned VReg = Intervals[ID]->reg(); MO.setReg(VReg); if (MO.isTied() && Reg != VReg) { @@ -304,7 +304,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (size_t I = 0, E = Intervals.size(); I < E; ++I) { LiveInterval &LI = *Intervals[I]; - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); LI.removeEmptySubRanges(); diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 8ff1cffcd1e6a..372c7f8061295 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -168,7 +168,7 @@ void SplitAnalysis::analyzeUses() { // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg)) + for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg())) if (!MO.isUndef()) UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot()); @@ -333,7 +333,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { } bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { - unsigned OrigReg = VRM.getOriginal(CurLI->reg); + unsigned OrigReg = VRM.getOriginal(CurLI->reg()); const LiveInterval &Orig = LIS.getInterval(OrigReg); assert(!Orig.empty() && "Splitting empty interval?"); LiveInterval::const_iterator I = Orig.find(Idx); @@ -433,7 +433,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { LaneBitmask LM; for (const MachineOperand &DefOp : DefMI->defs()) { Register R = DefOp.getReg(); - if (R != LI.reg) + if (R != LI.reg()) continue; if (unsigned SR = DefOp.getSubReg()) LM |= TRI.getSubRegIndexLaneMask(SR); @@ -636,7 +636,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); - unsigned Reg = LI->reg; + unsigned Reg = LI->reg(); bool DidRemat = false; if (OrigVNI) { LiveRangeEdit::Remat RM(ParentVNI); @@ -1329,7 +1329,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); - MO.setReg(LI.reg); + MO.setReg(LI.reg()); LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1411,7 +1411,7 @@ void SplitEditor::deleteRematVictims() { continue; MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); assert(MI && "Missing instruction for dead def"); - MI->addRegisterDead(LI->reg, &TRI); + MI->addRegisterDead(LI->reg(), &TRI); if (!MI->allDefsAreDead()) continue; @@ -1531,7 +1531,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { LIS.splitSeparateComponents(LI, SplitLIs); unsigned Original = VRM.getOriginal(VReg); for (LiveInterval *SplitLI : SplitLIs) - VRM.setIsSplitFromReg(SplitLI->reg, Original); + VRM.setIsSplitFromReg(SplitLI->reg(), Original); // The new intervals all map back to i. if (LRMap) diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 3cc5d30ebad7d..a6f8974f33436 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -145,7 +145,7 @@ namespace { // their weight. struct IntervalSorter { bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { - return LHS->weight > RHS->weight; + return LHS->weight() > RHS->weight(); } }; @@ -174,7 +174,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI.isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); + li.incrementWeight( + LiveIntervals::getSpillWeight(false, true, MBFI, MI)); } for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(), EE = MI.memoperands_end(); @@ -222,7 +223,7 @@ void StackSlotColoring::InitializeSlots() { for (auto *I : Intervals) { LiveInterval &li = I->second; LLVM_DEBUG(li.dump()); - int FI = Register::stackSlot2Index(li.reg); + int FI = Register::stackSlot2Index(li.reg()); if (MFI->isDeadObjectIndex(FI)) continue; @@ -269,7 +270,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; - int FI = Register::stackSlot2Index(li->reg); + int FI = Register::stackSlot2Index(li->reg()); uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { @@ -331,12 +332,12 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = Register::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg()); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; RevMap[NewSS].push_back(SS); - SlotWeights[NewSS] += li->weight; + SlotWeights[NewSS] += li->weight(); UsedColors.set(NewSS); Changed |= (SS != NewSS); } @@ -344,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = Register::stackSlot2Index(li->reg); - li->weight = SlotWeights[SS]; + int SS = Register::stackSlot2Index(li->reg()); + li->setWeight(SlotWeights[SS]); } // Sort them by new weight. llvm::stable_sort(SSIntervals, IntervalSorter()); diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e2ef12d8ac77f..e89353c9ad276 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -68,7 +68,7 @@ bool TargetRegisterInfo::shouldRegionSplitForVirtReg( const MachineFunction &MF, const LiveInterval &VirtReg) const { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg); + MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg()); if (MI && TII->isTriviallyReMaterializable(*MI) && VirtReg.size() > HugeSizeForSplit) return false; diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp index ff9228e2dea4a..1df86e7ca6b20 100644 --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -114,7 +114,7 @@ GCNNSAReassign::tryAssignRegisters(SmallVectorImpl &Intervals, unsigned NumRegs = Intervals.size(); for (unsigned N = 0; N < NumRegs; ++N) - if (VRM->hasPhys(Intervals[N]->reg)) + if (VRM->hasPhys(Intervals[N]->reg())) LRM->unassign(*Intervals[N]); for (unsigned N = 0; N < NumRegs; ++N) @@ -302,14 +302,15 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI << "\tOriginal allocation:\t"; - for(auto *LI : Intervals) - dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI); + for (auto *LI + : Intervals) dbgs() + << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI); dbgs() << '\n'); bool Success = scavengeRegs(Intervals); if (!Success) { LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n"); - if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation. + if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation. continue; } else { // Check we did not make it worse for other instructions. @@ -328,7 +329,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { if (!Success) { for (unsigned I = 0; I < Info->VAddrDwords; ++I) - if (VRM->hasPhys(Intervals[I]->reg)) + if (VRM->hasPhys(Intervals[I]->reg())) LRM->unassign(*Intervals[I]); for (unsigned I = 0; I < Info->VAddrDwords; ++I) @@ -339,11 +340,12 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { C.second = true; ++NumNSAConverted; - LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t [" - << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI) - << " : " - << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI) - << "]\n"); + LLVM_DEBUG( + dbgs() << "\tNew allocation:\t\t [" + << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI) + << " : " + << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI) + << "]\n"); Changed = true; } diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp index 1c940428273cb..92d4a64624793 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -650,7 +650,7 @@ unsigned GCNRegBankReassign::computeStallCycles(Register SrcReg, Register Reg, unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank, unsigned SubReg) const { - const TargetRegisterClass *RC = MRI->getRegClass(LI.reg); + const TargetRegisterClass *RC = MRI->getRegClass(LI.reg()); unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs : MaxNumSGPRs; unsigned MaxReg = MaxNumRegs + (Bank < NUM_VGPR_BANKS ? AMDGPU::VGPR0 diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index a2da0ea849e04..6bfed1a7195c1 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -97,7 +97,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( // values through live-range splitting and stackification, it will have to // do. MF.getInfo()->setFrameBaseVreg( - SplitLIs.back()->reg); + SplitLIs.back()->reg()); } SplitLIs.clear(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 20fe2b2b7bfc5..fe127dec8aede 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -106,8 +106,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { continue; LiveInterval *LI = &Liveness->getInterval(VReg); - assert(LI->weight == 0.0f); - LI->weight = computeWeight(MRI, MBFI, VReg); + assert(LI->weight() == 0.0f); + LI->setWeight(computeWeight(MRI, MBFI, VReg)); LLVM_DEBUG(LI->dump()); SortedIntervals.push_back(LI); } @@ -118,10 +118,10 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // TODO: Investigate more intelligent sorting heuristics. For starters, we // should try to coalesce adjacent live intervals before non-adjacent ones. llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) { - if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg)) - return MRI->isLiveIn(LHS->reg); - if (LHS->weight != RHS->weight) - return LHS->weight > RHS->weight; + if (MRI->isLiveIn(LHS->reg()) != MRI->isLiveIn(RHS->reg())) + return MRI->isLiveIn(LHS->reg()); + if (LHS->weight() != RHS->weight()) + return LHS->weight() > RHS->weight(); if (LHS->empty() || RHS->empty()) return !LHS->empty() && RHS->empty(); return *LHS < *RHS; @@ -135,14 +135,14 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) { LiveInterval *LI = SortedIntervals[I]; - unsigned Old = LI->reg; + unsigned Old = LI->reg(); size_t Color = I; const TargetRegisterClass *RC = MRI->getRegClass(Old); // Check if it's possible to reuse any of the used colors. if (!MRI->isLiveIn(Old)) for (unsigned C : UsedColors.set_bits()) { - if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) + if (MRI->getRegClass(SortedIntervals[C]->reg()) != RC) continue; for (LiveInterval *OtherLI : Assignments[C]) if (!OtherLI->empty() && OtherLI->overlaps(*LI)) @@ -152,7 +152,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { continue_outer:; } - unsigned New = SortedIntervals[Color]->reg; + unsigned New = SortedIntervals[Color]->reg(); SlotMapping[I] = New; Changed |= Old != New; UsedColors.set(Color); @@ -160,7 +160,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // If we reassigned the stack pointer, update the debug frame base info. if (Old != New && MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Old) MFI.setFrameBaseVreg(New); - LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg) + LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg()) << " to vreg" << Register::virtReg2Index(New) << "\n"); } if (!Changed) @@ -168,7 +168,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // Rewrite register operands. for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) { - unsigned Old = SortedIntervals[I]->reg; + unsigned Old = SortedIntervals[I]->reg(); unsigned New = SlotMapping[I]; if (Old != New) MRI->replaceRegWith(Old, New); From b2c931eff3cd6f88426ef26d233fab1fabaa0b7e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 16:26:13 +0100 Subject: [PATCH 0844/1079] [X86] EmitInstrWithCustomInserter - remove redundant getDebugLoc() calls. NFCI. Use the same DebugLoc that is called at the top of the method. Fixes some Wshadow static analyzer warnings. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b316a3e5a71e..f0c4206b012cc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33717,7 +33717,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::PTDPBUSD: case X86::PTDPBUUD: case X86::PTDPBF16PS: { - const DebugLoc &DL = MI.getDebugLoc(); unsigned Opc; switch (MI.getOpcode()) { case X86::PTDPBSSD: Opc = X86::TDPBSSD; break; @@ -33737,7 +33736,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return BB; } case X86::PTILEZERO: { - const DebugLoc &DL = MI.getDebugLoc(); unsigned Imm = MI.getOperand(0).getImm(); BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm)); MI.eraseFromParent(); // The pseudo is gone now. @@ -33746,7 +33744,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::PTILELOADD: case X86::PTILELOADDT1: case X86::PTILESTORED: { - const DebugLoc &DL = MI.getDebugLoc(); unsigned Opc; switch (MI.getOpcode()) { case X86::PTILELOADD: Opc = X86::TILELOADD; break; From f0546173fa4bdde03ecb21a174fcaa8a6490adbd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 16 Sep 2020 17:28:59 +0200 Subject: [PATCH 0845/1079] [ASTMatchers] Add missing definition for decompositionDecl Otherwise we'd get a linker error whenever decompositionDecl is ODR used. --- clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 6b17bd0cda0b3..4e4e43b2a94a6 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -710,6 +710,7 @@ const internal::VariadicDynCastAllOfMatcher typeAliasDecl; const internal::VariadicDynCastAllOfMatcher typeAliasTemplateDecl; const internal::VariadicAllOfMatcher decl; +const internal::VariadicAllOfMatcher decompositionDecl; const internal::VariadicDynCastAllOfMatcher linkageSpecDecl; const internal::VariadicDynCastAllOfMatcher namedDecl; From 06d058afecdf54021fbf8fece422dd04766227ea Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Wed, 16 Sep 2020 18:51:26 +0300 Subject: [PATCH 0846/1079] [AMDGPU] Corrected directive to use for ELF weak refs WeakRefDirective should specify a directive to declare "a global as being a weak undefined symbol". The directive used by AMDGPU was incorrect - ".weakref" was intended for other purposes. The correct directive is ".weak" and it is already defined as default for ELF. So the redefinition was removed. Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D87762 --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 1 - llvm/test/CodeGen/AMDGPU/hsa-globals.ll | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 687cfef4559f3..1836237c8df56 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -40,7 +40,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, HasAggressiveSymbolFolding = true; COMMDirectiveAlignmentIsInBytes = false; HasNoDeadStrip = true; - WeakRefDirective = ".weakref\t"; //===--- Dwarf Emission Directives -----------------------------------===// SupportsDebugInformation = true; DwarfRegNumForCFI = true; diff --git a/llvm/test/CodeGen/AMDGPU/hsa-globals.ll b/llvm/test/CodeGen/AMDGPU/hsa-globals.ll index 09c4b5f68a0b5..bbb96072dfaf5 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-globals.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-globals.ll @@ -13,6 +13,8 @@ define amdgpu_kernel void @test() { ret void } +@weak_global = extern_weak addrspace(1) global i32 + ; ASM: .type linkonce_odr_global_program,@object ; ASM: .section .bss,#alloc,#write ; ASM: .weak linkonce_odr_global_program @@ -48,3 +50,5 @@ define amdgpu_kernel void @test() { ; ASM: external_readonly: ; ASM: .long 0 ; ASM: .size external_readonly, 4 + +; ASM: .weak weak_global From 09c342493d89c2f32602f911e5c919742b837e10 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 15 Sep 2020 22:06:50 -0700 Subject: [PATCH 0847/1079] [NPM] Translate alias analysis into require<> as well 'require' is needed to make globals-aa work in NPM, since globals-aa is a module analysis but function passes cannot run module analyses on demand. So don't skip translating alias analyses to 'require<>'. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87743 --- llvm/lib/Passes/PassBuilder.cpp | 6 ++++++ llvm/test/Analysis/GlobalsModRef/no-escape.ll | 3 ++- llvm/tools/opt/NewPMDriver.cpp | 5 +---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 2ecd6fb602cb5..71e013f75d0a7 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -2823,6 +2823,12 @@ bool PassBuilder::isAnalysisPassName(StringRef PassName) { #define CGSSC_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; +#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; #include "PassRegistry.def" return false; } diff --git a/llvm/test/Analysis/GlobalsModRef/no-escape.ll b/llvm/test/Analysis/GlobalsModRef/no-escape.ll index 9d0f1053902f0..fc95b6ad63147 100644 --- a/llvm/test/Analysis/GlobalsModRef/no-escape.ll +++ b/llvm/test/Analysis/GlobalsModRef/no-escape.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -globals-aa -S -licm | FileCheck %s +; RUN: opt < %s -basic-aa -globals-aa -S -licm -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -basic-aa -globals-aa -S -licm -enable-new-pm=1 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index a5c2a1bf1feeb..b38f67ac45197 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -336,15 +336,12 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, } // For compatibility with legacy pass manager. // Alias analyses are not specially specified when using the legacy PM. - SmallVector NonAAPasses; for (auto PassName : Passes) { if (PB.isAAPassName(PassName)) { if (auto Err = PB.parseAAPipeline(AA, PassName)) { errs() << Arg0 << ": " << toString(std::move(Err)) << "\n"; return false; } - } else { - NonAAPasses.push_back(PassName); } } // For compatibility with the legacy PM AA pipeline. @@ -389,7 +386,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, return false; } } - for (auto PassName : NonAAPasses) { + for (auto PassName : Passes) { std::string ModifiedPassName(PassName.begin(), PassName.end()); if (PB.isAnalysisPassName(PassName)) ModifiedPassName = "require<" + ModifiedPassName + ">"; From 15e9a6c2118fa3db2c80043e6679da5dcc72b3a7 Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli Date: Tue, 8 Sep 2020 08:08:59 +0000 Subject: [PATCH 0848/1079] [llvm][CodeGen] Do not scalarize `llvm.masked.[gather|scatter]` operating on scalable vectors. This patch prevents the `llvm.masked.gather` and `llvm.masked.scatter` intrinsics to be scalarized when invoked on scalable vectors. The change in `Function.cpp` is needed to prevent the warning that is raised when `getNumElements` is used in place of `getElementCount` on `VectorType` instances. The tests guards for regressions on this change. The tests makes sure that calls to `llvm.masked.[gather|scatter]` are still scalarized when: # the intrinsics are operating on fixed size vectors, and # the compiler is not targeting fixed length SVE code generation. Reviewed By: efriedma, sdesmalen Differential Revision: https://reviews.llvm.org/D86249 --- llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp | 6 ++ llvm/lib/IR/Function.cpp | 3 +- .../llvm-masked-gather-legal-for-sve.ll | 63 +++++++++++++++++++ .../llvm-masked-scatter-legal-for-sve.ll | 63 +++++++++++++++++++ 4 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll create mode 100644 llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 15b67e3b69cc1..3443743a28c5f 100644 --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -865,6 +865,12 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { IntrinsicInst *II = dyn_cast(CI); if (II) { + // The scalarization code below does not work for scalable vectors. + if (isa(II->getType()) || + any_of(II->arg_operands(), + [](Value *V) { return isa(V->getType()); })) + return false; + switch (II->getIntrinsicID()) { default: break; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index e701feae22562..d03ffbb8d008f 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1400,8 +1400,7 @@ static bool matchIntrinsicType( auto *ReferenceType = dyn_cast(ArgTys[RefArgNumber]); auto *ThisArgVecTy = dyn_cast(Ty); if (!ThisArgVecTy || !ReferenceType || - (cast(ReferenceType)->getNumElements() != - cast(ThisArgVecTy)->getNumElements())) + (ReferenceType->getElementCount() != ThisArgVecTy->getElementCount())) return true; PointerType *ThisArgEltTy = dyn_cast(ThisArgVecTy->getElementType()); diff --git a/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll b/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll new file mode 100644 index 0000000000000..1dffd76a11927 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll @@ -0,0 +1,63 @@ +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Testing that masked gathers operating on scalable vectors that are +; packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_gather_nxv4i32( +; CHECK: call @llvm.masked.gather.nxv4i32 +define @masked_gather_nxv4i32( %ld, %masks, %passthru) { + %res = call @llvm.masked.gather.nxv4i32( %ld, i32 0, %masks, %passthru) + ret %res +} + +; Testing that masked gathers operating on scalable vectors of FP data +; that is packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_gather_nxv2f64( +; CHECK: call @llvm.masked.gather.nxv2f64 +define @masked_gather_nxv2f64( %ld, %masks, %passthru) { + %res = call @llvm.masked.gather.nxv2f64( %ld, i32 0, %masks, %passthru) + ret %res +} + +; Testing that masked gathers operating on scalable vectors of FP data +; that is unpacked in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_gather_nxv2f16( +; CHECK: call @llvm.masked.gather.nxv2f16 +define @masked_gather_nxv2f16( %ld, %masks, %passthru) { + %res = call @llvm.masked.gather.nxv2f16( %ld, i32 0, %masks, %passthru) + ret %res +} + +; Testing that masked gathers operating on 64-bit fixed vectors are +; scalarized because NEON doesn't have support for masked gather +; instructions. + +; CHECK-LABEL: @masked_gather_v2f32( +; CHECK-NOT: @llvm.masked.gather.v2f32( +define <2 x float> @masked_gather_v2f32(<2 x float*> %ld, <2 x i1> %masks, <2 x float> %passthru) { + %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthru) + ret <2 x float> %res +} + +; Testing that masked gathers operating on 128-bit fixed vectors are +; scalarized because NEON doesn't have support for masked gather +; instructions and because we are not targeting fixed width SVE. + +; CHECK-LABEL: @masked_gather_v4i32( +; CHECK-NOT: @llvm.masked.gather.v4i32( +define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthru) { + %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru) + ret <4 x i32> %res +} + +declare @llvm.masked.gather.nxv4i32( %ptrs, i32 %align, %masks, %passthru) +declare @llvm.masked.gather.nxv2f64( %ptrs, i32 %align, %masks, %passthru) +declare @llvm.masked.gather.nxv2f16( %ptrs, i32 %align, %masks, %passthru) +declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthru) +declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru) diff --git a/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll b/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll new file mode 100644 index 0000000000000..caaa146aa9595 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll @@ -0,0 +1,63 @@ +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Testing that masked scatters operating on scalable vectors that are +; packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_scatter_nxv4i32( +; CHECK: call void @llvm.masked.scatter.nxv4i32 +define void @masked_scatter_nxv4i32( %data, %ptrs, %masks) { + call void @llvm.masked.scatter.nxv4i32( %data, %ptrs, i32 0, %masks) + ret void +} + +; Testing that masked scatters operating on scalable vectors of FP +; data that is packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_scatter_nxv2f64( +; CHECK: call void @llvm.masked.scatter.nxv2f64 +define void @masked_scatter_nxv2f64( %data, %ptrs, %masks) { + call void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 0, %masks) + ret void +} + +; Testing that masked scatters operating on scalable vectors of FP +; data that is unpacked in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_scatter_nxv2f16( +; CHECK: call void @llvm.masked.scatter.nxv2f16 +define void @masked_scatter_nxv2f16( %data, %ptrs, %masks) { + call void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 0, %masks) + ret void +} + +; Testing that masked scatters operating on 64-bit fixed vectors are +; scalarized because NEON doesn't have support for masked scatter +; instructions. + +; CHECK-LABEL: @masked_scatter_v2f32( +; CHECK-NOT: @llvm.masked.scatter.v2f32( +define void @masked_scatter_v2f32(<2 x float> %data, <2 x float*> %ptrs, <2 x i1> %masks) { + call void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 0, <2 x i1> %masks) + ret void +} + +; Testing that masked scatters operating on 128-bit fixed vectors are +; scalarized because NEON doesn't have support for masked scatter +; instructions and because we are not targeting fixed width SVE. + +; CHECK-LABEL: @masked_scatter_v4i32( +; CHECK-NOT: @llvm.masked.scatter.v4i32( +define void @masked_scatter_v4i32(<4 x i32> %data, <4 x i32*> %ptrs, <4 x i1> %masks) { + call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks) + ret void +} + +declare void @llvm.masked.scatter.nxv4i32( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 %align, <2 x i1> %masks) +declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 %align, <4 x i1> %masks) From cb64455faa36d6ac12759fa4ec4dd05847cb1b90 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 16 Sep 2020 17:02:42 +0100 Subject: [PATCH 0849/1079] [AMDGPU] Remove obsolete comment Obsoleted by e4464bf3d45848461630e3771d66546d389f1ed5 "AMDGPU/GlobalISel: Select scalar v2s16 G_BUILD_VECTOR" --- llvm/lib/Target/AMDGPU/SIInstructions.td | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 5f8f2a4e58479..47b27d63408dd 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2040,8 +2040,6 @@ def : GCNPat < SRCMODS.NONE, $src2) >; -// COPY is workaround tablegen bug from multiple outputs -// from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat < (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i16 16)) From b5c3efeb7bc9861dc04a1b00a4c0183bdfa9b582 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 16 Sep 2020 16:35:53 +0100 Subject: [PATCH 0850/1079] [ARM][MVE] Tail-predication: predicate new elementcount checks on force-enabled Additional sanity checks were added to get.active.lane.mask's second argument, the loop tripcount/elementcount, in rG635b87511ec3. Like the other (overflow) checks, skip this if tail-predication is forced. Differential Revision: https://reviews.llvm.org/D87769 --- llvm/lib/Target/ARM/MVETailPredication.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index 987df73970e57..a99fefefdf25d 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -411,7 +411,7 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, << TC2 << " from get.active.lane.mask\n"); return false; } - } else { + } else if (!ForceTailPredication) { // Smoke tests if the element count is a runtime value. I.e., this isn't // fully generic because that would require a full SCEV visitor here. It // would require extracting the variable from the elementcount SCEV From c27b64bbe1bf96642b5b1e0babde7886bb30c84f Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 11 Sep 2020 19:57:17 -0700 Subject: [PATCH 0851/1079] [Coro][NewPM] Handle llvm.coro.prepare.retcon in NPM coro-split pass Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D87731 --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 57 +++++++++++++++++-- .../Coroutines/coro-retcon-frame.ll | 1 + 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 9c4392e7999b6..ad93ae7cf1aca 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1563,6 +1563,42 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { SCC.initialize(Nodes); } +/// Replace a call to llvm.coro.prepare.retcon. +static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, + LazyCallGraph::SCC &C) { + auto CastFn = Prepare->getArgOperand(0); // as an i8* + auto Fn = CastFn->stripPointerCasts(); // as its original type + + // Attempt to peephole this pattern: + // %0 = bitcast [[TYPE]] @some_function to i8* + // %1 = call @llvm.coro.prepare.retcon(i8* %0) + // %2 = bitcast %1 to [[TYPE]] + // ==> + // %2 = @some_function + for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); UI != UE;) { + // Look for bitcasts back to the original function type. + auto *Cast = dyn_cast((UI++)->getUser()); + if (!Cast || Cast->getType() != Fn->getType()) + continue; + + // Replace and remove the cast. + Cast->replaceAllUsesWith(Fn); + Cast->eraseFromParent(); + } + + // Replace any remaining uses with the function as an i8*. + // This can never directly be a callee, so we don't need to update CG. + Prepare->replaceAllUsesWith(CastFn); + Prepare->eraseFromParent(); + + // Kill dead bitcasts. + while (auto *Cast = dyn_cast(CastFn)) { + if (!Cast->use_empty()) + break; + CastFn = Cast->getOperand(0); + Cast->eraseFromParent(); + } +} /// Replace a call to llvm.coro.prepare.retcon. static void replacePrepare(CallInst *Prepare, CallGraph &CG) { auto CastFn = Prepare->getArgOperand(0); // as an i8* @@ -1618,6 +1654,19 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) { } } +static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, + LazyCallGraph::SCC &C) { + bool Changed = false; + for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); PI != PE;) { + // Intrinsics can only be used in calls. + auto *Prepare = cast((PI++)->getUser()); + replacePrepare(Prepare, CG, C); + Changed = true; + } + + return Changed; +} + /// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent /// IPO from operating on calls to a retcon coroutine before it's been /// split. This is only safe to do after we've split all retcon @@ -1656,7 +1705,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, return PreservedAnalyses::all(); // Check for uses of llvm.coro.prepare.retcon. - const auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon"); + auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon"); if (PrepareFn && PrepareFn->use_empty()) PrepareFn = nullptr; @@ -1670,8 +1719,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, return PreservedAnalyses::all(); if (Coroutines.empty()) - llvm_unreachable("new pass manager cannot yet handle " - "'llvm.coro.prepare.retcon'"); + replaceAllPrepares(PrepareFn, CG, C); // Split all the coroutines. for (LazyCallGraph::Node *N : Coroutines) { @@ -1704,8 +1752,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, } if (PrepareFn) - llvm_unreachable("new pass manager cannot yet handle " - "'llvm.coro.prepare.retcon'"); + replaceAllPrepares(PrepareFn, CG, C); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll b/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll index c7ca8e3a01370..a1b83eeaee774 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s target datalayout = "p:64:64:64" From 66df98945e08906ce4a057245fda81f631cfd3ae Mon Sep 17 00:00:00 2001 From: mhl Date: Wed, 16 Sep 2020 08:02:34 -0700 Subject: [PATCH 0852/1079] [libfuzzer] Reduce default verbosity when printing large mutation sequences When using a custom mutator (e.g. thrift mutator, similar to LPM) that calls back into libfuzzer's mutations via `LLVMFuzzerMutate`, the mutation sequences needed to achieve new coverage can get prohibitively large. Printing these large sequences has two downsides: 1) It makes the logs hard to understand for a human. 2) The performance cost slows down fuzzing. In this patch I change the `PrintMutationSequence` function to take a max number of entries, to achieve this goal. I also update `PrintStatusForNewUnit` to default to printing only 10 entries, in the default verbosity level (1), requiring the user to set verbosity to 2 if they want the full mutation sequence. For our use case, turning off verbosity is not an option, as that would also disable `PrintStats()` which is very useful for infrastructure that analyzes the logs in realtime. I imagine most users of libfuzzer always want those logs in the default. I built a fuzzer locally with this patch applied to libfuzzer. When running with the default verbosity, I see logs like this: #65 NEW cov: 4799 ft: 10443 corp: 41/1447Kb lim: 64000 exec/s: 1 rss: 575Mb L: 28658/62542 MS: 196 Custom-CrossOver-ChangeBit-EraseBytes-ChangeBit-ChangeBit-ChangeBit-CrossOver-ChangeBit-CrossOver- DE: "\xff\xff\xff\x0e"-"\xfe\xff\xff\x7f"-"\xfe\xff\xff\x7f"-"\x17\x00\x00\x00\x00\x00\x00\x00"-"\x00\x00\x00\xf9"-"\xff\xff\xff\xff"-"\xfa\xff\xff\xff"-"\xf7\xff\xff\xff"-"@\xff\xff\xff\xff\xff\xff\xff"-"E\x00"- #67 NEW cov: 4810 ft: 10462 corp: 42/1486Kb lim: 64000 exec/s: 1 rss: 577Mb L: 39823/62542 MS: 135 Custom-CopyPart-ShuffleBytes-ShuffleBytes-ChangeBit-ChangeBinInt-EraseBytes-ChangeBit-ChangeBinInt-ChangeBit- DE: "\x01\x00\x00\x00\x00\x00\x01\xf1"-"\x00\x00\x00\x07"-"\x00\x0d"-"\xfd\xff\xff\xff"-"\xfe\xff\xff\xf4"-"\xe3\xff\xff\xff"-"\xff\xff\xff\xf1"-"\xea\xff\xff\xff"-"\x00\x00\x00\xfd"-"\x01\x00\x00\x05"- Staring hard at the logs it's clear that the cap of 10 is applied. When running with verbosity level 2, the logs look like the below: #66 NEW cov: 4700 ft: 10188 corp: 37/1186Kb lim: 64000 exec/s: 2 rss: 509Mb L: 47616/61231 MS: 520 Custom-CopyPart-ChangeBinInt-ChangeBit-ChangeByte-EraseBytes-PersAutoDict-CopyPart-ShuffleBytes-ChangeBit-ShuffleBytes-CopyPart-EraseBytes-CopyPart-ChangeBinInt-CopyPart-ChangeByte-ShuffleBytes-ChangeBinInt-ShuffleBytes-ChangeBit-CMP-ShuffleBytes-ChangeBit-CrossOver-ChangeBinInt-ChangeByte-ShuffleBytes-CrossOver-EraseBytes-ChangeBinInt-InsertRepeatedBytes-PersAutoDict-InsertRepeatedBytes-InsertRepeatedBytes-CrossOver-ChangeByte-ShuffleBytes-CopyPart-ShuffleBytes-CopyPart-CrossOver-ChangeBit-ShuffleBytes-CrossOver-PersAutoDict-ChangeByte-ChangeBit-ShuffleBytes-CrossOver-ChangeByte-EraseBytes-CopyPart-ChangeBinInt-PersAutoDict-CrossOver-ShuffleBytes-CrossOver-CrossOver-EraseBytes-CrossOver-EraseBytes-CrossOver-ChangeBit-ChangeBinInt-ChangeByte-EraseBytes-ShuffleBytes-ShuffleBytes-ChangeBit-EraseBytes-ChangeBinInt-ChangeBit-ChangeBinInt-CopyPart-EraseBytes-PersAutoDict-EraseBytes-CopyPart-ChangeBinInt-ChangeByte-CrossOver-ChangeBinInt-ShuffleBytes-PersAutoDict-PersAutoDict-ChangeBinInt-CopyPart-ChangeBinInt-CrossOver-ChangeBit-ChangeBinInt-CopyPart-ChangeByte-ChangeBit-CopyPart-CrossOver-ChangeByte-ChangeBit-ChangeByte-ShuffleBytes-CMP-ChangeBit-CopyPart-ChangeBit-ChangeByte-ChangeBinInt-PersAutoDict-ChangeBinInt-CrossOver-ChangeBinInt-ChangeBit-ChangeBinInt-ChangeBinInt-PersAutoDict-ChangeBinInt-ChangeBinInt-ChangeByte-CopyPart-ShuffleBytes-ChangeByte-ChangeBit-ChangeByte-ChangeByte-EraseBytes-CrossOver-ChangeByte-ChangeByte-EraseBytes-EraseBytes-InsertRepeatedBytes-ShuffleBytes-CopyPart-CopyPart-ChangeBit-ShuffleBytes-PersAutoDict-ShuffleBytes-ChangeBit-ChangeByte-ChangeBit-ShuffleBytes-ChangeByte-ChangeBinInt-CrossOver-ChangeBinInt-ChangeBit-EraseBytes-CopyPart-ChangeByte-CrossOver-EraseBytes-CrossOver-ChangeByte-ShuffleBytes-ChangeByte-ChangeBinInt-CrossOver-ChangeByte-InsertRepeatedBytes-InsertByte-ShuffleBytes-PersAutoDict-ChangeBit-ChangeByte-ChangeBit-ShuffleBytes-ShuffleBytes-CopyPart-ShuffleBytes-EraseBytes-ShuffleBytes-ShuffleBytes-CrossOver-ChangeBinInt-CopyPart-CopyPart-CopyPart-EraseBytes-EraseBytes-ChangeByte-ChangeBinInt-ShuffleBytes-CMP-InsertByte-EraseBytes-ShuffleBytes-CopyPart-ChangeBit-CrossOver-CopyPart-CopyPart-ShuffleBytes-ChangeByte-ChangeByte-ChangeBinInt-EraseBytes-ChangeByte-ChangeBinInt-ChangeBit-ChangeBit-ChangeByte-ShuffleBytes-PersAutoDict-PersAutoDict-CMP-ChangeBit-ShuffleBytes-PersAutoDict-ChangeBinInt-EraseBytes-EraseBytes-ShuffleBytes-ChangeByte-ShuffleBytes-ChangeBit-EraseBytes-CMP-ShuffleBytes-ChangeByte-ChangeBinInt-EraseBytes-ChangeBinInt-ChangeByte-EraseBytes-ChangeByte-CrossOver-ShuffleBytes-EraseBytes-EraseBytes-ShuffleBytes-ChangeBit-EraseBytes-CopyPart-ShuffleBytes-ShuffleBytes-CrossOver-CopyPart-ChangeBinInt-ShuffleBytes-CrossOver-InsertByte-InsertByte-ChangeBinInt-ChangeBinInt-CopyPart-EraseBytes-ShuffleBytes-ChangeBit-ChangeBit-EraseBytes-ChangeByte-ChangeByte-ChangeBinInt-CrossOver-ChangeBinInt-ChangeBinInt-ShuffleBytes-ShuffleBytes-ChangeByte-ChangeByte-ChangeBinInt-ShuffleBytes-CrossOver-EraseBytes-CopyPart-CopyPart-CopyPart-ChangeBit-ShuffleBytes-ChangeByte-EraseBytes-ChangeByte-InsertRepeatedBytes-InsertByte-InsertRepeatedBytes-PersAutoDict-EraseBytes-ShuffleBytes-ChangeByte-ShuffleBytes-ChangeBinInt-ShuffleBytes-ChangeBinInt-ChangeBit-CrossOver-CrossOver-ShuffleBytes-CrossOver-CopyPart-CrossOver-CrossOver-CopyPart-ChangeByte-ChangeByte-CrossOver-ChangeBit-ChangeBinInt-EraseBytes-ShuffleBytes-EraseBytes-CMP-PersAutoDict-PersAutoDict-InsertByte-ChangeBit-ChangeByte-CopyPart-CrossOver-ChangeByte-ChangeBit-ChangeByte-CopyPart-ChangeBinInt-EraseBytes-CrossOver-ChangeBit-CrossOver-PersAutoDict-CrossOver-ChangeByte-CrossOver-ChangeByte-ChangeByte-CrossOver-ShuffleBytes-CopyPart-CopyPart-ShuffleBytes-ChangeByte-ChangeByte-ChangeBinInt-ChangeBinInt-ChangeBinInt-ChangeBinInt-ShuffleBytes-CrossOver-ChangeBinInt-ShuffleBytes-ChangeBit-PersAutoDict-ChangeBinInt-ShuffleBytes-ChangeBinInt-ChangeByte-CrossOver-ChangeBit-CopyPart-ChangeBit-ChangeBit-CopyPart-ChangeByte-PersAutoDict-ChangeBit-ShuffleBytes-ChangeByte-ChangeBit-CrossOver-ChangeByte-CrossOver-ChangeByte-CrossOver-ChangeBit-ChangeByte-ChangeBinInt-PersAutoDict-CopyPart-ChangeBinInt-ChangeBit-CrossOver-ChangeBit-PersAutoDict-ShuffleBytes-EraseBytes-CrossOver-ChangeByte-ChangeBinInt-ShuffleBytes-ChangeBinInt-InsertRepeatedBytes-PersAutoDict-CrossOver-ChangeByte-Custom-PersAutoDict-CopyPart-CopyPart-ChangeBinInt-ShuffleBytes-ChangeBinInt-ChangeBit-ShuffleBytes-CrossOver-CMP-ChangeByte-CopyPart-ShuffleBytes-CopyPart-CopyPart-CrossOver-CrossOver-CrossOver-ShuffleBytes-ChangeByte-ChangeBinInt-ChangeBit-ChangeBit-ChangeBit-ChangeByte-EraseBytes-ChangeByte-ChangeBit-ChangeByte-ChangeByte-CopyPart-PersAutoDict-ChangeBinInt-PersAutoDict-PersAutoDict-PersAutoDict-CopyPart-CopyPart-CrossOver-ChangeByte-ChangeBinInt-ShuffleBytes-ChangeBit-CopyPart-EraseBytes-CopyPart-CopyPart-CrossOver-ChangeByte-EraseBytes-ShuffleBytes-ChangeByte-CopyPart-EraseBytes-CopyPart-CrossOver-ChangeBinInt-ChangeBinInt-InsertByte-ChangeBinInt-ChangeBit-ChangeByte-CopyPart-ChangeByte-EraseBytes-ChangeByte-ChangeBit-ChangeByte-ShuffleBytes-CopyPart-ChangeBinInt-EraseBytes-CrossOver-ChangeBit-ChangeBit-CrossOver-EraseBytes-ChangeBinInt-CopyPart-CopyPart-ChangeBinInt-ChangeBit-EraseBytes-InsertRepeatedBytes-EraseBytes-ChangeBit-CrossOver-CrossOver-EraseBytes-EraseBytes-ChangeByte-CopyPart-CopyPart-ShuffleBytes-ChangeByte-ChangeBit-ChangeByte-EraseBytes-ChangeBit-ChangeByte-ChangeByte-CrossOver-CopyPart-EraseBytes-ChangeByte-EraseBytes-ChangeByte-ShuffleBytes-ShuffleBytes-ChangeByte-CopyPart-ChangeByte-ChangeByte-ChangeBit-CopyPart-ChangeBit-ChangeBinInt-CopyPart-ShuffleBytes-ChangeBit-ChangeBinInt-ChangeBit-EraseBytes-CMP-CrossOver-CopyPart-ChangeBinInt-CrossOver-CrossOver-CopyPart-CrossOver-CrossOver-InsertByte-InsertByte-CopyPart-Custom- DE: "warn"-"\x00\x00\x00\x80"-"\xfe\xff\xff\xfb"-"\xff\xff"-"\x10\x00\x00\x00"-"\xfe\xff\xff\xff"-"\xff\xff\xff\xf6"-"U\x01\x00\x00\x00\x00\x00\x00"-"\xd9\xff\xff\xff"-"\xfe\xff\xff\xea"-"\xf0\xff\xff\xff"-"\xfc\xff\xff\xff"-"warn"-"\xff\xff\xff\xff"-"\xfe\xff\xff\xfb"-"\x00\x00\x00\x80"-"\xfe\xff\xff\xf1"-"\xfe\xff\xff\xea"-"\x00\x00\x00\x00\x00\x00\x012"-"\xe2\x00"-"\xfb\xff\xff\xff"-"\x00\x00\x00\x00"-"\xe9\xff\xff\xff"-"\xff\xff"-"\x00\x00\x00\x80"-"\x01\x00\x04\xc9"-"\xf0\xff\xff\xff"-"\xf9\xff\xff\xff"-"\xff\xff\xff\xff\xff\xff\xff\x12"-"\xe2\x00"-"\xfe\xff\xff\xff"-"\xfe\xff\xff\xea"-"\xff\xff\xff\xff"-"\xf4\xff\xff\xff"-"\xe9\xff\xff\xff"-"\xf1\xff\xff\xff"- #48 NEW cov: 4502 ft: 9151 corp: 27/750Kb lim: 64000 exec/s: 2 rss: 458Mb L: 50772/50772 MS: 259 ChangeByte-ShuffleBytes-ChangeBinInt-ChangeByte-ChangeByte-ChangeByte-ChangeByte-ChangeBit-CopyPart-CrossOver-CopyPart-ChangeByte-CrossOver-CopyPart-ChangeBit-ChangeByte-EraseBytes-ChangeByte-CopyPart-CopyPart-CopyPart-ChangeBit-EraseBytes-ChangeBinInt-CrossOver-CopyPart-CrossOver-CopyPart-ChangeBit-ChangeByte-ChangeBit-InsertByte-CrossOver-InsertRepeatedBytes-InsertRepeatedBytes-InsertRepeatedBytes-ChangeBinInt-EraseBytes-InsertRepeatedBytes-InsertByte-ChangeBit-ShuffleBytes-ChangeBit-ChangeBit-CopyPart-ChangeBit-ChangeByte-CrossOver-ChangeBinInt-ChangeByte-CrossOver-CMP-ChangeByte-CrossOver-ChangeByte-ShuffleBytes-ShuffleBytes-ChangeByte-ChangeBinInt-CopyPart-EraseBytes-CrossOver-ChangeBit-ChangeBinInt-InsertByte-ChangeBit-CopyPart-ChangeBinInt-ChangeByte-CrossOver-ChangeBit-EraseBytes-CopyPart-ChangeBinInt-ChangeBit-ChangeBit-ChangeByte-CopyPart-ChangeBinInt-CrossOver-PersAutoDict-ChangeByte-ChangeBit-ChangeByte-ChangeBinInt-ChangeBinInt-EraseBytes-CopyPart-CopyPart-ChangeByte-ChangeByte-EraseBytes-PersAutoDict-CopyPart-ChangeByte-ChangeByte-EraseBytes-CrossOver-CopyPart-CopyPart-CopyPart-ChangeByte-ChangeBit-CMP-CopyPart-ChangeBinInt-ChangeBinInt-CrossOver-ChangeBit-ChangeBit-EraseBytes-ChangeByte-ShuffleBytes-ChangeBit-ChangeBinInt-CMP-InsertRepeatedBytes-CopyPart-Custom-ChangeByte-CrossOver-EraseBytes-ChangeBit-CopyPart-CrossOver-CMP-ShuffleBytes-EraseBytes-CrossOver-PersAutoDict-ChangeByte-CrossOver-CopyPart-CrossOver-CrossOver-ShuffleBytes-ChangeBinInt-CrossOver-ChangeBinInt-ShuffleBytes-PersAutoDict-ChangeByte-EraseBytes-ChangeBit-CrossOver-EraseBytes-CrossOver-ChangeBit-ChangeBinInt-EraseBytes-InsertByte-InsertRepeatedBytes-InsertByte-InsertByte-ChangeByte-ChangeBinInt-ChangeBit-CrossOver-ChangeByte-CrossOver-EraseBytes-ChangeByte-ShuffleBytes-ChangeBit-ChangeBit-ShuffleBytes-CopyPart-ChangeByte-PersAutoDict-ChangeBit-ChangeByte-InsertRepeatedBytes-CMP-CrossOver-ChangeByte-EraseBytes-ShuffleBytes-CrossOver-ShuffleBytes-ChangeBinInt-ChangeBinInt-CopyPart-PersAutoDict-ShuffleBytes-ChangeBit-CopyPart-ShuffleBytes-CopyPart-EraseBytes-ChangeByte-ChangeBit-ChangeBit-ChangeBinInt-ChangeByte-CopyPart-EraseBytes-ChangeBinInt-EraseBytes-EraseBytes-PersAutoDict-CMP-PersAutoDict-CrossOver-CrossOver-ChangeBit-CrossOver-PersAutoDict-CrossOver-CopyPart-ChangeByte-EraseBytes-ChangeByte-ShuffleBytes-ChangeByte-ChangeByte-CrossOver-ChangeBit-EraseBytes-ChangeByte-EraseBytes-ChangeBinInt-CrossOver-CrossOver-EraseBytes-ChangeBinInt-CrossOver-ChangeBit-ShuffleBytes-ChangeBit-ChangeByte-EraseBytes-ChangeBit-CrossOver-CrossOver-CrossOver-ChangeByte-ChangeBit-ShuffleBytes-ChangeBit-ChangeBit-EraseBytes-CrossOver-CrossOver-CopyPart-ShuffleBytes-ChangeByte-ChangeByte-CopyPart-CrossOver-CopyPart-CrossOver-CrossOver-EraseBytes-EraseBytes-ShuffleBytes-InsertRepeatedBytes-ChangeBit-CopyPart-Custom- DE: "\xfe\xff\xff\xfc"-"\x00\x00\x00\x00"-"F\x00"-"\xf3\xff\xff\xff"-"St9exception"-"_\x00\x00\x00"-"\xf6\xff\xff\xff"-"\xfe\xff\xff\xff"-"\x00\x00\x00\x00"-"p\x02\x00\x00\x00\x00\x00\x00"-"\xfe\xff\xff\xfb"-"\xff\xff"-"\xff\xff\xff\xff"-"\x01\x00\x00\x07"-"\xfe\xff\xff\xfe"- These are prohibitively large and of limited value in the default case (when someone is running the fuzzer, not debugging it), in my opinion. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86658 --- compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 2 +- compiler-rt/lib/fuzzer/FuzzerMutate.cpp | 17 +++++--- compiler-rt/lib/fuzzer/FuzzerMutate.h | 5 ++- .../CustomMutatorWithLongSequencesTest.cpp | 40 +++++++++++++++++++ .../test/fuzzer/fuzzer-custommutator.test | 14 +++++++ 5 files changed, 70 insertions(+), 8 deletions(-) create mode 100644 compiler-rt/test/fuzzer/CustomMutatorWithLongSequencesTest.cpp diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index ce8c2fb747144..f1895ec2621a4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -636,7 +636,7 @@ void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { PrintStats(Text, ""); if (Options.Verbosity) { Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); - MD.PrintMutationSequence(); + MD.PrintMutationSequence(Options.Verbosity >= 2); Printf("\n"); } } diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index 121b450e8b8c5..cf34a9fe8e2e1 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -18,6 +18,7 @@ namespace fuzzer { const size_t Dictionary::kMaxDictSize; +static const size_t kMaxMutationsToPrint = 10; static void PrintASCII(const Word &W, const char *PrintAfter) { PrintASCII(W.data(), W.size(), PrintAfter); @@ -481,15 +482,21 @@ void MutationDispatcher::PrintRecommendedDictionary() { Printf("###### End of recommended dictionary. ######\n"); } -void MutationDispatcher::PrintMutationSequence() { +void MutationDispatcher::PrintMutationSequence(bool Verbose) { Printf("MS: %zd ", CurrentMutatorSequence.size()); - for (auto M : CurrentMutatorSequence) - Printf("%s-", M.Name); + size_t EntriesToPrint = + Verbose ? CurrentMutatorSequence.size() + : std::min(kMaxMutationsToPrint, CurrentMutatorSequence.size()); + for (size_t i = 0; i < EntriesToPrint; i++) + Printf("%s-", CurrentMutatorSequence[i].Name); if (!CurrentDictionaryEntrySequence.empty()) { Printf(" DE: "); - for (auto DE : CurrentDictionaryEntrySequence) { + EntriesToPrint = Verbose ? CurrentDictionaryEntrySequence.size() + : std::min(kMaxMutationsToPrint, + CurrentDictionaryEntrySequence.size()); + for (size_t i = 0; i < EntriesToPrint; i++) { Printf("\""); - PrintASCII(DE->GetW(), "\"-"); + PrintASCII(CurrentDictionaryEntrySequence[i]->GetW(), "\"-"); } } } diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index 3ce3159f6893b..37fd6100dac33 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -24,8 +24,9 @@ class MutationDispatcher { ~MutationDispatcher() {} /// Indicate that we are about to start a new sequence of mutations. void StartMutationSequence(); - /// Print the current sequence of mutations. - void PrintMutationSequence(); + /// Print the current sequence of mutations. Only prints the full sequence + /// when Verbose is true. + void PrintMutationSequence(bool Verbose = true); /// Return the current sequence of mutations. std::string MutationSequence(); /// Indicate that the current sequence of mutations was successful. diff --git a/compiler-rt/test/fuzzer/CustomMutatorWithLongSequencesTest.cpp b/compiler-rt/test/fuzzer/CustomMutatorWithLongSequencesTest.cpp new file mode 100644 index 0000000000000..4c9714788f569 --- /dev/null +++ b/compiler-rt/test/fuzzer/CustomMutatorWithLongSequencesTest.cpp @@ -0,0 +1,40 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Simple test for a cutom mutator that results in long sequences of mutations. +#include +#include +#include +#include +#include +#include + +#include "FuzzerInterface.h" + +static volatile int Sink; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + assert(Data); + if (Size > 0 && Data[0] == 'H') { + Sink = 1; + if (Size > 1 && Data[1] == 'i') { + Sink = 2; + if (Size > 2 && Data[2] == '!') { + std::cout << "BINGO; Found the target, exiting\n" + << std::flush; + exit(1); + } + } + } + return 0; +} + +extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, + size_t MaxSize, unsigned int Seed) { + // Run this 25 times to generate a large mutation sequence. + for (size_t i = 0; i < 25; i++) { + LLVMFuzzerMutate(Data, Size, MaxSize); + } + return LLVMFuzzerMutate(Data, Size, MaxSize); +} diff --git a/compiler-rt/test/fuzzer/fuzzer-custommutator.test b/compiler-rt/test/fuzzer/fuzzer-custommutator.test index 25f5fe697b43f..7d94ae064bf96 100644 --- a/compiler-rt/test/fuzzer/fuzzer-custommutator.test +++ b/compiler-rt/test/fuzzer/fuzzer-custommutator.test @@ -11,3 +11,17 @@ LLVMFuzzerCustomMutatorWithLenControl: INFO: found LLVMFuzzerCustomMutator LLVMFuzzerCustomMutatorWithLenControl: In LLVMFuzzerCustomMutator LLVMFuzzerCustomMutatorWithLenControl: {{.*}} lim: {{[1-9][0-9]?}} {{.*}} LLVMFuzzerCustomMutatorWithLenControl: BINGO + +# sanity check: verify that we do get long lines with verbose printing on +RUN: %cpp_compiler %S/CustomMutatorWithLongSequencesTest.cpp -o %t-CustomMutatorWithLongSequencesTest +RUN: not %run %t-CustomMutatorWithLongSequencesTest -verbosity=2 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomMutatorLongSequence +LLVMFuzzerCustomMutatorLongSequence: Flag: verbosity 2 +LLVMFuzzerCustomMutatorLongSequence: {{.*}} MS: {{[0-9]*}} {{(([a-zA-Z]*-){11,})}} {{.*}} +LLVMFuzzerCustomMutatorLongSequence: BINGO + +# check a target that prints long mutation sequences and verifies the printed +# output is capped at 10 entries +RUN: not %run %t-CustomMutatorWithLongSequencesTest 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomMutatorLongSequenceTrimmed +LLVMFuzzerCustomMutatorLongSequenceTrimmed-NOT: Flag: verbosity 2 +LLVMFuzzerCustomMutatorLongSequenceTrimmed-NOT: {{.*}} MS: {{[0-9]*}} {{(([a-zA-Z]*-){11,})}} {{.*}} +LLVMFuzzerCustomMutatorLongSequenceTrimmed: BINGO From 4cff1b40dacf6a5489b09657d94ea4757b8cd3b0 Mon Sep 17 00:00:00 2001 From: Elizabeth Andrews Date: Mon, 14 Sep 2020 14:33:01 -0700 Subject: [PATCH 0853/1079] Do not apply calling conventions to MSVC entry points Fix link error for MSVC entry points when calling conventions are specified. MSVC entry points should have default calling convention. Differential Revision: https://reviews.llvm.org/D87701 --- clang/lib/Sema/SemaDecl.cpp | 5 ++ .../test/CodeGenCXX/default_calling_conv.cpp | 48 ++++++++++++++++--- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4ede2f9192f4f..3e0d284bdf710 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11095,6 +11095,11 @@ void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) { if (FD->getName() != "DllMain") FD->setHasImplicitReturnZero(true); + if (FT->getCallConv() != CC_C) { + FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(CC_C)); + FD->setType(QualType(FT, 0)); + } + if (!FD->isInvalidDecl() && FD->getDescribedFunctionTemplate()) { Diag(FD->getLocation(), diag::err_mainlike_template_decl) << FD; FD->setInvalidDecl(); diff --git a/clang/test/CodeGenCXX/default_calling_conv.cpp b/clang/test/CodeGenCXX/default_calling_conv.cpp index b5b0f47ceb986..16b623c301971 100644 --- a/clang/test/CodeGenCXX/default_calling_conv.cpp +++ b/clang/test/CodeGenCXX/default_calling_conv.cpp @@ -1,10 +1,14 @@ -// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -fdefault-calling-conv=cdecl -emit-llvm -o - %s | FileCheck %s --check-prefix=CDECL --check-prefix=ALL -// RUN: %clang_cc1 -triple i786-unknown-linux-gnu -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s | FileCheck %s --check-prefix=FASTCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s | FileCheck %s --check-prefix=VECTORCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s | FileCheck %s --check-prefix=REGCALL --check-prefix=ALL - +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -fdefault-calling-conv=cdecl -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=CDECL --check-prefix=ALL +// RUN: %clang_cc1 -triple i786-unknown-linux-gnu -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=FASTCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=VECTORCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=REGCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DWMAIN | FileCheck %s --check-prefix=WMAIN +// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DWINMAIN | FileCheck %s --check-prefix=WINMAIN +// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DWWINMAIN | FileCheck %s --check-prefix=WWINMAIN +// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DDLLMAIN | FileCheck %s --check-prefix=DLLMAIN +// // CDECL: define void @_Z5test1v // FASTCALL: define x86_fastcallcc void @_Z5test1v // STDCALL: define x86_stdcallcc void @_Z5test1v @@ -46,7 +50,37 @@ void test() { a.test_member(); } +#ifdef MAIN // ALL: define i32 @main int main() { return 1; } +#endif // main + +#ifdef WMAIN +// WMAIN: define dso_local i32 @wmain +int wmain() { + return 1; +} +#endif // wmain + +#ifdef WINMAIN +// WINMAIN: define dso_local i32 @WinMain +int WinMain() { + return 1; +} +#endif // WinMain + +#ifdef WWINMAIN +// WWINMAIN: define dso_local i32 @wWinMain +int wWinMain() { + return 1; +} +#endif // wWinMain + +#ifdef DLLMAIN +// DLLMAIN: define dso_local i32 @DllMain +int DllMain() { + return 1; +} +#endif // DllMain From 8d8a496356dbdf4fcc17caa69fe489d8d87068ac Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Sep 2020 12:08:41 -0400 Subject: [PATCH 0854/1079] LocalStackSlotAllocation: Swap order of check --- llvm/lib/CodeGen/LocalStackSlotAllocation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 204fb556d8105..ec3cce3fa1f15 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -117,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { // If the target doesn't want/need this pass, or if there are no locals // to consider, early exit. - if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF)) return true; // Make sure we have enough space to store the local offsets. From deae5e567d65c49c40abc99d5ad53855c9872d5b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Sep 2020 13:06:12 -0400 Subject: [PATCH 0855/1079] AMDGPU: Add baseline test for incorrect SP access --- .../local-stack-alloc-block-sp-reference.ll | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll new file mode 100644 index 0000000000000..a97b5dab5e503 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; Make sure we use the correct frame offset is used with the local +; frame area. +; +; %pin.low is allocated to offset 0. +; +; %local.area is assigned to the local frame offset by the +; LocalStackSlotAllocation pass at offset 4096. +; +; The %load1 access to %gep.large.offset initially used the stack +; pointer register and directly referenced the frame index. After +; LocalStackSlotAllocation, it would no longer refer to a frame index +; so eliminateFrameIndex would not adjust the access to use the +; correct FP offset. + +define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 addrspace(1)* %in) { +; GCN-LABEL: local_stack_offset_uses_sp: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 +; GCN-NEXT: v_mov_b32_e32 v1, 0x3000 +; GCN-NEXT: s_addc_u32 s1, s1, 0 +; GCN-NEXT: v_add_u32_e32 v0, 64, v1 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x2000 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GCN-NEXT: BB0_1: ; %loadstoreloop +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_add_u32_e32 v3, s6, v1 +; GCN-NEXT: s_add_i32 s6, s6, 1 +; GCN-NEXT: s_cmpk_lt_u32 s6, 0x2120 +; GCN-NEXT: buffer_store_byte v2, v3, s[0:3], 0 offen +; GCN-NEXT: s_cbranch_scc1 BB0_1 +; GCN-NEXT: ; %bb.2: ; %split +; GCN-NEXT: v_mov_b32_e32 v1, 0x3000 +; GCN-NEXT: v_add_u32_e32 v1, 0x20d0, v1 +; GCN-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 +; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], s32 offen +; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], s32 offen offset:4 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v2, v3 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GCN-NEXT: s_endpgm +entry: + %pin.low = alloca i32, align 8192, addrspace(5) + %local.area = alloca [1060 x i64], align 4096, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %pin.low + %local.area.cast = bitcast [1060 x i64] addrspace(5)* %local.area to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %local.area.cast, i8 0, i32 8480, i1 true) + %gep.large.offset = getelementptr inbounds [1060 x i64], [1060 x i64] addrspace(5)* %local.area, i64 0, i64 1050 + %gep.small.offset = getelementptr inbounds [1060 x i64], [1060 x i64] addrspace(5)* %local.area, i64 0, i64 8 + %load0 = load volatile i64, i64 addrspace(5)* %gep.large.offset + %load1 = load volatile i64, i64 addrspace(5)* %gep.small.offset + %add0 = add i64 %load0, %load1 + store volatile i64 %add0, i64 addrspace(1)* %out + ret void +} + +define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 addrspace(1)* %in) { +; GCN-LABEL: func_local_stack_offset_uses_sp: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_add_u32 s4, s32, 0x7ffc0 +; GCN-NEXT: s_mov_b32 s5, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xfff80000 +; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 +; GCN-NEXT: v_add_u32_e32 v3, 0x1000, v3 +; GCN-NEXT: v_mov_b32_e32 v4, 0 +; GCN-NEXT: v_add_u32_e32 v2, 64, v3 +; GCN-NEXT: s_mov_b32 s4, 0 +; GCN-NEXT: s_add_u32 s32, s32, 0x180000 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 +; GCN-NEXT: BB1_1: ; %loadstoreloop +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_add_u32_e32 v5, s4, v3 +; GCN-NEXT: s_add_i32 s4, s4, 1 +; GCN-NEXT: s_cmpk_lt_u32 s4, 0x2120 +; GCN-NEXT: buffer_store_byte v4, v5, s[0:3], 0 offen +; GCN-NEXT: s_cbranch_scc1 BB1_1 +; GCN-NEXT: ; %bb.2: ; %split +; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 +; GCN-NEXT: v_add_u32_e32 v3, 0x1000, v3 +; GCN-NEXT: v_add_u32_e32 v3, 0x20d0, v3 +; GCN-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen offset:4 +; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], s32 offen +; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], s32 offen offset:4 +; GCN-NEXT: s_sub_u32 s32, s32, 0x180000 +; GCN-NEXT: s_mov_b32 s33, s5 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v4, v5 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v6, vcc +; GCN-NEXT: global_store_dwordx2 v[0:1], v[2:3], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +entry: + %pin.low = alloca i32, align 8192, addrspace(5) + %local.area = alloca [1060 x i64], align 4096, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %pin.low + %local.area.cast = bitcast [1060 x i64] addrspace(5)* %local.area to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %local.area.cast, i8 0, i32 8480, i1 true) + %gep.large.offset = getelementptr inbounds [1060 x i64], [1060 x i64] addrspace(5)* %local.area, i64 0, i64 1050 + %gep.small.offset = getelementptr inbounds [1060 x i64], [1060 x i64] addrspace(5)* %local.area, i64 0, i64 8 + %load0 = load volatile i64, i64 addrspace(5)* %gep.large.offset + %load1 = load volatile i64, i64 addrspace(5)* %gep.small.offset + %add0 = add i64 %load0, %load1 + store volatile i64 %add0, i64 addrspace(1)* %out + ret void +} + +declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture writeonly, i8, i32, i1 immarg) #0 + +attributes #0 = { argmemonly nounwind willreturn writeonly } From 367248956e93982a73c0441868a562aeb85af5a0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Sep 2020 12:11:53 -0400 Subject: [PATCH 0856/1079] AMDGPU: Clear offset register when using local stack area eliminateFrameIndex won't fix up the offset register when the direct frame index reference is moved to a separate move instruction. Switch the offset to a base 0 (which it probably should be to begin with). --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 ++++++++-- .../AMDGPU/local-stack-alloc-block-sp-reference.ll | 8 ++++---- .../AMDGPU/stack-pointer-offset-relative-frameindex.ll | 5 +++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8a9899988b4c9..c3ffd5b7d6147 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -503,8 +503,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, #endif assert(FIOp && FIOp->isFI() && "frame index must be address operand"); assert(TII->isMUBUF(MI)); - assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == - MF->getInfo()->getStackPtrOffsetReg() && + + MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset); + assert(SOffset->getReg() == + MF->getInfo()->getStackPtrOffsetReg() && "should only be seeing stack pointer offset relative FrameIndex"); MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); @@ -513,6 +515,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, FIOp->ChangeToRegister(BaseReg, false); OffsetOp->setImm(NewOffset); + + // The move materializing the base address will be an absolute stack address, + // so clear the base offset. + SOffset->ChangeToImmediate(0); } bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index a97b5dab5e503..f390fadba1503 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -41,8 +41,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 ; GCN-NEXT: v_add_u32_e32 v1, 0x20d0, v1 ; GCN-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 -; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], s32 offen -; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], s32 offen offset:4 +; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:4 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v2, v3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -94,8 +94,8 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 addrspac ; GCN-NEXT: v_add_u32_e32 v3, 0x20d0, v3 ; GCN-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen offset:4 -; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], s32 offen -; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], s32 offen offset:4 +; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:4 ; GCN-NEXT: s_sub_u32 s32, s32, 0x180000 ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_waitcnt vmcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index e2d64c105d955..78e1402b1b022 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -41,8 +41,9 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GCN-NEXT: s_cbranch_execz BB0_2 ; GCN-NEXT: ; %bb.1: ; %if.then4.i -; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], s32 offen -; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], s32 offen offset:4 +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], 0 offen +; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], 0 offen offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v0 ; GCN-NEXT: v_mul_lo_u32 v0, 0x41c64e6d, v0 From e47d2927de79767663f0a0ece0581522fbe40ac4 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 16 Sep 2020 09:55:22 -0700 Subject: [PATCH 0857/1079] Include (Type|Symbol)Record.h less Most clients only need CVType and CVSymbol, not structs for every type and symbol. Move CVSymbol and CVType to CVRecord.h to accomplish this. Update some of the common headers that need CVSymbol and CVType to use the new location. --- llvm/include/llvm/DebugInfo/CodeView/CVRecord.h | 17 ++++++++++------- .../llvm/DebugInfo/CodeView/CVSymbolVisitor.h | 3 --- .../llvm/DebugInfo/CodeView/CodeViewRecordIO.h | 3 ++- .../DebugInfo/CodeView/DebugSymbolsSubsection.h | 2 +- .../CodeView/LazyRandomTypeCollection.h | 1 - .../llvm/DebugInfo/CodeView/RecordName.h | 1 - .../llvm/DebugInfo/CodeView/SymbolDumper.h | 2 +- .../llvm/DebugInfo/CodeView/SymbolRecord.h | 3 --- .../DebugInfo/CodeView/SymbolRecordHelpers.h | 3 ++- .../llvm/DebugInfo/CodeView/TypeCollection.h | 3 +-- .../DebugInfo/CodeView/TypeIndexDiscovery.h | 4 ++-- .../llvm/DebugInfo/CodeView/TypeRecord.h | 6 ------ .../llvm/DebugInfo/CodeView/TypeRecordHelpers.h | 3 ++- .../llvm/DebugInfo/CodeView/TypeStreamMerger.h | 2 +- .../llvm/DebugInfo/PDB/Native/TpiStream.h | 2 +- .../DebugInfo/CodeView/TypeIndexDiscovery.cpp | 3 ++- .../DebugInfo/CodeView/TypeHashingTest.cpp | 1 + 17 files changed, 26 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h index 784c47e3bf5dc..bb29ef5f2ce82 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -11,9 +11,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -61,12 +61,9 @@ template class CVRecord { ArrayRef RecordData; }; -template struct RemappedRecord { - explicit RemappedRecord(const CVRecord &R) : OriginalRecord(R) {} - - CVRecord OriginalRecord; - SmallVector, 8> Mappings; -}; +// There are two kinds of codeview records: type and symbol records. +using CVType = CVRecord; +using CVSymbol = CVRecord; template Error forEachCodeViewRecord(ArrayRef StreamBuffer, Func F) { @@ -126,6 +123,12 @@ struct VarStreamArrayExtractor> { } }; +namespace codeview { +using CVSymbolArray = VarStreamArray; +using CVTypeArray = VarStreamArray; +using CVTypeRange = iterator_range; +} // namespace codeview + } // end namespace llvm #endif // LLVM_DEBUGINFO_CODEVIEW_RECORDITERATOR_H diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h index 1615ff41df125..82ef8c173beec 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h @@ -10,9 +10,6 @@ #define LLVM_DEBUGINFO_CODEVIEW_CVSYMBOLVISITOR_H #include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index f26e80ebe2a94..d851dea0a27f4 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -15,7 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/GUID.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h index 784fc59484b96..51b8523ed9697 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h @@ -9,8 +9,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H #define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h index 35eeef5a327e0..ddbb4e3c5e6c8 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h @@ -14,7 +14,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h index cc09db8933bdb..8e06be9e41e8f 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h +++ b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H #define LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h index d832a48b12653..aaeffb2446ad8 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -11,8 +11,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 4383534b0db28..c37f6b4d5fa77 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -1003,9 +1003,6 @@ class AnnotationSym : public SymbolRecord { uint32_t RecordOffset = 0; }; -using CVSymbol = CVRecord; -using CVSymbolArray = VarStreamArray; - Expected readSymbolFromStream(BinaryStreamRef Stream, uint32_t Offset); diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h index 57dbc56c0769d..71bc70dde6ed1 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h @@ -9,7 +9,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" namespace llvm { namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h index 102d68c3fb2a9..bde5a8b3ab2fa 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -10,9 +10,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H #include "llvm/ADT/StringRef.h" - +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" namespace llvm { namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h index 469768787274d..f4f5835d8b57a 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -10,8 +10,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h index 35f5c05611385..59bdd2a7c9f2c 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -14,7 +14,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/GUID.h" @@ -32,15 +31,10 @@ using support::little32_t; using support::ulittle16_t; using support::ulittle32_t; -using CVType = CVRecord; -using RemappedType = RemappedRecord; - struct CVMemberRecord { TypeLeafKind Kind; ArrayRef Data; }; -using CVTypeArray = VarStreamArray; -using CVTypeRange = iterator_range; /// Equvalent to CV_fldattr_t in cvinfo.h. struct MemberAttributes { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h index 19492b93681cc..041f5214967c6 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h @@ -9,7 +9,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H #define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" namespace llvm { namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index d0506cce81762..04d7c7b0420a8 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -11,7 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h index 1b7fd2d54cb22..70288868ca21c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -9,7 +9,7 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H #define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" diff --git a/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index e84e1c9cea78e..682747a2b81fe 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -5,8 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Endian.h" diff --git a/llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp b/llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp index 8b9dc7ab285e9..b4501c36fd2b9 100644 --- a/llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp +++ b/llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "gtest/gtest.h" From 738c73a454881ca78214816754c1b82941d0cd26 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 31 Aug 2020 15:09:50 -0400 Subject: [PATCH 0858/1079] RegAllocFast: Make self loop live-out heuristic more aggressive This currently has no impact on code, but prevents sizeable code size regressions after D52010. This prevents spilling and reloading all values inside blocks that loop back. Add a baseline test which would regress without this patch. --- llvm/lib/CodeGen/RegAllocFast.cpp | 37 +++- .../fastregalloc-self-loop-heuristic.mir | 185 ++++++++++++++++++ 2 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index d93fd8f601c6b..db1b904fb2e6f 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -263,6 +263,20 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) { return FrameIdx; } +static bool dominates(MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator A, + MachineBasicBlock::const_iterator B) { + auto MBBEnd = MBB.end(); + if (B == MBBEnd) + return true; + + MachineBasicBlock::const_iterator I = MBB.begin(); + for (; &*I != A && &*I != B; ++I) + ; + + return &*I == A; +} + /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFast::mayLiveOut(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { @@ -270,11 +284,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { return !MBB->succ_empty(); } - // If this block loops back to itself, it would be necessary to check whether - // the use comes after the def. + const MachineInstr *SelfLoopDef = nullptr; + + // If this block loops back to itself, it is necessary to check whether the + // use comes after the def. if (MBB->isSuccessor(MBB)) { - MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); - return true; + SelfLoopDef = MRI->getUniqueVRegDef(VirtReg); + if (!SelfLoopDef) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } } // See if the first \p Limit uses of the register are all in the current @@ -287,6 +306,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } + + if (SelfLoopDef) { + // Try to handle some simple cases to avoid spilling and reloading every + // value inside a self looping block. + if (SelfLoopDef == &UseInst || + !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + } } return false; diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir new file mode 100644 index 0000000000000..32de262837816 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir @@ -0,0 +1,185 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: self_loop_single_def_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_single_def_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: self_loop_multi_def +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_multi_def + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +# There's a single def inside the self loop, but it's also a use. + +--- +name: self_loop_def_use_same_inst +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_def_use_same_inst + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec + ; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:vgpr_32 = V_ADD_U32_e32 1, undef %1, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: self_loop_def_after_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_def_after_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + GLOBAL_STORE_DWORD %0, undef %1, 0, 0, 0, 0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e64 1, 1, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: self_loop_single_subreg_def_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_single_subreg_def_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3 + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec + ; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5) + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... From 39faf428164a28f3652370958ce893d9200927c8 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 14 May 2020 09:56:35 -0400 Subject: [PATCH 0859/1079] [libc++] Ensure streams are initialized early When statically linking libc++ on some systems, the streams are not initialized early enough, which causes all kinds of issues. This was reported e.g. in http://llvm.org/PR28954, but also in various open source projects that use libc++. Fixes http://llvm.org/PR28954. Differential Revision: https://reviews.llvm.org/D31413 --- libcxx/src/iostream.cpp | 2 +- .../iostream.objects/init.pass.cpp | 88 +++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 libcxx/test/std/input.output/iostream.objects/init.pass.cpp diff --git a/libcxx/src/iostream.cpp b/libcxx/src/iostream.cpp index ad1920abc6572..d088593c4feda 100644 --- a/libcxx/src/iostream.cpp +++ b/libcxx/src/iostream.cpp @@ -77,7 +77,7 @@ __asm__("?wclog@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@_WU?$char_t #endif ; -_LIBCPP_HIDDEN ios_base::Init __start_std_streams; +_LIBCPP_HIDDEN ios_base::Init __start_std_streams __attribute__((init_priority(101))); // On Windows the TLS storage for locales needs to be initialized before we create // the standard streams, otherwise it may not be alive during program termination diff --git a/libcxx/test/std/input.output/iostream.objects/init.pass.cpp b/libcxx/test/std/input.output/iostream.objects/init.pass.cpp new file mode 100644 index 0000000000000..62a9ffbca3ea3 --- /dev/null +++ b/libcxx/test/std/input.output/iostream.objects/init.pass.cpp @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: libcpp-has-no-stdin, libcpp-has-no-stdout + +// Make sure that the iostreams are initialized before everything else. +// This has been an issue when statically linking libc++ in some contexts. +// See https://llvm.org/PR28954 for details. +// +// This test works by checking that std::{cin,cout,cerr} is the same in a +// static object constructor and in the main function. It dumps the memory of +// each stream in the static object constructor and compares it with the memory +// in the main function. +// +// The assumption is that if there are no uses of the stream object (such as +// construction), then its memory must be the same. In the case where the test +// "fails" and we are actually accessing an uninitialized object when we perform +// the memcpy, the behavior is technically undefined (so the test could still +// pass). + +#include +#include +#include + +struct Checker { + char *cerr_mem_dump; + char *cin_mem_dump; + char *cout_mem_dump; + char *clog_mem_dump; + + char *wcerr_mem_dump; + char *wcin_mem_dump; + char *wcout_mem_dump; + char *wclog_mem_dump; + + Checker() + : cerr_mem_dump(new char[sizeof(std::cerr)]) + , cin_mem_dump(new char[sizeof(std::cin)]) + , cout_mem_dump(new char[sizeof(std::cout)]) + , clog_mem_dump(new char[sizeof(std::clog)]) + + , wcerr_mem_dump(new char[sizeof(std::wcerr)]) + , wcin_mem_dump(new char[sizeof(std::wcin)]) + , wcout_mem_dump(new char[sizeof(std::wcout)]) + , wclog_mem_dump(new char[sizeof(std::wclog)]) + { + std::memcpy(cerr_mem_dump, (char*)&std::cerr, sizeof(std::cerr)); + std::memcpy(cin_mem_dump, (char*)&std::cin, sizeof(std::cin)); + std::memcpy(cout_mem_dump, (char*)&std::cout, sizeof(std::cout)); + std::memcpy(clog_mem_dump, (char*)&std::clog, sizeof(std::clog)); + + std::memcpy(wcerr_mem_dump, (char*)&std::wcerr, sizeof(std::wcerr)); + std::memcpy(wcin_mem_dump, (char*)&std::wcin, sizeof(std::wcin)); + std::memcpy(wcout_mem_dump, (char*)&std::wcout, sizeof(std::wcout)); + std::memcpy(wclog_mem_dump, (char*)&std::wclog, sizeof(std::wclog)); + } + + ~Checker() { + delete[] cerr_mem_dump; + delete[] cin_mem_dump; + delete[] cout_mem_dump; + delete[] clog_mem_dump; + + delete[] wcerr_mem_dump; + delete[] wcin_mem_dump; + delete[] wcout_mem_dump; + delete[] wclog_mem_dump; + } +}; + +static Checker check; + +int main() { + assert(std::memcmp(check.cerr_mem_dump, (char const*)&std::cerr, sizeof(std::cerr)) == 0); + assert(std::memcmp(check.cin_mem_dump, (char const*)&std::cin, sizeof(std::cin)) == 0); + assert(std::memcmp(check.cout_mem_dump, (char const*)&std::cout, sizeof(std::cout)) == 0); + assert(std::memcmp(check.clog_mem_dump, (char const*)&std::clog, sizeof(std::clog)) == 0); + + assert(std::memcmp(check.wcerr_mem_dump, (char const*)&std::wcerr, sizeof(std::wcerr)) == 0); + assert(std::memcmp(check.wcin_mem_dump, (char const*)&std::wcin, sizeof(std::wcin)) == 0); + assert(std::memcmp(check.wcout_mem_dump, (char const*)&std::wcout, sizeof(std::wcout)) == 0); + assert(std::memcmp(check.wclog_mem_dump, (char const*)&std::wclog, sizeof(std::wclog)) == 0); +} From f9e6d1edc0dad9afb26e773aa125ed62c58f7080 Mon Sep 17 00:00:00 2001 From: Jamie Schmeiser Date: Wed, 16 Sep 2020 17:25:13 +0000 Subject: [PATCH 0860/1079] Re-land: Add new hidden option -print-changed which only reports changes to IR A new hidden option -print-changed is added along with code to support printing the IR as it passes through the opt pipeline in the new pass manager. Only those passes that change the IR are reported, with others only having the banner reported, indicating that they did not change the IR, were filtered out or ignored. Filtering of output via the -filter-print-funcs is supported and a new supporting hidden option -filter-passes is added. The latter takes a comma separated list of pass names and filters the output to only show those passes in the list that change the IR. The output can also be modified via the -print-module-scope function. The code introduces a template base class that generalizes the comparison of IRs that takes an IR representation as template parameter. The constructor takes a series of lambdas that provide an event based API for generalized reporting of IRs as they are changed in the opt pipeline through the new pass manager. The first of several instantiations is provided that prints the IR in a form similar to that produced by -print-after-all with the above mentioned filtering capabilities. This version, and the others to follow will be introduced at the upcoming developer's conference. Reviewed By: aeubanks (Arthur Eubanks), yrouban (Yevgeny Rouban), ychen (Yuanfang Chen) Differential Revision: https://reviews.llvm.org/D86360 --- .../llvm/Passes/StandardInstrumentations.h | 92 +++++++ llvm/lib/IR/LegacyPassManager.cpp | 4 +- llvm/lib/Passes/StandardInstrumentations.cpp | 229 +++++++++++++++++- llvm/test/Other/change-printer.ll | 109 +++++++++ 4 files changed, 427 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Other/change-printer.ll diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 76e217c899745..8fc868bfa4c9e 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -124,6 +124,97 @@ class PreservedCFGCheckerInstrumentation { void registerCallbacks(PassInstrumentationCallbacks &PIC); }; +// Base class for classes that report changes to the IR. +// It presents an interface for such classes and provides calls +// on various events as the new pass manager transforms the IR. +// It also provides filtering of information based on hidden options +// specifying which functions are interesting. +// Calls are made for the following events/queries: +// 1. The initial IR processed. +// 2. To get the representation of the IR (of type \p T). +// 3. When a pass does not change the IR. +// 4. When a pass changes the IR (given both before and after representations +// of type \p T). +// 5. When an IR is invalidated. +// 6. When a pass is run on an IR that is not interesting (based on options). +// 7. When a pass is ignored (pass manager or adapter pass). +// 8. To compare two IR representations (of type \p T). +template class ChangePrinter { +protected: + ChangePrinter() : InitialIR(true) {} + +public: + virtual ~ChangePrinter(); + + // Determine if this pass/IR is interesting and if so, save the IR + // otherwise it is left on the stack without data + void saveIRBeforePass(Any IR, StringRef PassID); + // Compare the IR from before the pass after the pass. + void handleIRAfterPass(Any IR, StringRef PassID); + // Handle the situation where a pass is invalidated. + void handleInvalidatedPass(StringRef PassID); + +protected: + // called on the first IR processed + virtual void handleInitialIR(Any IR) = 0; + // called before and after a pass to get the representation of the IR + virtual void generateIRRepresentation(Any IR, StringRef PassID, + IRUnitT &Output) = 0; + // called when the pass is not iteresting + virtual void omitAfter(StringRef PassID, std::string &Name) = 0; + // called when an interesting IR has changed + virtual void handleAfter(StringRef PassID, std::string &Name, + const IRUnitT &Before, const IRUnitT &After, + Any) = 0; + // called when an interesting pass is invalidated + virtual void handleInvalidated(StringRef PassID) = 0; + // called when the IR or pass is not interesting + virtual void handleFiltered(StringRef PassID, std::string &Name) = 0; + // called when an ignored pass is encountered + virtual void handleIgnored(StringRef PassID, std::string &Name) = 0; + // called to compare the before and after representations of the IR + virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0; + + // stack of IRs before passes + std::vector BeforeStack; + // Is this the first IR seen? + bool InitialIR; +}; + +// A change printer based on the string representation of the IR as created +// by unwrapAndPrint. The string representation is stored in a std::string +// to preserve it as the IR changes in each pass. Note that the banner is +// included in this representation but it is massaged before reporting. +class IRChangePrinter : public ChangePrinter { +public: + IRChangePrinter(); + ~IRChangePrinter() override; + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +protected: + // called on the first IR processed + void handleInitialIR(Any IR) override; + // called before and after a pass to get the representation of the IR + void generateIRRepresentation(Any IR, StringRef PassID, + std::string &Output) override; + // called when the pass is not iteresting + void omitAfter(StringRef PassID, std::string &Name) override; + // called when an interesting IR has changed + void handleAfter(StringRef PassID, std::string &Name, + const std::string &Before, const std::string &After, + Any) override; + // called when an interesting pass is invalidated + void handleInvalidated(StringRef PassID) override; + // called when the IR or pass is not interesting + void handleFiltered(StringRef PassID, std::string &Name) override; + // called when an ignored pass is encountered + void handleIgnored(StringRef PassID, std::string &Name) override; + // called to compare the before and after representations of the IR + bool same(const std::string &Before, const std::string &After) override; + + raw_ostream &Out; +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -132,6 +223,7 @@ class StandardInstrumentations { TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; PreservedCFGCheckerInstrumentation PreservedCFGChecker; + IRChangePrinter PrintChangedIR; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 8d9ed917bb617..63886f4861708 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -87,14 +87,14 @@ static cl::opt PrintAfterAll("print-after-all", static cl::opt PrintModuleScope("print-module-scope", cl::desc("When printing IR for print-[before|after]{-all} " - "always print a module IR"), + "and change reporters always print a module IR"), cl::init(false), cl::Hidden); static cl::list PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), cl::desc("Only print IR for functions whose name " "match this for all print-[before|after][-all] " - "options"), + "and change reporter options"), cl::CommaSeparated, cl::Hidden); /// This is a helper to determine whether to print IR before or diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 2ee373b912be0..4755315ecfdb6 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include #include using namespace llvm; @@ -51,6 +52,34 @@ static cl::opt cl::desc("Print all pass management debugging information. " "`-debug-pass-manager` must also be specified")); +// A hidden option that prints out the IR after passes, similar to +// -print-after-all except that it only prints the IR after passes that +// change the IR. Those passes that do not make changes to the IR are +// reported as not making any changes. In addition, the initial IR is +// also reported. Other hidden options affect the output from this +// option. -filter-passes will limit the output to the named passes +// that actually change the IR and other passes are reported as filtered out. +// The specified passes will either be reported as making no changes (with +// no IR reported) or the changed IR will be reported. Also, the +// -filter-print-funcs and -print-module-scope options will do similar +// filtering based on function name, reporting changed IRs as functions(or +// modules if -print-module-scope is specified) for a particular function +// or indicating that the IR has been filtered out. The extra options +// can be combined, allowing only changed IRs for certain passes on certain +// functions to be reported in different formats, with the rest being +// reported as filtered out. +static cl::opt PrintChanged("print-changed", + cl::desc("Print changed IRs"), + cl::init(false), cl::Hidden); +// A hidden option that supports the -print-changed option. See +// the description for -print-changed for an explanation of the use +// of this option. Note that this option has no effect without -print-changed. +static cl::list + PrintPassesList("filter-passes", cl::value_desc("pass names"), + cl::desc("Only consider IR changes for passes whose names " + "match for the print-changed option"), + cl::CommaSeparated, cl::Hidden); + namespace { /// Extracting Module out of \p IR unit. Also fills a textual description @@ -107,7 +136,8 @@ void printIR(raw_ostream &OS, const Function *F, StringRef Banner, } void printIR(raw_ostream &OS, const Module *M, StringRef Banner, - StringRef Extra = StringRef(), bool Brief = false) { + StringRef Extra = StringRef(), bool Brief = false, + bool ShouldPreserveUseListOrder = false) { if (Brief) { OS << M->getName() << '\n'; return; @@ -115,7 +145,7 @@ void printIR(raw_ostream &OS, const Module *M, StringRef Banner, if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) { OS << Banner << Extra << "\n"; - M->print(OS, nullptr, false); + M->print(OS, nullptr, ShouldPreserveUseListOrder); } else { for (const auto &F : M->functions()) { printIR(OS, &F, Banner, Extra); @@ -159,17 +189,19 @@ void printIR(raw_ostream &OS, const Loop *L, StringRef Banner, /// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into /// llvm::Any and does actual print job. void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, - bool ForceModule = false, bool Brief = false) { + bool ForceModule = false, bool Brief = false, + bool ShouldPreserveUseListOrder = false) { if (ForceModule) { if (auto UnwrappedModule = unwrapModule(IR)) - printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second); + printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second, + Brief, ShouldPreserveUseListOrder); return; } if (any_isa(IR)) { const Module *M = any_cast(IR); assert(M && "module should be valid for printing"); - printIR(OS, M, Banner, "", Brief); + printIR(OS, M, Banner, "", Brief, ShouldPreserveUseListOrder); return; } @@ -197,8 +229,194 @@ void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, llvm_unreachable("Unknown wrapped IR type"); } +// Return true when this is a pass for which changes should be ignored +inline bool isIgnored(StringRef PassID) { + return isSpecialPass(PassID, + {"PassManager", "PassAdaptor", "AnalysisManagerProxy"}); +} + +// Return true when this is a defined function for which printing +// of changes is desired. +inline bool isInterestingFunction(const Function &F) { + return llvm::isFunctionInPrintList(F.getName()); +} + +// Return true when this is a pass for which printing of changes is desired. +inline bool isInterestingPass(StringRef PassID) { + if (isIgnored(PassID)) + return false; + + static std::unordered_set PrintPassNames(PrintPassesList.begin(), + PrintPassesList.end()); + return PrintPassNames.empty() || PrintPassNames.count(PassID.str()); +} + +// Return true when this is a pass on IR for which printing +// of changes is desired. +bool isInteresting(Any IR, StringRef PassID) { + if (!isInterestingPass(PassID)) + return false; + if (any_isa(IR)) + return isInterestingFunction(*any_cast(IR)); + return true; +} + } // namespace +template +void ChangePrinter::saveIRBeforePass(Any IR, StringRef PassID) { + // Always need to place something on the stack because invalidated passes + // are not given the IR so it cannot be determined whether the pass was for + // something that was filtered out. + BeforeStack.emplace_back(); + + if (!isInteresting(IR, PassID)) + return; + // Is this the initial IR? + if (InitialIR) { + InitialIR = false; + handleInitialIR(IR); + } + + // Save the IR representation on the stack. + auto &Data = BeforeStack.back(); + generateIRRepresentation(IR, PassID, Data); +} + +template +void ChangePrinter::handleIRAfterPass(Any IR, StringRef PassID) { + assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); + std::string Name; + + // unwrapModule has inconsistent handling of names for function IRs. + if (any_isa(IR)) { + const Function *F = any_cast(IR); + Name = formatv(" (function: {0})", F->getName()).str(); + } else { + if (auto UM = unwrapModule(IR)) + Name = UM->second; + } + if (Name == "") + Name = " (module)"; + + if (isIgnored(PassID)) + handleIgnored(PassID, Name); + else if (!isInteresting(IR, PassID)) + handleFiltered(PassID, Name); + else { + // Get the before rep from the stack + IRUnitT &Before = BeforeStack.back(); + // Create the after rep + IRUnitT After; + generateIRRepresentation(IR, PassID, After); + + // was there a change in IR? + if (same(Before, After)) + omitAfter(PassID, Name); + else + handleAfter(PassID, Name, Before, After, IR); + } + BeforeStack.pop_back(); +} + +template +void ChangePrinter::handleInvalidatedPass(StringRef PassID) { + assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); + + // Always flag it as invalidated as we cannot determine when + // a pass for a filtered function is invalidated since we do not + // get the IR in the call. Also, the output is just alternate + // forms of the banner anyway. + handleInvalidated(PassID); + BeforeStack.pop_back(); +} + +template ChangePrinter::~ChangePrinter() { + assert(BeforeStack.empty() && "Problem with Change Printer stack."); +} + +IRChangePrinter::IRChangePrinter() : Out(dbgs()) {} + +IRChangePrinter::~IRChangePrinter() { + ChangePrinter::~ChangePrinter(); +} + +void IRChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (!PrintChanged) + return; + + PIC.registerBeforePassCallback([this](StringRef P, Any IR) { + saveIRBeforePass(IR, P); + return true; + }); + + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + handleIRAfterPass(IR, P); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &) { + handleInvalidatedPass(P); + }); +} + +void IRChangePrinter::handleInitialIR(Any IR) { + StringRef Banner("*** IR Dump At Start: ***"); + unwrapAndPrint(Out, IR, Banner, true, + /*Brief*/ false, /*ShouldPreserveUseListOrder*/ true); +} + +void IRChangePrinter::generateIRRepresentation(Any IR, StringRef PassID, + std::string &Output) { + raw_string_ostream OS(Output); + // use the after banner for all cases so it will match + SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID); + unwrapAndPrint(OS, IR, Banner, llvm::forcePrintModuleIR(), + /*Brief*/ false, /*ShouldPreserveUseListOrder*/ true); + OS.str(); +} + +void IRChangePrinter::omitAfter(StringRef PassID, std::string &Name) { + Out << formatv("*** IR Dump After {0}{1} omitted because no change ***\n", + PassID, Name); +} + +void IRChangePrinter::handleAfter(StringRef PassID, std::string &Name, + const std::string &Before, + const std::string &After, Any) { + assert(After.find("*** IR Dump") == 0 && "Unexpected banner format."); + StringRef AfterRef = After; + StringRef Banner = + AfterRef.take_until([](char C) -> bool { return C == '\n'; }); + Out << Banner; + + // LazyCallGraph::SCC already has "(scc:..." in banner so only add + // in the name if it isn't already there. + if (Name.substr(0, 6).compare(" (scc:") != 0 && !llvm::forcePrintModuleIR()) + Out << Name; + + Out << After.substr(Banner.size()); +} + +void IRChangePrinter::handleInvalidated(StringRef PassID) { + Out << formatv("*** IR Pass {0} invalidated ***\n", PassID); +} + +void IRChangePrinter::handleFiltered(StringRef PassID, std::string &Name) { + SmallString<20> Banner = + formatv("*** IR Dump After {0}{1} filtered out ***\n", PassID, Name); + Out << Banner; +} + +void IRChangePrinter::handleIgnored(StringRef PassID, std::string &Name) { + Out << formatv("*** IR Pass {0}{1} ignored ***\n", PassID, Name); +} + +bool IRChangePrinter::same(const std::string &Before, + const std::string &After) { + return Before.compare(After) == 0; +}; + PrintIRInstrumentation::~PrintIRInstrumentation() { assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit"); } @@ -508,4 +726,5 @@ void StandardInstrumentations::registerCallbacks( TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); + PrintChangedIR.registerCallbacks(PIC); } diff --git a/llvm/test/Other/change-printer.ll b/llvm/test/Other/change-printer.ll new file mode 100644 index 0000000000000..54c941b293009 --- /dev/null +++ b/llvm/test/Other/change-printer.ll @@ -0,0 +1,109 @@ +; Simple checks of -print-changed functionality +; +; Note that (mostly) only the banners are checked. +; +; Simple functionality check. +; RUN: opt -S -print-changed -passes=instsimplify 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_SIMPLE +; +; Check that only the passes that change the IR are printed and that the +; others (including g) are filtered out. +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FUNC_FILTER +; +; Check that the reporting of IRs respects -print-module-scope +; RUN: opt -S -print-changed -passes=instsimplify -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_PRINT_MOD_SCOPE +; +; Check that the reporting of IRs respects -print-module-scope +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FUNC_FILTER_MOD_SCOPE +; +; Check that reporting of multiple functions happens +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs="f,g" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_MULT_FUNC +; +; Check that the reporting of IRs respects -filter-passes +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_PASSES +; +; Check that the reporting of IRs respects -filter-passes with multiple passes +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_MULT_PASSES +; +; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_FUNC_PASSES +; +; Check that the reporting of IRs respects -filter-passes, -filter-print-funcs and -print-module-scope +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_FUNC_PASSES_MOD_SCOPE +; +; Check that repeated passes that change the IR are printed and that the +; others (including g) are filtered out. Note that the second time +; instsimplify is run on f, it does not change the IR +; RUN: opt -S -print-changed -passes="instsimplify,instsimplify" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_MULT_PASSES_FILTER_FUNC + +define i32 @g() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +define i32 @f() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +; CHECK_SIMPLE: *** IR Dump At Start: *** +; CHECK_SIMPLE: ; ModuleID = '' +; CHECK_SIMPLE: *** IR Dump After VerifierPass (module) omitted because no change *** +; CHECK_SIMPLE: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK_SIMPLE: *** IR Pass PassManager (function: g) ignored *** +; CHECK_SIMPLE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_SIMPLE: *** IR Pass PassManager (function: f) ignored *** +; CHECK_SIMPLE: *** IR Pass ModuleToFunctionPassAdaptor{{ ?}}> (module) ignored *** +; CHECK_SIMPLE: *** IR Dump After VerifierPass (module) omitted because no change *** +; CHECK_SIMPLE: *** IR Dump After PrintModulePass (module) omitted because no change *** + +; CHECK_FUNC_FILTER: *** IR Dump At Start: *** +; CHECK_FUNC_FILTER: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_FUNC_FILTER: *** IR Dump After InstSimplifyPass *** (function: f) + +; CHECK_PRINT_MOD_SCOPE: *** IR Dump At Start: *** +; CHECK_PRINT_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK_PRINT_MOD_SCOPE: ModuleID = '' +; CHECK_PRINT_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_PRINT_MOD_SCOPE: ModuleID = '' + +; CHECK_FUNC_FILTER_MOD_SCOPE: *** IR Dump At Start: *** +; CHECK_FUNC_FILTER_MOD_SCOPE: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_FUNC_FILTER_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_FUNC_FILTER_MOD_SCOPE: ModuleID = '' + +; CHECK_FILTER_MULT_FUNC: *** IR Dump At Start: *** +; CHECK_FILTER_MULT_FUNC: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK_FILTER_MULT_FUNC: *** IR Dump After InstSimplifyPass *** (function: f) + +; CHECK_FILTER_PASSES: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_FILTER_PASSES: *** IR Dump At Start: *** (function: g) +; CHECK_FILTER_PASSES: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** +; CHECK_FILTER_PASSES: *** IR Dump After InstSimplifyPass (function: f) filtered out *** +; CHECK_FILTER_PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK_FILTER_MULT_PASSES: *** IR Dump At Start: *** (function: g) +; CHECK_FILTER_MULT_PASSES: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK_FILTER_MULT_PASSES: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** +; CHECK_FILTER_MULT_PASSES: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_FILTER_MULT_PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK_FILTER_FUNC_PASSES: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_FILTER_FUNC_PASSES: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** +; CHECK_FILTER_FUNC_PASSES: *** IR Dump At Start: *** (function: f) +; CHECK_FILTER_FUNC_PASSES: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_FILTER_FUNC_PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** +; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump At Start: *** (function: f) +; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: ModuleID = '' +; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump At Start: *** +; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass (function: f) omitted because no change *** From 50f4c7c785da87679fac1f483ef6a3e53dfca37a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Sep 2020 10:24:58 -0700 Subject: [PATCH 0861/1079] [llvm-nm] Use aggregate initialization instead of memset zero --- llvm/tools/llvm-nm/llvm-nm.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp index ecd1e21e15bfb..a34352d1512c5 100644 --- a/llvm/tools/llvm-nm/llvm-nm.cpp +++ b/llvm/tools/llvm-nm/llvm-nm.cpp @@ -1635,8 +1635,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, } if (!found) { LastSymbolName = Entry.symbolName(); - NMSymbol W; - memset(&W, '\0', sizeof(NMSymbol)); + NMSymbol W = {}; W.Name = Entry.symbolName(); W.Address = 0; W.Size = 0; From b011611e373c3d6dfddde5120ce7974cc8719d4a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Sep 2020 10:59:30 -0400 Subject: [PATCH 0862/1079] [SLP] add tests for reduction ordering; NFC --- .../SLPVectorizer/X86/compare-reduce.ll | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index 3ac8c04774a4c..daa96bfa84aef 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -74,3 +74,150 @@ for.end: ; preds = %for.inc declare i32 @printf(i8* nocapture, ...) +; PR41312 - the order of the reduction ops should not prevent forming a reduction. +; The 'wrong' member of the reduction requires a greater cost if grouped with the +; other candidates in the reduction because it does not have matching predicate +; and/or constant operand. + +define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) { +; CHECK-LABEL: @merge_anyof_v4f32_wrong_first( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3 +; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01 +; CHECK-NEXT: [[CMP0:%.*]] = fcmp ogt float [[X0]], 1.000000e+00 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X1]], 1.000000e+00 +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X2]], 1.000000e+00 +; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X3]], 1.000000e+00 +; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3WRONG]] +; CHECK-NEXT: [[OR031:%.*]] = or i1 [[OR03]], [[CMP1]] +; CHECK-NEXT: [[OR0312:%.*]] = or i1 [[OR031]], [[CMP2]] +; CHECK-NEXT: [[OR03123:%.*]] = or i1 [[OR0312]], [[CMP3]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03123]], float -1.000000e+00, float 1.000000e+00 +; CHECK-NEXT: ret float [[R]] +; + %x0 = extractelement <4 x float> %x, i32 0 + %x1 = extractelement <4 x float> %x, i32 1 + %x2 = extractelement <4 x float> %x, i32 2 + %x3 = extractelement <4 x float> %x, i32 3 + %cmp3wrong = fcmp olt float %x3, 42.0 + %cmp0 = fcmp ogt float %x0, 1.0 + %cmp1 = fcmp ogt float %x1, 1.0 + %cmp2 = fcmp ogt float %x2, 1.0 + %cmp3 = fcmp ogt float %x3, 1.0 + %or03 = or i1 %cmp0, %cmp3wrong + %or031 = or i1 %or03, %cmp1 + %or0312 = or i1 %or031, %cmp2 + %or03123 = or i1 %or0312, %cmp3 + %r = select i1 %or03123, float -1.0, float 1.0 + ret float %r +} + +define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) { +; CHECK-LABEL: @merge_anyof_v4f32_wrong_last( +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 +; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[X]], +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], float -1.000000e+00, float 1.000000e+00 +; CHECK-NEXT: ret float [[R]] +; + %x0 = extractelement <4 x float> %x, i32 0 + %x1 = extractelement <4 x float> %x, i32 1 + %x2 = extractelement <4 x float> %x, i32 2 + %x3 = extractelement <4 x float> %x, i32 3 + %cmp3wrong = fcmp olt float %x3, 42.0 + %cmp0 = fcmp ogt float %x0, 1.0 + %cmp1 = fcmp ogt float %x1, 1.0 + %cmp2 = fcmp ogt float %x2, 1.0 + %cmp3 = fcmp ogt float %x3, 1.0 + %or03 = or i1 %cmp0, %cmp3 + %or031 = or i1 %or03, %cmp1 + %or0312 = or i1 %or031, %cmp2 + %or03123 = or i1 %or0312, %cmp3wrong + %r = select i1 %or03123, float -1.0, float 1.0 + ret float %r +} + +define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) { +; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 +; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42 +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X0]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X2]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X3]], 1 +; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3]] +; CHECK-NEXT: [[OR033:%.*]] = or i1 [[OR03]], [[CMP3WRONG]] +; CHECK-NEXT: [[OR0332:%.*]] = or i1 [[OR033]], [[CMP2]] +; CHECK-NEXT: [[OR03321:%.*]] = or i1 [[OR0332]], [[CMP1]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03321]], i32 -1, i32 1 +; CHECK-NEXT: ret i32 [[R]] +; + %x0 = extractelement <4 x i32> %x, i32 0 + %x1 = extractelement <4 x i32> %x, i32 1 + %x2 = extractelement <4 x i32> %x, i32 2 + %x3 = extractelement <4 x i32> %x, i32 3 + %cmp3wrong = icmp slt i32 %x3, 42 + %cmp0 = icmp sgt i32 %x0, 1 + %cmp1 = icmp sgt i32 %x1, 1 + %cmp2 = icmp sgt i32 %x2, 1 + %cmp3 = icmp sgt i32 %x3, 1 + %or03 = or i1 %cmp0, %cmp3 + %or033 = or i1 %or03, %cmp3wrong + %or0332 = or i1 %or033, %cmp2 + %or03321 = or i1 %or0332, %cmp1 + %r = select i1 %or03321, i32 -1, i32 1 + ret i32 %r +} + +define i32 @merge_anyof_v4i32_wrong_middle_better_rdx(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle_better_rdx( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 +; CHECK-NEXT: [[Y0:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y]], i32 1 +; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2 +; CHECK-NEXT: [[Y3:%.*]] = extractelement <4 x i32> [[Y]], i32 3 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], [[Y1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X3]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[Y3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X2]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[Y0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[X3]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y2]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[CMP1]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP11]], i32 -1, i32 1 +; CHECK-NEXT: ret i32 [[R]] +; + %x0 = extractelement <4 x i32> %x, i32 0 + %x1 = extractelement <4 x i32> %x, i32 1 + %x2 = extractelement <4 x i32> %x, i32 2 + %x3 = extractelement <4 x i32> %x, i32 3 + %y0 = extractelement <4 x i32> %y, i32 0 + %y1 = extractelement <4 x i32> %y, i32 1 + %y2 = extractelement <4 x i32> %y, i32 2 + %y3 = extractelement <4 x i32> %y, i32 3 + %cmp3wrong = icmp slt i32 %x3, %y3 + %cmp0 = icmp sgt i32 %x0, %y0 + %cmp1 = icmp sgt i32 %x1, %y1 + %cmp2 = icmp sgt i32 %x2, %y2 + %cmp3 = icmp sgt i32 %x3, %y3 + %or03 = or i1 %cmp0, %cmp3 + %or033 = or i1 %or03, %cmp3wrong + %or0332 = or i1 %or033, %cmp2 + %or03321 = or i1 %or0332, %cmp1 + %r = select i1 %or03321, i32 -1, i32 1 + ret i32 %r +} From c6a82fdbf2ea691fdaf70fb07ae1f61d8452e1ac Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 18:08:32 +0100 Subject: [PATCH 0863/1079] ValueEnumerator.cpp - remove duplicate includes. NFCI. Remove headers already included in ValueEnumerator.h --- llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 8bdddc27e95ab..88279569bc028 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -11,11 +11,9 @@ //===----------------------------------------------------------------------===// #include "ValueEnumerator.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -32,7 +30,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" -#include "llvm/IR/UseListOrder.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" @@ -42,12 +39,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include #include -#include -#include using namespace llvm; From 69682f993cc0545da30be32fab572a2a56074653 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 18:09:30 +0100 Subject: [PATCH 0864/1079] InterferenceCache.cpp - remove duplicate includes. NFCI. Remove headers already included in InterferenceCache.h --- llvm/lib/CodeGen/InterferenceCache.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp index 7b50dac4cd1a7..617db0450d02e 100644 --- a/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/llvm/lib/CodeGen/InterferenceCache.cpp @@ -12,19 +12,15 @@ #include "InterferenceCache.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include #include -#include #include using namespace llvm; From 73d02064d2533daecf6fe82b8608da8f6eed59a5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 18:11:39 +0100 Subject: [PATCH 0865/1079] raw_ostream.cpp - remove duplicate includes. NFCI. Remove headers already included in raw_ostream.h --- llvm/lib/Support/raw_ostream.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index c803724eb1cfa..48b42fec0acdf 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -12,7 +12,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" @@ -30,7 +29,6 @@ #include #include #include -#include // may provide O_BINARY. #if defined(HAVE_FCNTL_H) From 8f7d6b2375618a79f621d5484e44870ede335a13 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 18:32:03 +0100 Subject: [PATCH 0866/1079] DwarfUnit.h - remove unnecessary includes. NFCI. --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 1 - llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 13 +++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index b469f91401f2c..8be6b889b8a99 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -13,7 +13,6 @@ #include "DwarfUnit.h" #include "AddressPool.h" #include "DwarfCompileUnit.h" -#include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 918e5045828d5..4cd66fb2cada8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -16,22 +16,19 @@ #include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" +#include namespace llvm { -class MachineOperand; -class ConstantInt; class ConstantFP; +class ConstantInt; class DbgVariable; class DwarfCompileUnit; +class MachineOperand; +class MCDwarfDwoLineTable; +class MCSymbol; //===----------------------------------------------------------------------===// /// This dwarf writer support class manages information associated with a From c4e589b7954c4e202474ce4a2101f07014792835 Mon Sep 17 00:00:00 2001 From: Michael Kitzan Date: Fri, 21 Aug 2020 23:11:22 -0700 Subject: [PATCH 0867/1079] [GISel] Add new combines for unary FP instrs with constant operand https://reviews.llvm.org/D86393 Patch adds five new `GICombinerRules`, one for each of the following unary FP instrs: `G_FNEG`, `G_FABS`, `G_FPTRUNC`, `G_FSQRT`, and `G_FLOG2`. The combine rules perform the FP operation on the constant operand and replace the original instr with the result. Patch additionally adds new combiner tests for the AArch64 target to test these new combiner rules. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 7 ++ llvm/include/llvm/CodeGen/LowLevelType.h | 4 ++ .../include/llvm/Target/GlobalISel/Combine.td | 12 +++- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 63 +++++++++++++++++ llvm/lib/CodeGen/LowLevelType.cpp | 16 +++++ .../AArch64/GlobalISel/combine-fabs.mir | 70 +++++++++++++++++++ .../AArch64/GlobalISel/combine-flog2.mir | 36 ++++++++++ .../AArch64/GlobalISel/combine-fneg.mir | 66 +++++++++++++++++ .../AArch64/GlobalISel/combine-fptrunc.mir | 36 ++++++++++ .../AArch64/GlobalISel/combine-fsqrt.mir | 39 +++++++++++ 10 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-flog2.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-fsqrt.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 87d5e6a18c8ad..8ee3b545815b2 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -17,6 +17,7 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H #define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H +#include "llvm/ADT/APFloat.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" @@ -266,6 +267,12 @@ class CombinerHelper { bool matchCombineUnmergeZExtToZExt(MachineInstr &MI); bool applyCombineUnmergeZExtToZExt(MachineInstr &MI); + /// Transform fp_instr(cst) to constant result of the fp operation. + bool matchCombineConstantFoldFpUnary(MachineInstr &MI, + Optional &Cst); + bool applyCombineConstantFoldFpUnary(MachineInstr &MI, + Optional &Cst); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h index 6295d86f749cb..402fa2ce61e74 100644 --- a/llvm/include/llvm/CodeGen/LowLevelType.h +++ b/llvm/include/llvm/CodeGen/LowLevelType.h @@ -23,6 +23,7 @@ namespace llvm { class DataLayout; class Type; +struct fltSemantics; /// Construct a low-level type based on an LLVM type. LLT getLLTForType(Type &Ty, const DataLayout &DL); @@ -35,6 +36,9 @@ MVT getMVTForLLT(LLT Ty); /// scalarable vector types, and will assert if used. LLT getLLTForMVT(MVT Ty); +/// Get the appropriate floating point arithmetic semantic based on the bit size +/// of the given scalar LLT. +const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty); } #endif // LLVM_CODEGEN_LOWLEVELTYPE_H diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 847a861c6b725..d3ccbb4049496 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -303,6 +303,15 @@ def simplify_add_to_sub: GICombineRule < (apply [{ return Helper.applySimplifyAddToSub(*${root}, ${info});}]) >; +// Fold fp_op(cst) to the constant result of the floating point operation. +def constant_fp_op_matchinfo: GIDefMatchData<"Optional">; +def constant_fp_op: GICombineRule < + (defs root:$root, constant_fp_op_matchinfo:$info), + (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FSQRT, G_FLOG2):$root, + [{ return Helper.matchCombineConstantFoldFpUnary(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }]) +>; + // Fold int2ptr(ptr2int(x)) -> x def p2i_to_i2p_matchinfo: GIDefMatchData<"Register">; def p2i_to_i2p: GICombineRule< @@ -505,4 +514,5 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, - unmerge_zext_to_zext, trunc_ext_fold, trunc_shl]>; + unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, + constant_fp_op]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 5e2b86200ce5e..938f55959d452 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1430,6 +1430,69 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { return false; } +static Optional constantFoldFpUnary(unsigned Opcode, LLT DstTy, + const Register Op, + const MachineRegisterInfo &MRI) { + const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); + if (!MaybeCst) + return None; + + APFloat V = MaybeCst->getValueAPF(); + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_FNEG: { + V.changeSign(); + return V; + } + case TargetOpcode::G_FABS: { + V.clearSign(); + return V; + } + case TargetOpcode::G_FPTRUNC: + break; + case TargetOpcode::G_FSQRT: { + bool Unused; + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); + V = APFloat(sqrt(V.convertToDouble())); + break; + } + case TargetOpcode::G_FLOG2: { + bool Unused; + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); + V = APFloat(log2(V.convertToDouble())); + break; + } + } + // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, + // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, + // and `G_FLOG2` reach here. + bool Unused; + V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); + return V; +} + +bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, + Optional &Cst) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); + return Cst.hasValue(); +} + +bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, + Optional &Cst) { + assert(Cst.hasValue() && "Optional is unexpectedly empty!"); + Builder.setInstrAndDebugLoc(MI); + MachineFunction &MF = Builder.getMF(); + auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); + Register DstReg = MI.getOperand(0).getReg(); + Builder.buildFConstant(DstReg, *FPVal); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) { // We're trying to match the following pattern: diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index 33752a1f9230f..2bda586db8c78 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LowLevelType.h" +#include "llvm/ADT/APFloat.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/raw_ostream.h" @@ -58,3 +59,18 @@ LLT llvm::getLLTForMVT(MVT Ty) { return LLT::vector(Ty.getVectorNumElements(), Ty.getVectorElementType().getSizeInBits()); } + +const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) { + assert(Ty.isScalar() && "Expected a scalar type."); + switch (Ty.getSizeInBits()) { + case 16: + return APFloat::IEEEhalf(); + case 32: + return APFloat::IEEEsingle(); + case 64: + return APFloat::IEEEdouble(); + case 128: + return APFloat::IEEEquad(); + } + llvm_unreachable("Invalid FP type size."); +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir index 32aa60fe6045f..a543e7cd4c7e4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fabs.mir @@ -30,3 +30,73 @@ body: | %2:_(<2 x s32>) = G_FABS %1(<2 x s32>) $x0 = COPY %2(<2 x s32>) ... +--- +name: test_combine_half_fabs_neg_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_fabs_neg_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4580 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s16) = G_FCONSTANT half 0xHC580 + %1:_(s16) = G_FABS %0 + $h0 = COPY %1(s16) +... +--- +name: test_combine_half_fabs_pos_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_fabs_pos_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4580 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s16) = G_FCONSTANT half 0xH4580 + %1:_(s16) = G_FABS %0 + $h0 = COPY %1(s16) +... +--- +name: test_combine_float_fabs_neg_constant +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_float_fabs_neg_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.500000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = G_FCONSTANT float -5.500000e+00 + %1:_(s32) = G_FABS %0 + $w0 = COPY %1(s32) +... +--- +name: test_combine_float_fabs_pos_constant +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_float_fabs_pos_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.500000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = G_FCONSTANT float -5.500000e+00 + %1:_(s32) = G_FABS %0 + $w0 = COPY %1(s32) +... +--- +name: test_combine_double_fabs_neg_constant +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_double_fabs_neg_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.200000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = G_FCONSTANT double -4.200000e+00 + %1:_(s64) = G_FABS %0 + $x0 = COPY %1(s64) +... +--- +name: test_combine_double_fabs_pos_constant +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_double_fabs_pos_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.200000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = G_FCONSTANT double 4.200000e+00 + %1:_(s64) = G_FABS %0 + $x0 = COPY %0(s64) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-flog2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-flog2.mir new file mode 100644 index 0000000000000..9e7e279e9e1a3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-flog2.mir @@ -0,0 +1,36 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: test_combine_half_flog2_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_flog2_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4000 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s16) = G_FCONSTANT half 4.000000e+00 + %1:_(s16) = G_FLOG2 %0 + $h0 = COPY %1(s16) +... +--- +name: test_combine_float_flog2_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_float_flog2_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = G_FCONSTANT float 4.000000e+00 + %1:_(s32) = G_FLOG2 %0 + $w0 = COPY %1(s32) +... +--- +name: test_combine_double_flog2_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_double_flog2_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = G_FCONSTANT double 4.000000e+00 + %1:_(s64) = G_FLOG2 %0 + $x0 = COPY %1(s64) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir index 2d0d23088770f..1b1077854b4c1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fneg.mir @@ -26,3 +26,69 @@ body: | %2:_(<2 x s32>) = G_FNEG %1(<2 x s32>) $x0 = COPY %2(<2 x s32>) ... +--- +name: test_combine_half_fneg_neg_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_fneg_neg_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4580 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s16) = G_FCONSTANT half 0xHC580 + %1:_(s16) = G_FNEG %0 + $h0 = COPY %1(s16) +... +--- +name: test_combine_half_fneg_pos_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_fneg_pos_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHC580 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s16) = G_FCONSTANT half 0xH4580 + %1:_(s16) = G_FNEG %0 + $h0 = COPY %1(s16) +... +--- +name: test_combine_float_fneg_neg_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_float_fneg_neg_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.500000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = G_FCONSTANT float -5.500000e+00 + %1:_(s32) = G_FNEG %0 + $w0 = COPY %1(s32) +... +--- +name: test_combine_float_fneg_pos_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_float_fneg_pos_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.500000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = G_FCONSTANT float 5.500000e+00 + %1:_(s32) = G_FNEG %0 + $w0 = COPY %1(s32) +... +--- +name: test_combine_double_fneg_neg_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_double_fneg_neg_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.200000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = G_FCONSTANT double -4.200000e+00 + %1:_(s64) = G_FNEG %0 + $x0 = COPY %1(s64) +... +--- +name: test_combine_double_fneg_pos_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_double_fneg_pos_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -4.200000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = G_FCONSTANT double 4.200000e+00 + %1:_(s64) = G_FNEG %0 + $x0 = COPY %1(s64) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir new file mode 100644 index 0000000000000..1fd7f6f39caca --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir @@ -0,0 +1,36 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: test_combine_float_to_half_fptrunc_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_float_to_half_fptrunc_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4580 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s32) = G_FCONSTANT float 5.500000e+00 + %1:_(s16) = G_FPTRUNC %0(s32) + $h0 = COPY %1(s16) +... +--- +name: test_combine_double_to_half_fptrunc_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_double_to_half_fptrunc_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4433 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s64) = G_FCONSTANT double 4.200000e+00 + %1:_(s16) = G_FPTRUNC %0(s64) + $h0 = COPY %1(s16) +... +--- +name: test_combine_double_to_foat_fptrunc_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_double_to_foat_fptrunc_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4010CCCCC0000000 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s64) = G_FCONSTANT double 4.200000e+00 + %1:_(s32) = G_FPTRUNC %0(s64) + $w0 = COPY %1(s32) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fsqrt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fsqrt.mir new file mode 100644 index 0000000000000..e114d01793167 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fsqrt.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: test_combine_half_fsqrt_constant +body: | + bb.1: + liveins: + ; CHECK-LABEL: name: test_combine_half_fsqrt_constant + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4000 + ; CHECK: $h0 = COPY [[C]](s16) + %0:_(s16) = G_FCONSTANT half 4.000000e+00 + %1:_(s16) = G_FSQRT %0 + $h0 = COPY %1 +... +--- +name: test_combine_float_fsqrt_constant +body: | + bb.1: + liveins: + ; CHECK-LABEL: name: test_combine_float_fsqrt_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = G_FCONSTANT float 4.000000e+00 + %1:_(s32) = G_FSQRT %0 + $w0 = COPY %1 +... +--- +name: test_combine_double_fsqrt_constant +body: | + bb.1: + liveins: + ; CHECK-LABEL: name: test_combine_double_fsqrt_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = G_FCONSTANT double 4.000000e+00 + %1:_(s64) = G_FSQRT %0 + $x0 = COPY %1 +... From ebf267b87d4b557dff488f87f66df3628e3da957 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 15 Sep 2020 13:44:22 -0700 Subject: [PATCH 0868/1079] [Sema][MSVC] warn at dynamic_cast/typeid when /GR- is given Differential Revision: https://reviews.llvm.org/D86369 --- clang/include/clang/Basic/DiagnosticGroups.td | 2 ++ .../clang/Basic/DiagnosticSemaKinds.td | 6 ++++ clang/lib/Sema/SemaCast.cpp | 12 +++++++ clang/lib/Sema/SemaExprCXX.cpp | 11 ++++++- clang/test/SemaCXX/ms-no-rtti-data.cpp | 32 +++++++++++++++++++ clang/test/SemaCXX/no-rtti-data.cpp | 32 +++++++++++++++++++ 6 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCXX/ms-no-rtti-data.cpp create mode 100644 clang/test/SemaCXX/no-rtti-data.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 6b4dcc850612e..a9bd52b8afcdf 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1235,3 +1235,5 @@ in addition with the pragmas or -fmax-tokens flag to get any warnings. } def WebAssemblyExceptionSpec : DiagGroup<"wasm-exception-spec">; + +def RTTI : DiagGroup<"rtti">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e0d700c66724a..f6ded1b4ee266 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7451,6 +7451,12 @@ def err_no_typeid_with_fno_rtti : Error< "use of typeid requires -frtti">; def err_no_dynamic_cast_with_fno_rtti : Error< "use of dynamic_cast requires -frtti">; +def warn_no_dynamic_cast_with_rtti_disabled: Warning< + "dynamic_cast will not work since RTTI data is disabled by " + "%select{-fno-rtti-data|/GR-}0">, InGroup; +def warn_no_typeid_with_rtti_disabled: Warning< + "typeid will not work since RTTI data is disabled by " + "%select{-fno-rtti-data|/GR-}0">, InGroup; def err_cannot_form_pointer_to_member_of_reference_type : Error< "cannot form a pointer-to-member to member %0 of reference type %1">; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index f718154ce6db8..d59f1880a7fff 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -889,6 +889,18 @@ void CastOperation::CheckDynamicCast() { return; } + // Warns when dynamic_cast is used with RTTI data disabled. + if (!Self.getLangOpts().RTTIData) { + bool MicrosoftABI = + Self.getASTContext().getTargetInfo().getCXXABI().isMicrosoft(); + bool isClangCL = Self.getDiagnostics().getDiagnosticOptions().getFormat() == + DiagnosticOptions::MSVC; + if (MicrosoftABI || !DestPointee->isVoidType()) + Self.Diag(OpRange.getBegin(), + diag::warn_no_dynamic_cast_with_rtti_disabled) + << isClangCL; + } + // Done. Everything else is run-time checks. Kind = CK_Dynamic; } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index b5d4276f22b46..08b56413d8bff 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -663,7 +663,16 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, } // The operand is an expression. - return BuildCXXTypeId(TypeInfoType, OpLoc, (Expr*)TyOrExpr, RParenLoc); + ExprResult Result = + BuildCXXTypeId(TypeInfoType, OpLoc, (Expr *)TyOrExpr, RParenLoc); + + if (!getLangOpts().RTTIData && !Result.isInvalid()) + if (auto *CTE = dyn_cast(Result.get())) + if (CTE->isPotentiallyEvaluated() && !CTE->isMostDerived(Context)) + Diag(OpLoc, diag::warn_no_typeid_with_rtti_disabled) + << (getDiagnostics().getDiagnosticOptions().getFormat() == + DiagnosticOptions::MSVC); + return Result; } /// Grabs __declspec(uuid()) off a type, or returns 0 if we cannot resolve to diff --git a/clang/test/SemaCXX/ms-no-rtti-data.cpp b/clang/test/SemaCXX/ms-no-rtti-data.cpp new file mode 100644 index 0000000000000..aef167d8a3736 --- /dev/null +++ b/clang/test/SemaCXX/ms-no-rtti-data.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s -triple x86_64-windows-msvc -fdiagnostics-format msvc -fno-rtti-data -fsyntax-only -verify + +namespace std { +struct type_info {}; +} // namespace std +class B { +public: + virtual ~B() = default; +}; + +class D1 : public B { +public: + ~D1() = default; +}; + +void f() { + B *b = new D1(); + auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} + void *v = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} + + (void)typeid(int); + (void)typeid(b); + (void)typeid(*b); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} + B b2 = *b; + (void)typeid(b2); + (void)typeid(*&b2); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} + (void)typeid((B &)b2); + + B &br = b2; + (void)typeid(br); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} + (void)typeid(&br); +} \ No newline at end of file diff --git a/clang/test/SemaCXX/no-rtti-data.cpp b/clang/test/SemaCXX/no-rtti-data.cpp new file mode 100644 index 0000000000000..af0dc7c11bb81 --- /dev/null +++ b/clang/test/SemaCXX/no-rtti-data.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux -fno-rtti-data -fsyntax-only -verify + +namespace std { +struct type_info {}; +} // namespace std +class B { +public: + virtual ~B() = default; +}; + +class D1 : public B { +public: + ~D1() = default; +}; + +void f() { + B *b = new D1(); + auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by -fno-rtti-data}} + void *v = dynamic_cast(b); + + (void)typeid(int); + (void)typeid(b); + (void)typeid(*b); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} + B b2 = *b; + (void)typeid(b2); + (void)typeid(*&b2); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} + (void)typeid((B &)b2); + + B &br = b2; + (void)typeid(br); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} + (void)typeid(&br); +} \ No newline at end of file From f3c2e0bcee64b0905addaefe9cd0c9ad4d20ac6f Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Tue, 15 Sep 2020 10:33:23 -0700 Subject: [PATCH 0869/1079] [libFuzzer] Enable entropic by default. Entropic has performed at least on par with vanilla scheduling on Clusterfuzz, and has shown a slight coverage improvement on FuzzBench: https://www.fuzzbench.com/reports/2020-08-31/index.html Reviewed By: Dor1s Differential Revision: https://reviews.llvm.org/D87476 --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 10 +++------- compiler-rt/lib/fuzzer/FuzzerFlags.def | 5 +++-- compiler-rt/lib/fuzzer/FuzzerOptions.h | 2 +- compiler-rt/test/fuzzer/cross_over_uniform_dist.test | 4 ++-- compiler-rt/test/fuzzer/keep-seed.test | 4 ++-- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index 57df1238c398c..83ef642ceeb6e 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -767,16 +767,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.EntropicNumberOfRarestFeatures = (size_t)Flags.entropic_number_of_rarest_features; Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time; - if (Options.Entropic) { - if (!Options.FocusFunction.empty()) { - Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " - "be used together.\n"); - exit(1); - } + if (!Options.FocusFunction.empty()) + Options.Entropic = false; // FocusFunction overrides entropic scheduling. + if (Options.Entropic) Printf("INFO: Running with entropic power schedule (0x%X, %d).\n", Options.EntropicFeatureFrequencyThreshold, Options.EntropicNumberOfRarestFeatures); - } struct EntropicOptions Entropic; Entropic.Enabled = Options.Entropic; Entropic.FeatureFrequencyThreshold = diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index c9a787e03833d..4d4841b17ae42 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -171,8 +171,9 @@ FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed " FUZZER_FLAG_STRING(focus_function, "Experimental. " "Fuzzing will focus on inputs that trigger calls to this function. " "If -focus_function=auto and -data_flow_trace is used, libFuzzer " - "will choose the focus functions automatically.") -FUZZER_FLAG_INT(entropic, 0, "Experimental. Enables entropic power schedule.") + "will choose the focus functions automatically. Disables -entropic when " + "specified.") +FUZZER_FLAG_INT(entropic, 1, "Enables entropic power schedule.") FUZZER_FLAG_INT(entropic_feature_frequency_threshold, 0xFF, "Experimental. If " "entropic is enabled, all features which are observed less often than " "the specified value are considered as rare.") diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index 706e1c64c706c..20b810b2867fb 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -46,7 +46,7 @@ struct FuzzingOptions { size_t MaxNumberOfRuns = -1L; int ReportSlowUnits = 10; bool OnlyASCII = false; - bool Entropic = false; + bool Entropic = true; size_t EntropicFeatureFrequencyThreshold = 0xFF; size_t EntropicNumberOfRarestFeatures = 100; bool EntropicScalePerExecTime = false; diff --git a/compiler-rt/test/fuzzer/cross_over_uniform_dist.test b/compiler-rt/test/fuzzer/cross_over_uniform_dist.test index 0dff5fd628f37..b5ae7e4659230 100644 --- a/compiler-rt/test/fuzzer/cross_over_uniform_dist.test +++ b/compiler-rt/test/fuzzer/cross_over_uniform_dist.test @@ -6,11 +6,11 @@ RUN: mkdir %t-corpus RUN: echo -n "@SELECT" > %t-corpus/A RUN: echo -n "@FROM WHERE" > %t-corpus/B -RUN: not %run %t-CrossOverUniformDistTest -keep_seed=1 -cross_over_uniform_dist=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +RUN: not %run %t-CrossOverUniformDistTest -keep_seed=1 -cross_over_uniform_dist=1 -seed=1 -runs=5000000 %t-corpus 2>&1 | FileCheck %s CHECK: BINGO RUN: rm -rf %t-corpus RUN: mkdir %t-corpus RUN: echo -n "@SELECT" > %t-corpus/A RUN: echo -n "@FROM WHERE" > %t-corpus/B -RUN: %run %t-CrossOverUniformDistTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 +RUN: %run %t-CrossOverUniformDistTest -keep_seed=1 -seed=1 -runs=5000000 %t-corpus 2>&1 diff --git a/compiler-rt/test/fuzzer/keep-seed.test b/compiler-rt/test/fuzzer/keep-seed.test index 29212ac7c177c..a21cf46e8fe55 100644 --- a/compiler-rt/test/fuzzer/keep-seed.test +++ b/compiler-rt/test/fuzzer/keep-seed.test @@ -5,7 +5,7 @@ RUN: rm -rf %t-corpus RUN: mkdir %t-corpus RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments -RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=3000000 %t-corpus 2>&1 | FileCheck %s CHECK: BINGO RUN: rm -rf %t-corpus-baseline @@ -13,5 +13,5 @@ RUN: mkdir %t-corpus-baseline RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments # The following checks whether without -keep_seed=1 libFuzzer does not find the -# crashing input "SELECT FROM WHERE" even with 2x more runs. +# crashing input "SELECT FROM WHERE" even with more runs. RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1 From 77a01d9498a79d2e6e3f366fdb363928f188ec11 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 9 Sep 2020 22:43:37 +0000 Subject: [PATCH 0870/1079] Sema: add support for `__attribute__((__swift_bridge__))` This extends semantic analysis of attributes for Swift interoperability by introducing the `swift_bridge` attribute. This attribute enables bridging Objective-C types to Swift specific types. This is based on the work of the original changes in https://github.com/llvm/llvm-project-staging/commit/8afaf3aad2af43cfedca7a24cd817848c4e95c0c Differential Revision: https://reviews.llvm.org/D87532 Reviewed By: Aaron Ballman --- clang/include/clang/Basic/Attr.td | 8 ++++++ clang/include/clang/Basic/AttrDocs.td | 24 ++++++++++++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 19 ++++++++++++++ clang/test/AST/attr-swift_bridge.m | 11 +++++++++ clang/test/SemaObjC/attr-swift_bridge.m | 33 +++++++++++++++++++++++++ 5 files changed, 95 insertions(+) create mode 100644 clang/test/AST/attr-swift_bridge.m create mode 100644 clang/test/SemaObjC/attr-swift_bridge.m diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 6df3486182604..adef5b6a4495a 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2130,6 +2130,14 @@ def Regparm : TypeAttr { let ASTNode = 0; } +def SwiftBridge : InheritableAttr { + let Spellings = [GNU<"swift_bridge">]; + let Args = [StringArgument<"SwiftType">]; + let Subjects = SubjectList<[Tag, TypedefName, ObjCInterface, ObjCProtocol], + ErrorDiag>; + let Documentation = [SwiftBridgeDocs]; +} + def SwiftBridgedTypedef : InheritableAttr { let Spellings = [GNU<"swift_bridged_typedef">]; let Subjects = SubjectList<[TypedefName], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 7aff443e9a12e..8706a3f4578c3 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3476,6 +3476,30 @@ Swift. }]; } +def SwiftBridgeDocs : Documentation { + let Category = SwiftDocs; + let Heading = "swift_bridge"; + let Content = [{ +The ``swift_bridge`` attribute indicates that the declaration to which the +attribute appertains is bridged to the named Swift type. + + .. code-block:: c + + __attribute__((__objc_root__)) + @interface Base + - (instancetype)init; + @end + + __attribute__((__swift_bridge__("BridgedI"))) + @interface I : Base + @end + +In this example, the Objective-C interface ``I`` will be made available to Swift +with the name ``BridgedI``. It would be possible for the compiler to refer to +``I`` still in order to bridge the type back to Objective-C. + }]; +} + def SwiftBridgedTypedefDocs : Documentation { let Category = SwiftDocs; let Heading = "swift_bridged"; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 02ffd752233d1..5efc989db576d 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5524,6 +5524,22 @@ static void handleObjCPreciseLifetimeAttr(Sema &S, Decl *D, D->addAttr(::new (S.Context) ObjCPreciseLifetimeAttr(S.Context, AL)); } +static void handleSwiftBridge(Sema &S, Decl *D, const ParsedAttr &AL) { + // Make sure that there is a string literal as the annotation's single + // argument. + StringRef BT; + if (!S.checkStringLiteralArgumentAttr(AL, 0, BT)) + return; + + // Don't duplicate annotations that are already set. + if (D->hasAttr()) { + S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL; + return; + } + + D->addAttr(::new (S.Context) SwiftBridgeAttr(S.Context, AL, BT)); +} + static bool isErrorParameter(Sema &S, QualType QT) { const auto *PT = QT->getAs(); if (!PT) @@ -7533,6 +7549,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, break; // Swift attributes. + case ParsedAttr::AT_SwiftBridge: + handleSwiftBridge(S, D, AL); + break; case ParsedAttr::AT_SwiftBridgedTypedef: handleSimpleAttribute(S, D, AL); break; diff --git a/clang/test/AST/attr-swift_bridge.m b/clang/test/AST/attr-swift_bridge.m new file mode 100644 index 0000000000000..2caa86bef4c0e --- /dev/null +++ b/clang/test/AST/attr-swift_bridge.m @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -fsyntax-only -ast-dump %s | FileCheck %s + +struct __attribute__((__swift_bridge__("BridgedS"))) S; +// CHECK: RecordDecl {{.*}} struct S +// CHECK: SwiftBridgeAttr {{.*}} "BridgedS" + +struct S { +}; + +// CHECK: RecordDecl {{.*}} struct S definition +// CHECK: SwiftBridgeAttr {{.*}} Inherited "BridgedS" diff --git a/clang/test/SemaObjC/attr-swift_bridge.m b/clang/test/SemaObjC/attr-swift_bridge.m new file mode 100644 index 0000000000000..1c8259a6a2e7f --- /dev/null +++ b/clang/test/SemaObjC/attr-swift_bridge.m @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -verify -fsyntax-only %s + +// expected-error@+1 {{'__swift_bridge__' attribute takes one argument}} +__attribute__((__swift_bridge__)) +@interface I +@end + +// expected-error@+1 {{'__swift_bridge__' attribute requires a string}} +__attribute__((__swift_bridge__(1))) +@interface J +@end + +// expected-error@+1 {{'__swift_bridge__' attribute takes one argument}} +__attribute__((__swift_bridge__("K", 1))) +@interface K +@end + +@interface L +// expected-error@+1 {{'__swift_bridge__' attribute only applies to tag types, typedefs, Objective-C interfaces, and Objective-C protocols}} +- (void)method __attribute__((__swift_bridge__("method"))); +@end + +__attribute__((__swift_bridge__("Array"))) +@interface NSArray +@end + +__attribute__((__swift_bridge__("ProtocolP"))) +@protocol P +@end + +typedef NSArray *NSArrayAlias __attribute__((__swift_bridge__("ArrayAlias"))); + +struct __attribute__((__swift_bridge__("StructT"))) T {}; From 4d437348d24d6342bdeb3ad84a64e57a889a0ea2 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Wed, 16 Sep 2020 11:03:04 -0700 Subject: [PATCH 0871/1079] fix test no-rtti.cpp --- clang/test/SemaCXX/no-rtti.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaCXX/no-rtti.cpp b/clang/test/SemaCXX/no-rtti.cpp index e0b57153c24c9..8082da219d5ad 100644 --- a/clang/test/SemaCXX/no-rtti.cpp +++ b/clang/test/SemaCXX/no-rtti.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -fno-rtti %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsyntax-only -verify -fno-rtti %s namespace std { class type_info; From 4d4f0922837de3f1aa9862ae8a8d941b3b6e5f78 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 16 Sep 2020 08:52:02 -0400 Subject: [PATCH 0872/1079] [clang][codegen] Skip adding default function attributes on intrinsics. - After loading builtin bitcode for linking, skip adding default function attributes on LLVM intrinsics as their attributes are well-defined and retrieved directly from internal definitions. Adding extra attributes on intrinsics results in inconsistent result when `-save-temps` is present. Also, that makes few optimizations conservative. Differential Revision: https://reviews.llvm.org/D87761 --- clang/lib/CodeGen/CodeGenAction.cpp | 7 ++++++- .../test/CodeGenCUDA/Inputs/device-lib-code.ll | 5 +++++ .../dft-func-attr-skip-intrinsic.hip | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCUDA/Inputs/device-lib-code.ll create mode 100644 clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 5a6ce0f5dbd50..eda4beff78b7b 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -245,8 +245,13 @@ namespace clang { bool LinkInModules() { for (auto &LM : LinkModules) { if (LM.PropagateAttrs) - for (Function &F : *LM.Module) + for (Function &F : *LM.Module) { + // Skip intrinsics. Keep consistent with how intrinsics are created + // in LLVM IR. + if (F.isIntrinsic()) + continue; Gen->CGM().addDefaultFunctionDefinitionAttributes(F); + } CurLinkModule = LM.Module.get(); diff --git a/clang/test/CodeGenCUDA/Inputs/device-lib-code.ll b/clang/test/CodeGenCUDA/Inputs/device-lib-code.ll new file mode 100644 index 0000000000000..43ec911fb02cc --- /dev/null +++ b/clang/test/CodeGenCUDA/Inputs/device-lib-code.ll @@ -0,0 +1,5 @@ +define linkonce_odr protected float @__ocml_fma_f32(float %0, float %1, float %2) local_unnamed_addr { + %4 = tail call float @llvm.fma.f32(float %0, float %1, float %2) + ret float %4 +} +declare float @llvm.fma.f32(float, float, float) diff --git a/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip b/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip new file mode 100644 index 0000000000000..9e3e436200fc3 --- /dev/null +++ b/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip @@ -0,0 +1,18 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -x ir -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc -disable-llvm-passes -o %t.bc %S/Inputs/device-lib-code.ll +// RUN: %clang_cc1 -x hip -fcuda-is-device -triple amdgcn-amd-amdhsa -mlink-builtin-bitcode %t.bc -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +extern "C" __device__ float __ocml_fma_f32(float x, float y, float z); + +__device__ float foo(float x) { + return __ocml_fma_f32(x, x, x); +} + +// CHECK: {{^}}define{{.*}} @__ocml_fma_f32{{.*}} [[ATTR1:#[0-9]+]] +// CHECK: {{^}}declare{{.*}} @llvm.fma.f32{{.*}} [[ATTR2:#[0-9]+]] +// CHECK: attributes [[ATTR1]] = { convergent +// CHECK: attributes [[ATTR2]] = { +// CHECK-NOT: convergent +// CHECK: } From 6ad33d8360335143ef50e7f7b66ae1ce17aaa2a5 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 16 Sep 2020 11:19:08 -0700 Subject: [PATCH 0873/1079] [AArch64][GlobalISel] Make G_BUILD_VECTOR os <16 x s8> legal. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 ++- .../AArch64/GlobalISel/legalize-build-vector.mir | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 77e5f374c1af0..6b98e7a58328e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -568,7 +568,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }); getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalFor({{v4s16, s16}, + .legalFor({{v16s8, s8}, + {v4s16, s16}, {v8s16, s16}, {v2s32, s32}, {v4s32, s32}, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir index 0b69a126f1ae0..bb2bc3372936f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir @@ -56,3 +56,19 @@ body: | $q0 = COPY %2(<2 x p0>) RET_ReallyLR ... +--- +name: legal_v16s8 +body: | + bb.0: + ; CHECK-LABEL: name: legal_v16s8 + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF]](s8), [[DEF1]](s8) + ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: RET_ReallyLR + %0:_(s8) = G_IMPLICIT_DEF + %1:_(s8) = G_IMPLICIT_DEF + %2:_(<16 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %0(s8), %1(s8), %0(s8), %1(s8), %0(s8), %1(s8), %0(s8), %1(s8), %0(s8), %1(s8), %0(s8), %1(s8), %0(s8), %1(s8) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR +... From b3d33f5e838f8a181feb391fc96e74e3bb6be110 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 16 Sep 2020 14:21:14 -0400 Subject: [PATCH 0874/1079] [gn build] make "all" target build If you want to build everything, building the default target via just `ninja` is better, but `ninja all` shouldn't give you compile errors -- this fixes that. --- llvm/utils/gn/secondary/compiler-rt/lib/scudo/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/BUILD.gn index 1143b265a3773..c8c057f85cd3c 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/BUILD.gn @@ -27,6 +27,8 @@ group("scudo") { # This target is unused, it only exists to satisfy # sync_source_lists_from_cmake.py. source_set("sources") { + configs -= [ "//llvm/utils/gn/build:llvm_code" ] + configs += [ "//llvm/utils/gn/build:crt_code" ] sources = [ "scudo_allocator.cpp", "scudo_allocator.h", From 88bdcbbf1aaef6ac99877cc511bf4b2a85343773 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 22 Aug 2020 12:34:38 -0400 Subject: [PATCH 0875/1079] GlobalISel: Lift store value widening restriction This doesn't change the memory size and doesn't need to worry about non-power-of-2 sizes. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- .../GlobalISel/legalize-store-global.mir | 288 ++++++++++++++++++ .../AMDGPU/GlobalISel/legalize-store.mir | 112 ++++++- 3 files changed, 389 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 347fe7b0ee98d..a8283e47acdd8 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2033,7 +2033,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return UnableToLegalize; LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (!isPowerOf2_32(Ty.getSizeInBits())) + if (!Ty.isScalar()) return UnableToLegalize; Observer.changingInstr(MI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 8b607244eb8e7..80bd3e1f6ec8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -44,6 +44,38 @@ body: | G_STORE %2, %0 :: (store 1, align 1, addrspace 1) ... +--- +name: test_store_global_s7_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; SI-LABEL: name: test_store_global_s7_align1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; CI-LABEL: name: test_store_global_s7_align1 + ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI-LABEL: name: test_store_global_s7_align1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; GFX9-LABEL: name: test_store_global_s7_align1 + ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s7) = G_TRUNC %1 + G_STORE %2, %0 :: (store 1, align 1, addrspace 1) +... + --- name: test_store_global_s8_align1 body: | @@ -192,6 +224,262 @@ body: | G_STORE %2, %0 :: (store 2, align 4, addrspace 1) ... +--- +name: test_store_global_s24_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; SI-LABEL: name: test_store_global_s24_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + ; CI-LABEL: name: test_store_global_s24_align4 + ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + ; VI-LABEL: name: test_store_global_s24_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + ; GFX9-LABEL: name: test_store_global_s24_align4 + ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s24) = G_TRUNC %1 + G_STORE %2, %0 :: (store 3, align 4, addrspace 1) +... + +--- +name: test_store_global_s24_align2 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; SI-LABEL: name: test_store_global_s24_align2 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + ; CI-LABEL: name: test_store_global_s24_align2 + ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + ; VI-LABEL: name: test_store_global_s24_align2 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + ; GFX9-LABEL: name: test_store_global_s24_align2 + ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s24) = G_TRUNC %1 + G_STORE %2, %0 :: (store 3, align 2, addrspace 1) +... + +--- +name: test_store_global_s24_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; SI-LABEL: name: test_store_global_s24_align1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: G_STORE [[COPY6]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD]](p1) :: (store 1 + 1, addrspace 1) + ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD1]](p1) :: (store 1 + 2, addrspace 1) + ; CI-LABEL: name: test_store_global_s24_align1 + ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, addrspace 1) + ; VI-LABEL: name: test_store_global_s24_align1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 + 1, addrspace 1) + ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 + 2, addrspace 1) + ; GFX9-LABEL: name: test_store_global_s24_align1 + ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s24) = G_TRUNC %1 + G_STORE %2, %0 :: (store 3, align 1, addrspace 1) +... + +--- +name: test_store_global_s25_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; SI-LABEL: name: test_store_global_s25_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; CI-LABEL: name: test_store_global_s25_align4 + ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI-LABEL: name: test_store_global_s25_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; GFX9-LABEL: name: test_store_global_s25_align4 + ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s25) = G_TRUNC %1 + G_STORE %2, %0 :: (store 4, align 4, addrspace 1) +... + +# --- +# name: test_store_global_s25_align2 +# body: | +# bb.0: +# liveins: $vgpr0_vgpr1, $vgpr2 + +# %0:_(p1) = COPY $vgpr0_vgpr1 +# %1:_(s32) = COPY $vgpr2 +# %2:_(s25) = G_TRUNC %1 +# G_STORE %2, %0 :: (store 4, align 2, addrspace 1) +# ... + +# --- +# name: test_store_global_s25_align1 +# body: | +# bb.0: +# liveins: $vgpr0_vgpr1, $vgpr2 + +# %0:_(p1) = COPY $vgpr0_vgpr1 +# %1:_(s32) = COPY $vgpr2 +# %2:_(s25) = G_TRUNC %1 +# G_STORE %2, %0 :: (store 4, align 1, addrspace 1) +# ... + --- name: test_store_global_s32_align1 body: | diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 758d5b01c9786..bba490ee57dad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -929,15 +929,59 @@ body: | ; SI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; SI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; SI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 1, addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; VI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; VI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 @@ -954,15 +998,59 @@ body: | ; SI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; SI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; SI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; VI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; VI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 From 14e55f82980cf1342d4d3eea4885a5375e829496 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 16 Sep 2020 11:31:21 -0700 Subject: [PATCH 0876/1079] [obj2yaml] - Match ".stack_size" with the original section name, and not the uniquified name. Without this patch, obj2yaml decodes the content of only one ".stack_size" section. Other sections are dumped with their full contents. Reviewed By: grimar, MaskRay Differential Revision: https://reviews.llvm.org/D87727 --- llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml | 48 +++++++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml index 8e6c66729c4e0..98a5c5ae88aac 100644 --- a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml +++ b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml @@ -83,3 +83,51 @@ Sections: - Name: .stack_sizes Type: SHT_PROGBITS Content: "" + +## Check obj2yaml can dump multiple .stack_sizes. + +# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=MULTI + +# MULTI: --- !ELF +# MULTI-NEXT: FileHeader: +# MULTI-NEXT: Class: ELFCLASS64 +# MULTI-NEXT: Data: ELFDATA2LSB +# MULTI-NEXT: Type: ET_EXEC +# MULTI-NEXT: Machine: EM_NONE +# MULTI-NEXT: Sections: +# MULTI-NEXT: - Name: .stack_sizes +# MULTI-NEXT: Type: SHT_PROGBITS +# MULTI-NEXT: Entries: +# MULTI-NEXT: - Address: 0x0000000000000010 +# MULTI-NEXT: Size: 0x0000000000000020 +# MULTI-NEXT: - Address: 0x0000000000000030 +# MULTI-NEXT: Size: 0x0000000000000040 +# MULTI-NEXT: - Name: '.stack_sizes (1)' +# MULTI-NEXT: Type: SHT_PROGBITS +# MULTI-NEXT: Entries: +# MULTI-NEXT: - Address: 0x0000000000000050 +# MULTI-NEXT: Size: 0x0000000000000001 +# MULTI-NEXT: - Address: 0x0000000000000060 +# MULTI-NEXT: Size: 0x0000000000000002 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .stack_sizes + Type: SHT_PROGBITS + Entries: + - Address: 0x0000000000000010 + Size: 0x0000000000000020 + - Address: 0x0000000000000030 + Size: 0x0000000000000040 + - Name: '.stack_sizes (1)' + Type: SHT_PROGBITS + Entries: + - Address: 0x0000000000000050 + Size: 0x0000000000000001 + - Address: 0x0000000000000060 + Size: 0x0000000000000002 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 3c3bef2dfbf4c..d4bc135b4e0c2 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -522,7 +522,7 @@ ELFDumper::dumpSections() { // Recognize some special SHT_PROGBITS sections by name. if (Sec.sh_type == ELF::SHT_PROGBITS) { - auto NameOrErr = getUniquedSectionName(&Sec); + auto NameOrErr = Obj.getSectionName(&Sec); if (!NameOrErr) return NameOrErr.takeError(); From f723d193e2c92ea6903e3debfee32b13354808bc Mon Sep 17 00:00:00 2001 From: Patrick Beard Date: Thu, 30 Jul 2020 14:43:46 -0700 Subject: [PATCH 0877/1079] Add '<' meta command to read in code from external file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perform all error handling in ReadCode() Add :help text describing “< path”, add extra line before Commands Differential Revision: https://reviews.llvm.org/D87640 --- lldb/source/Expression/REPL.cpp | 44 +++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/lldb/source/Expression/REPL.cpp b/lldb/source/Expression/REPL.cpp index fd7c39686921d..1f2b009c48935 100644 --- a/lldb/source/Expression/REPL.cpp +++ b/lldb/source/Expression/REPL.cpp @@ -123,10 +123,11 @@ const char *REPL::IOHandlerGetHelpPrologue() { "Valid statements, expressions, and declarations are immediately " "compiled and executed.\n\n" "The complete set of LLDB debugging commands are also available as " - "described below. Commands " + "described below.\n\nCommands " "must be prefixed with a colon at the REPL prompt (:quit for " "example.) Typing just a colon " - "followed by return will switch to the LLDB prompt.\n\n"; + "followed by return will switch to the LLDB prompt.\n\n" + "Type “< path” to read in code from a text file “path”.\n\n"; } bool REPL::IOHandlerIsInputComplete(IOHandler &io_handler, StringList &lines) { @@ -179,6 +180,36 @@ int REPL::IOHandlerFixIndentation(IOHandler &io_handler, return (int)desired_indent - actual_indent; } +static bool ReadCode(const std::string &path, std::string &code, + lldb::StreamFileSP &error_sp) { + auto &fs = FileSystem::Instance(); + llvm::Twine pathTwine(path); + if (!fs.Exists(pathTwine)) { + error_sp->Printf("no such file at path '%s'\n", path.c_str()); + return false; + } + if (!fs.Readable(pathTwine)) { + error_sp->Printf("could not read file at path '%s'\n", path.c_str()); + return false; + } + const size_t file_size = fs.GetByteSize(pathTwine); + const size_t max_size = code.max_size(); + if (file_size > max_size) { + error_sp->Printf("file at path '%s' too large: " + "file_size = %llu, max_size = %llu\n", + path.c_str(), file_size, max_size); + return false; + } + auto data_sp = fs.CreateDataBuffer(pathTwine); + if (data_sp == nullptr) { + error_sp->Printf("could not create buffer for file at path '%s'\n", + path.c_str()); + return false; + } + code.assign((const char *)data_sp->GetBytes(), data_sp->GetByteSize()); + return true; +} + void REPL::IOHandlerInputComplete(IOHandler &io_handler, std::string &code) { lldb::StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); lldb::StreamFileSP error_sp(io_handler.GetErrorStreamFileSP()); @@ -257,6 +288,15 @@ void REPL::IOHandlerInputComplete(IOHandler &io_handler, std::string &code) { } } } else { + if (code[0] == '<') { + // User wants to read code from a file. + // Interpret rest of line as a literal path. + auto path = llvm::StringRef(code.substr(1)).trim().str(); + if (!ReadCode(path, code, error_sp)) { + return; + } + } + // Unwind any expression we might have been running in case our REPL // expression crashed and the user was looking around if (m_dedicated_repl_mode) { From dbde3969ba8e2b396333dc6b139a0b3a88dfbc80 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 13 Aug 2020 20:25:02 -0500 Subject: [PATCH 0878/1079] [UpdateTestChecks][NFC] Fix spelling --- llvm/utils/UpdateTestChecks/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index dd0e132969da3..a1759b40b524a 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -379,7 +379,7 @@ def get_value_use(var, match): return '[[' + get_value_name(var, match) + ']]' # Replace IR value defs and uses with FileCheck variables. -def genericize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): +def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. @@ -466,7 +466,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, if attrs: output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs)) args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) - args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0] + args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0] if '[[' in args_and_sig: output_lines.append(check_label_format % (checkprefix, func_name, '')) output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) @@ -486,7 +486,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, # For IR output, change all defs to FileCheck variables, so we're immune # to variable naming fashions. - func_body = genericize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen) + func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen) # This could be selectively enabled with an optional invocation argument. # Disabled for now: better to check everything. Be safe rather than sorry. From 6a02932becaeaeb02eddfaed567f3dad3719dd1c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 12 Aug 2020 19:44:25 -0500 Subject: [PATCH 0879/1079] [OpenMP][FIX] Do not crash trying to print a missing (demangled) user condition Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D85875 --- clang/lib/AST/OpenMPClause.cpp | 5 +- ...ast-dump-openmp-begin-declare-variant_13.c | 67 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 clang/test/AST/ast-dump-openmp-begin-declare-variant_13.c diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index e846d325560d0..ff9e9b2b34530 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -2201,7 +2201,10 @@ void OMPTraitInfo::print(llvm::raw_ostream &OS, OS << "("; if (Selector.Kind == TraitSelector::user_condition) { - Selector.ScoreOrCondition->printPretty(OS, nullptr, Policy); + if (Selector.ScoreOrCondition) + Selector.ScoreOrCondition->printPretty(OS, nullptr, Policy); + else + OS << "..."; } else { if (Selector.ScoreOrCondition) { diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_13.c b/clang/test/AST/ast-dump-openmp-begin-declare-variant_13.c new file mode 100644 index 0000000000000..93d847a077779 --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_13.c @@ -0,0 +1,67 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -x c++| FileCheck %s +// expected-no-diagnostics + +int also_before(void) { + return 1; +} + +#pragma omp begin declare variant match(user = {condition(1)}) +int also_after(void) { + return 0; +} +int also_before(void) { + return 0; +} +#pragma omp end declare variant + +int also_after(void) { + return 2; +} + +int test() { + // Should return 0. + return also_after() + also_before(); +} + +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:5:5 used also_before 'int ({{.*}})' +// CHECK-NEXT: | |-CompoundStmt [[ADDR_1:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_3:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_4:0x[a-z0-9]*]] <> Implicit user={condition(1)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_6:0x[a-z0-9]*]] 'also_before[user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_7:0x[a-z0-9]*]] col:5 implicit used also_after 'int ({{.*}})' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_8:0x[a-z0-9]*]] <> Implicit user={condition(1)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_9:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_10:0x[a-z0-9]*]] 'also_after[user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_10]] line:10:1 also_after[user={condition(...)}] 'int ({{.*}})' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_11:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_12:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_13:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: |-FunctionDecl [[ADDR_6]] line:13:1 also_before[user={condition(...)}] 'int ({{.*}})' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_16:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: |-FunctionDecl [[ADDR_17:0x[a-z0-9]*]] prev [[ADDR_7]] line:18:5 used also_after 'int ({{.*}})' +// CHECK-NEXT: | |-CompoundStmt [[ADDR_18:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_19:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_20:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_21:0x[a-z0-9]*]] <> Inherited Implicit user={condition(1)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_9]] 'int ({{.*}})' {{.*}}Function [[ADDR_10]] 'also_after[user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: `-FunctionDecl [[ADDR_22:0x[a-z0-9]*]] line:22:5 test 'int ({{.*}})' +// CHECK-NEXT: `-CompoundStmt [[ADDR_23:0x[a-z0-9]*]] +// CHECK-NEXT: `-ReturnStmt [[ADDR_24:0x[a-z0-9]*]] +// CHECK-NEXT: `-BinaryOperator [[ADDR_25:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: |-PseudoObjectExpr [[ADDR_26:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-CallExpr [[ADDR_27:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_28:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_29:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_17]] 'also_after' 'int ({{.*}})' +// CHECK-NEXT: | `-CallExpr [[ADDR_30:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_31:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_9]] 'int ({{.*}})' {{.*}}Function [[ADDR_10]] 'also_after[user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: `-PseudoObjectExpr [[ADDR_32:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: |-CallExpr [[ADDR_33:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_34:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_35:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_0]] 'also_before' 'int ({{.*}})' +// CHECK-NEXT: `-CallExpr [[ADDR_36:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: `-ImplicitCastExpr [[ADDR_37:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_5]] 'int ({{.*}})' {{.*}}Function [[ADDR_6]] 'also_before[user={condition(...)}]' 'int ({{.*}})' From 05fd04eda4b22b09e33753132cbf037a1265c7e2 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 13 Aug 2020 01:12:31 -0500 Subject: [PATCH 0880/1079] [OpenMP][FIX] Do not drop a '$' while demangling declare variant names Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85876 --- clang/lib/AST/OpenMPClause.cpp | 2 +- .../AST/ast-dump-openmp-declare-variant-extensions.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index ff9e9b2b34530..6590738268c60 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -2281,7 +2281,7 @@ OMPTraitInfo::OMPTraitInfo(StringRef MangledName) { Property.RawString = PropRestPair.first; Property.Kind = getOpenMPContextTraitPropertyKind( Set.Kind, Selector.Kind, PropRestPair.first); - MangledName = PropRestPair.second; + MangledName = MangledName.drop_front(PropRestPair.first.size()); } while (true); } while (true); } while (true); diff --git a/clang/test/AST/ast-dump-openmp-declare-variant-extensions.c b/clang/test/AST/ast-dump-openmp-declare-variant-extensions.c index 4a755282e39d3..577abbc5fe0b0 100644 --- a/clang/test/AST/ast-dump-openmp-declare-variant-extensions.c +++ b/clang/test/AST/ast-dump-openmp-declare-variant-extensions.c @@ -200,8 +200,8 @@ int test() { // CHECK-NEXT: | `-DeclRefExpr [[ADDR_111:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_18]] 'picked7' 'int ({{.*}})' non_odr_use_unevaluated // CHECK-NEXT: |-FunctionDecl [[ADDR_112:0x[a-z0-9]*]] col:5 implicit used overloaded1 'int ({{.*}})' // CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_113:0x[a-z0-9]*]] <> Implicit implementation={extension(match_any)}, device={kind(cpu, gpu)} -// CHECK-NEXT: | `-DeclRefExpr [[ADDR_114:0x[a-z0-9]*]] 'int ({{.*}})' Function [[ADDR_115:0x[a-z0-9]*]] 'overloaded1[implementation={extension(match_any)}]' 'int ({{.*}})' -// CHECK-NEXT: |-FunctionDecl [[ADDR_115]] col:1 overloaded1[implementation={extension(match_any)}] 'int ({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_114:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_115:0x[a-z0-9]*]] 'overloaded1[implementation={extension(match_any)}, device={kind(cpu, gpu)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_115]] col:1 overloaded1[implementation={extension(match_any)}, device={kind(cpu, gpu)}] 'int ({{.*}})' // CHECK-NEXT: | `-CompoundStmt [[ADDR_116:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_117:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_118:0x[a-z0-9]*]] 'int' 0 @@ -210,8 +210,8 @@ int test() { // CHECK-NEXT: | | `-ReturnStmt [[ADDR_121:0x[a-z0-9]*]] // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_122:0x[a-z0-9]*]] 'int' 1 // CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_123:0x[a-z0-9]*]] <> Implicit implementation={extension(match_none)}, device={kind(fpga, gpu)} -// CHECK-NEXT: | `-DeclRefExpr [[ADDR_124:0x[a-z0-9]*]] 'int ({{.*}})' Function [[ADDR_125:0x[a-z0-9]*]] 'overloaded2[implementation={extension(match_none)}]' 'int ({{.*}})' -// CHECK-NEXT: |-FunctionDecl [[ADDR_125]] col:1 overloaded2[implementation={extension(match_none)}] 'int ({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_124:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_125:0x[a-z0-9]*]] 'overloaded2[implementation={extension(match_none)}, device={kind(fpga, gpu)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_125]] col:1 overloaded2[implementation={extension(match_none)}, device={kind(fpga, gpu)}] 'int ({{.*}})' // CHECK-NEXT: | `-CompoundStmt [[ADDR_126:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_127:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_128:0x[a-z0-9]*]] 'int' 0 @@ -333,11 +333,11 @@ int test() { // CHECK-NEXT: | | `-DeclRefExpr [[ADDR_236:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_112]] 'overloaded1' 'int ({{.*}})' // CHECK-NEXT: | `-CallExpr [[ADDR_237:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_238:0x[a-z0-9]*]] 'int (*)({{.*}})' -// CHECK-NEXT: | `-DeclRefExpr [[ADDR_114]] 'int ({{.*}})' Function [[ADDR_115]] 'overloaded1[implementation={extension(match_any)}]' 'int ({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_114]] 'int ({{.*}})' {{.*}}Function [[ADDR_115]] 'overloaded1[implementation={extension(match_any)}, device={kind(cpu, gpu)}]' 'int ({{.*}})' // CHECK-NEXT: `-PseudoObjectExpr [[ADDR_239:0x[a-z0-9]*]] 'int' // CHECK-NEXT: |-CallExpr [[ADDR_240:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_241:0x[a-z0-9]*]] 'int (*)({{.*}})' // CHECK-NEXT: | `-DeclRefExpr [[ADDR_242:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_119]] 'overloaded2' 'int ({{.*}})' // CHECK-NEXT: `-CallExpr [[ADDR_243:0x[a-z0-9]*]] 'int' // CHECK-NEXT: `-ImplicitCastExpr [[ADDR_244:0x[a-z0-9]*]] 'int (*)({{.*}})' -// CHECK-NEXT: `-DeclRefExpr [[ADDR_124]] 'int ({{.*}})' Function [[ADDR_125]] 'overloaded2[implementation={extension(match_none)}]' 'int ({{.*}})' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_124]] 'int ({{.*}})' {{.*}}Function [[ADDR_125]] 'overloaded2[implementation={extension(match_none)}, device={kind(fpga, gpu)}]' 'int ({{.*}})' From 5c63ae156e96a20ce96570d4bd2c48a9c8170a9d Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 13 Aug 2020 01:05:51 -0500 Subject: [PATCH 0881/1079] [OpenMP] Support nested OpenMP context selectors (declare variant) Due to `omp begin/end declare variant`, OpenMP context selectors can be nested. This patch adds initial support for this so we can use it for target math variants. We should improve the detection of "equivalent" scores and user conditions, we should also revisit the data structures of the OMPTraitInfo object, however, both are not pressing issues right now. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D85877 --- .../clang/Basic/DiagnosticParseKinds.td | 5 ++ .../clang/Basic/DiagnosticSemaKinds.td | 4 - clang/include/clang/Parse/Parser.h | 3 +- clang/include/clang/Sema/Sema.h | 6 ++ clang/lib/Parse/ParseOpenMP.cpp | 76 ++++++++++++++-- clang/lib/Sema/SemaOpenMP.cpp | 4 - ...dump-openmp-begin-declare-variant_nested.c | 87 +++++++++++++++++++ clang/test/OpenMP/declare_variant_messages.c | 14 +++ 8 files changed, 184 insertions(+), 15 deletions(-) create mode 100644 clang/test/AST/ast-dump-openmp-begin-declare-variant_nested.c diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 1c8d741ab54ff..1ac1e9d10a7a1 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1293,6 +1293,11 @@ def err_omp_mapper_expected_declarator : Error< "expected declarator on 'omp declare mapper' directive">; def err_omp_declare_variant_wrong_clause : Error< "expected '%0' clause on 'omp declare variant' directive">; +def err_omp_declare_variant_duplicate_nested_trait : Error< + "nested OpenMP context selector contains duplicated trait '%0'" + " in selector '%1' and set '%2' with different score">; +def err_omp_declare_variant_nested_user_condition : Error< + "nested user conditions in OpenMP context selector not supported (yet)">; def warn_omp_declare_variant_string_literal_or_identifier : Warning<"expected identifier or string literal describing a context " "%select{set|selector|property}0; " diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f6ded1b4ee266..a9bd448ba0262 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10367,10 +10367,6 @@ def err_omp_non_lvalue_in_map_or_motion_clauses: Error< "expected addressable lvalue in '%0' clause">; def err_omp_var_expected : Error< "expected variable of the '%0' type%select{|, not %2}1">; -def warn_nested_declare_variant - : Warning<"nesting `omp begin/end declare variant` is not supported yet; " - "nested context ignored">, - InGroup; def warn_unknown_declare_variant_isa_trait : Warning<"isa trait '%0' is not known to the current target; verify the " "spelling or consider restricting the context selector with the " diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index af8cf47e56673..211827e99de84 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3098,7 +3098,8 @@ class Parser : public CodeCompletionHandler { /// Parse a `match` clause for an '#pragma omp declare variant'. Return true /// if there was an error. - bool parseOMPDeclareVariantMatchClause(SourceLocation Loc, OMPTraitInfo &TI); + bool parseOMPDeclareVariantMatchClause(SourceLocation Loc, OMPTraitInfo &TI, + OMPTraitInfo *ParentTI); /// Parse clauses for '#pragma omp declare variant'. void ParseOMPDeclareVariantClauses(DeclGroupPtrTy Ptr, CachedTokens &Toks, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 129ac0355c87f..9502c104be68c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10019,6 +10019,12 @@ class Sema final { OMPDeclareVariantScope(OMPTraitInfo &TI); }; + /// Return the OMPTraitInfo for the surrounding scope, if any. + OMPTraitInfo *getOMPTraitInfoForSurroundingScope() { + return OMPDeclareVariantScopes.empty() ? nullptr + : OMPDeclareVariantScopes.back().TI; + } + /// The current `omp begin/end declare variant` scopes. SmallVector OMPDeclareVariantScopes; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index ceb91dce186c7..40124264fdb90 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1385,8 +1385,10 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, return; } - OMPTraitInfo &TI = Actions.getASTContext().getNewOMPTraitInfo(); - if (parseOMPDeclareVariantMatchClause(Loc, TI)) + OMPTraitInfo *ParentTI = Actions.getOMPTraitInfoForSurroundingScope(); + ASTContext &ASTCtx = Actions.getASTContext(); + OMPTraitInfo &TI = ASTCtx.getNewOMPTraitInfo(); + if (parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI)) return; Optional> DeclVarData = @@ -1407,7 +1409,8 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, } bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc, - OMPTraitInfo &TI) { + OMPTraitInfo &TI, + OMPTraitInfo *ParentTI) { // Parse 'match'. OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown @@ -1438,6 +1441,66 @@ bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc, // Parse ')' (void)T.consumeClose(); + + if (!ParentTI) + return false; + + // Merge the parent/outer trait info into the one we just parsed and diagnose + // problems. + // TODO: Keep some source location in the TI to provide better diagnostics. + // TODO: Perform some kind of equivalence check on the condition and score + // expressions. + for (const OMPTraitSet &ParentSet : ParentTI->Sets) { + bool MergedSet = false; + for (OMPTraitSet &Set : TI.Sets) { + if (Set.Kind != ParentSet.Kind) + continue; + MergedSet = true; + for (const OMPTraitSelector &ParentSelector : ParentSet.Selectors) { + bool MergedSelector = false; + for (OMPTraitSelector &Selector : Set.Selectors) { + if (Selector.Kind != ParentSelector.Kind) + continue; + MergedSelector = true; + for (const OMPTraitProperty &ParentProperty : + ParentSelector.Properties) { + bool MergedProperty = false; + for (OMPTraitProperty &Property : Selector.Properties) { + // Ignore "equivalent" properties. + if (Property.Kind != ParentProperty.Kind) + continue; + + // If the kind is the same but the raw string not, we don't want + // to skip out on the property. + MergedProperty |= Property.RawString == ParentProperty.RawString; + + if (Property.RawString == ParentProperty.RawString && + Selector.ScoreOrCondition == ParentSelector.ScoreOrCondition) + continue; + + if (Selector.Kind == llvm::omp::TraitSelector::user_condition) { + Diag(Loc, diag::err_omp_declare_variant_nested_user_condition); + } else if (Selector.ScoreOrCondition != + ParentSelector.ScoreOrCondition) { + Diag(Loc, diag::err_omp_declare_variant_duplicate_nested_trait) + << getOpenMPContextTraitPropertyName( + ParentProperty.Kind, ParentProperty.RawString) + << getOpenMPContextTraitSelectorName(ParentSelector.Kind) + << getOpenMPContextTraitSetName(ParentSet.Kind); + } + } + if (!MergedProperty) + Selector.Properties.push_back(ParentProperty); + } + } + if (!MergedSelector) + Set.Selectors.push_back(ParentSelector); + } + } + if (!MergedSet) + TI.Sets.push_back(ParentSet); + } + return false; } @@ -1811,8 +1874,10 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( // { #pragma omp end declare variant } // ConsumeToken(); - OMPTraitInfo &TI = Actions.getASTContext().getNewOMPTraitInfo(); - if (parseOMPDeclareVariantMatchClause(Loc, TI)) + OMPTraitInfo *ParentTI = Actions.getOMPTraitInfoForSurroundingScope(); + ASTContext &ASTCtx = Actions.getASTContext(); + OMPTraitInfo &TI = ASTCtx.getNewOMPTraitInfo(); + if (parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI)) break; // Skip last tokens. @@ -1821,7 +1886,6 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( ParsingOpenMPDirectiveRAII NormalScope(*this, /*Value=*/false); VariantMatchInfo VMI; - ASTContext &ASTCtx = Actions.getASTContext(); TI.getAsVariantMatchInfo(ASTCtx, VMI); std::function DiagUnknownTrait = [this, Loc]( diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 1a0470a9606d9..aef043b062997 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2441,10 +2441,6 @@ void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; } void Sema::ActOnOpenMPBeginDeclareVariant(SourceLocation Loc, OMPTraitInfo &TI) { - if (!OMPDeclareVariantScopes.empty()) { - Diag(Loc, diag::warn_nested_declare_variant); - return; - } OMPDeclareVariantScopes.push_back(OMPDeclareVariantScope(TI)); } diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_nested.c b/clang/test/AST/ast-dump-openmp-begin-declare-variant_nested.c new file mode 100644 index 0000000000000..e4b5b39ae87a0 --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_nested.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -x c++| FileCheck %s +// expected-no-diagnostics + +int also_before(void) { + return 1; +} + +#pragma omp begin declare variant match(user = {condition(1)}, device = {kind(cpu)}, implementation = {vendor(llvm)}) +#pragma omp begin declare variant match(device = {kind(cpu)}, implementation = {vendor(llvm, pgi), extension(match_any)}) +#pragma omp begin declare variant match(device = {kind(any)}, implementation = {dynamic_allocators}) +int also_after(void) { + return 0; +} +int also_before(void) { + return 0; +} +#pragma omp end declare variant +#pragma omp end declare variant +#pragma omp end declare variant + +int also_after(void) { + return 2; +} + +int test() { + // Should return 0. + return also_after() + also_before(); +} + +#pragma omp begin declare variant match(device = {isa("sse")}) +#pragma omp declare variant(test) match(device = {isa(sse)}) +int equivalent_isa_trait(void); +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {isa("sse")}) +#pragma omp declare variant(test) match(device = {isa("sse2")}) +int non_equivalent_isa_trait(void); +#pragma omp end declare variant + +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:5:5 used also_before 'int ({{.*}})' +// CHECK-NEXT: | |-CompoundStmt [[ADDR_1:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_3:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_4:0x[a-z0-9]*]] <> Implicit device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(1)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_6:0x[a-z0-9]*]] 'also_before[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_7:0x[a-z0-9]*]] col:5 implicit used also_after 'int ({{.*}})' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_8:0x[a-z0-9]*]] <> Implicit device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(1)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_9:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_10:0x[a-z0-9]*]] 'also_after[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_10]] line:12:1 also_after[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}] 'int ({{.*}})' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_11:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_12:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_13:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: |-FunctionDecl [[ADDR_6]] line:15:1 also_before[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}] 'int ({{.*}})' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_16:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: |-FunctionDecl [[ADDR_17:0x[a-z0-9]*]] prev [[ADDR_7]] line:22:5 used also_after 'int ({{.*}})' +// CHECK-NEXT: | |-CompoundStmt [[ADDR_18:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_19:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_20:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_21:0x[a-z0-9]*]] <> Inherited Implicit device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(1)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_9]] 'int ({{.*}})' {{.*}}Function [[ADDR_10]] 'also_after[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_22:0x[a-z0-9]*]] line:26:5 referenced test 'int ({{.*}})' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_23:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_24:0x[a-z0-9]*]] +// CHECK-NEXT: | `-BinaryOperator [[ADDR_25:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | |-PseudoObjectExpr [[ADDR_26:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-CallExpr [[ADDR_27:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_28:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_29:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_17]] 'also_after' 'int ({{.*}})' +// CHECK-NEXT: | | `-CallExpr [[ADDR_30:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_31:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_9]] 'int ({{.*}})' {{.*}}Function [[ADDR_10]] 'also_after[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: | `-PseudoObjectExpr [[ADDR_32:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-CallExpr [[ADDR_33:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_34:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_35:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_0]] 'also_before' 'int ({{.*}})' +// CHECK-NEXT: | `-CallExpr [[ADDR_36:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_37:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_5]] 'int ({{.*}})' {{.*}}Function [[ADDR_6]] 'also_before[device={kind(any, cpu)}, implementation={dynamic_allocators, vendor(llvm, pgi), extension(match_any)}, user={condition(...)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionDecl [[ADDR_38:0x[a-z0-9]*]] col:5 equivalent_isa_trait 'int ({{.*}})' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_39:0x[a-z0-9]*]] Implicit device={isa(sse)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_40:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_22]] 'test' 'int ({{.*}})' non_odr_use_unevaluated +// CHECK-NEXT: `-FunctionDecl [[ADDR_41:0x[a-z0-9]*]] col:5 non_equivalent_isa_trait 'int ({{.*}})' +// CHECK-NEXT: `-OMPDeclareVariantAttr [[ADDR_42:0x[a-z0-9]*]] Implicit device={isa(sse2, sse)} +// CHECK-NEXT: `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_22]] 'test' 'int ({{.*}})' non_odr_use_unevaluated diff --git a/clang/test/OpenMP/declare_variant_messages.c b/clang/test/OpenMP/declare_variant_messages.c index 84a56c5fd4094..2c63ca206fbbc 100644 --- a/clang/test/OpenMP/declare_variant_messages.c +++ b/clang/test/OpenMP/declare_variant_messages.c @@ -153,3 +153,17 @@ void caller() { #pragma omp declare variant // expected-error {{function declaration is expected after 'declare variant' directive}} #pragma omp declare variant // expected-error {{function declaration is expected after 'declare variant' directive}} + +// FIXME: If the scores are equivalent we should detect that and allow it. +#pragma omp begin declare variant match(implementation = {vendor(score(2) \ + : llvm)}) +#pragma omp declare variant(foo) match(implementation = {vendor(score(2) \ + : llvm)}) // expected-error@-1 {{nested OpenMP context selector contains duplicated trait 'llvm' in selector 'vendor' and set 'implementation' with different score}} +int conflicting_nested_score(void); +#pragma omp end declare variant + +// FIXME: We should build the conjuction of different conditions, see also the score fixme above. +#pragma omp begin declare variant match(user = {condition(1)}) +#pragma omp declare variant(foo) match(user = {condition(1)}) // expected-error {{nested user conditions in OpenMP context selector not supported (yet)}} +int conflicting_nested_condition(void); +#pragma omp end declare variant From c4b7a1da9d872ed075ce99c80a90b11a135577a0 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 12 Aug 2020 16:49:10 -0500 Subject: [PATCH 0882/1079] [OpenMP] Context selector extensions for return value overloading This extension allows to declare variants in between `omp begin/end declare variant` that do not match the type of the existing function with that name. Without this extension we would not find a base function (with a compatible type), therefore create a new one, which would cause conflicting declarations. With this extension we will not create "missing" base functions, which basically renders these specializations harmless. They will be generated but never called. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D85878 --- clang/include/clang/AST/OpenMPClause.h | 17 + clang/include/clang/Basic/AttrDocs.td | 6 + clang/lib/Parse/ParseOpenMP.cpp | 4 + clang/lib/Sema/SemaOpenMP.cpp | 8 +- ...nmp-begin-declare-variant-varying-return.c | 401 ++++++++++++++++++ .../include/llvm/Frontend/OpenMP/OMPKinds.def | 1 + 6 files changed, 435 insertions(+), 2 deletions(-) create mode 100644 clang/test/AST/ast-dump-openmp-begin-declare-variant-varying-return.c diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 35ab8ff39efa8..d101fcf214b5e 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -7856,6 +7856,23 @@ class OMPTraitInfo { /// Return a string representation identifying this context selector. std::string getMangledName() const; + /// Check the extension trait \p TP is active. + bool isExtensionActive(llvm::omp::TraitProperty TP) { + for (const OMPTraitSet &Set : Sets) { + if (Set.Kind != llvm::omp::TraitSet::implementation) + continue; + for (const OMPTraitSelector &Selector : Set.Selectors) { + if (Selector.Kind != llvm::omp::TraitSelector::implementation_extension) + continue; + for (const OMPTraitProperty &Property : Selector.Properties) { + if (Property.Kind == TP) + return true; + } + } + } + return false; + } + /// Print a human readable representation into \p OS. void print(llvm::raw_ostream &OS, const PrintingPolicy &Policy) const; }; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 8706a3f4578c3..e0f875a905b7e 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3678,12 +3678,18 @@ Clang provides the following context selector extensions, used via match_all match_any match_none + disable_implicit_base The match extensions change when the *entire* context selector is considered a match for an OpenMP context. The default is ``all``, with ``none`` no trait in the selector is allowed to be in the OpenMP context, with ``any`` a single trait in both the selector and OpenMP context is sufficient. Only a single match extension trait is allowed per context selector. +The disable extensions remove default effects of the ``begin declare variant`` +applied to a definition. If ``disable_implicit_base`` is given, we will not +introduce an implicit base function for a variant if no base function was +found. The variant is still generated but will never be called, due to the +absence of a base function and consequently calls to a base function. }]; } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 40124264fdb90..184dd48c391c2 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -935,6 +935,10 @@ static bool checkExtensionProperty(Parser &P, SourceLocation Loc, if (TIProperty.Kind == TraitProperty::invalid) return false; + if (TIProperty.Kind == + TraitProperty::implementation_extension_disable_implicit_base) + return true; + auto IsMatchExtension = [](OMPTraitProperty &TP) { return (TP.Kind == llvm::omp::TraitProperty::implementation_extension_match_all || diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index aef043b062997..36c257440a483 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5871,6 +5871,7 @@ Sema::OMPDeclareVariantScope::OMPDeclareVariantScope(OMPTraitInfo &TI) FunctionDecl * Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, Declarator &D) { + OMPDeclareVariantScope &DVScope = OMPDeclareVariantScopes.back(); IdentifierInfo *BaseII = D.getIdentifier(); LookupResult Lookup(*this, DeclarationName(BaseII), D.getIdentifierLoc(), LookupOrdinaryName); @@ -5905,12 +5906,15 @@ Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, BaseFD = UDecl; break; } - if (!BaseFD) { + + bool UseImplicitBase = !DVScope.TI->isExtensionActive( + llvm::omp::TraitProperty::implementation_extension_disable_implicit_base); + // If no base was found we create a declaration that we use as base. + if (!BaseFD && UseImplicitBase) { BaseFD = cast(ActOnDeclarator(S, D)); BaseFD->setImplicit(true); } - OMPDeclareVariantScope &DVScope = OMPDeclareVariantScopes.back(); std::string MangledName; MangledName += D.getIdentifier()->getName(); MangledName += getOpenMPVariantManglingSeparatorStr(); diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant-varying-return.c b/clang/test/AST/ast-dump-openmp-begin-declare-variant-varying-return.c new file mode 100644 index 0000000000000..dd81e2ee98c17 --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant-varying-return.c @@ -0,0 +1,401 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -DUSE_FLOAT | FileCheck %s --check-prefix=C_FLOAT +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -x c++ -DUSE_FLOAT | FileCheck %s --check-prefix=CXX_FLOAT +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s | FileCheck %s --check-prefix=C_INT +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -x c++ | FileCheck %s --check-prefix=CXX_INT +// expected-no-diagnostics + +#ifdef __cplusplus +#define OVERLOADABLE +#else +#define OVERLOADABLE __attribute__((overloadable)) +#endif + +#ifdef USE_FLOAT +#define RETURN_TY float +#define BEFORE_BASE_RETURN_VALUE 0 +#define BEFORE_VARIANT_RETURN_VALUE 1 +#define AFTER__BASE_RETURN_VALUE 1 +#define AFTER__VARIANT_RETURN_VALUE 0 +#else +#define RETURN_TY int +#define BEFORE_BASE_RETURN_VALUE 1 +#define BEFORE_VARIANT_RETURN_VALUE 0 +#define AFTER__BASE_RETURN_VALUE 0 +#define AFTER__VARIANT_RETURN_VALUE 1 +#endif + +OVERLOADABLE +RETURN_TY also_before(void) { + return BEFORE_BASE_RETURN_VALUE; +} +OVERLOADABLE +RETURN_TY also_before(int i) { + return BEFORE_BASE_RETURN_VALUE; +} + +#pragma omp begin declare variant match(implementation = {extension(disable_implicit_base)}) +OVERLOADABLE +int also_before(void) { + return BEFORE_VARIANT_RETURN_VALUE; +} +OVERLOADABLE +int also_before(int i) { + return BEFORE_VARIANT_RETURN_VALUE; +} + +OVERLOADABLE +int also_after(double d) { + return AFTER__VARIANT_RETURN_VALUE; +} +OVERLOADABLE +int also_after(long l) { + return AFTER__VARIANT_RETURN_VALUE; +} +#pragma omp end declare variant + +OVERLOADABLE +RETURN_TY also_after(double d) { + return AFTER__BASE_RETURN_VALUE; +} +OVERLOADABLE +RETURN_TY also_after(long l) { + return AFTER__BASE_RETURN_VALUE; +} + +int main() { + // Should return 0. + return also_before() + also_before(1) + also_before(2.0f) + also_after(3.0) + also_after(4L); +} + +// Make sure we see base calls in the FLOAT versions, that is no +// PseudoObjectExpr in those. In the INT versions we want PseudoObjectExpr (= +// variant calls) for the `*_before` functions but not the `*_after` ones +// (first 3 vs 2 last ones). + +// C_FLOAT: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:30:1> line:28:11 used also_before 'float ({{.*}})' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_1:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_2:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ImplicitCastExpr [[ADDR_3:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_4:0x[a-z0-9]*]] 'int' 0 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_5:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_6:0x[a-z0-9]*]] line:32:11 used also_before 'float (int)' +// C_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_7:0x[a-z0-9]*]] col:27 i 'int' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_8:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_9:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ImplicitCastExpr [[ADDR_10:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_11:0x[a-z0-9]*]] 'int' 0 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_12:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_13:0x[a-z0-9]*]] line:10:22 also_before[implementation={extension(disable_implicit_base)}] 'int ({{.*}})' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_14:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_15:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_16:0x[a-z0-9]*]] 'int' 1 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_17:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_18:0x[a-z0-9]*]] line:10:22 also_before[implementation={extension(disable_implicit_base)}] 'int (int)' +// C_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_19:0x[a-z0-9]*]] col:21 i 'int' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_20:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_21:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_22:0x[a-z0-9]*]] 'int' 1 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_23:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_24:0x[a-z0-9]*]] line:10:22 also_after[implementation={extension(disable_implicit_base)}] 'int (double)' +// C_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_25:0x[a-z0-9]*]] col:23 d 'double' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_26:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_27:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_28:0x[a-z0-9]*]] 'int' 0 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_29:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_30:0x[a-z0-9]*]] line:10:22 also_after[implementation={extension(disable_implicit_base)}] 'int (long)' +// C_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_31:0x[a-z0-9]*]] col:21 l 'long' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_32:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_33:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_34:0x[a-z0-9]*]] 'int' 0 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_35:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_36:0x[a-z0-9]*]] line:57:11 used also_after 'float (double)' +// C_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:29 d 'double' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_38:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_39:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_41:0x[a-z0-9]*]] 'int' 1 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_42:0x[a-z0-9]*]] +// C_FLOAT-NEXT: |-FunctionDecl [[ADDR_43:0x[a-z0-9]*]] line:61:11 used also_after 'float (long)' +// C_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_44:0x[a-z0-9]*]] col:27 l 'long' +// C_FLOAT-NEXT: | |-CompoundStmt [[ADDR_45:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ReturnStmt [[ADDR_46:0x[a-z0-9]*]] +// C_FLOAT-NEXT: | | `-ImplicitCastExpr [[ADDR_47:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | `-IntegerLiteral [[ADDR_48:0x[a-z0-9]*]] 'int' 1 +// C_FLOAT-NEXT: | `-OverloadableAttr [[ADDR_49:0x[a-z0-9]*]] +// C_FLOAT-NEXT: `-FunctionDecl [[ADDR_50:0x[a-z0-9]*]] line:65:5 main 'int ({{.*}})' +// C_FLOAT-NEXT: `-CompoundStmt [[ADDR_51:0x[a-z0-9]*]] +// C_FLOAT-NEXT: `-ReturnStmt [[ADDR_52:0x[a-z0-9]*]] +// C_FLOAT-NEXT: `-ImplicitCastExpr [[ADDR_53:0x[a-z0-9]*]] 'int' +// C_FLOAT-NEXT: `-BinaryOperator [[ADDR_54:0x[a-z0-9]*]] 'float' '+' +// C_FLOAT-NEXT: |-BinaryOperator [[ADDR_55:0x[a-z0-9]*]] 'float' '+' +// C_FLOAT-NEXT: | |-BinaryOperator [[ADDR_56:0x[a-z0-9]*]] 'float' '+' +// C_FLOAT-NEXT: | | |-BinaryOperator [[ADDR_57:0x[a-z0-9]*]] 'float' '+' +// C_FLOAT-NEXT: | | | |-CallExpr [[ADDR_58:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | | | `-ImplicitCastExpr [[ADDR_59:0x[a-z0-9]*]] 'float (*)({{.*}})' +// C_FLOAT-NEXT: | | | | `-DeclRefExpr [[ADDR_60:0x[a-z0-9]*]] 'float ({{.*}})' {{.*}}Function [[ADDR_0]] 'also_before' 'float ({{.*}})' +// C_FLOAT-NEXT: | | | `-CallExpr [[ADDR_61:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | | |-ImplicitCastExpr [[ADDR_62:0x[a-z0-9]*]] 'float (*)(int)' +// C_FLOAT-NEXT: | | | | `-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'float (int)' {{.*}}Function [[ADDR_6]] 'also_before' 'float (int)' +// C_FLOAT-NEXT: | | | `-IntegerLiteral [[ADDR_64:0x[a-z0-9]*]] 'int' 1 +// C_FLOAT-NEXT: | | `-CallExpr [[ADDR_65:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | | |-ImplicitCastExpr [[ADDR_66:0x[a-z0-9]*]] 'float (*)(int)' +// C_FLOAT-NEXT: | | | `-DeclRefExpr [[ADDR_67:0x[a-z0-9]*]] 'float (int)' {{.*}}Function [[ADDR_6]] 'also_before' 'float (int)' +// C_FLOAT-NEXT: | | `-ImplicitCastExpr [[ADDR_68:0x[a-z0-9]*]] 'int' +// C_FLOAT-NEXT: | | `-FloatingLiteral [[ADDR_69:0x[a-z0-9]*]] 'float' 2.000000e+00 +// C_FLOAT-NEXT: | `-CallExpr [[ADDR_70:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: | |-ImplicitCastExpr [[ADDR_71:0x[a-z0-9]*]] 'float (*)(double)' +// C_FLOAT-NEXT: | | `-DeclRefExpr [[ADDR_72:0x[a-z0-9]*]] 'float (double)' {{.*}}Function [[ADDR_36]] 'also_after' 'float (double)' +// C_FLOAT-NEXT: | `-FloatingLiteral [[ADDR_73:0x[a-z0-9]*]] 'double' 3.000000e+00 +// C_FLOAT-NEXT: `-CallExpr [[ADDR_74:0x[a-z0-9]*]] 'float' +// C_FLOAT-NEXT: |-ImplicitCastExpr [[ADDR_75:0x[a-z0-9]*]] 'float (*)(long)' +// C_FLOAT-NEXT: | `-DeclRefExpr [[ADDR_76:0x[a-z0-9]*]] 'float (long)' {{.*}}Function [[ADDR_43]] 'also_after' 'float (long)' +// C_FLOAT-NEXT: `-IntegerLiteral [[ADDR_77:0x[a-z0-9]*]] 'long' 4 + +// CXX_FLOAT: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:30:1> line:28:11 used also_before 'float ({{.*}})' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_1:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_2:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ImplicitCastExpr [[ADDR_3:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_4:0x[a-z0-9]*]] 'int' 0 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_5:0x[a-z0-9]*]] line:32:11 used also_before 'float (int)' +// CXX_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_6:0x[a-z0-9]*]] col:27 i 'int' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_7:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_8:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ImplicitCastExpr [[ADDR_9:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_10:0x[a-z0-9]*]] 'int' 0 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_11:0x[a-z0-9]*]] line:38:1 also_before[implementation={extension(disable_implicit_base)}] 'int ({{.*}})' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_12:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_13:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_14:0x[a-z0-9]*]] 'int' 1 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_15:0x[a-z0-9]*]] line:42:1 also_before[implementation={extension(disable_implicit_base)}] 'int (int)' +// CXX_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_16:0x[a-z0-9]*]] col:21 i 'int' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_17:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_18:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_19:0x[a-z0-9]*]] 'int' 1 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_20:0x[a-z0-9]*]] line:47:1 also_after[implementation={extension(disable_implicit_base)}] 'int (double)' +// CXX_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_21:0x[a-z0-9]*]] col:23 d 'double' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_22:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_23:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_24:0x[a-z0-9]*]] 'int' 0 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_25:0x[a-z0-9]*]] line:51:1 also_after[implementation={extension(disable_implicit_base)}] 'int (long)' +// CXX_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_26:0x[a-z0-9]*]] col:21 l 'long' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_27:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_28:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_29:0x[a-z0-9]*]] 'int' 0 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_30:0x[a-z0-9]*]] line:57:11 used also_after 'float (double)' +// CXX_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_31:0x[a-z0-9]*]] col:29 d 'double' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_32:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_33:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ImplicitCastExpr [[ADDR_34:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_35:0x[a-z0-9]*]] 'int' 1 +// CXX_FLOAT-NEXT: |-FunctionDecl [[ADDR_36:0x[a-z0-9]*]] line:61:11 used also_after 'float (long)' +// CXX_FLOAT-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:27 l 'long' +// CXX_FLOAT-NEXT: | `-CompoundStmt [[ADDR_38:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ReturnStmt [[ADDR_39:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | `-IntegerLiteral [[ADDR_41:0x[a-z0-9]*]] 'int' 1 +// CXX_FLOAT-NEXT: `-FunctionDecl [[ADDR_42:0x[a-z0-9]*]] line:65:5 main 'int ({{.*}})' +// CXX_FLOAT-NEXT: `-CompoundStmt [[ADDR_43:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: `-ReturnStmt [[ADDR_44:0x[a-z0-9]*]] +// CXX_FLOAT-NEXT: `-ImplicitCastExpr [[ADDR_45:0x[a-z0-9]*]] 'int' +// CXX_FLOAT-NEXT: `-BinaryOperator [[ADDR_46:0x[a-z0-9]*]] 'float' '+' +// CXX_FLOAT-NEXT: |-BinaryOperator [[ADDR_47:0x[a-z0-9]*]] 'float' '+' +// CXX_FLOAT-NEXT: | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'float' '+' +// CXX_FLOAT-NEXT: | | |-BinaryOperator [[ADDR_49:0x[a-z0-9]*]] 'float' '+' +// CXX_FLOAT-NEXT: | | | |-CallExpr [[ADDR_50:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'float (*)({{.*}})' +// CXX_FLOAT-NEXT: | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'float ({{.*}})' {{.*}}Function [[ADDR_0]] 'also_before' 'float ({{.*}})' +// CXX_FLOAT-NEXT: | | | `-CallExpr [[ADDR_53:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | | | |-ImplicitCastExpr [[ADDR_54:0x[a-z0-9]*]] 'float (*)(int)' +// CXX_FLOAT-NEXT: | | | | `-DeclRefExpr [[ADDR_55:0x[a-z0-9]*]] 'float (int)' {{.*}}Function [[ADDR_5]] 'also_before' 'float (int)' +// CXX_FLOAT-NEXT: | | | `-IntegerLiteral [[ADDR_56:0x[a-z0-9]*]] 'int' 1 +// CXX_FLOAT-NEXT: | | `-CallExpr [[ADDR_57:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | | |-ImplicitCastExpr [[ADDR_58:0x[a-z0-9]*]] 'float (*)(int)' +// CXX_FLOAT-NEXT: | | | `-DeclRefExpr [[ADDR_59:0x[a-z0-9]*]] 'float (int)' {{.*}}Function [[ADDR_5]] 'also_before' 'float (int)' +// CXX_FLOAT-NEXT: | | `-ImplicitCastExpr [[ADDR_60:0x[a-z0-9]*]] 'int' +// CXX_FLOAT-NEXT: | | `-FloatingLiteral [[ADDR_61:0x[a-z0-9]*]] 'float' 2.000000e+00 +// CXX_FLOAT-NEXT: | `-CallExpr [[ADDR_62:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: | |-ImplicitCastExpr [[ADDR_63:0x[a-z0-9]*]] 'float (*)(double)' +// CXX_FLOAT-NEXT: | | `-DeclRefExpr [[ADDR_64:0x[a-z0-9]*]] 'float (double)' {{.*}}Function [[ADDR_30]] 'also_after' 'float (double)' +// CXX_FLOAT-NEXT: | `-FloatingLiteral [[ADDR_65:0x[a-z0-9]*]] 'double' 3.000000e+00 +// CXX_FLOAT-NEXT: `-CallExpr [[ADDR_66:0x[a-z0-9]*]] 'float' +// CXX_FLOAT-NEXT: |-ImplicitCastExpr [[ADDR_67:0x[a-z0-9]*]] 'float (*)(long)' +// CXX_FLOAT-NEXT: | `-DeclRefExpr [[ADDR_68:0x[a-z0-9]*]] 'float (long)' {{.*}}Function [[ADDR_36]] 'also_after' 'float (long)' +// CXX_FLOAT-NEXT: `-IntegerLiteral [[ADDR_69:0x[a-z0-9]*]] 'long' 4 + +// C_INT: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:30:1> line:28:11 used also_before 'int ({{.*}})' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_1:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_2:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_3:0x[a-z0-9]*]] 'int' 1 +// C_INT-NEXT: | |-OverloadableAttr [[ADDR_4:0x[a-z0-9]*]] +// C_INT-NEXT: | `-OMPDeclareVariantAttr [[ADDR_5:0x[a-z0-9]*]] <> Implicit implementation={extension(disable_implicit_base)} +// C_INT-NEXT: | `-DeclRefExpr [[ADDR_6:0x[a-z0-9]*]] 'int ({{.*}})' Function [[ADDR_7:0x[a-z0-9]*]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int ({{.*}})' +// C_INT-NEXT: |-FunctionDecl [[ADDR_8:0x[a-z0-9]*]] line:32:11 used also_before 'int (int)' +// C_INT-NEXT: | |-ParmVarDecl [[ADDR_9:0x[a-z0-9]*]] col:27 i 'int' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_10:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_11:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_12:0x[a-z0-9]*]] 'int' 1 +// C_INT-NEXT: | |-OverloadableAttr [[ADDR_13:0x[a-z0-9]*]] +// C_INT-NEXT: | `-OMPDeclareVariantAttr [[ADDR_14:0x[a-z0-9]*]] <> Implicit implementation={extension(disable_implicit_base)} +// C_INT-NEXT: | `-DeclRefExpr [[ADDR_15:0x[a-z0-9]*]] 'int (int)' Function [[ADDR_16:0x[a-z0-9]*]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int (int)' +// C_INT-NEXT: |-FunctionDecl [[ADDR_7]] line:10:22 also_before[implementation={extension(disable_implicit_base)}] 'int ({{.*}})' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_17:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_18:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_19:0x[a-z0-9]*]] 'int' 0 +// C_INT-NEXT: | `-OverloadableAttr [[ADDR_20:0x[a-z0-9]*]] +// C_INT-NEXT: |-FunctionDecl [[ADDR_16]] line:10:22 also_before[implementation={extension(disable_implicit_base)}] 'int (int)' +// C_INT-NEXT: | |-ParmVarDecl [[ADDR_21:0x[a-z0-9]*]] col:21 i 'int' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_22:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_23:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_24:0x[a-z0-9]*]] 'int' 0 +// C_INT-NEXT: | `-OverloadableAttr [[ADDR_25:0x[a-z0-9]*]] +// C_INT-NEXT: |-FunctionDecl [[ADDR_26:0x[a-z0-9]*]] line:10:22 also_after[implementation={extension(disable_implicit_base)}] 'int (double)' +// C_INT-NEXT: | |-ParmVarDecl [[ADDR_27:0x[a-z0-9]*]] col:23 d 'double' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_28:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_29:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_30:0x[a-z0-9]*]] 'int' 1 +// C_INT-NEXT: | `-OverloadableAttr [[ADDR_31:0x[a-z0-9]*]] +// C_INT-NEXT: |-FunctionDecl [[ADDR_32:0x[a-z0-9]*]] line:10:22 also_after[implementation={extension(disable_implicit_base)}] 'int (long)' +// C_INT-NEXT: | |-ParmVarDecl [[ADDR_33:0x[a-z0-9]*]] col:21 l 'long' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_34:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_35:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 1 +// C_INT-NEXT: | `-OverloadableAttr [[ADDR_37:0x[a-z0-9]*]] +// C_INT-NEXT: |-FunctionDecl [[ADDR_38:0x[a-z0-9]*]] line:57:11 used also_after 'int (double)' +// C_INT-NEXT: | |-ParmVarDecl [[ADDR_39:0x[a-z0-9]*]] col:29 d 'double' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_40:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_41:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_42:0x[a-z0-9]*]] 'int' 0 +// C_INT-NEXT: | `-OverloadableAttr [[ADDR_43:0x[a-z0-9]*]] +// C_INT-NEXT: |-FunctionDecl [[ADDR_44:0x[a-z0-9]*]] line:61:11 used also_after 'int (long)' +// C_INT-NEXT: | |-ParmVarDecl [[ADDR_45:0x[a-z0-9]*]] col:27 l 'long' +// C_INT-NEXT: | |-CompoundStmt [[ADDR_46:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-ReturnStmt [[ADDR_47:0x[a-z0-9]*]] +// C_INT-NEXT: | | `-IntegerLiteral [[ADDR_48:0x[a-z0-9]*]] 'int' 0 +// C_INT-NEXT: | `-OverloadableAttr [[ADDR_49:0x[a-z0-9]*]] +// C_INT-NEXT: `-FunctionDecl [[ADDR_50:0x[a-z0-9]*]] line:65:5 main 'int ({{.*}})' +// C_INT-NEXT: `-CompoundStmt [[ADDR_51:0x[a-z0-9]*]] +// C_INT-NEXT: `-ReturnStmt [[ADDR_52:0x[a-z0-9]*]] +// C_INT-NEXT: `-BinaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' '+' +// C_INT-NEXT: |-BinaryOperator [[ADDR_54:0x[a-z0-9]*]] 'int' '+' +// C_INT-NEXT: | |-BinaryOperator [[ADDR_55:0x[a-z0-9]*]] 'int' '+' +// C_INT-NEXT: | | |-BinaryOperator [[ADDR_56:0x[a-z0-9]*]] 'int' '+' +// C_INT-NEXT: | | | |-PseudoObjectExpr [[ADDR_57:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | | |-CallExpr [[ADDR_58:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_59:0x[a-z0-9]*]] 'int (*)({{.*}})' +// C_INT-NEXT: | | | | | `-DeclRefExpr [[ADDR_60:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_0]] 'also_before' 'int ({{.*}})' +// C_INT-NEXT: | | | | `-CallExpr [[ADDR_61:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | | `-ImplicitCastExpr [[ADDR_62:0x[a-z0-9]*]] 'int (*)({{.*}})' +// C_INT-NEXT: | | | | `-DeclRefExpr [[ADDR_6]] 'int ({{.*}})' Function [[ADDR_7]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int ({{.*}})' +// C_INT-NEXT: | | | `-PseudoObjectExpr [[ADDR_63:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | |-CallExpr [[ADDR_64:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | | |-ImplicitCastExpr [[ADDR_65:0x[a-z0-9]*]] 'int (*)(int)' +// C_INT-NEXT: | | | | | `-DeclRefExpr [[ADDR_66:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_8]] 'also_before' 'int (int)' +// C_INT-NEXT: | | | | `-IntegerLiteral [[ADDR_67:0x[a-z0-9]*]] 'int' 1 +// C_INT-NEXT: | | | `-CallExpr [[ADDR_68:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | |-ImplicitCastExpr [[ADDR_69:0x[a-z0-9]*]] 'int (*)(int)' +// C_INT-NEXT: | | | | `-DeclRefExpr [[ADDR_15]] 'int (int)' Function [[ADDR_16]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int (int)' +// C_INT-NEXT: | | | `-IntegerLiteral [[ADDR_67]] 'int' 1 +// C_INT-NEXT: | | `-PseudoObjectExpr [[ADDR_70:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | |-CallExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | |-ImplicitCastExpr [[ADDR_72:0x[a-z0-9]*]] 'int (*)(int)' +// C_INT-NEXT: | | | | `-DeclRefExpr [[ADDR_73:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_8]] 'also_before' 'int (int)' +// C_INT-NEXT: | | | `-ImplicitCastExpr [[ADDR_74:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | | `-FloatingLiteral [[ADDR_75:0x[a-z0-9]*]] 'float' 2.000000e+00 +// C_INT-NEXT: | | `-CallExpr [[ADDR_76:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | |-ImplicitCastExpr [[ADDR_77:0x[a-z0-9]*]] 'int (*)(int)' +// C_INT-NEXT: | | | `-DeclRefExpr [[ADDR_15]] 'int (int)' Function [[ADDR_16]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int (int)' +// C_INT-NEXT: | | `-ImplicitCastExpr [[ADDR_78:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | | `-FloatingLiteral [[ADDR_75]] 'float' 2.000000e+00 +// C_INT-NEXT: | `-CallExpr [[ADDR_79:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: | |-ImplicitCastExpr [[ADDR_80:0x[a-z0-9]*]] 'int (*)(double)' +// C_INT-NEXT: | | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_38]] 'also_after' 'int (double)' +// C_INT-NEXT: | `-FloatingLiteral [[ADDR_82:0x[a-z0-9]*]] 'double' 3.000000e+00 +// C_INT-NEXT: `-CallExpr [[ADDR_83:0x[a-z0-9]*]] 'int' +// C_INT-NEXT: |-ImplicitCastExpr [[ADDR_84:0x[a-z0-9]*]] 'int (*)(long)' +// C_INT-NEXT: | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int (long)' {{.*}}Function [[ADDR_44]] 'also_after' 'int (long)' +// C_INT-NEXT: `-IntegerLiteral [[ADDR_86:0x[a-z0-9]*]] 'long' 4 + +// CXX_INT: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:30:1> line:28:11 used also_before 'int ({{.*}})' +// CXX_INT-NEXT: | |-CompoundStmt [[ADDR_1:0x[a-z0-9]*]] +// CXX_INT-NEXT: | | `-ReturnStmt [[ADDR_2:0x[a-z0-9]*]] +// CXX_INT-NEXT: | | `-IntegerLiteral [[ADDR_3:0x[a-z0-9]*]] 'int' 1 +// CXX_INT-NEXT: | `-OMPDeclareVariantAttr [[ADDR_4:0x[a-z0-9]*]] <> Implicit implementation={extension(disable_implicit_base)} +// CXX_INT-NEXT: | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int ({{.*}})' Function [[ADDR_6:0x[a-z0-9]*]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int ({{.*}})' +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_7:0x[a-z0-9]*]] line:32:11 used also_before 'int (int)' +// CXX_INT-NEXT: | |-ParmVarDecl [[ADDR_8:0x[a-z0-9]*]] col:27 i 'int' +// CXX_INT-NEXT: | |-CompoundStmt [[ADDR_9:0x[a-z0-9]*]] +// CXX_INT-NEXT: | | `-ReturnStmt [[ADDR_10:0x[a-z0-9]*]] +// CXX_INT-NEXT: | | `-IntegerLiteral [[ADDR_11:0x[a-z0-9]*]] 'int' 1 +// CXX_INT-NEXT: | `-OMPDeclareVariantAttr [[ADDR_12:0x[a-z0-9]*]] <> Implicit implementation={extension(disable_implicit_base)} +// CXX_INT-NEXT: | `-DeclRefExpr [[ADDR_13:0x[a-z0-9]*]] 'int (int)' Function [[ADDR_14:0x[a-z0-9]*]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int (int)' +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_6]] line:38:1 also_before[implementation={extension(disable_implicit_base)}] 'int ({{.*}})' +// CXX_INT-NEXT: | `-CompoundStmt [[ADDR_15:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-ReturnStmt [[ADDR_16:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-IntegerLiteral [[ADDR_17:0x[a-z0-9]*]] 'int' 0 +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_14]] line:42:1 also_before[implementation={extension(disable_implicit_base)}] 'int (int)' +// CXX_INT-NEXT: | |-ParmVarDecl [[ADDR_18:0x[a-z0-9]*]] col:21 i 'int' +// CXX_INT-NEXT: | `-CompoundStmt [[ADDR_19:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-ReturnStmt [[ADDR_20:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-IntegerLiteral [[ADDR_21:0x[a-z0-9]*]] 'int' 0 +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_22:0x[a-z0-9]*]] line:47:1 also_after[implementation={extension(disable_implicit_base)}] 'int (double)' +// CXX_INT-NEXT: | |-ParmVarDecl [[ADDR_23:0x[a-z0-9]*]] col:23 d 'double' +// CXX_INT-NEXT: | `-CompoundStmt [[ADDR_24:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-ReturnStmt [[ADDR_25:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-IntegerLiteral [[ADDR_26:0x[a-z0-9]*]] 'int' 1 +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_27:0x[a-z0-9]*]] line:51:1 also_after[implementation={extension(disable_implicit_base)}] 'int (long)' +// CXX_INT-NEXT: | |-ParmVarDecl [[ADDR_28:0x[a-z0-9]*]] col:21 l 'long' +// CXX_INT-NEXT: | `-CompoundStmt [[ADDR_29:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-ReturnStmt [[ADDR_30:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-IntegerLiteral [[ADDR_31:0x[a-z0-9]*]] 'int' 1 +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_32:0x[a-z0-9]*]] line:57:11 used also_after 'int (double)' +// CXX_INT-NEXT: | |-ParmVarDecl [[ADDR_33:0x[a-z0-9]*]] col:29 d 'double' +// CXX_INT-NEXT: | `-CompoundStmt [[ADDR_34:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-ReturnStmt [[ADDR_35:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 +// CXX_INT-NEXT: |-FunctionDecl [[ADDR_37:0x[a-z0-9]*]] line:61:11 used also_after 'int (long)' +// CXX_INT-NEXT: | |-ParmVarDecl [[ADDR_38:0x[a-z0-9]*]] col:27 l 'long' +// CXX_INT-NEXT: | `-CompoundStmt [[ADDR_39:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-ReturnStmt [[ADDR_40:0x[a-z0-9]*]] +// CXX_INT-NEXT: | `-IntegerLiteral [[ADDR_41:0x[a-z0-9]*]] 'int' 0 +// CXX_INT-NEXT: `-FunctionDecl [[ADDR_42:0x[a-z0-9]*]] line:65:5 main 'int ({{.*}})' +// CXX_INT-NEXT: `-CompoundStmt [[ADDR_43:0x[a-z0-9]*]] +// CXX_INT-NEXT: `-ReturnStmt [[ADDR_44:0x[a-z0-9]*]] +// CXX_INT-NEXT: `-BinaryOperator [[ADDR_45:0x[a-z0-9]*]] 'int' '+' +// CXX_INT-NEXT: |-BinaryOperator [[ADDR_46:0x[a-z0-9]*]] 'int' '+' +// CXX_INT-NEXT: | |-BinaryOperator [[ADDR_47:0x[a-z0-9]*]] 'int' '+' +// CXX_INT-NEXT: | | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'int' '+' +// CXX_INT-NEXT: | | | |-PseudoObjectExpr [[ADDR_49:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | | |-CallExpr [[ADDR_50:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CXX_INT-NEXT: | | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_0]] 'also_before' 'int ({{.*}})' +// CXX_INT-NEXT: | | | | `-CallExpr [[ADDR_53:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | | `-ImplicitCastExpr [[ADDR_54:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CXX_INT-NEXT: | | | | `-DeclRefExpr [[ADDR_5]] 'int ({{.*}})' Function [[ADDR_6]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int ({{.*}})' +// CXX_INT-NEXT: | | | `-PseudoObjectExpr [[ADDR_55:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | |-CallExpr [[ADDR_56:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | | |-ImplicitCastExpr [[ADDR_57:0x[a-z0-9]*]] 'int (*)(int)' +// CXX_INT-NEXT: | | | | | `-DeclRefExpr [[ADDR_58:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_7]] 'also_before' 'int (int)' +// CXX_INT-NEXT: | | | | `-IntegerLiteral [[ADDR_59:0x[a-z0-9]*]] 'int' 1 +// CXX_INT-NEXT: | | | `-CallExpr [[ADDR_60:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | |-ImplicitCastExpr [[ADDR_61:0x[a-z0-9]*]] 'int (*)(int)' +// CXX_INT-NEXT: | | | | `-DeclRefExpr [[ADDR_13]] 'int (int)' Function [[ADDR_14]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int (int)' +// CXX_INT-NEXT: | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CXX_INT-NEXT: | | `-PseudoObjectExpr [[ADDR_62:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | |-CallExpr [[ADDR_63:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | |-ImplicitCastExpr [[ADDR_64:0x[a-z0-9]*]] 'int (*)(int)' +// CXX_INT-NEXT: | | | | `-DeclRefExpr [[ADDR_65:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_7]] 'also_before' 'int (int)' +// CXX_INT-NEXT: | | | `-ImplicitCastExpr [[ADDR_66:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | | `-FloatingLiteral [[ADDR_67:0x[a-z0-9]*]] 'float' 2.000000e+00 +// CXX_INT-NEXT: | | `-CallExpr [[ADDR_68:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | |-ImplicitCastExpr [[ADDR_69:0x[a-z0-9]*]] 'int (*)(int)' +// CXX_INT-NEXT: | | | `-DeclRefExpr [[ADDR_13]] 'int (int)' Function [[ADDR_14]] 'also_before[implementation={extension(disable_implicit_base)}]' 'int (int)' +// CXX_INT-NEXT: | | `-ImplicitCastExpr [[ADDR_70:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | | `-FloatingLiteral [[ADDR_67]] 'float' 2.000000e+00 +// CXX_INT-NEXT: | `-CallExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: | |-ImplicitCastExpr [[ADDR_72:0x[a-z0-9]*]] 'int (*)(double)' +// CXX_INT-NEXT: | | `-DeclRefExpr [[ADDR_73:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_32]] 'also_after' 'int (double)' +// CXX_INT-NEXT: | `-FloatingLiteral [[ADDR_74:0x[a-z0-9]*]] 'double' 3.000000e+00 +// CXX_INT-NEXT: `-CallExpr [[ADDR_75:0x[a-z0-9]*]] 'int' +// CXX_INT-NEXT: |-ImplicitCastExpr [[ADDR_76:0x[a-z0-9]*]] 'int (*)(long)' +// CXX_INT-NEXT: | `-DeclRefExpr [[ADDR_77:0x[a-z0-9]*]] 'int (long)' {{.*}}Function [[ADDR_37]] 'also_after' 'int (long)' +// CXX_INT-NEXT: `-IntegerLiteral [[ADDR_78:0x[a-z0-9]*]] 'long' 4 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 9ad7efff6ef56..821362c35826e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -1118,6 +1118,7 @@ __OMP_TRAIT_SELECTOR(implementation, extension, true) __OMP_TRAIT_PROPERTY(implementation, extension, match_all) __OMP_TRAIT_PROPERTY(implementation, extension, match_any) __OMP_TRAIT_PROPERTY(implementation, extension, match_none) +__OMP_TRAIT_PROPERTY(implementation, extension, disable_implicit_base) __OMP_TRAIT_SET(user) From 97652202d1e6964d5d7a1c03a257452c7ad95233 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 12 Aug 2020 16:45:46 -0500 Subject: [PATCH 0883/1079] [OpenMP] Overload `std::isnan` and friends multiple times for the GPU `std::isnan` and friends can be found in two variants in the wild, one returns `bool`, as the standard defines it, one returns `int`, as the C macros do. So far we kinda hoped the system versions of these functions will work for people, e.g. they are definitions that can be compiled for the target. We know that is not the case always so we leverage the `disable_implicit_base` OpenMP context extension to specialize both versions of these functions without causing an invalid redeclaration. Reviewed By: JonChesterfield, tra Differential Revision: https://reviews.llvm.org/D85879 --- clang/lib/Headers/__clang_cuda_cmath.h | 41 +++++++++++++++++-- clang/test/Headers/Inputs/include/cmath | 5 +++ .../test/Headers/openmp_device_math_isnan.cpp | 30 ++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 clang/test/Headers/openmp_device_math_isnan.cpp diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h index 8ba182689a4f9..f49463d72e042 100644 --- a/clang/lib/Headers/__clang_cuda_cmath.h +++ b/clang/lib/Headers/__clang_cuda_cmath.h @@ -66,10 +66,38 @@ __DEVICE__ float frexp(float __arg, int *__exp) { } // For inscrutable reasons, the CUDA headers define these functions for us on -// Windows. For OpenMP we omit these as some old system headers have -// non-conforming `isinf(float)` and `isnan(float)` implementations that return -// an `int`. The system versions of these functions should be fine anyway. -#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__) +// Windows. +#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) + +// For OpenMP we work around some old system headers that have non-conforming +// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do +// this by providing two versions of these functions, differing only in the +// return type. To avoid conflicting definitions we disable implicit base +// function generation. That means we will end up with two specializations, one +// per type, but only one has a base function defined by the system header. +#if defined(__OPENMP_NVPTX__) +#pragma omp begin declare variant match( \ + implementation = {extension(disable_implicit_base)}) + +// FIXME: We lack an extension to customize the mangling of the variants, e.g., +// add a suffix. This means we would clash with the names of the variants +// (note that we do not create implicit base functions here). To avoid +// this clash we add a new trait to some of them that is always true +// (this is LLVM after all ;)). It will only influence the mangled name +// of the variants inside the inner region and avoid the clash. +#pragma omp begin declare variant match(implementation = {vendor(llvm)}) + +__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } +__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant + +#endif + __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } @@ -79,6 +107,11 @@ __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } + +#if defined(__OPENMP_NVPTX__) +#pragma omp end declare variant +#endif + #endif __DEVICE__ bool isgreater(float __x, float __y) { diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath index 5e4e8b67514f0..20e34898b5535 100644 --- a/clang/test/Headers/Inputs/include/cmath +++ b/clang/test/Headers/Inputs/include/cmath @@ -82,8 +82,13 @@ bool isless(float, float); bool islessgreater(double, double); bool islessgreater(float, float); bool isnan(long double); +#ifdef USE_ISNAN_WITH_INT_RETURN +int isnan(double); +int isnan(float); +#else bool isnan(double); bool isnan(float); +#endif bool isnormal(double); bool isnormal(float); bool isunordered(double, double); diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp new file mode 100644 index 0000000000000..35443dbdebea6 --- /dev/null +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// expected-no-diagnostics + +#include + +double math(float f, double d) { + double r = 0; + // INT_RETURN: call i32 @__nv_isnanf(float + // BOOL_RETURN: call i32 @__nv_isnanf(float + r += std::isnan(f); + // INT_RETURN: call i32 @__nv_isnand(double + // BOOL_RETURN: call i32 @__nv_isnand(double + r += std::isnan(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d); +#pragma omp target map(r) + { r += math(f, d); } + return r; +} From 5c1084e8840b02d410ba125cbba466465242d820 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 31 May 2020 11:40:09 -0500 Subject: [PATCH 0884/1079] [OpenMP] Context selector extensions for template functions With this extension the effects of `omp begin declare variant` will be applied to template function declarations. The behavior is opt-in and controlled by the `extension(allow_templates)` trait. While generally useful, this will enable us to implement complex math function calls by overloading the templates of the standard library with the ones in libc++. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D85735 --- clang/include/clang/Basic/AttrDocs.td | 6 + clang/include/clang/Sema/Sema.h | 14 +- clang/lib/Headers/openmp_wrappers/cmath | 5 +- clang/lib/Parse/ParseOpenMP.cpp | 4 + clang/lib/Sema/SemaDecl.cpp | 14 +- clang/lib/Sema/SemaOpenMP.cpp | 108 ++++--- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 37 ++- ...penmp-begin-declare-variant_template_2.cpp | 264 ++++++++++++++++++ .../include/llvm/Frontend/OpenMP/OMPKinds.def | 1 + 9 files changed, 393 insertions(+), 60 deletions(-) create mode 100644 clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index e0f875a905b7e..aab337a4e24ab 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3679,6 +3679,7 @@ Clang provides the following context selector extensions, used via match_any match_none disable_implicit_base + allow_templates The match extensions change when the *entire* context selector is considered a match for an OpenMP context. The default is ``all``, with ``none`` no trait in the @@ -3690,6 +3691,11 @@ applied to a definition. If ``disable_implicit_base`` is given, we will not introduce an implicit base function for a variant if no base function was found. The variant is still generated but will never be called, due to the absence of a base function and consequently calls to a base function. +The allow extensions change when the ``begin declare variant`` effect is +applied to a definition. If ``allow_templates`` is given, template function +definitions are considered as specializations of existing or assumed template +declarations with the same name. The template parameters for the base functions +are used to instantiate the specialization. }]; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9502c104be68c..9ee8e338e7329 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10031,15 +10031,15 @@ class Sema final { /// The declarator \p D defines a function in the scope \p S which is nested /// in an `omp begin/end declare variant` scope. In this method we create a /// declaration for \p D and rename \p D according to the OpenMP context - /// selector of the surrounding scope. - FunctionDecl * - ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, - Declarator &D); + /// selector of the surrounding scope. Return all base functions in \p Bases. + void ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( + Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParameterLists, + SmallVectorImpl &Bases); - /// Register \p FD as specialization of \p BaseFD in the current `omp - /// begin/end declare variant` scope. + /// Register \p D as specialization of all base functions in \p Bases in the + /// current `omp begin/end declare variant` scope. void ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( - FunctionDecl *FD, FunctionDecl *BaseFD); + Decl *D, SmallVectorImpl &Bases); public: diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath index bd6011eb6f6d5..1aff66af7d52d 100644 --- a/clang/lib/Headers/openmp_wrappers/cmath +++ b/clang/lib/Headers/openmp_wrappers/cmath @@ -24,8 +24,11 @@ // which might live in cstdlib. #include +// We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`. +#include + #pragma omp begin declare variant match( \ - device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any, allow_templates)}) #define __CUDA__ #define __OPENMP_NVPTX__ diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 184dd48c391c2..34bddd2e10d76 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -939,6 +939,10 @@ static bool checkExtensionProperty(Parser &P, SourceLocation Loc, TraitProperty::implementation_extension_disable_implicit_base) return true; + if (TIProperty.Kind == + TraitProperty::implementation_extension_allow_templates) + return true; + auto IsMatchExtension = [](OMPTraitProperty &TP) { return (TP.Kind == llvm::omp::TraitProperty::implementation_extension_match_all || diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 3e0d284bdf710..416a75fa4323b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -13757,19 +13757,17 @@ Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D, // variant` annotation which specifies the mangled definition as a // specialization function under the OpenMP context defined as part of the // `omp begin declare variant`. - FunctionDecl *BaseFD = nullptr; - if (LangOpts.OpenMP && isInOpenMPDeclareVariantScope() && - TemplateParameterLists.empty()) - BaseFD = ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( - ParentScope, D); + SmallVector Bases; + if (LangOpts.OpenMP && isInOpenMPDeclareVariantScope()) + ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( + ParentScope, D, TemplateParameterLists, Bases); D.setFunctionDefinitionKind(FDK_Definition); Decl *DP = HandleDeclarator(ParentScope, D, TemplateParameterLists); Decl *Dcl = ActOnStartOfFunctionDef(FnBodyScope, DP, SkipBody); - if (BaseFD) - ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( - cast(Dcl), BaseFD); + if (!Bases.empty()) + ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, Bases); return Dcl; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 36c257440a483..92f6141b6d389 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5868,10 +5868,21 @@ static void setPrototype(Sema &S, FunctionDecl *FD, FunctionDecl *FDWithProto, Sema::OMPDeclareVariantScope::OMPDeclareVariantScope(OMPTraitInfo &TI) : TI(&TI), NameSuffix(TI.getMangledName()) {} -FunctionDecl * -Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, - Declarator &D) { +void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( + Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParamLists, + SmallVectorImpl &Bases) { + if (!D.getIdentifier()) + return; + OMPDeclareVariantScope &DVScope = OMPDeclareVariantScopes.back(); + + // Template specialization is an extension, check if we do it. + bool IsTemplated = !TemplateParamLists.empty(); + if (IsTemplated & + !DVScope.TI->isExtensionActive( + llvm::omp::TraitProperty::implementation_extension_allow_templates)) + return; + IdentifierInfo *BaseII = D.getIdentifier(); LookupResult Lookup(*this, DeclarationName(BaseII), D.getIdentifierLoc(), LookupOrdinaryName); @@ -5883,9 +5894,13 @@ Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, bool IsConstexpr = D.getDeclSpec().getConstexprSpecifier() == CSK_constexpr; bool IsConsteval = D.getDeclSpec().getConstexprSpecifier() == CSK_consteval; - FunctionDecl *BaseFD = nullptr; for (auto *Candidate : Lookup) { - auto *UDecl = dyn_cast(Candidate->getUnderlyingDecl()); + auto *CandidateDecl = Candidate->getUnderlyingDecl(); + FunctionDecl *UDecl = nullptr; + if (IsTemplated && isa(CandidateDecl)) + UDecl = cast(CandidateDecl)->getTemplatedDecl(); + else if (!IsTemplated) + UDecl = dyn_cast(CandidateDecl); if (!UDecl) continue; @@ -5896,23 +5911,31 @@ Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, if (UDecl->isConsteval() && !IsConsteval) continue; - QualType NewType = Context.mergeFunctionTypes( - FType, UDecl->getType(), /* OfBlockPointer */ false, - /* Unqualified */ false, /* AllowCXX */ true); - if (NewType.isNull()) - continue; + QualType UDeclTy = UDecl->getType(); + // TODO: Verify types for templates eventually. + if (!UDeclTy->isDependentType()) { + QualType NewType = Context.mergeFunctionTypes( + FType, UDeclTy, /* OfBlockPointer */ false, + /* Unqualified */ false, /* AllowCXX */ true); + if (NewType.isNull()) + continue; + } // Found a base! - BaseFD = UDecl; - break; + Bases.push_back(UDecl); } bool UseImplicitBase = !DVScope.TI->isExtensionActive( llvm::omp::TraitProperty::implementation_extension_disable_implicit_base); // If no base was found we create a declaration that we use as base. - if (!BaseFD && UseImplicitBase) { - BaseFD = cast(ActOnDeclarator(S, D)); - BaseFD->setImplicit(true); + if (Bases.empty() && UseImplicitBase) { + D.setFunctionDefinitionKind(FDK_Declaration); + Decl *BaseD = HandleDeclarator(S, D, TemplateParamLists); + BaseD->setImplicit(true); + if (auto *BaseTemplD = dyn_cast(BaseD)) + Bases.push_back(BaseTemplD->getTemplatedDecl()); + else + Bases.push_back(cast(BaseD)); } std::string MangledName; @@ -5923,17 +5946,21 @@ Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(Scope *S, VariantII.setMangledOpenMPVariantName(true); D.SetIdentifier(&VariantII, D.getBeginLoc()); - return BaseFD; } void Sema::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( - FunctionDecl *FD, FunctionDecl *BaseFD) { + Decl *D, SmallVectorImpl &Bases) { // Do not mark function as is used to prevent its emission if this is the // only place where it is used. EnterExpressionEvaluationContext Unevaluated( *this, Sema::ExpressionEvaluationContext::Unevaluated); - Expr *VariantFuncRef = DeclRefExpr::Create( + FunctionDecl *FD = nullptr; + if (auto *UTemplDecl = dyn_cast(D)) + FD = UTemplDecl->getTemplatedDecl(); + else + FD = cast(D); + auto *VariantFuncRef = DeclRefExpr::Create( Context, NestedNameSpecifierLoc(), SourceLocation(), FD, /* RefersToEnclosingVariableOrCapture */ false, /* NameLoc */ FD->getLocation(), FD->getType(), ExprValueKind::VK_RValue); @@ -5941,7 +5968,8 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( OMPDeclareVariantScope &DVScope = OMPDeclareVariantScopes.back(); auto *OMPDeclareVariantA = OMPDeclareVariantAttr::CreateImplicit( Context, VariantFuncRef, DVScope.TI); - BaseFD->addAttr(OMPDeclareVariantA); + for (FunctionDecl *BaseFD : Bases) + BaseFD->addAttr(OMPDeclareVariantA); } ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, @@ -6129,7 +6157,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, // Convert VariantRef expression to the type of the original function to // resolve possible conflicts. - ExprResult VariantRefCast; + ExprResult VariantRefCast = VariantRef; if (LangOpts.CPlusPlus) { QualType FnPtrType; auto *Method = dyn_cast(FD); @@ -6154,25 +6182,27 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, } else { FnPtrType = Context.getPointerType(FD->getType()); } - ImplicitConversionSequence ICS = - TryImplicitConversion(VariantRef, FnPtrType.getUnqualifiedType(), - /*SuppressUserConversions=*/false, - AllowedExplicit::None, - /*InOverloadResolution=*/false, - /*CStyle=*/false, - /*AllowObjCWritebackConversion=*/false); - if (ICS.isFailure()) { - Diag(VariantRef->getExprLoc(), - diag::err_omp_declare_variant_incompat_types) - << VariantRef->getType() - << ((Method && !Method->isStatic()) ? FnPtrType : FD->getType()) - << VariantRef->getSourceRange(); - return None; + QualType VarianPtrType = Context.getPointerType(VariantRef->getType()); + if (VarianPtrType.getUnqualifiedType() != FnPtrType.getUnqualifiedType()) { + ImplicitConversionSequence ICS = TryImplicitConversion( + VariantRef, FnPtrType.getUnqualifiedType(), + /*SuppressUserConversions=*/false, AllowedExplicit::None, + /*InOverloadResolution=*/false, + /*CStyle=*/false, + /*AllowObjCWritebackConversion=*/false); + if (ICS.isFailure()) { + Diag(VariantRef->getExprLoc(), + diag::err_omp_declare_variant_incompat_types) + << VariantRef->getType() + << ((Method && !Method->isStatic()) ? FnPtrType : FD->getType()) + << VariantRef->getSourceRange(); + return None; + } + VariantRefCast = PerformImplicitConversion( + VariantRef, FnPtrType.getUnqualifiedType(), AA_Converting); + if (!VariantRefCast.isUsable()) + return None; } - VariantRefCast = PerformImplicitConversion( - VariantRef, FnPtrType.getUnqualifiedType(), AA_Converting); - if (!VariantRefCast.isUsable()) - return None; // Drop previously built artificial addr_of unary op for member functions. if (Method && !Method->isStatic()) { Expr *PossibleAddrOfVariantRef = VariantRefCast.get(); @@ -6180,8 +6210,6 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, PossibleAddrOfVariantRef->IgnoreImplicit())) VariantRefCast = UO->getSubExpr(); } - } else { - VariantRefCast = VariantRef; } ExprResult ER = CheckPlaceholderExpr(VariantRefCast.get()); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index a5100dc99fcda..921d94036a2c6 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -417,7 +417,9 @@ static void instantiateOMPDeclareVariantAttr( if (TI.anyScoreOrCondition(SubstScoreOrConditionExpr)) return; - // Check function/variant ref. + Expr *E = VariantFuncRef.get(); + // Check function/variant ref for `omp declare variant` but not for `omp + // begin declare variant` (which use implicit attributes). Optional> DeclVarData = S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New), VariantFuncRef.get(), TI, @@ -426,9 +428,36 @@ static void instantiateOMPDeclareVariantAttr( if (!DeclVarData) return; - S.ActOnOpenMPDeclareVariantDirective(DeclVarData.getValue().first, - DeclVarData.getValue().second, TI, - Attr.getRange()); + E = DeclVarData.getValue().second; + FD = DeclVarData.getValue().first; + + if (auto *VariantDRE = dyn_cast(E->IgnoreParenImpCasts())) { + if (auto *VariantFD = dyn_cast(VariantDRE->getDecl())) { + if (auto *VariantFTD = VariantFD->getDescribedFunctionTemplate()) { + if (!VariantFTD->isThisDeclarationADefinition()) + return; + Sema::TentativeAnalysisScope Trap(S); + const TemplateArgumentList *TAL = TemplateArgumentList::CreateCopy( + S.Context, TemplateArgs.getInnermost()); + + auto *SubstFD = S.InstantiateFunctionDeclaration(VariantFTD, TAL, + New->getLocation()); + if (!SubstFD) + return; + S.InstantiateFunctionDefinition( + New->getLocation(), SubstFD, /* Recursive */ true, + /* DefinitionRequired */ false, /* AtEndOfTU */ false); + SubstFD->setInstantiationIsPending(!SubstFD->isDefined()); + E = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), + SourceLocation(), SubstFD, + /* RefersToEnclosingVariableOrCapture */ false, + /* NameLoc */ SubstFD->getLocation(), + SubstFD->getType(), ExprValueKind::VK_RValue); + } + } + } + + S.ActOnOpenMPDeclareVariantDirective(FD, E, TI, Attr.getRange()); } static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr( diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp new file mode 100644 index 0000000000000..9613e86634927 --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp @@ -0,0 +1,264 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -verify -ast-dump %s -x c++ | FileCheck %s +// expected-no-diagnostics + +template +int also_before(T) { + return 1; +} +template +int also_before_mismatch(void) { + return 0; +} +int also_before_non_template(void) { + return 0; +} + +#pragma omp begin declare variant match(implementation = {extension(allow_templates)}) +template +int also_before(T) { + return 0; +} +template +int also_after(T) { + return 0; +} +template +int also_after_mismatch(T, Q) { + return 2; +} +template +int also_before_mismatch(T) { + return 3; +} +template +int also_before_non_template(T) { + return 4; +} +template +int only_def(void) { + return 0; +} +#pragma omp end declare variant + +template +int also_after(T) { + return 6; +} +template +int also_after_mismatch(T) { + return 0; +} + +int test() { + // Should return 0. + return also_before(0.) + also_before_mismatch<0>() + also_before_non_template() + also_after(0) + also_after_mismatch(0) + only_def<0>(); +} + +// CHECK: |-FunctionTemplateDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:5:5 also_before +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_1:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_2:0x[a-z0-9]*]] line:5:5 also_before 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_3:0x[a-z0-9]*]] col:18 'T' +// CHECK-NEXT: | | |-CompoundStmt [[ADDR_4:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-ReturnStmt [[ADDR_5:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_6:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | `-OMPDeclareVariantAttr [[ADDR_7:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_8:0x[a-z0-9]*]] 'int (T)' {{.*}}Function [[ADDR_9:0x[a-z0-9]*]] 'also_before[implementation={extension(allow_templates)}]' 'int (T)' +// CHECK-NEXT: | `-FunctionDecl [[ADDR_10:0x[a-z0-9]*]] line:5:5 used also_before 'int (double)' +// CHECK-NEXT: | |-TemplateArgument type 'double' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_11:0x[a-z0-9]*]] 'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_12:0x[a-z0-9]*]] col:18 'double':'double' +// CHECK-NEXT: | |-CompoundStmt [[ADDR_13:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_6]] 'int' 1 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_15:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_16:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_17:0x[a-z0-9]*]] 'also_before[implementation={extension(allow_templates)}]' 'int (double)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_18:0x[a-z0-9]*]] line:9:5 also_before_mismatch +// CHECK-NEXT: | |-NonTypeTemplateParmDecl [[ADDR_19:0x[a-z0-9]*]] col:15 'int' depth 0 index 0 V +// CHECK-NEXT: | |-FunctionDecl [[ADDR_20:0x[a-z0-9]*]] line:9:5 also_before_mismatch 'int ({{.*}})' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_21:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_22:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_23:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_24:0x[a-z0-9]*]] line:9:5 used also_before_mismatch 'int ({{.*}})' +// CHECK-NEXT: | |-TemplateArgument integral 0 +// CHECK-NEXT: | `-CompoundStmt [[ADDR_25:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_26:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_23]] 'int' 0 +// CHECK-NEXT: |-FunctionDecl [[ADDR_27:0x[a-z0-9]*]] line:12:5 used also_before_non_template 'int ({{.*}})' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_28:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_29:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_30:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_31:0x[a-z0-9]*]] line:18:1 also_before[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_32:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_9]] line:18:1 referenced also_before[implementation={extension(allow_templates)}] 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_33:0x[a-z0-9]*]] col:18 'T' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_35:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_17]] line:18:1 also_before[implementation={extension(allow_templates)}] 'int (double)' +// CHECK-NEXT: | |-TemplateArgument type 'double' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_11]] 'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:18 'double':'double' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_38:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_39:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_36]] 'int' 0 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_40:0x[a-z0-9]*]] col:5 implicit also_after +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_41:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_42:0x[a-z0-9]*]] col:5 also_after 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_43:0x[a-z0-9]*]] col:17 'T' +// CHECK-NEXT: | | `-OMPDeclareVariantAttr [[ADDR_44:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_45:0x[a-z0-9]*]] 'int (T)' {{.*}}Function [[ADDR_46:0x[a-z0-9]*]] 'also_after[implementation={extension(allow_templates)}]' 'int (T)' +// CHECK-NEXT: | `-FunctionDecl [[ADDR_47:0x[a-z0-9]*]] line:44:5 used also_after 'int (char)' +// CHECK-NEXT: | |-TemplateArgument type 'char' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_48:0x[a-z0-9]*]] 'char' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_49:0x[a-z0-9]*]] col:17 'char':'char' +// CHECK-NEXT: | |-CompoundStmt [[ADDR_50:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_51:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_52:0x[a-z0-9]*]] 'int' 6 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_53:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_54:0x[a-z0-9]*]] 'int (char)' {{.*}}Function [[ADDR_55:0x[a-z0-9]*]] 'also_after[implementation={extension(allow_templates)}]' 'int (char)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_56:0x[a-z0-9]*]] line:22:1 also_after[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_41]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_46]] line:22:1 referenced also_after[implementation={extension(allow_templates)}] 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_43]] col:17 'T' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_57:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_58:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_59:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_55]] line:22:1 also_after[implementation={extension(allow_templates)}] 'int (char)' +// CHECK-NEXT: | |-TemplateArgument type 'char' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_48]] 'char' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_60:0x[a-z0-9]*]] col:17 'char':'char' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_61:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_62:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_59]] 'int' 0 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_63:0x[a-z0-9]*]] col:5 implicit also_after_mismatch +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_64:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_65:0x[a-z0-9]*]] col:32 referenced typename depth 0 index 1 Q +// CHECK-NEXT: | `-FunctionDecl [[ADDR_66:0x[a-z0-9]*]] col:5 also_after_mismatch 'int (T, Q)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67:0x[a-z0-9]*]] col:26 'T' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_68:0x[a-z0-9]*]] col:29 'Q' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_69:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_70:0x[a-z0-9]*]] 'int (T, Q)' {{.*}}Function [[ADDR_71:0x[a-z0-9]*]] 'also_after_mismatch[implementation={extension(allow_templates)}]' 'int (T, Q)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_72:0x[a-z0-9]*]] line:26:1 also_after_mismatch[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_64]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_65]] col:32 referenced typename depth 0 index 1 Q +// CHECK-NEXT: | `-FunctionDecl [[ADDR_71]] line:26:1 also_after_mismatch[implementation={extension(allow_templates)}] 'int (T, Q)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67]] col:26 'T' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_68]] col:29 'Q' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_74:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_75:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_76:0x[a-z0-9]*]] col:5 implicit also_before_mismatch +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_77:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | `-FunctionDecl [[ADDR_78:0x[a-z0-9]*]] col:5 also_before_mismatch 'int (T)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_79:0x[a-z0-9]*]] col:27 'T' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_80:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int (T)' {{.*}}Function [[ADDR_82:0x[a-z0-9]*]] 'also_before_mismatch[implementation={extension(allow_templates)}]' 'int (T)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_83:0x[a-z0-9]*]] line:30:1 also_before_mismatch[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_77]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | `-FunctionDecl [[ADDR_82]] line:30:1 also_before_mismatch[implementation={extension(allow_templates)}] 'int (T)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_79]] col:27 'T' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_84:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_85:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_86:0x[a-z0-9]*]] 'int' 3 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_87:0x[a-z0-9]*]] col:5 implicit also_before_non_template +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_88:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | `-FunctionDecl [[ADDR_89:0x[a-z0-9]*]] col:5 also_before_non_template 'int (T)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_90:0x[a-z0-9]*]] col:31 'T' +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_91:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_92:0x[a-z0-9]*]] 'int (T)' {{.*}}Function [[ADDR_93:0x[a-z0-9]*]] 'also_before_non_template[implementation={extension(allow_templates)}]' 'int (T)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_94:0x[a-z0-9]*]] line:34:1 also_before_non_template[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_88]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | `-FunctionDecl [[ADDR_93]] line:34:1 also_before_non_template[implementation={extension(allow_templates)}] 'int (T)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_90]] col:31 'T' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_95:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_96:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_97:0x[a-z0-9]*]] 'int' 4 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_98:0x[a-z0-9]*]] col:5 implicit only_def +// CHECK-NEXT: | |-NonTypeTemplateParmDecl [[ADDR_99:0x[a-z0-9]*]] col:15 'int' depth 0 index 0 V +// CHECK-NEXT: | |-FunctionDecl [[ADDR_100:0x[a-z0-9]*]] col:5 only_def 'int ({{.*}})' +// CHECK-NEXT: | | `-OMPDeclareVariantAttr [[ADDR_101:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_102:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_103:0x[a-z0-9]*]] 'only_def[implementation={extension(allow_templates)}]' 'int ({{.*}})' +// CHECK-NEXT: | `-FunctionDecl [[ADDR_104:0x[a-z0-9]*]] col:5 used only_def 'int ({{.*}})' +// CHECK-NEXT: | |-TemplateArgument integral 0 +// CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_105:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_106:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_107:0x[a-z0-9]*]] 'only_def[implementation={extension(allow_templates)}]' 'int ({{.*}})' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_108:0x[a-z0-9]*]] line:38:1 only_def[implementation={extension(allow_templates)}] +// CHECK-NEXT: | |-NonTypeTemplateParmDecl [[ADDR_99]] col:15 'int' depth 0 index 0 V +// CHECK-NEXT: | |-FunctionDecl [[ADDR_103]] line:38:1 referenced only_def[implementation={extension(allow_templates)}] 'int ({{.*}})' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_109:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_110:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_111:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_107]] line:38:1 only_def[implementation={extension(allow_templates)}] 'int ({{.*}})' +// CHECK-NEXT: | |-TemplateArgument integral 0 +// CHECK-NEXT: | `-CompoundStmt [[ADDR_112:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_113:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_111]] 'int' 0 +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_114:0x[a-z0-9]*]] prev [[ADDR_40]] line:44:5 also_after +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_115:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_116:0x[a-z0-9]*]] prev [[ADDR_42]] line:44:5 also_after 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_117:0x[a-z0-9]*]] col:17 'T' +// CHECK-NEXT: | | |-CompoundStmt [[ADDR_118:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-ReturnStmt [[ADDR_119:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_52]] 'int' 6 +// CHECK-NEXT: | | `-OMPDeclareVariantAttr [[ADDR_120:0x[a-z0-9]*]] <> Inherited Implicit implementation={extension(allow_templates)} +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_45]] 'int (T)' {{.*}}Function [[ADDR_46]] 'also_after[implementation={extension(allow_templates)}]' 'int (T)' +// CHECK-NEXT: | `-Function [[ADDR_47]] 'also_after' 'int (char)' +// CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_121:0x[a-z0-9]*]] line:48:5 also_after_mismatch +// CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_122:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T +// CHECK-NEXT: | |-FunctionDecl [[ADDR_123:0x[a-z0-9]*]] line:48:5 also_after_mismatch 'int (T)' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_124:0x[a-z0-9]*]] col:26 'T' +// CHECK-NEXT: | | `-CompoundStmt [[ADDR_125:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ReturnStmt [[ADDR_126:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_127:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | `-FunctionDecl [[ADDR_128:0x[a-z0-9]*]] line:48:5 used also_after_mismatch 'int (int)' +// CHECK-NEXT: | |-TemplateArgument type 'int' +// CHECK-NEXT: | | `-BuiltinType [[ADDR_129:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_130:0x[a-z0-9]*]] col:26 'int':'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_131:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ReturnStmt [[ADDR_132:0x[a-z0-9]*]] +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_127]] 'int' 0 +// CHECK-NEXT: `-FunctionDecl [[ADDR_133:0x[a-z0-9]*]] line:52:5 test 'int ({{.*}})' +// CHECK-NEXT: `-CompoundStmt [[ADDR_134:0x[a-z0-9]*]] +// CHECK-NEXT: `-ReturnStmt [[ADDR_135:0x[a-z0-9]*]] +// CHECK-NEXT: `-BinaryOperator [[ADDR_136:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: |-BinaryOperator [[ADDR_137:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | |-BinaryOperator [[ADDR_138:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_139:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_140:0x[a-z0-9]*]] 'int' '+' +// CHECK-NEXT: | | | | |-PseudoObjectExpr [[ADDR_141:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | |-CallExpr [[ADDR_142:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_143:0x[a-z0-9]*]] 'int (*)(double)' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_144:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_10]] 'also_before' 'int (double)' (FunctionTemplate [[ADDR_0]] 'also_before') +// CHECK-NEXT: | | | | | | `-FloatingLiteral [[ADDR_145:0x[a-z0-9]*]] 'double' 0.000000e+00 +// CHECK-NEXT: | | | | | `-CallExpr [[ADDR_146:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_147:0x[a-z0-9]*]] 'int (*)(double)' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_16]] 'int (double)' {{.*}}Function [[ADDR_17]] 'also_before[implementation={extension(allow_templates)}]' 'int (double)' +// CHECK-NEXT: | | | | | `-FloatingLiteral [[ADDR_145]] 'double' 0.000000e+00 +// CHECK-NEXT: | | | | `-CallExpr [[ADDR_148:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_149:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_150:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_24]] 'also_before_mismatch' 'int ({{.*}})' (FunctionTemplate [[ADDR_18]] 'also_before_mismatch') +// CHECK-NEXT: | | | `-CallExpr [[ADDR_151:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_152:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_153:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_27]] 'also_before_non_template' 'int ({{.*}})' +// CHECK-NEXT: | | `-PseudoObjectExpr [[ADDR_154:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-CallExpr [[ADDR_155:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_156:0x[a-z0-9]*]] 'int (*)(char)' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_157:0x[a-z0-9]*]] 'int (char)' {{.*}}Function [[ADDR_47]] 'also_after' 'int (char)' (FunctionTemplate [[ADDR_114]] 'also_after') +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_158:0x[a-z0-9]*]] 'char':'char' +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_159:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: | | `-CallExpr [[ADDR_160:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_161:0x[a-z0-9]*]] 'int (*)(char)' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_54]] 'int (char)' {{.*}}Function [[ADDR_55]] 'also_after[implementation={extension(allow_templates)}]' 'int (char)' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_162:0x[a-z0-9]*]] 'char':'char' +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_159]] 'int' 0 +// CHECK-NEXT: | `-CallExpr [[ADDR_163:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-ImplicitCastExpr [[ADDR_164:0x[a-z0-9]*]] 'int (*)(int)' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_165:0x[a-z0-9]*]] 'int (int)' {{.*}}Function [[ADDR_128]] 'also_after_mismatch' 'int (int)' (FunctionTemplate [[ADDR_121]] 'also_after_mismatch') +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_166:0x[a-z0-9]*]] 'int' 0 +// CHECK-NEXT: `-PseudoObjectExpr [[ADDR_167:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: |-CallExpr [[ADDR_168:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_169:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int ({{.*}})' {{.*}}Function [[ADDR_104]] 'only_def' 'int ({{.*}})' (FunctionTemplate [[ADDR_98]] 'only_def') +// CHECK-NEXT: `-CallExpr [[ADDR_171:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: `-ImplicitCastExpr [[ADDR_172:0x[a-z0-9]*]] 'int (*)({{.*}})' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_106]] 'int ({{.*}})' {{.*}}Function [[ADDR_107]] 'only_def[implementation={extension(allow_templates)}]' 'int ({{.*}})' diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 821362c35826e..1b39fff3edec4 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -1119,6 +1119,7 @@ __OMP_TRAIT_PROPERTY(implementation, extension, match_all) __OMP_TRAIT_PROPERTY(implementation, extension, match_any) __OMP_TRAIT_PROPERTY(implementation, extension, match_none) __OMP_TRAIT_PROPERTY(implementation, extension, disable_implicit_base) +__OMP_TRAIT_PROPERTY(implementation, extension, allow_templates) __OMP_TRAIT_SET(user) From 56069b5c71ca78749aa983c1e9de6f1e4c049f4b Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 6 Aug 2020 15:46:44 -0500 Subject: [PATCH 0885/1079] [OpenMP] Support `std::complex` math functions in target regions The last (big) missing piece to get "math" working in OpenMP target regions (that I know of) was complex math functions, e.g., `std::sin(std::complex)`. With this patch we overload the system template functions for these operations with versions that have been distilled from `libcxx/include/complex`. We use the same `omp begin/end declare variant` mechanism we use for other math functions before, except that we this time overload templates (via D85735). Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D85777 --- clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/openmp_wrappers/complex | 25 ++ .../Headers/openmp_wrappers/complex_cmath.h | 388 ++++++++++++++++++ clang/test/Headers/Inputs/include/complex | 111 +++++ clang/test/Headers/Inputs/include/type_traits | 43 ++ .../Headers/nvptx_device_math_complex.cpp | 39 ++ 6 files changed, 607 insertions(+) create mode 100644 clang/lib/Headers/openmp_wrappers/complex_cmath.h create mode 100644 clang/test/Headers/Inputs/include/type_traits diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 0692fe75a4417..a9761f0490675 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -154,6 +154,7 @@ set(openmp_wrapper_files openmp_wrappers/complex.h openmp_wrappers/complex openmp_wrappers/__clang_openmp_device_functions.h + openmp_wrappers/complex_cmath.h openmp_wrappers/new ) diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index 1ed0b14879efb..306ffe2080534 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -23,3 +23,28 @@ // Grab the host header too. #include_next + + +#ifdef __cplusplus + +// If we are compiling against libc++, the macro _LIBCPP_STD_VER should be set +// after including above. Since the complex header we use is a +// simplified version of the libc++, we don't need it in this case. If we +// compile against libstdc++, or any other standard library, we will overload +// the (hopefully template) functions in the header with the ones we +// got from libc++ which decomposes math functions, like `std::sin`, into +// arithmetic and calls to non-complex functions, all of which we can then +// handle. +#ifndef _LIBCPP_STD_VER + +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, \ + implementation = {extension(match_any, allow_templates)}) + +#include + +#pragma omp end declare variant + +#endif + +#endif diff --git a/clang/lib/Headers/openmp_wrappers/complex_cmath.h b/clang/lib/Headers/openmp_wrappers/complex_cmath.h new file mode 100644 index 0000000000000..e3d9aebbbc243 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/complex_cmath.h @@ -0,0 +1,388 @@ +//===------------------------- __complex_cmath.h --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// std::complex header copied from the libcxx source and simplified for use in +// OpenMP target offload regions. +// +//===----------------------------------------------------------------------===// + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#ifndef __cplusplus +#error "This file is for C++ compilation only." +#endif + +#ifndef _LIBCPP_COMPLEX +#define _LIBCPP_COMPLEX + +#include +#include + +#define __DEVICE__ static constexpr __attribute__((nothrow)) + +namespace std { + +// abs + +template __DEVICE__ _Tp abs(const std::complex<_Tp> &__c) { + return hypot(__c.real(), __c.imag()); +} + +// arg + +template __DEVICE__ _Tp arg(const std::complex<_Tp> &__c) { + return atan2(__c.imag(), __c.real()); +} + +template +typename enable_if::value || is_same<_Tp, double>::value, + double>::type +arg(_Tp __re) { + return atan2(0., __re); +} + +template +typename enable_if::value, float>::type arg(_Tp __re) { + return atan2f(0.F, __re); +} + +// norm + +template __DEVICE__ _Tp norm(const std::complex<_Tp> &__c) { + if (std::isinf(__c.real())) + return abs(__c.real()); + if (std::isinf(__c.imag())) + return abs(__c.imag()); + return __c.real() * __c.real() + __c.imag() * __c.imag(); +} + +// conj + +template std::complex<_Tp> conj(const std::complex<_Tp> &__c) { + return std::complex<_Tp>(__c.real(), -__c.imag()); +} + +// proj + +template std::complex<_Tp> proj(const std::complex<_Tp> &__c) { + std::complex<_Tp> __r = __c; + if (std::isinf(__c.real()) || std::isinf(__c.imag())) + __r = std::complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag())); + return __r; +} + +// polar + +template +complex<_Tp> polar(const _Tp &__rho, const _Tp &__theta = _Tp()) { + if (std::isnan(__rho) || signbit(__rho)) + return std::complex<_Tp>(_Tp(NAN), _Tp(NAN)); + if (std::isnan(__theta)) { + if (std::isinf(__rho)) + return std::complex<_Tp>(__rho, __theta); + return std::complex<_Tp>(__theta, __theta); + } + if (std::isinf(__theta)) { + if (std::isinf(__rho)) + return std::complex<_Tp>(__rho, _Tp(NAN)); + return std::complex<_Tp>(_Tp(NAN), _Tp(NAN)); + } + _Tp __x = __rho * cos(__theta); + if (std::isnan(__x)) + __x = 0; + _Tp __y = __rho * sin(__theta); + if (std::isnan(__y)) + __y = 0; + return std::complex<_Tp>(__x, __y); +} + +// log + +template std::complex<_Tp> log(const std::complex<_Tp> &__x) { + return std::complex<_Tp>(log(abs(__x)), arg(__x)); +} + +// log10 + +template std::complex<_Tp> log10(const std::complex<_Tp> &__x) { + return log(__x) / log(_Tp(10)); +} + +// sqrt + +template +__DEVICE__ std::complex<_Tp> sqrt(const std::complex<_Tp> &__x) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(_Tp(INFINITY), __x.imag()); + if (std::isinf(__x.real())) { + if (__x.real() > _Tp(0)) + return std::complex<_Tp>(__x.real(), std::isnan(__x.imag()) + ? __x.imag() + : copysign(_Tp(0), __x.imag())); + return std::complex<_Tp>(std::isnan(__x.imag()) ? __x.imag() : _Tp(0), + copysign(__x.real(), __x.imag())); + } + return polar(sqrt(abs(__x)), arg(__x) / _Tp(2)); +} + +// exp + +template +__DEVICE__ std::complex<_Tp> exp(const std::complex<_Tp> &__x) { + _Tp __i = __x.imag(); + if (std::isinf(__x.real())) { + if (__x.real() < _Tp(0)) { + if (!std::isfinite(__i)) + __i = _Tp(1); + } else if (__i == 0 || !std::isfinite(__i)) { + if (std::isinf(__i)) + __i = _Tp(NAN); + return std::complex<_Tp>(__x.real(), __i); + } + } else if (std::isnan(__x.real()) && __x.imag() == 0) + return __x; + _Tp __e = exp(__x.real()); + return std::complex<_Tp>(__e * cos(__i), __e * sin(__i)); +} + +// pow + +template +std::complex<_Tp> pow(const std::complex<_Tp> &__x, + const std::complex<_Tp> &__y) { + return exp(__y * log(__x)); +} + +// __sqr, computes pow(x, 2) + +template std::complex<_Tp> __sqr(const std::complex<_Tp> &__x) { + return std::complex<_Tp>((__x.real() - __x.imag()) * + (__x.real() + __x.imag()), + _Tp(2) * __x.real() * __x.imag()); +} + +// asinh + +template +__DEVICE__ std::complex<_Tp> asinh(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.real())) { + if (std::isnan(__x.imag())) + return __x; + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__x.real(), + copysign(__pi * _Tp(0.25), __x.imag())); + return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag())); + } + if (std::isnan(__x.real())) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__x.imag(), __x.real()); + if (__x.imag() == 0) + return __x; + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.imag())) + return std::complex<_Tp>(copysign(__x.imag(), __x.real()), + copysign(__pi / _Tp(2), __x.imag())); + std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1))); + return std::complex<_Tp>(copysign(__z.real(), __x.real()), + copysign(__z.imag(), __x.imag())); +} + +// acosh + +template +__DEVICE__ std::complex<_Tp> acosh(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.real())) { + if (std::isnan(__x.imag())) + return std::complex<_Tp>(abs(__x.real()), __x.imag()); + if (std::isinf(__x.imag())) { + if (__x.real() > 0) + return std::complex<_Tp>(__x.real(), + copysign(__pi * _Tp(0.25), __x.imag())); + else + return std::complex<_Tp>(-__x.real(), + copysign(__pi * _Tp(0.75), __x.imag())); + } + if (__x.real() < 0) + return std::complex<_Tp>(-__x.real(), copysign(__pi, __x.imag())); + return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag())); + } + if (std::isnan(__x.real())) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(abs(__x.imag()), __x.real()); + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.imag())) + return std::complex<_Tp>(abs(__x.imag()), + copysign(__pi / _Tp(2), __x.imag())); + std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1))); + return std::complex<_Tp>(copysign(__z.real(), _Tp(0)), + copysign(__z.imag(), __x.imag())); +} + +// atanh + +template +__DEVICE__ std::complex<_Tp> atanh(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.imag())) { + return std::complex<_Tp>(copysign(_Tp(0), __x.real()), + copysign(__pi / _Tp(2), __x.imag())); + } + if (std::isnan(__x.imag())) { + if (std::isinf(__x.real()) || __x.real() == 0) + return std::complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag()); + return std::complex<_Tp>(__x.imag(), __x.imag()); + } + if (std::isnan(__x.real())) { + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.real())) { + return std::complex<_Tp>(copysign(_Tp(0), __x.real()), + copysign(__pi / _Tp(2), __x.imag())); + } + if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0)) { + return std::complex<_Tp>(copysign(_Tp(INFINITY), __x.real()), + copysign(_Tp(0), __x.imag())); + } + std::complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2); + return std::complex<_Tp>(copysign(__z.real(), __x.real()), + copysign(__z.imag(), __x.imag())); +} + +// sinh + +template +__DEVICE__ std::complex<_Tp> sinh(const std::complex<_Tp> &__x) { + if (std::isinf(__x.real()) && !std::isfinite(__x.imag())) + return std::complex<_Tp>(__x.real(), _Tp(NAN)); + if (__x.real() == 0 && !std::isfinite(__x.imag())) + return std::complex<_Tp>(__x.real(), _Tp(NAN)); + if (__x.imag() == 0 && !std::isfinite(__x.real())) + return __x; + return std::complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), + cosh(__x.real()) * sin(__x.imag())); +} + +// cosh + +template +__DEVICE__ std::complex<_Tp> cosh(const std::complex<_Tp> &__x) { + if (std::isinf(__x.real()) && !std::isfinite(__x.imag())) + return std::complex<_Tp>(abs(__x.real()), _Tp(NAN)); + if (__x.real() == 0 && !std::isfinite(__x.imag())) + return std::complex<_Tp>(_Tp(NAN), __x.real()); + if (__x.real() == 0 && __x.imag() == 0) + return std::complex<_Tp>(_Tp(1), __x.imag()); + if (__x.imag() == 0 && !std::isfinite(__x.real())) + return std::complex<_Tp>(abs(__x.real()), __x.imag()); + return std::complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), + sinh(__x.real()) * sin(__x.imag())); +} + +// tanh + +template +__DEVICE__ std::complex<_Tp> tanh(const std::complex<_Tp> &__x) { + if (std::isinf(__x.real())) { + if (!std::isfinite(__x.imag())) + return std::complex<_Tp>(_Tp(1), _Tp(0)); + return std::complex<_Tp>(_Tp(1), + copysign(_Tp(0), sin(_Tp(2) * __x.imag()))); + } + if (std::isnan(__x.real()) && __x.imag() == 0) + return __x; + _Tp __2r(_Tp(2) * __x.real()); + _Tp __2i(_Tp(2) * __x.imag()); + _Tp __d(cosh(__2r) + cos(__2i)); + _Tp __2rsh(sinh(__2r)); + if (std::isinf(__2rsh) && std::isinf(__d)) + return std::complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1), + __2i > _Tp(0) ? _Tp(0) : _Tp(-0.)); + return std::complex<_Tp>(__2rsh / __d, sin(__2i) / __d); +} + +// asin + +template +__DEVICE__ std::complex<_Tp> asin(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +// acos + +template +__DEVICE__ std::complex<_Tp> acos(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.real())) { + if (std::isnan(__x.imag())) + return std::complex<_Tp>(__x.imag(), __x.real()); + if (std::isinf(__x.imag())) { + if (__x.real() < _Tp(0)) + return std::complex<_Tp>(_Tp(0.75) * __pi, -__x.imag()); + return std::complex<_Tp>(_Tp(0.25) * __pi, -__x.imag()); + } + if (__x.real() < _Tp(0)) + return std::complex<_Tp>(__pi, + signbit(__x.imag()) ? -__x.real() : __x.real()); + return std::complex<_Tp>(_Tp(0), + signbit(__x.imag()) ? __x.real() : -__x.real()); + } + if (std::isnan(__x.real())) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__x.real(), -__x.imag()); + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__pi / _Tp(2), -__x.imag()); + if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag()))) + return std::complex<_Tp>(__pi / _Tp(2), -__x.imag()); + std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1))); + if (signbit(__x.imag())) + return std::complex<_Tp>(abs(__z.imag()), abs(__z.real())); + return std::complex<_Tp>(abs(__z.imag()), -abs(__z.real())); +} + +// atan + +template +__DEVICE__ std::complex<_Tp> atan(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +// sin + +template +__DEVICE__ std::complex<_Tp> sin(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +// cos + +template std::complex<_Tp> cos(const std::complex<_Tp> &__x) { + return cosh(complex<_Tp>(-__x.imag(), __x.real())); +} + +// tan + +template +__DEVICE__ std::complex<_Tp> tan(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +} // namespace std + +#endif diff --git a/clang/test/Headers/Inputs/include/complex b/clang/test/Headers/Inputs/include/complex index f3aefab7954be..bd43cd952d7cd 100644 --- a/clang/test/Headers/Inputs/include/complex +++ b/clang/test/Headers/Inputs/include/complex @@ -3,6 +3,7 @@ #include #define INFINITY (__builtin_inff()) +#define NAN (__builtin_nanf ("")) namespace std { @@ -298,4 +299,114 @@ operator!=(const _Tp &__x, const complex<_Tp> &__y) { return !(__x == __y); } +template _Tp abs(const std::complex<_Tp> &__c); + +// arg + +template _Tp arg(const std::complex<_Tp> &__c); + +// norm + +template _Tp norm(const std::complex<_Tp> &__c); + +// conj + +template std::complex<_Tp> conj(const std::complex<_Tp> &__c); + +// proj + +template std::complex<_Tp> proj(const std::complex<_Tp> &__c); + +// polar + +template +complex<_Tp> polar(const _Tp &__rho, const _Tp &__theta = _Tp()); + +// log + +template std::complex<_Tp> log(const std::complex<_Tp> &__x); + +// log10 + +template std::complex<_Tp> log10(const std::complex<_Tp> &__x); + +// sqrt + +template +std::complex<_Tp> sqrt(const std::complex<_Tp> &__x); + +// exp + +template +std::complex<_Tp> exp(const std::complex<_Tp> &__x); + +// pow + +template +std::complex<_Tp> pow(const std::complex<_Tp> &__x, + const std::complex<_Tp> &__y); + +// __sqr, computes pow(x, 2) + +template std::complex<_Tp> __sqr(const std::complex<_Tp> &__x); + +// asinh + +template +std::complex<_Tp> asinh(const std::complex<_Tp> &__x); + +// acosh + +template +std::complex<_Tp> acosh(const std::complex<_Tp> &__x); + +// atanh + +template +std::complex<_Tp> atanh(const std::complex<_Tp> &__x); + +// sinh + +template +std::complex<_Tp> sinh(const std::complex<_Tp> &__x); + +// cosh + +template +std::complex<_Tp> cosh(const std::complex<_Tp> &__x); + +// tanh + +template +std::complex<_Tp> tanh(const std::complex<_Tp> &__x); + +// asin + +template +std::complex<_Tp> asin(const std::complex<_Tp> &__x); + +// acos + +template +std::complex<_Tp> acos(const std::complex<_Tp> &__x); + +// atan + +template +std::complex<_Tp> atan(const std::complex<_Tp> &__x); + +// sin + +template +std::complex<_Tp> sin(const std::complex<_Tp> &__x); + +// cos + +template std::complex<_Tp> cos(const std::complex<_Tp> &__x); + +// tan + +template +std::complex<_Tp> tan(const std::complex<_Tp> &__x); + } // namespace std diff --git a/clang/test/Headers/Inputs/include/type_traits b/clang/test/Headers/Inputs/include/type_traits new file mode 100644 index 0000000000000..9fd02d51eff13 --- /dev/null +++ b/clang/test/Headers/Inputs/include/type_traits @@ -0,0 +1,43 @@ +/// Copied from libcxx type_traits and simplified + +#pragma once + +namespace std { + +template +struct integral_constant { + static const _Tp value = __v; + typedef _Tp value_type; + typedef integral_constant type; +}; + +typedef integral_constant true_type; +typedef integral_constant false_type; + +// is_same, functional +template struct is_same : public false_type {}; +template struct is_same<_Tp, _Tp> : public true_type {}; + +// is_integral, for some types. +template struct is_integral + : public integral_constant {}; +template <> struct is_integral + : public integral_constant {}; +template <> struct is_integral + : public integral_constant {}; +template <> struct is_integral + : public integral_constant {}; +template <> struct is_integral + : public integral_constant {}; +template <> struct is_integral + : public integral_constant {}; +template <> struct is_integral + : public integral_constant {}; + +// enable_if, functional +template struct enable_if{}; +template struct enable_if{ + using type = _Tp; +}; + +} diff --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp index e4b78deb05d7b..688fd5d101eab 100644 --- a/clang/test/Headers/nvptx_device_math_complex.cpp +++ b/clang/test/Headers/nvptx_device_math_complex.cpp @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s // expected-no-diagnostics +#include #include // CHECK: define weak {{.*}} @__muldc3 @@ -33,6 +34,12 @@ // CHECK-DAG: call float @__nv_fabsf( // CHECK-DAG: call float @__nv_logbf( +// We actually check that there are no declarations of non-OpenMP functions. +// That is, as long as we don't call an unkown function with a name that +// doesn't start with '__' we are good :) + +// CHECK-NOT: declare.*@[^_] + void test_scmplx(std::complex a) { #pragma omp target { @@ -46,3 +53,35 @@ void test_dcmplx(std::complex a) { (void)(a * (a / a)); } } + +template +std::complex test_template_math_calls(std::complex a) { + decltype(a) r = a; +#pragma omp target + { + r = std::sin(r); + r = std::cos(r); + r = std::exp(r); + r = std::atan(r); + r = std::acos(r); + } + return r; +} + +std::complex test_scall(std::complex a) { + decltype(a) r; +#pragma omp target + { + r = std::sin(a); + } + return test_template_math_calls(r); +} + +std::complex test_dcall(std::complex a) { + decltype(a) r; +#pragma omp target + { + r = std::exp(a); + } + return test_template_math_calls(r); +} From 91f503c3af190e19974f8832871e363d232cd64c Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 16 Sep 2020 11:09:25 -0700 Subject: [PATCH 0886/1079] [AMDGPU] gfx1030 RT support Differential Revision: https://reviews.llvm.org/D87782 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 + .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 22 ++- .../Disassembler/AMDGPUDisassembler.cpp | 14 +- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 54 ++++++ llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp | 5 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 84 ++++++++- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 9 + .../Target/AMDGPU/SIShrinkInstructions.cpp | 4 +- .../AMDGPU/llvm.amdgcn.intersect_ray.ll | 162 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx1011_err.s | 8 +- llvm/test/MC/AMDGPU/gfx1030_new.s | 24 +++ .../Disassembler/AMDGPU/gfx1030_dasm_new.txt | 24 +++ 12 files changed, 403 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 2aff207ce0149..62f009b666d08 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1698,6 +1698,14 @@ class AMDGPUGlobalAtomicRtn : Intrinsic < def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn; +// uint4 llvm.amdgcn.image.bvh.intersect.ray , , , +// , ray_inv_dir>, +def int_amdgcn_image_bvh_intersect_ray : + Intrinsic<[llvm_v4i32_ty], + [llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty, + LLVMMatchType<1>, llvm_v4i32_ty], + [IntrReadMem]>; + //===----------------------------------------------------------------------===// // Deep learning intrinsics. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0460d861aebea..e1369e8f5c95f 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1444,6 +1444,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { void cvtMIMG(MCInst &Inst, const OperandVector &Operands, bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); + void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); OperandMatchResultTy parseDim(OperandVector &Operands); OperandMatchResultTy parseDPP8(OperandVector &Operands); @@ -3109,8 +3110,9 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); assert(VDataIdx != -1); - assert(DMaskIdx != -1); - assert(TFEIdx != -1); + + if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray + return true; unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; @@ -3137,6 +3139,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { return true; const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); @@ -3145,9 +3148,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { assert(VAddr0Idx != -1); assert(SrsrcIdx != -1); - assert(DimIdx != -1); assert(SrsrcIdx > VAddr0Idx); + if (DimIdx == -1) + return true; // intersect_ray + unsigned Dim = Inst.getOperand(DimIdx).getImm(); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); bool IsNSA = SrsrcIdx - VAddr0Idx > 1; @@ -6466,6 +6471,17 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) cvtMIMG(Inst, Operands, true); } +void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, + const OperandVector &Operands) { + for (unsigned I = 1; I < Operands.size(); ++I) { + auto &Operand = (AMDGPUOperand &)*Operands[I]; + if (Operand.isReg()) + Operand.addRegOperands(Inst, 1); + } + + Inst.addOperand(MCOperand::createImm(1)); // a16 +} + //===----------------------------------------------------------------------===// // smrd //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9c2f2e7eecd14..b7dde61f608bf 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -139,6 +139,8 @@ DECODE_OPERAND_REG(VS_128) DECODE_OPERAND_REG(VReg_64) DECODE_OPERAND_REG(VReg_96) DECODE_OPERAND_REG(VReg_128) +DECODE_OPERAND_REG(VReg_256) +DECODE_OPERAND_REG(VReg_512) DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) @@ -499,8 +501,16 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AMDGPU::OpName::d16); assert(VDataIdx != -1); - assert(DMaskIdx != -1); - assert(TFEIdx != -1); + if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray + if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) { + assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa || + MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa || + MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa || + MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa); + addOperand(MI, MCOperand::createImm(1)); + } + return MCDisassembler::Success; + } const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); bool IsAtomic = (VDstIdx != -1); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index ba7d9ad2eda1a..c223e1a8bc265 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -708,6 +708,55 @@ multiclass MIMG_Gather op, AMDGPUSampleVariant sample, bit wqm = 0, multiclass MIMG_Gather_WQM op, AMDGPUSampleVariant sample> : MIMG_Gather; +class MIMG_IntersectRay_gfx10 + : MIMG_gfx10 { + + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), + !if(!eq(A16,1), (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(!eq(A16,1), "$a16", ""); + + let nsa = 0; +} + +class MIMG_IntersectRay_nsa_gfx10 + : MIMG_nsa_gfx10 { + let InOperandList = !con(nsah.AddrIns, + (ins SReg_128:$srsrc), + !if(!eq(A16,1), (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(!eq(A16,1), "$a16", ""); +} + +multiclass MIMG_IntersectRay { + def "" : MIMGBaseOpcode; + let SubtargetPredicate = HasGFX10_BEncoding, + AssemblerPredicate = HasGFX10_BEncoding, + AsmMatchConverter = !if(!eq(A16,1), "cvtIntersectRay", ""), + dmask = 0xf, + unorm = 1, + d16 = 0, + glc = 0, + slc = 0, + dlc = 0, + tfe = 0, + lwe = 0, + r128 = 1, + ssamp = 0, + dim = {0, 0, 0}, + a16 = A16, + d16 = 0, + BaseOpcode = !cast(NAME), + VDataDwords = 4 in { + // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple, + // when we only need 9, 11 or 12 depending on A16 field and ptr size. + def "_sa" : MIMG_IntersectRay_gfx10.RegClass, A16> { + let VAddrDwords = !srl(MIMGAddrSize.RegClass.Size, 5); + } + def _nsa : MIMG_IntersectRay_nsa_gfx10 { + let VAddrDwords = num_addrs; + } + } +} + //===----------------------------------------------------------------------===// // MIMG Instructions //===----------------------------------------------------------------------===// @@ -832,6 +881,11 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <0x000000ef, AMDGPUSample_c_cd_cl let SubtargetPredicate = HasGFX10_BEncoding in defm IMAGE_MSAA_LOAD : MIMG_NoSampler <0x00000080, "image_msaa_load", 1>; +defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<0xe6, "image_bvh_intersect_ray", 11, 0>; +defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<0xe6, "image_bvh_intersect_ray", 8, 1>; +defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<0xe7, "image_bvh64_intersect_ray", 12, 0>; +defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<0xe7, "image_bvh64_intersect_ray", 9, 1>; + /********** ========================================= **********/ /********** Table of dimension-aware image intrinsics **********/ /********** ========================================= **********/ diff --git a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp index 90e48c63b5dca..0a0532c629595 100644 --- a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -80,9 +80,8 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); - // Check for instructions that don't have tfe or lwe fields - // There shouldn't be any at this point. - assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); + if (!TFE && !LWE) // intersect_ray + continue; unsigned TFEVal = TFE->getImm(); unsigned LWEVal = LWE->getImm(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6350562ec4f95..e119d65a7f0ac 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1194,6 +1194,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MachineMemOperand::MOVolatile; return true; } + case Intrinsic::amdgcn_image_bvh_intersect_ray: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT? + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(5)); + Info.align.reset(); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + return true; + } case Intrinsic::amdgcn_ds_gws_init: case Intrinsic::amdgcn_ds_gws_barrier: case Intrinsic::amdgcn_ds_gws_sema_v: @@ -7318,6 +7329,76 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getVTList(VT, MVT::Other), Ops, M->getMemOperand()); } + case Intrinsic::amdgcn_image_bvh_intersect_ray: { + SDLoc DL(Op); + MemSDNode *M = cast(Op); + SDValue NodePtr = M->getOperand(2); + SDValue RayExtent = M->getOperand(3); + SDValue RayOrigin = M->getOperand(4); + SDValue RayDir = M->getOperand(5); + SDValue RayInvDir = M->getOperand(6); + SDValue TDescr = M->getOperand(7); + + assert(NodePtr.getValueType() == MVT::i32 || + NodePtr.getValueType() == MVT::i64); + assert(RayDir.getValueType() == MVT::v4f16 || + RayDir.getValueType() == MVT::v4f32); + + bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16; + bool Is64 = NodePtr.getValueType() == MVT::i64; + unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa + : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa + : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa + : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa; + + SmallVector Ops; + + auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) { + SmallVector Lanes; + DAG.ExtractVectorElements(Op, Lanes, 0, 3); + if (Lanes[0].getValueSizeInBits() == 32) { + for (unsigned I = 0; I < 3; ++I) + Ops.push_back(DAG.getBitcast(MVT::i32, Lanes[I])); + } else { + if (IsAligned) { + Ops.push_back( + DAG.getBitcast(MVT::i32, + DAG.getBuildVector(MVT::v2f16, DL, + { Lanes[0], Lanes[1] }))); + Ops.push_back(Lanes[2]); + } else { + SDValue Elt0 = Ops.pop_back_val(); + Ops.push_back( + DAG.getBitcast(MVT::i32, + DAG.getBuildVector(MVT::v2f16, DL, + { Elt0, Lanes[0] }))); + Ops.push_back( + DAG.getBitcast(MVT::i32, + DAG.getBuildVector(MVT::v2f16, DL, + { Lanes[1], Lanes[2] }))); + } + } + }; + + if (Is64) + DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0, 2); + else + Ops.push_back(NodePtr); + + Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent)); + packLanes(RayOrigin, true); + packLanes(RayDir, true); + packLanes(RayInvDir, false); + Ops.push_back(TDescr); + if (IsA16) + Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1)); + Ops.push_back(M->getChain()); + + auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops); + MachineMemOperand *MemRef = M->getMemOperand(); + DAG.setNodeMemRefs(NewNode, {MemRef}); + return SDValue(NewNode, 0); + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) @@ -10963,7 +11044,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned Opcode = Node->getMachineOpcode(); if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && - !TII->isGather4(Opcode)) { + !TII->isGather4(Opcode) && + AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) { return adjustWritemask(Node, DAG); } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 3d612d56a9663..576828c9c8dfd 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -393,6 +393,15 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::DS_WRITE_B64: case AMDGPU::DS_WRITE_B64_gfx9: return DS_WRITE; + case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa: + case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa: + case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa: + case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa: + case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa: + case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa: + case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa: + case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa: + return UNKNOWN; } } diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 8f718ce6cb466..0be245f7698e6 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -272,8 +272,8 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { // enabled int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe); - unsigned TFEVal = MI.getOperand(TFEIdx).getImm(); - unsigned LWEVal = MI.getOperand(LWEIdx).getImm(); + unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm(); + unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm(); int ToUntie = -1; if (TFEVal || LWEVal) { // TFE/LWE is enabled so we need to deal with an implicit tied operand diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll new file mode 100644 index 0000000000000..d726b9c306be2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll @@ -0,0 +1,162 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr) +; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr) +; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr) +; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr) + +declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>) +declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>) +declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>) +declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>) + +; GCN-LABEL: {{^}}image_bvh_intersect_ray: +; GCN: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]{{$}} +; Arguments are flattened to represent the actual VGPR_A layout, so we have no +; extra moves in the generated kernel. +define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) { +main_body: + %ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0 + %ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1 + %ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2 + %ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0 + %ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1 + %ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2 + %ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0 + %ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1 + %ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2 + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) + %r = bitcast <4 x i32> %v to <4 x float> + ret <4 x float> %r +} + +; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16: +; GCN: image_bvh_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) { +main_body: + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) + %r = bitcast <4 x i32> %v to <4 x float> + ret <4 x float> %r +} + +; GCN-LABEL: {{^}}image_bvh64_intersect_ray: +; GCN: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]{{$}} +; Arguments are flattened to represent the actual VGPR_A layout, so we have no +; extra moves in the generated kernel. +define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) { +main_body: + %node_ptr = bitcast <2 x i32> %node_ptr_vec to i64 + %ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0 + %ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1 + %ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2 + %ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0 + %ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1 + %ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2 + %ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0 + %ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1 + %ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2 + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) + %r = bitcast <4 x i32> %v to <4 x float> + ret <4 x float> %r +} + +; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16: +; GCN: image_bvh64_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) { +main_body: + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) + %r = bitcast <4 x i32> %v to <4 x float> + ret <4 x float> %r +} + +; TODO: NSA reassign is very limited and cannot work with VGPR tuples and subregs. + +; GCN-LABEL: {{^}}image_bvh_intersect_ray_nsa_reassign: +; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { +main_body: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid + %node_ptr = load i32, i32* %gep_node_ptr, align 4 + %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid + %ray_extent = load float, float* %gep_ray, align 4 + %ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0 + %ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1 + %ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2 + %ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0 + %ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1 + %ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2 + %ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0 + %ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1 + %ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2 + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) + store <4 x i32> %v, <4 x i32>* undef + ret void +} + +; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16_nsa_reassign: +; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { +main_body: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid + %node_ptr = load i32, i32* %gep_node_ptr, align 4 + %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid + %ray_extent = load float, float* %gep_ray, align 4 + %ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0 + %ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1 + %ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2 + %ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0 + %ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1 + %ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2 + %ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0 + %ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1 + %ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2 + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) + store <4 x i32> %v, <4 x i32>* undef + ret void +} + +; GCN-LABEL: {{^}}image_bvh64_intersect_ray_nsa_reassign: +; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { +main_body: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid + %ray_extent = load float, float* %gep_ray, align 4 + %ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0 + %ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1 + %ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2 + %ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0 + %ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1 + %ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2 + %ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0 + %ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1 + %ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2 + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) + store <4 x i32> %v, <4 x i32>* undef + ret void +} + +; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16_nsa_reassign: +; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}} +define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { +main_body: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid + %ray_extent = load float, float* %gep_ray, align 4 + %ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0 + %ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1 + %ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2 + %ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0 + %ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1 + %ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2 + %ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0 + %ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1 + %ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2 + %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) + store <4 x i32> %v, <4 x i32>* undef + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/MC/AMDGPU/gfx1011_err.s b/llvm/test/MC/AMDGPU/gfx1011_err.s index 81c8c6254c037..4b5bc2e5887af 100644 --- a/llvm/test/MC/AMDGPU/gfx1011_err.s +++ b/llvm/test/MC/AMDGPU/gfx1011_err.s @@ -23,16 +23,16 @@ v_fma_legacy_f32 v0, v1, v2, v3 // GFX10: error: instruction not supported on this GPU image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] -// GFX10: error: invalid instruction +// GFX10: error: instruction not supported on this GPU image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 -// GFX10: error: invalid instruction +// GFX10: error: invalid operand image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] -// GFX10: error: invalid instruction +// GFX10: error: instruction not supported on this GPU image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 -// GFX10: error: invalid instruction +// GFX10: error: invalid operand image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX10: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s index 1420f9a7c61eb..3f80bdf745b33 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_new.s +++ b/llvm/test/MC/AMDGPU/gfx1030_new.s @@ -61,6 +61,30 @@ v_fma_legacy_f32 v0, v1, |v2|, -v3 v_fma_legacy_f32 v0, s1, 2.0, -v3 // GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84] +image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] +// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00] + +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 +// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40] + +image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] +// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00] + +image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 +// GFX10: encoding: [0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40] + +image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15] +// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00] + +image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16 +// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00] + +image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15] +// GFX10: encoding: [0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00] + +image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19], s[12:15] a16 +// GFX10: encoding: [0x05,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13] + image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt index 26c50ecc4cf0f..11e1f08be93f4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt @@ -52,6 +52,30 @@ # GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3 0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84 +# GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] +0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00 + +# GFX10: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 +0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40 + +# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] +0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x00 + +# GFX10: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 +0x01,0x9f,0x9c,0xf1,0x09,0x04,0x01,0x40 + +# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15] +0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00 + +# GFX10: image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16 +0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00 + +# GFX10: image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15] +0x07,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x2a,0x00 + +# GFX10: image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19], s[12:15] a16 +0x05,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13 + # GFX10: image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D 0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00 From f80f2516a2697218eeb7af80de3b13c38f342987 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 16 Sep 2020 11:41:54 -0700 Subject: [PATCH 0887/1079] Revert "[obj2yaml] - Match ".stack_size" with the original section name, and not the uniquified name." This reverts commit 14e55f82980cf1342d4d3eea4885a5375e829496. --- llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml | 48 ------------------- llvm/tools/obj2yaml/elf2yaml.cpp | 2 +- 2 files changed, 1 insertion(+), 49 deletions(-) diff --git a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml index 98a5c5ae88aac..8e6c66729c4e0 100644 --- a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml +++ b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml @@ -83,51 +83,3 @@ Sections: - Name: .stack_sizes Type: SHT_PROGBITS Content: "" - -## Check obj2yaml can dump multiple .stack_sizes. - -# RUN: yaml2obj --docnum=4 %s -o %t4 -# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=MULTI - -# MULTI: --- !ELF -# MULTI-NEXT: FileHeader: -# MULTI-NEXT: Class: ELFCLASS64 -# MULTI-NEXT: Data: ELFDATA2LSB -# MULTI-NEXT: Type: ET_EXEC -# MULTI-NEXT: Machine: EM_NONE -# MULTI-NEXT: Sections: -# MULTI-NEXT: - Name: .stack_sizes -# MULTI-NEXT: Type: SHT_PROGBITS -# MULTI-NEXT: Entries: -# MULTI-NEXT: - Address: 0x0000000000000010 -# MULTI-NEXT: Size: 0x0000000000000020 -# MULTI-NEXT: - Address: 0x0000000000000030 -# MULTI-NEXT: Size: 0x0000000000000040 -# MULTI-NEXT: - Name: '.stack_sizes (1)' -# MULTI-NEXT: Type: SHT_PROGBITS -# MULTI-NEXT: Entries: -# MULTI-NEXT: - Address: 0x0000000000000050 -# MULTI-NEXT: Size: 0x0000000000000001 -# MULTI-NEXT: - Address: 0x0000000000000060 -# MULTI-NEXT: Size: 0x0000000000000002 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .stack_sizes - Type: SHT_PROGBITS - Entries: - - Address: 0x0000000000000010 - Size: 0x0000000000000020 - - Address: 0x0000000000000030 - Size: 0x0000000000000040 - - Name: '.stack_sizes (1)' - Type: SHT_PROGBITS - Entries: - - Address: 0x0000000000000050 - Size: 0x0000000000000001 - - Address: 0x0000000000000060 - Size: 0x0000000000000002 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index d4bc135b4e0c2..3c3bef2dfbf4c 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -522,7 +522,7 @@ ELFDumper::dumpSections() { // Recognize some special SHT_PROGBITS sections by name. if (Sec.sh_type == ELF::SHT_PROGBITS) { - auto NameOrErr = Obj.getSectionName(&Sec); + auto NameOrErr = getUniquedSectionName(&Sec); if (!NameOrErr) return NameOrErr.takeError(); From 2240ca0bd1502d7baa098da7cb4aca64a6f979d4 Mon Sep 17 00:00:00 2001 From: Fanbo Meng Date: Wed, 16 Sep 2020 13:52:28 -0400 Subject: [PATCH 0888/1079] [SystemZ][z/OS] Set aligned allocation unavailable by default for z/OS Aligned allocation is not supported on z/OS. This patch sets -faligned-alloc-unavailable as default in z/OS toolchain. Reviewed By: abhina.sreeskantharajan, hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D87611 --- clang/include/clang/Basic/AlignedAllocation.h | 2 + clang/include/clang/Basic/Attr.td | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 4 +- clang/lib/Basic/Targets/OSTargets.h | 2 + clang/lib/Driver/ToolChains/ZOS.cpp | 10 ++++ clang/lib/Driver/ToolChains/ZOS.h | 4 ++ clang/lib/Sema/SemaExprCXX.cpp | 3 +- .../Driver/unavailable_aligned_allocation.cpp | 9 +++ clang/test/Lexer/aligned-allocation.cpp | 13 +++- .../unavailable_aligned_allocation.cpp | 59 +++++++++++-------- 10 files changed, 77 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/Basic/AlignedAllocation.h b/clang/include/clang/Basic/AlignedAllocation.h index 88410c5cb51ff..ab9f19da5d598 100644 --- a/clang/include/clang/Basic/AlignedAllocation.h +++ b/clang/include/clang/Basic/AlignedAllocation.h @@ -33,6 +33,8 @@ inline llvm::VersionTuple alignedAllocMinVersion(llvm::Triple::OSType OS) { return llvm::VersionTuple(11U); case llvm::Triple::WatchOS: // Earliest supporting version is 4.0.0. return llvm::VersionTuple(4U); + case llvm::Triple::ZOS: + return llvm::VersionTuple(); // All z/OS versions have no support. } llvm_unreachable("Unexpected OS"); diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index adef5b6a4495a..628649a6998d5 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -825,6 +825,7 @@ static llvm::StringRef getPlatformNameSourceSpelling(llvm::StringRef Platform) { .Case("macos_app_extension", "macOSApplicationExtension") .Case("tvos_app_extension", "tvOSApplicationExtension") .Case("watchos_app_extension", "watchOSApplicationExtension") + .Case("zos", "z/OS") .Default(Platform); } static llvm::StringRef canonicalizePlatformName(llvm::StringRef Platform) { diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a9bd448ba0262..2e265e114191c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7219,8 +7219,8 @@ def warn_overaligned_type : Warning< "guarantees %2 bytes">, InGroup, DefaultIgnore; def err_aligned_allocation_unavailable : Error< - "aligned %select{allocation|deallocation}0 function of type '%1' is only " - "available on %2 %3 or newer">; + "aligned %select{allocation|deallocation}0 function of type '%1' is " + "%select{only|not}4 available on %2%select{ %3 or newer|}4">; def note_silence_aligned_allocation_unavailable : Note< "if you supply your own aligned allocation functions, use " "-faligned-allocation to silence this diagnostic">; diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 9c206fc7e6a42..0c06ac3cd0350 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -770,6 +770,8 @@ class LLVM_LIBRARY_VISIBILITY ZOSTargetInfo : public OSTargetInfo { // type is not declared as a typedef in system headers. Builder.defineMacro("__wchar_t"); } + + this->PlatformName = llvm::Triple::getOSTypeName(Triple.getOS()); } public: diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp b/clang/lib/Driver/ToolChains/ZOS.cpp index d57686b8930a3..f921227076a5e 100644 --- a/clang/lib/Driver/ToolChains/ZOS.cpp +++ b/clang/lib/Driver/ToolChains/ZOS.cpp @@ -21,3 +21,13 @@ ZOS::ZOS(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : ToolChain(D, Triple, Args) {} ZOS::~ZOS() {} + +void ZOS::addClangTargetOptions(const ArgList &DriverArgs, + ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const { + // Pass "-faligned-alloc-unavailable" only when the user hasn't manually + // enabled or disabled aligned allocations. + if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation, + options::OPT_fno_aligned_allocation)) + CC1Args.push_back("-faligned-alloc-unavailable"); +} diff --git a/clang/lib/Driver/ToolChains/ZOS.h b/clang/lib/Driver/ToolChains/ZOS.h index 3a90f4a12428a..cace85d6da772 100644 --- a/clang/lib/Driver/ToolChains/ZOS.h +++ b/clang/lib/Driver/ToolChains/ZOS.h @@ -27,6 +27,10 @@ class LLVM_LIBRARY_VISIBILITY ZOS : public ToolChain { bool isPICDefaultForced() const override { return false; } bool IsIntegratedAssemblerDefault() const override { return true; } + + void addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadingKind) const override; }; } // end namespace toolchains diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 08b56413d8bff..5f4afb38bc253 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1843,12 +1843,13 @@ void Sema::diagnoseUnavailableAlignedAllocation(const FunctionDecl &FD, const llvm::Triple &T = getASTContext().getTargetInfo().getTriple(); StringRef OSName = AvailabilityAttr::getPlatformNameSourceSpelling( getASTContext().getTargetInfo().getPlatformName()); + VersionTuple OSVersion = alignedAllocMinVersion(T.getOS()); OverloadedOperatorKind Kind = FD.getDeclName().getCXXOverloadedOperator(); bool IsDelete = Kind == OO_Delete || Kind == OO_Array_Delete; Diag(Loc, diag::err_aligned_allocation_unavailable) << IsDelete << FD.getType().getAsString() << OSName - << alignedAllocMinVersion(T.getOS()).getAsString(); + << OSVersion.getAsString() << OSVersion.empty(); Diag(Loc, diag::note_silence_aligned_allocation_unavailable); } } diff --git a/clang/test/Driver/unavailable_aligned_allocation.cpp b/clang/test/Driver/unavailable_aligned_allocation.cpp index 131bc116be10c..7f5d8e2cc7d4b 100644 --- a/clang/test/Driver/unavailable_aligned_allocation.cpp +++ b/clang/test/Driver/unavailable_aligned_allocation.cpp @@ -22,6 +22,9 @@ // RUN: -c -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=UNAVAILABLE // +// RUN: %clang -target s390x-none-zos -c -### %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix=UNAVAILABLE + // UNAVAILABLE: "-faligned-alloc-unavailable" // RUN: %clang -target x86_64-apple-macosx10.14 -c -### %s 2>&1 \ @@ -59,5 +62,11 @@ // // RUN: %clang -target x86_64-apple-macosx10.13 -fno-aligned-allocation -c -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=AVAILABLE +// +// RUN: %clang -target s390x-none-zos -faligned-allocation -c -### %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix=AVAILABLE +// +// RUN: %clang -target s390x-none-zos -fno-aligned-allocation -c -### %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix=AVAILABLE // AVAILABLE-NOT: "-faligned-alloc-unavailable" diff --git a/clang/test/Lexer/aligned-allocation.cpp b/clang/test/Lexer/aligned-allocation.cpp index eef5d980a37b8..d92bb73ba1f9a 100644 --- a/clang/test/Lexer/aligned-allocation.cpp +++ b/clang/test/Lexer/aligned-allocation.cpp @@ -6,10 +6,19 @@ // // RUN: %clang_cc1 -triple x86_64-apple-macosx10.12.0 -fexceptions -std=c++17 -verify %s \ // RUN: -faligned-allocation -faligned-alloc-unavailable +// +// RUN: %clang_cc1 -triple s390x-none-zos -fexceptions -std=c++17 -verify %s \ +// RUN: -DEXPECT_DEFINED +// +// RUN: %clang_cc1 -triple s390x-none-zos -fexceptions -std=c++17 -verify %s \ +// RUN: -faligned-alloc-unavailable +// +// RUN: %clang_cc1 -triple s390x-none-zos -fexceptions -std=c++17 -verify %s \ +// RUN: -faligned-allocation -faligned-alloc-unavailable // Test that __cpp_aligned_new is not defined when CC1 is passed -// -faligned-alloc-unavailable by the Darwin driver, even when aligned -// allocation is actually enabled. +// -faligned-alloc-unavailable by the Darwin and the z/OS driver, even when +// aligned allocation is actually enabled. // expected-no-diagnostics #ifdef EXPECT_DEFINED diff --git a/clang/test/SemaCXX/unavailable_aligned_allocation.cpp b/clang/test/SemaCXX/unavailable_aligned_allocation.cpp index 2f0f8fe7a4b50..d4ac966be2dfc 100644 --- a/clang/test/SemaCXX/unavailable_aligned_allocation.cpp +++ b/clang/test/SemaCXX/unavailable_aligned_allocation.cpp @@ -1,12 +1,15 @@ -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fexceptions -faligned-alloc-unavailable -std=c++1z -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fexceptions -faligned-alloc-unavailable -std=c++1z -verify -DMACOS %s // RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fexceptions -std=c++1z -verify -DNO_ERRORS %s -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fexceptions -faligned-allocation -faligned-alloc-unavailable -std=c++14 -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fexceptions -faligned-allocation -faligned-alloc-unavailable -std=c++14 -verify -DMACOS %s // RUN: %clang_cc1 -triple arm64-apple-ios10.0.0 -fexceptions -faligned-alloc-unavailable -std=c++1z -verify -DIOS %s // RUN: %clang_cc1 -triple arm64-apple-ios10.0.0 -fexceptions -std=c++1z -verify -DNO_ERRORS %s // RUN: %clang_cc1 -triple arm64-apple-tvos10.0.0 -fexceptions -faligned-alloc-unavailable -std=c++1z -verify -DTVOS %s // RUN: %clang_cc1 -triple arm64-apple-tvos10.0.0 -fexceptions -std=c++1z -verify -DNO_ERRORS %s // RUN: %clang_cc1 -triple armv7k-apple-watchos3.0.0 -fexceptions -faligned-alloc-unavailable -std=c++1z -verify -DWATCHOS %s // RUN: %clang_cc1 -triple armv7k-apple-watchos3.0.0 -fexceptions -std=c++1z -verify -DNO_ERRORS %s +// RUN: %clang_cc1 -triple s390x-none-zos -fexceptions -faligned-alloc-unavailable -std=c++1z -verify -DZOS %s +// RUN: %clang_cc1 -triple s390x-none-zos -fexceptions -std=c++1z -verify -DNO_ERRORS %s +// RUN: %clang_cc1 -triple s390x-none-zos -fexceptions -faligned-allocation -faligned-alloc-unavailable -std=c++14 -verify -DZOS %s namespace std { typedef decltype(sizeof(0)) size_t; @@ -62,40 +65,40 @@ void testOveraligned() { #ifdef NO_ERRORS // expected-no-diagnostics #else -// expected-error@-16 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-16 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-17 {{if you supply your own aligned allocation functions}} -// expected-error@-18 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-18 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-19 {{if you supply your own aligned allocation functions}} -// expected-error@-20 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-20 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-21 {{if you supply your own aligned allocation functions}} -// expected-error@-22 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-22 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-23 {{if you supply your own aligned allocation functions}} -// expected-error@-24 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-24 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-25 {{if you supply your own aligned allocation functions}} -// expected-error@-26 {{aligned allocation function of type 'void *(std::size_t, std::align_val_t, const std::nothrow_t &) noexcept' is only available on}} +// expected-error-re@-26 {{aligned allocation function of type 'void *(std::size_t, std::align_val_t, const std::nothrow_t &) noexcept' is {{only|not}} available on}} // expected-note@-27 {{if you supply your own aligned allocation functions}} -// expected-error@-28 {{aligned deallocation function of type 'void (void *, std::align_val_t, const std::nothrow_t &) noexcept' is only available on}} +// expected-error-re@-28 {{aligned deallocation function of type 'void (void *, std::align_val_t, const std::nothrow_t &) noexcept' is {{only|not}} available on}} // expected-note@-29 {{if you supply your own aligned allocation functions}} -// expected-error@-29 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-29 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-30 {{if you supply your own aligned allocation functions}} -// expected-error@-31 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-31 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-32 {{if you supply your own aligned allocation functions}} -// expected-error@-33 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-33 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-34 {{if you supply your own aligned allocation functions}} -// expected-error@-35 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-35 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-36 {{if you supply your own aligned allocation functions}} -// expected-error@-37 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-37 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-38 {{if you supply your own aligned allocation functions}} -// expected-error@-39 {{aligned allocation function of type 'void *(std::size_t, std::align_val_t, const std::nothrow_t &) noexcept' is only available on}} +// expected-error-re@-39 {{aligned allocation function of type 'void *(std::size_t, std::align_val_t, const std::nothrow_t &) noexcept' is {{only|not}} available on}} // expected-note@-40 {{if you supply your own aligned allocation functions}} -// expected-error@-41 {{aligned deallocation function of type 'void (void *, std::align_val_t, const std::nothrow_t &) noexcept' is only available on}} +// expected-error-re@-41 {{aligned deallocation function of type 'void (void *, std::align_val_t, const std::nothrow_t &) noexcept' is {{only|not}} available on}} // expected-note@-42 {{if you supply your own aligned allocation functions}} #endif @@ -116,12 +119,15 @@ void testOveralignedCheckOS() { #elif defined(WATCHOS) // expected-error@-13 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on watchOS 4 or newer}}} // expected-error@-14 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on watchOS 4 or newer}}} -#else +#elif defined(MACOS) // expected-error@-16 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on macOS 10.14 or newer}}} // expected-error@-17 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on macOS 10.14 or newer}}} +#elif defined(ZOS) +// expected-error@-19 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is not available on z/OS}}} +// expected-error@-20 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is not available on z/OS}}} #endif -// expected-note@-20 2 {{if you supply your own aligned allocation functions}} +// expected-note@-23 2 {{if you supply your own aligned allocation functions}} #endif // Test that diagnostics are produced when an unavailable aligned deallocation @@ -145,9 +151,12 @@ OveralignedS2::~OveralignedS2() {} #elif defined(WATCHOS) // expected-error@-12 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on watchOS 4 or newer}}} // expected-note@-13 {{if you supply your own aligned allocation functions}} -#else +#elif defined(MACOS) // expected-error@-15 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on macOS 10.14 or newer}}} // expected-note@-16 {{if you supply your own aligned allocation functions}} +#elif defined(ZOS) +// expected-error@-18 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is not available on z/OS}}} +// expected-note@-19 {{if you supply your own aligned allocation functions}} #endif #endif @@ -172,22 +181,22 @@ void testExplicitOperatorNewDeleteOveraligned() { #ifdef NO_ERRORS // expected-no-diagnostics #else -// expected-error@-11 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-11 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-12 {{if you supply your own aligned allocation functions}} -// expected-error@-13 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-13 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-14 {{if you supply your own aligned allocation functions}} -// expected-error@-15 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-15 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-16 {{if you supply your own aligned allocation functions}} -// expected-error@-17 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-17 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-18 {{if you supply your own aligned allocation functions}} -// expected-error@-19 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is only available on}} +// expected-error-re@-19 {{aligned allocation function of type 'void *(unsigned long, enum std::align_val_t)' is {{only|not}} available on}} // expected-note@-20 {{if you supply your own aligned allocation functions}} -// expected-error@-21 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is only available on}} +// expected-error-re@-21 {{aligned deallocation function of type 'void (void *, enum std::align_val_t) noexcept' is {{only|not}} available on}} // expected-note@-22 {{if you supply your own aligned allocation functions}} #endif From 15c378f6e641f34bb9fd3582f9cb83ff686101dc Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 16 Sep 2020 14:50:29 -0400 Subject: [PATCH 0889/1079] [gn build] unconfuse sync script about "sources = []" in clang/lib/Headers/BUILD.gn --- llvm/utils/gn/build/sync_source_lists_from_cmake.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/utils/gn/build/sync_source_lists_from_cmake.py b/llvm/utils/gn/build/sync_source_lists_from_cmake.py index e0c550ed7085b..a54483da8e55d 100755 --- a/llvm/utils/gn/build/sync_source_lists_from_cmake.py +++ b/llvm/utils/gn/build/sync_source_lists_from_cmake.py @@ -29,6 +29,9 @@ def patch_gn_file(gn_file, add, remove): srcs_tok = 'sources = [' tokloc = gn_contents.find(srcs_tok) + while tokloc != -1 and tokloc + len(srcs_tok) < len(gn_contents) and \ + gn_contents[tokloc + len(srcs_tok)] == ']': + tokloc = gn_contents.find(srcs_tok, tokloc + 1) if tokloc == -1: raise ValueError(gn_file + ': Failed to find source list') if gn_contents.find(srcs_tok, tokloc + 1) != -1: From 6859d95ea2d0f3fe0de2923a3f642170e66a1a14 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 16 Sep 2020 14:43:08 -0400 Subject: [PATCH 0890/1079] Fix build. --- llvm/lib/Passes/StandardInstrumentations.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 4755315ecfdb6..e2cc19b34f3bc 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -338,7 +338,6 @@ template ChangePrinter::~ChangePrinter() { IRChangePrinter::IRChangePrinter() : Out(dbgs()) {} IRChangePrinter::~IRChangePrinter() { - ChangePrinter::~ChangePrinter(); } void IRChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { @@ -415,7 +414,7 @@ void IRChangePrinter::handleIgnored(StringRef PassID, std::string &Name) { bool IRChangePrinter::same(const std::string &Before, const std::string &After) { return Before.compare(After) == 0; -}; +} PrintIRInstrumentation::~PrintIRInstrumentation() { assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit"); From 94d912021ff35d33cde96dacd6f1db925fe9f2b8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 16 Sep 2020 18:27:55 +0200 Subject: [PATCH 0891/1079] [InstCombine] Add test for infinite combine loop (NFC) Test courtesy of bkramer for the infinite combine loop introduced by D87480. --- llvm/test/Transforms/InstCombine/select.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index d9a4f4bdbd473..6c3e577b4c71d 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2683,5 +2683,20 @@ define i8 @select_replacement_loop(i8 %x, i8 %y, i8 %z) { ret i8 %sel } +define i32 @select_replacement_loop2(i32 %arg, i32 %arg2) { +; CHECK-LABEL: @select_replacement_loop2( +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ARG:%.*]], [[ARG2:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[ARG2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MUL]], [[ARG]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[DIV]], i32 undef +; CHECK-NEXT: ret i32 [[SEL]] +; + %div = udiv i32 %arg, %arg2 + %mul = mul nsw i32 %div, %arg2 + %cmp = icmp eq i32 %mul, %arg + %sel = select i1 %cmp, i32 %div, i32 undef + ret i32 %sel +} + declare void @use(i1) declare i32 @llvm.cttz.i32(i32, i1 immarg) From 0bb06f297fe52a5125952cb6f1e264b4e7c48097 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 16 Sep 2020 20:49:08 +0200 Subject: [PATCH 0892/1079] [InstSimplify] Clarify SimplifyWithOpReplaced() return value If SimplifyWithOpReplaced() cannot simplify the value, null should be returned. Make sure this really does happen in all cases, including those where SimplifyBinOp() returns the original value. This does not matter for existing users, but does mattter for D87480, which would go into an infinite loop otherwise. --- .../llvm/Analysis/InstructionSimplify.h | 3 +- llvm/lib/Analysis/InstructionSimplify.cpp | 37 ++++++++++++++----- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index e0251e7c8bbfd..a4cee8b29d9e8 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -292,7 +292,8 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q); Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE = nullptr); -/// See if V simplifies when its operand Op is replaced with RepOp. +/// See if V simplifies when its operand Op is replaced with RepOp. If not, +/// return null. /// AllowRefinement specifies whether the simplification can be a refinement, /// or whether it needs to be strictly identical. Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 9e38a4d8595a2..7d939bb63a6b6 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3796,15 +3796,30 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (!AllowRefinement && canCreatePoison(cast(I))) return nullptr; + // The simplification queries below may return the original value. Consider: + // %div = udiv i32 %arg, %arg2 + // %mul = mul nsw i32 %div, %arg2 + // %cmp = icmp eq i32 %mul, %arg + // %sel = select i1 %cmp, i32 %div, i32 undef + // Replacing %arg by %mul, %div becomes "udiv i32 %mul, %arg2", which + // simplifies back to %arg. This can only happen because %mul does not + // dominate %div. To ensure a consistent return value contract, we make sure + // that this case returns nullptr as well. + auto PreventSelfSimplify = [V](Value *Simplified) { + return Simplified != V ? Simplified : nullptr; + }; + // If this is a binary operator, try to simplify it with the replaced op. if (auto *B = dyn_cast(I)) { if (MaxRecurse) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q, - MaxRecurse - 1); + return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), RepOp, + B->getOperand(1), Q, + MaxRecurse - 1)); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q, - MaxRecurse - 1); + return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), + B->getOperand(0), RepOp, Q, + MaxRecurse - 1)); } } @@ -3812,11 +3827,13 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (CmpInst *C = dyn_cast(I)) { if (MaxRecurse) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q, - MaxRecurse - 1); + return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), RepOp, + C->getOperand(1), Q, + MaxRecurse - 1)); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q, - MaxRecurse - 1); + return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), + C->getOperand(0), RepOp, Q, + MaxRecurse - 1)); } } @@ -3826,8 +3843,8 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, SmallVector NewOps(GEP->getNumOperands()); transform(GEP->operands(), NewOps.begin(), [&](Value *V) { return V == Op ? RepOp : V; }); - return SimplifyGEPInst(GEP->getSourceElementType(), NewOps, Q, - MaxRecurse - 1); + return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(), + NewOps, Q, MaxRecurse - 1)); } } From 222bf3ffbc8419570fc2266a2e7d1c5f58cedaa7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 10 Sep 2020 18:45:53 +0200 Subject: [PATCH 0893/1079] Reapply [InstCombine] Simplify select operand based on equality condition Reapply after fixing SimplifyWithOpReplaced() to never return the original value, which would lead to an infinite loop in this transform. ----- For selects of the type X == Y ? A : B, check if we can simplify A by using the X == Y equality and replace the operand if that's possible. We already try to do this in InstSimplify, but will only fold if the result of the simplification is the same as B, in which case the select can be dropped entirely. Here the select will be retained, just one operand simplified. As we are performing an actual replacement here, we don't have problems with refinement / poison values. Differential Revision: https://reviews.llvm.org/D87480 --- .../InstCombine/InstCombineSelect.cpp | 30 ++++++++++++++----- llvm/test/Transforms/InstCombine/rem.ll | 3 +- .../InstCombine/select-binop-cmp.ll | 15 ++++------ llvm/test/Transforms/InstCombine/select.ll | 15 ++++------ 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 378132011aba2..ce473410f4caf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1165,15 +1165,32 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// /// We can't replace %sel with %add unless we strip away the flags. /// TODO: Wrapping flags could be preserved in some cases with better analysis. -static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, - const SimplifyQuery &Q) { +static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, + const SimplifyQuery &Q, + InstCombiner &IC) { if (!Cmp.isEquality()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); - if (Cmp.getPredicate() == ICmpInst::ICMP_NE) + bool Swapped = false; + if (Cmp.getPredicate() == ICmpInst::ICMP_NE) { std::swap(TrueVal, FalseVal); + Swapped = true; + } + + // In X == Y ? f(X) : Z, try to evaluate f(X) and replace the operand. + // Take care to avoid replacing X == Y ? X : Z with X == Y ? Y : Z, as that + // would lead to an infinite replacement cycle. + Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); + if (TrueVal != CmpLHS) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, + /* AllowRefinement */ true)) + return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); + if (TrueVal != CmpRHS) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, + /* AllowRefinement */ true)) + return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); auto *FalseInst = dyn_cast(FalseVal); if (!FalseInst) @@ -1198,12 +1215,11 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, // We have an 'EQ' comparison, so the select's false value will propagate. // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 - Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, /* AllowRefinement */ false) == TrueVal) { - return FalseVal; + return IC.replaceInstUsesWith(Sel, FalseVal); } // Restore poison-generating flags if the transform did not apply. @@ -1439,8 +1455,8 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ)) - return replaceInstUsesWith(SI, V); + if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI, SQ, *this)) + return NewSel; if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) return NewSel; diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index 2b9f5326dd152..37d81f2ebf6a0 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -50,8 +50,7 @@ define i8 @big_divisor(i8 %x) { define i5 @biggest_divisor(i5 %x) { ; CHECK-LABEL: @biggest_divisor( ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[DOTNOT]] to i5 -; CHECK-NEXT: [[REM:%.*]] = add i5 [[TMP1]], [[X]] +; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X]] ; CHECK-NEXT: ret i5 [[REM]] ; %rem = urem i5 %x, -1 diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index 4173c31b2acb1..aa450f8af8b7e 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -564,12 +564,10 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) ret <2 x i8> %C } -; TODO: support for undefs, check for an identity constant does not handle them yet -define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { -; CHECK-LABEL: @select_xor_icmp_vec_bad_2( +define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { +; CHECK-LABEL: @select_xor_icmp_vec_undef( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; %A = icmp eq <2 x i8> %x, @@ -604,11 +602,10 @@ define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) { ret i32 %C } -define i32 @select_and_icmp_bad(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: @select_and_icmp_bad( +define i32 @select_and_icmp_zero(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @select_and_icmp_zero( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = and i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 0, i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 6c3e577b4c71d..b7c4cb5c6420b 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2606,8 +2606,7 @@ define i32 @pr47322_more_poisonous_replacement(i32 %arg) { define i8 @select_replacement_add_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_eq( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 2, i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, 1 @@ -2620,8 +2619,7 @@ define i8 @select_replacement_add_ne(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_ne( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 1 ; CHECK-NEXT: call void @use(i1 [[CMP]]) -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 [[ADD]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[Y:%.*]], i8 2 ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp ne i8 %x, 1 @@ -2634,8 +2632,7 @@ define i8 @select_replacement_add_ne(i8 %x, i8 %y) { define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { ; CHECK-LABEL: @select_replacement_add_nuw( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 1 -; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 2, i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, 1 @@ -2647,8 +2644,7 @@ define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_sub( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, %y @@ -2661,8 +2657,7 @@ define i8 @select_replacement_shift(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_shift( ; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[Y]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SHL]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %shr = lshr exact i8 %x, 1 From 2a078a977e90481954eef69b489fac650ddbdaf6 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 16 Sep 2020 19:03:25 +0000 Subject: [PATCH 0894/1079] [gn build] Port 56069b5c71c --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index d1fc6ad4d9799..c43e531fc7180 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -159,6 +159,7 @@ copy("Headers") { "openmp_wrappers/__clang_openmp_device_functions.h", "openmp_wrappers/cmath", "openmp_wrappers/complex.h", + "openmp_wrappers/complex_cmath.h", "openmp_wrappers/math.h", "pconfigintrin.h", "pkuintrin.h", From ce0eb81c72749d1e96cfc6fb68af3c24b63753cc Mon Sep 17 00:00:00 2001 From: David Greene Date: Thu, 23 Jan 2020 14:30:32 -0600 Subject: [PATCH 0895/1079] [UpdateTestChecks] Allow $ in function names Some compilers generation functions with '$' in their names, so recognize those functions. This also requires recognizing function names inside quotes in some contexts in order to escape certain characters. Differential Revision: https://reviews.llvm.org/D82995 --- .../Inputs/aarch64_function_name.ll | 9 ++++++ .../Inputs/aarch64_function_name.ll.expected | 19 +++++++++++++ .../Inputs/amdgpu_function_name.ll | 8 ++++++ .../Inputs/amdgpu_function_name.ll.expected | 14 ++++++++++ .../Inputs/arm_function_name.ll | 10 +++++++ .../Inputs/arm_function_name.ll.expected | 15 ++++++++++ .../Inputs/hexagon_function_name.ll | 8 ++++++ .../Inputs/hexagon_function_name.ll.expected | 16 +++++++++++ .../Inputs/lanai_function_name.ll | 8 ++++++ .../Inputs/lanai_function_name.ll.expected | 18 ++++++++++++ .../Inputs/mips_function_name.ll | 8 ++++++ .../Inputs/mips_function_name.ll.expected | 13 +++++++++ .../Inputs/msp430_function_name.ll | 8 ++++++ .../Inputs/msp430_function_name.ll.expected | 14 ++++++++++ .../Inputs/ppc_function_name.ll | 8 ++++++ .../Inputs/ppc_function_name.ll.expected | 13 +++++++++ .../Inputs/riscv_function_name.ll | 8 ++++++ .../Inputs/riscv_function_name.ll.expected | 13 +++++++++ .../Inputs/sparc_function_name.ll | 8 ++++++ .../Inputs/sparc_function_name.ll.expected | 14 ++++++++++ .../Inputs/systemz_function_name.ll | 8 ++++++ .../Inputs/systemz_function_name.ll.expected | 13 +++++++++ .../Inputs/wasm_function_name.ll | 8 ++++++ .../Inputs/wasm_function_name.ll.expected | 14 ++++++++++ .../Inputs/x86_function_name.ll | 8 ++++++ .../Inputs/x86_function_name.ll.expected | 13 +++++++++ .../aarch64-function-name.test | 5 ++++ .../amdgpu-function-name.test | 5 ++++ .../arm-function-name.test | 5 ++++ .../hexagon-function-name.test | 5 ++++ .../lanai-function-name.test | 5 ++++ .../mips-function-name.test | 5 ++++ .../msp430-function-name.test | 5 ++++ .../ppc-function-name.test | 5 ++++ .../riscv-function-name.test | 5 ++++ .../sparc-function-name.test | 5 ++++ .../systemz-function-name.test | 5 ++++ .../wasm-function-name.test | 5 ++++ .../x86-function-name.test | 5 ++++ .../Inputs/function_name.ll | 8 ++++++ .../Inputs/function_name.ll.expected | 9 ++++++ .../update_test_checks/function-name.test | 7 +++++ llvm/utils/UpdateTestChecks/asm.py | 28 +++++++++---------- llvm/utils/UpdateTestChecks/common.py | 6 ++-- 44 files changed, 402 insertions(+), 17 deletions(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/aarch64-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/arm-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/hexagon-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/lanai-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/mips-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/msp430-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/ppc-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/riscv-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/sparc-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/systemz-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/wasm-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-function-name.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/function-name.test diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll new file mode 100644 index 0000000000000..1ea9d20146f1e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll @@ -0,0 +1,9 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=aarch64-unknown-linux < %s | FileCheck --check-prefix=LINUX %s +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=DARWIN %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll.expected new file mode 100644 index 0000000000000..fbe1caeea72d0 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_function_name.ll.expected @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=aarch64-unknown-linux < %s | FileCheck --check-prefix=LINUX %s +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=DARWIN %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; LINUX-LABEL: _Z54bar$ompvariant$bar: +; LINUX: // %bb.0: // %entry +; LINUX-NEXT: mov w0, #2 +; LINUX-NEXT: ret +; +; DARWIN-LABEL: _Z54bar$ompvariant$bar: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: mov w0, #2 +; DARWIN-NEXT: ret +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll new file mode 100644 index 0000000000000..b48607d2955f0 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll.expected new file mode 100644 index 0000000000000..e13058f32450e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_function_name.ll.expected @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 2 +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll new file mode 100644 index 0000000000000..6c0f9e971035d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll @@ -0,0 +1,10 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=arm64-unknown-linux < %s | FileCheck --prefi=LINUX %s +; RUN: llc -mtriple=armv7-apple-darwin < %s | FileCheck --prefix=DARWIN %s +; RUN: llc -mtriple=armv7-apple-ios < %s | FileCheck --prefix=IOS %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll.expected new file mode 100644 index 0000000000000..e191b0497f0a9 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_function_name.ll.expected @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=arm64-unknown-linux < %s | FileCheck --prefi=LINUX %s +; RUN: llc -mtriple=armv7-apple-darwin < %s | FileCheck --prefix=DARWIN %s +; RUN: llc -mtriple=armv7-apple-ios < %s | FileCheck --prefix=IOS %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: ret +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll new file mode 100644 index 0000000000000..526f6bd5d4615 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=hexagon-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll.expected new file mode 100644 index 0000000000000..9033be4aefee2 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/hexagon_function_name.ll.expected @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=hexagon-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #2 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll new file mode 100644 index 0000000000000..c1c7d4f612e3d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=lanai-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll.expected new file mode 100644 index 0000000000000..4f30c23976654 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_function_name.ll.expected @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=lanai-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: st %fp, [--%sp] +; CHECK-NEXT: add %sp, 0x8, %fp +; CHECK-NEXT: sub %sp, 0x8, %sp +; CHECK-NEXT: mov 0x2, %rv +; CHECK-NEXT: ld -4[%fp], %pc ! return +; CHECK-NEXT: add %fp, 0x0, %sp +; CHECK-NEXT: ld -8[%fp], %fp +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll new file mode 100644 index 0000000000000..1cf2e3cfcc0cc --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=mips-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll.expected new file mode 100644 index 0000000000000..c1c4577542e82 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/mips_function_name.ll.expected @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=mips-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 2 +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll new file mode 100644 index 0000000000000..1bf6ea93fbd1e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=msp430-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll.expected new file mode 100644 index 0000000000000..2cb55cde0b76f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/msp430_function_name.ll.expected @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=msp430-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov #2, r12 +; CHECK-NEXT: clr r13 +; CHECK-NEXT: ret +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll new file mode 100644 index 0000000000000..d4d1c68fd0ac1 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=ppc32-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll.expected new file mode 100644 index 0000000000000..72edada3ff06c --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_function_name.ll.expected @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=ppc32-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 3, 2 +; CHECK-NEXT: blr +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll new file mode 100644 index 0000000000000..db4a1988a9b68 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=riscv32-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll.expected new file mode 100644 index 0000000000000..d2ec3e0f9fcc0 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/riscv_function_name.ll.expected @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=riscv32-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: ret +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll new file mode 100644 index 0000000000000..8b4ae66f764d5 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=sparc-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll.expected new file mode 100644 index 0000000000000..72307c73a4298 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/sparc_function_name.ll.expected @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=sparc-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: ! %entry +; CHECK-NEXT: retl +; CHECK-NEXT: mov 2, %o0 +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll new file mode 100644 index 0000000000000..101bec2f0456e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=s390x-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll.expected new file mode 100644 index 0000000000000..c5dade171110b --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/systemz_function_name.ll.expected @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=s390x-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lhi %r2, 2 +; CHECK-NEXT: br %r14 +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll new file mode 100644 index 0000000000000..a55cd8efd60bd --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=wasm32-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll.expected new file mode 100644 index 0000000000000..e5a10a3e07c63 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_function_name.ll.expected @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=wasm32-unknown-linux < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: .functype _Z54bar$ompvariant$bar () -> (i32) +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: i32.const 2 +; CHECK-NEXT: # fallthrough-return +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll new file mode 100644 index 0000000000000..231aa54d6978e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll.expected new file mode 100644 index 0000000000000..32b05fccf62bf --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_function_name.ll.expected @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +; CHECK-LABEL: _Z54bar$ompvariant$bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: retq +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/aarch64-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/aarch64-function-name.test new file mode 100644 index 0000000000000..36c96cc329fdf --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/aarch64-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: aarch64-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/aarch64_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/aarch64_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-function-name.test new file mode 100644 index 0000000000000..eb4092d5a460e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: amdgpu-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/amdgpu_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/amdgpu_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/arm-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/arm-function-name.test new file mode 100644 index 0000000000000..07455cbf13c0e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/arm-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: arm-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/arm_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/arm_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/hexagon-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/hexagon-function-name.test new file mode 100644 index 0000000000000..1e34074255fd5 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/hexagon-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: hexagon-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/hexagon_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/hexagon_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/lanai-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/lanai-function-name.test new file mode 100644 index 0000000000000..cb5aa4e45ffae --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/lanai-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: lanai-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/lanai_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/lanai_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/mips-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/mips-function-name.test new file mode 100644 index 0000000000000..03f9149d5c02b --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/mips-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: mips-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/mips_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/mips_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/msp430-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/msp430-function-name.test new file mode 100644 index 0000000000000..8f676227aa324 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/msp430-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: msp430-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/msp430_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/msp430_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/ppc-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/ppc-function-name.test new file mode 100644 index 0000000000000..824740cde6f58 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/ppc-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: powerpc-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/ppc_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/ppc_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/riscv-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/riscv-function-name.test new file mode 100644 index 0000000000000..2e1e05d88f9a2 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/riscv-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: riscv-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/riscv_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/riscv_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/sparc-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/sparc-function-name.test new file mode 100644 index 0000000000000..a223ee211da36 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/sparc-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: sparc-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/sparc_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/sparc_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/systemz-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/systemz-function-name.test new file mode 100644 index 0000000000000..e6c47252d4541 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/systemz-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: systemz-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/systemz_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/systemz_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/wasm-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/wasm-function-name.test new file mode 100644 index 0000000000000..fc45e28415dd3 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/wasm-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: webassembly-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/wasm_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/wasm_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-function-name.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-function-name.test new file mode 100644 index 0000000000000..d395afb13971f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-function-name.test @@ -0,0 +1,5 @@ +# REQUIRES: x86-registered-target +## Check that functions names with '$' are processed correctly + +# RUN: cp -f %S/Inputs/x86_function_name.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_function_name.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll new file mode 100644 index 0000000000000..173e7219cb3f9 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll @@ -0,0 +1,8 @@ +; Check that we accept functions with '$' in the name. +; +; RUN: opt < %s -instsimplify -S | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll.expected new file mode 100644 index 0000000000000..75e4235eb440e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/function_name.ll.expected @@ -0,0 +1,9 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Check that we accept functions with '$' in the name. +; +; RUN: opt < %s -instsimplify -S | FileCheck %s +; +define hidden i32 @"_Z54bar$ompvariant$bar"() { +entry: + ret i32 2 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/function-name.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/function-name.test new file mode 100644 index 0000000000000..3d1a158e00bc7 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/function-name.test @@ -0,0 +1,7 @@ +# REQUIRES: x86-registered-target +## Basic test checking that update_test_checks.py works correctly +# RUN: cp -f %S/Inputs/function_name.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/function_name.ll.expected +## Check that running the script again does not change the result: +# RUN: %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/function_name.ll.expected diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index 588a2870b9895..dc35859606e0f 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -15,7 +15,7 @@ class string: ##### Assembly parser ASM_FUNCTION_X86_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n(?:\s*\.?Lfunc_begin[^:\n]*:\n)?[^:]*?' + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@"?(?P=func)"?\n(?:\s*\.?Lfunc_begin[^:\n]*:\n)?[^:]*?' r'(?P^##?[ \t]+[^:]+:.*?)\s*' r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section|#+ -- End function)', flags=(re.M | re.S)) @@ -28,7 +28,7 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_AARCH64_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*\/\/[ \t]*@"?(?P=func)"?\n' r'(?:[ \t]+.cfi_startproc\n)?' # drop optional cfi noise r'(?P.*?)\n' # This list is incomplete @@ -36,21 +36,21 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_AMDGPU_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*;+[ \t]*@(?P=func)\n[^:]*?' + r'^_?(?P[^:]+):[ \t]*;+[ \t]*@"?(?P=func)"?\n[^:]*?' r'(?P.*?)\n' # (body of the function) # This list is incomplete r'^\s*(\.Lfunc_end[0-9]+:\n|\.section)', flags=(re.M | re.S)) ASM_FUNCTION_HEXAGON_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*//[ \t]*@(?P=func)\n[^:]*?' + r'^_?(?P[^:]+):[ \t]*//[ \t]*@"?(?P=func)"?\n[^:]*?' r'(?P.*?)\n' # (body of the function) # This list is incomplete r'.Lfunc_end[0-9]+:\n', flags=(re.M | re.S)) ASM_FUNCTION_MIPS_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func) + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@"?(?P=func)"?\n[^:]*?' # f: (name of func) r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+' # Mips+LLVM standard asm prologue r'(?P.*?)\n' # (body of the function) # Mips+LLVM standard asm epilogue @@ -60,13 +60,13 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_MSP430_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*;+[ \t]*@(?P=func)\n[^:]*?' + r'^_?(?P[^:]+):[ \t]*;+[ \t]*@"?(?P=func)"?\n[^:]*?' r'(?P.*?)\n' r'(\$|\.L)func_end[0-9]+:\n', # $func_end0: flags=(re.M | re.S)) ASM_FUNCTION_PPC_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@"?(?P=func)"?\n' r'.*?' r'\.Lfunc_begin[0-9]+:\n' r'(?:[ \t]+.cfi_startproc\n)?' @@ -78,7 +78,7 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_RISCV_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@"?(?P=func)"?\n' r'(?:\s*\.?L(?P=func)\$local:\n)?' # optional .L$local: due to -fno-semantic-interposition r'(?:\s*\.?Lfunc_begin[^:\n]*:\n)?[^:]*?' r'(?P^##?[ \t]+[^:]+:.*?)\s*' @@ -86,27 +86,27 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_LANAI_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*!+[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*!+[ \t]*@"?(?P=func)"?\n' r'(?:[ \t]+.cfi_startproc\n)?' # drop optional cfi noise r'(?P.*?)\s*' r'.Lfunc_end[0-9]+:\n', flags=(re.M | re.S)) ASM_FUNCTION_SPARC_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*!+[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*!+[ \t]*@"?(?P=func)"?\n' r'(?P.*?)\s*' r'.Lfunc_end[0-9]+:\n', flags=(re.M | re.S)) ASM_FUNCTION_SYSTEMZ_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@"?(?P=func)"?\n' r'[ \t]+.cfi_startproc\n' r'(?P.*?)\n' r'.Lfunc_end[0-9]+:\n', flags=(re.M | re.S)) ASM_FUNCTION_AARCH64_DARWIN_RE = re.compile( - r'^_(?P[^:]+):[ \t]*;[ \t]@(?P=func)\n' + r'^_(?P[^:]+):[ \t]*;[ \t]@"?(?P=func)"?\n' r'([ \t]*.cfi_startproc\n[\s]*)?' r'(?P.*?)' r'([ \t]*.cfi_endproc\n[\s]*)?' @@ -114,7 +114,7 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_ARM_DARWIN_RE = re.compile( - r'^[ \t]*\.globl[ \t]*_(?P[^ \t])[ \t]*@[ \t]--[ \t]Begin[ \t]function[ \t](?P=func)' + r'^[ \t]*\.globl[ \t]*_(?P[^ \t])[ \t]*@[ \t]--[ \t]Begin[ \t]function[ \t]"?(?P=func)"?' r'(?P.*?)' r'^_(?P=func):\n[ \t]*' r'(?P.*?)' @@ -137,7 +137,7 @@ class string: flags=(re.M | re.S)) ASM_FUNCTION_WASM32_RE = re.compile( - r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n' + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@"?(?P=func)"?\n' r'(?P.*?)\n' r'^\s*(\.Lfunc_end[0-9]+:\n|end_function)', flags=(re.M | re.S)) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index a1759b40b524a..d49fe50e5b1c3 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -145,16 +145,16 @@ def invoke_tool(exe, cmd_args, ir): UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' OPT_FUNCTION_RE = re.compile( - r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.-]+?)\s*' + r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' r'(?P\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P.*?)^\}$', flags=(re.M | re.S)) ANALYZE_FUNCTION_RE = re.compile( - r'^\s*\'(?P[\w\s-]+?)\'\s+for\s+function\s+\'(?P[\w.-]+?)\':' + r'^\s*\'(?P[\w\s-]+?)\'\s+for\s+function\s+\'(?P[\w.$-]+?)\':' r'\s*\n(?P.*)$', flags=(re.X | re.S)) -IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(') +IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') From 7af4f44c3e3dfb4483fb4dcc200f9376e96d6208 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 16 Sep 2020 12:54:15 -0700 Subject: [PATCH 0896/1079] [aarch64][tests] Add tests which show current lack of implicit null support I will be posting a patch which adds appropriate target support shortly; landing the tests so that the diffs are clear. --- .../CodeGen/AArch64/implicit-null-check.ll | 422 ++++++++++++++++++ 1 file changed, 422 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/implicit-null-check.ll diff --git a/llvm/test/CodeGen/AArch64/implicit-null-check.ll b/llvm/test/CodeGen/AArch64/implicit-null-check.ll new file mode 100644 index 0000000000000..5e7bb6f5bba0d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/implicit-null-check.ll @@ -0,0 +1,422 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -O3 -mtriple=aarch64-unknown-unknown -enable-implicit-null-checks | FileCheck %s + +; Basic test for implicit null check conversion - this is analogous to the +; file with the same name in the X86 tree, but adjusted to remove patterns +; related to memory folding of arithmetic (since aarch64 doesn't), and add +; a couple of aarch64 specific tests. +; NOTE: Currently negative tests as these are being precommitted before +; the changes to enable. + +define i32 @imp_null_check_load_fallthrough(i32* %x) { +; CHECK-LABEL: imp_null_check_load_fallthrough: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + not_null: + %t = load i32, i32* %x + ret i32 %t + +is_null: + ret i32 42 +} + + +define i32 @imp_null_check_load_reorder(i32* %x) { +; CHECK-LABEL: imp_null_check_load_reorder: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t = load i32, i32* %x + ret i32 %t +} + +define i32 @imp_null_check_unordered_load(i32* %x) { +; CHECK-LABEL: imp_null_check_unordered_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t = load atomic i32, i32* %x unordered, align 4 + ret i32 %t +} + + +define i32 @imp_null_check_seq_cst_load(i32* %x) { +; CHECK-LABEL: imp_null_check_seq_cst_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldar w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t = load atomic i32, i32* %x seq_cst, align 4 + ret i32 %t +} + +;; Might be memory mapped IO, so can't rely on fault behavior +define i32 @imp_null_check_volatile_load(i32* %x) { +; CHECK-LABEL: imp_null_check_volatile_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB4_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t = load volatile i32, i32* %x, align 4 + ret i32 %t +} + + +define i8 @imp_null_check_load_i8(i8* %x) { +; CHECK-LABEL: imp_null_check_load_i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB5_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldrb w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i8* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i8 42 + + not_null: + %t = load i8, i8* %x + ret i8 %t +} + +define i256 @imp_null_check_load_i256(i256* %x) { +; CHECK-LABEL: imp_null_check_load_i256: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldp x8, x1, [x0] +; CHECK-NEXT: ldp x2, x3, [x0, #16] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: mov x2, xzr +; CHECK-NEXT: mov x3, xzr +; CHECK-NEXT: ret + entry: + %c = icmp eq i256* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i256 42 + + not_null: + %t = load i256, i256* %x + ret i256 %t +} + + + +define i32 @imp_null_check_gep_load(i32* %x) { +; CHECK-LABEL: imp_null_check_gep_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB7_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w0, [x0, #128] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %x.gep = getelementptr i32, i32* %x, i32 32 + %t = load i32, i32* %x.gep + ret i32 %t +} + +define i32 @imp_null_check_add_result(i32* %x, i32 %p) { +; CHECK-LABEL: imp_null_check_add_result: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB8_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t = load i32, i32* %x + %p1 = add i32 %t, %p + ret i32 %p1 +} + +; Can hoist over a potential faulting instruction as long as we don't +; change the conditions under which the instruction faults. +define i32 @imp_null_check_hoist_over_udiv(i32* %x, i32 %a, i32 %b) { +; CHECK-LABEL: imp_null_check_hoist_over_udiv: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB9_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: udiv w9, w1, w2 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %p1 = udiv i32 %a, %b + %t = load i32, i32* %x + %res = add i32 %t, %p1 + ret i32 %res +} + + +define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z) { +; CHECK-LABEL: imp_null_check_hoist_over_unrelated_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB10_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w8, [x1] +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB10_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load i32, i32* %y + %t1 = load i32, i32* %x + store i32 %t0, i32* %z + ret i32 %t1 +} + +define i32 @imp_null_check_gep_load_with_use_dep(i32* %x, i32 %a) { +; CHECK-LABEL: imp_null_check_gep_load_with_use_dep: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB11_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: add w0, w8, #4 // =4 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %x.loc = getelementptr i32, i32* %x, i32 1 + %y = ptrtoint i32* %x.loc to i32 + %b = add i32 %a, %y + %t = load i32, i32* %x + %z = add i32 %t, %b + ret i32 %z +} + +define i32 @imp_null_check_load_fence1(i32* %x) { +; CHECK-LABEL: imp_null_check_load_fence1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB12_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: dmb ishld +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB12_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret +entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + +is_null: + ret i32 42 + +not_null: + fence acquire + %t = load i32, i32* %x + ret i32 %t +} + +define i32 @imp_null_check_load_fence2(i32* %x) { +; CHECK-LABEL: imp_null_check_load_fence2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB13_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: dmb ish +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB13_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret +entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + +is_null: + ret i32 42 + +not_null: + fence seq_cst + %t = load i32, i32* %x + ret i32 %t +} + +define void @imp_null_check_store(i32* %x) { +; CHECK-LABEL: imp_null_check_store: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB14_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB14_2: // %is_null +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret void + + not_null: + store i32 1, i32* %x + ret void +} + +define void @imp_null_check_unordered_store(i32* %x) { +; CHECK-LABEL: imp_null_check_unordered_store: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB15_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB15_2: // %is_null +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret void + + not_null: + store atomic i32 1, i32* %x unordered, align 4 + ret void +} + +define i32 @imp_null_check_neg_gep_load(i32* %x) { +; CHECK-LABEL: imp_null_check_neg_gep_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x0, .LBB16_2 +; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldur w0, [x0, #-128] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB16_2: // %is_null +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: ret + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %x.gep = getelementptr i32, i32* %x, i32 -32 + %t = load i32, i32* %x.gep + ret i32 %t +} + +!0 = !{} From dee4686227842aa0e8380c7925049a5df9c4f781 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 16 Sep 2020 14:58:29 -0500 Subject: [PATCH 0897/1079] [flang][msvc] Work around if constexpr (false) evaluation. NFC. MSVC tries to expand templates that are in the false-branch of a `if constexpr` construct. In this case, the condition checks whether a tuple has at least one element and then is trying to access it using `std::get<0>`, which fails when the tuple has 0 elements. The workaround is to extract that case into a separate method. This patch is part of the series to make flang compilable with MS Visual Studio . Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D87728 --- flang/lib/Parser/basic-parsers.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/flang/lib/Parser/basic-parsers.h b/flang/lib/Parser/basic-parsers.h index 56d9ff1b07069..c92ece0ef6777 100644 --- a/flang/lib/Parser/basic-parsers.h +++ b/flang/lib/Parser/basic-parsers.h @@ -729,13 +729,7 @@ template class ApplyConstructor { return RESULT{}; } else { if constexpr (sizeof...(PARSER) == 1) { - if constexpr (std::is_same_v) { - if (std::get<0>(parsers_).Parse(state)) { - return RESULT{}; - } - } else if (auto arg{std::get<0>(parsers_).Parse(state)}) { - return RESULT{std::move(*arg)}; - } + return ParseOne(state); } else { ApplyArgs results; using Sequence = std::index_sequence_for; @@ -749,6 +743,17 @@ template class ApplyConstructor { } private: + std::optional ParseOne(ParseState &state) const { + if constexpr (std::is_same_v) { + if (std::get<0>(parsers_).Parse(state)) { + return RESULT{}; + } + } else if (auto arg{std::get<0>(parsers_).Parse(state)}) { + return RESULT{std::move(*arg)}; + } + return std::nullopt; + } + const std::tuple parsers_; }; From 65ef2e50a29630f9f0fba4899045c0058dacfcb0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 16 Sep 2020 12:20:38 -0700 Subject: [PATCH 0898/1079] [X86] Add test case for a masked load mask becoming all ones after type legalization. We should be able to turn this into a unmasked load. X86 has an optimization to detect that the first and last element aren't masked and then turn the whole thing into an unmasked load and a blend. That transform is disabled on avx512 though. But if we know the blend isn't needed, then the unmasked load by itself should always be profitable. --- llvm/test/CodeGen/X86/masked_load.ll | 75 ++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll index 75e41618263ea..d807fe96fb4e0 100644 --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -6573,6 +6573,72 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %ds ret <8 x double> %res } +; FIXME: We should be able to detect the mask is all ones after type +; legalization to use an unmasked load for some of the avx512 instructions. +define <16 x double> @mload_constmask_v16f64_allones_split(<16 x double>* %addr, <16 x double> %dst) { +; SSE-LABEL: mload_constmask_v16f64_allones_split: +; SSE: ## %bb.0: +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movups (%rsi), %xmm0 +; SSE-NEXT: movups 16(%rsi), %xmm1 +; SSE-NEXT: movups 32(%rsi), %xmm2 +; SSE-NEXT: movups 48(%rsi), %xmm3 +; SSE-NEXT: movlps {{.*#+}} xmm4 = mem[0,1],xmm4[2,3] +; SSE-NEXT: movlps {{.*#+}} xmm5 = mem[0,1],xmm5[2,3] +; SSE-NEXT: movlps {{.*#+}} xmm6 = mem[0,1],xmm6[2,3] +; SSE-NEXT: movlps {{.*#+}} xmm7 = mem[0,1],xmm7[2,3] +; SSE-NEXT: movaps %xmm7, 112(%rdi) +; SSE-NEXT: movaps %xmm6, 96(%rdi) +; SSE-NEXT: movaps %xmm5, 80(%rdi) +; SSE-NEXT: movaps %xmm4, 64(%rdi) +; SSE-NEXT: movaps %xmm3, 48(%rdi) +; SSE-NEXT: movaps %xmm2, 32(%rdi) +; SSE-NEXT: movaps %xmm1, 16(%rdi) +; SSE-NEXT: movaps %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: mload_constmask_v16f64_allones_split: +; AVX1OR2: ## %bb.0: +; AVX1OR2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [18446744073709551615,0,18446744073709551615,0] +; AVX1OR2-NEXT: ## ymm0 = mem[0,1,0,1] +; AVX1OR2-NEXT: vmaskmovpd 64(%rdi), %ymm0, %ymm1 +; AVX1OR2-NEXT: vblendpd {{.*#+}} ymm2 = ymm1[0],ymm2[1],ymm1[2],ymm2[3] +; AVX1OR2-NEXT: vmaskmovpd 96(%rdi), %ymm0, %ymm0 +; AVX1OR2-NEXT: vblendpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[3] +; AVX1OR2-NEXT: vmovups (%rdi), %ymm0 +; AVX1OR2-NEXT: vmovups 32(%rdi), %ymm1 +; AVX1OR2-NEXT: retq +; +; AVX512F-LABEL: mload_constmask_v16f64_allones_split: +; AVX512F: ## %bb.0: +; AVX512F-NEXT: kxnorw %k0, %k0, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: movb $85, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd 64(%rdi), %zmm1 {%k1} +; AVX512F-NEXT: retq +; +; AVX512VLDQ-LABEL: mload_constmask_v16f64_allones_split: +; AVX512VLDQ: ## %bb.0: +; AVX512VLDQ-NEXT: kxnorw %k0, %k0, %k1 +; AVX512VLDQ-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512VLDQ-NEXT: movb $85, %al +; AVX512VLDQ-NEXT: kmovw %eax, %k1 +; AVX512VLDQ-NEXT: vmovupd 64(%rdi), %zmm1 {%k1} +; AVX512VLDQ-NEXT: retq +; +; AVX512VLBW-LABEL: mload_constmask_v16f64_allones_split: +; AVX512VLBW: ## %bb.0: +; AVX512VLBW-NEXT: kxnorw %k0, %k0, %k1 +; AVX512VLBW-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512VLBW-NEXT: movb $85, %al +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: vmovupd 64(%rdi), %zmm1 {%k1} +; AVX512VLBW-NEXT: retq + %res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %addr, i32 4, <16 x i1> , <16 x double> %dst) + ret <16 x double> %res +} + ; If the pass-through operand is undef, no blend is needed. define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) { @@ -6788,20 +6854,20 @@ define i32 @pr38986(i1 %c, i32* %p) { ; SSE: ## %bb.0: ; SSE-NEXT: testb $1, %dil ; SSE-NEXT: ## implicit-def: $eax -; SSE-NEXT: je LBB43_2 +; SSE-NEXT: je LBB44_2 ; SSE-NEXT: ## %bb.1: ## %cond.load ; SSE-NEXT: movl (%rsi), %eax -; SSE-NEXT: LBB43_2: ## %else +; SSE-NEXT: LBB44_2: ## %else ; SSE-NEXT: retq ; ; AVX-LABEL: pr38986: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil ; AVX-NEXT: ## implicit-def: $eax -; AVX-NEXT: je LBB43_2 +; AVX-NEXT: je LBB44_2 ; AVX-NEXT: ## %bb.1: ## %cond.load ; AVX-NEXT: movl (%rsi), %eax -; AVX-NEXT: LBB43_2: ## %else +; AVX-NEXT: LBB44_2: ## %else ; AVX-NEXT: retq %vc = insertelement <1 x i1> undef, i1 %c, i32 0 %vp = bitcast i32* %p to <1 x i32>* @@ -6822,6 +6888,7 @@ define <2 x double> @zero_mask(<2 x double>* %addr, <2 x double> %dst) { ret <2 x double> %res } +declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>*, i32, <16 x i1>, <16 x double>) declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>) declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) From 89ee4c0314bd08143d954d80bf7678d3a3ecc15a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 16 Sep 2020 13:21:15 -0700 Subject: [PATCH 0899/1079] [DAGCombiner] Teach visitMLOAD to replace an all ones mask with an unmasked load If we have an all ones mask, we can just a regular masked load. InstCombine already gets this in IR. But the all ones mask can appear after type legalization. Only avx512 test cases are affected because X86 backend already looks for element 0 and the last element being 1. It replaces this with an unmasked load and blend. The all ones mask is a special case of that where the blend will be removed. That transform is only enabled on avx2 targets. I believe that's because a non-zero passthru on avx2 already requires a separate blend so its more profitable to handle mixed constant masks. This patch adds a dedicated all ones handling to the target independent DAG combiner. I've skipped extending, expanding, and index loads for now. X86 doesn't use index so I don't know much about it. Extending made me nervous because I wasn't sure I could trust the memory VT had the right element count due to some weirdness in vector splitting. For expanding I wasn't sure if we needed different undef handling. Differential Revision: https://reviews.llvm.org/D87788 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++++ llvm/test/CodeGen/X86/masked_load.ll | 34 +++++-------------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9109aca880282..276fe77978832 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9272,6 +9272,16 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // If this is a masked load with an all ones mask, we can use a unmasked load. + // FIXME: Can we do this for indexed, expanding, or extending loads? + if (ISD::isBuildVectorAllOnes(Mask.getNode()) && + MLD->isUnindexed() && !MLD->isExpandingLoad() && + MLD->getExtensionType() == ISD::NON_EXTLOAD) { + SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), + MLD->getBasePtr(), MLD->getMemOperand()); + return CombineTo(N, NewLd, NewLd.getValue(1)); + } + // Try transforming N to an indexed load. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll index d807fe96fb4e0..d15b7f4d0c649 100644 --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -6171,25 +6171,10 @@ define <4 x float> @mload_constmask_v4f32_all(<4 x float>* %addr) { ; SSE-NEXT: movups (%rdi), %xmm0 ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: mload_constmask_v4f32_all: -; AVX1OR2: ## %bb.0: -; AVX1OR2-NEXT: vmovups (%rdi), %xmm0 -; AVX1OR2-NEXT: retq -; -; AVX512F-LABEL: mload_constmask_v4f32_all: -; AVX512F: ## %bb.0: -; AVX512F-NEXT: movw $15, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} -; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: mload_constmask_v4f32_all: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: kxnorw %k0, %k0, %k1 -; AVX512VL-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} -; AVX512VL-NEXT: retq +; AVX-LABEL: mload_constmask_v4f32_all: +; AVX: ## %bb.0: +; AVX-NEXT: vmovups (%rdi), %xmm0 +; AVX-NEXT: retq %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> , <4 x float>undef) ret <4 x float> %res } @@ -6573,7 +6558,7 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %ds ret <8 x double> %res } -; FIXME: We should be able to detect the mask is all ones after type +; Make sure we detect the mask is all ones after type ; legalization to use an unmasked load for some of the avx512 instructions. define <16 x double> @mload_constmask_v16f64_allones_split(<16 x double>* %addr, <16 x double> %dst) { ; SSE-LABEL: mload_constmask_v16f64_allones_split: @@ -6611,29 +6596,26 @@ define <16 x double> @mload_constmask_v16f64_allones_split(<16 x double>* %addr, ; ; AVX512F-LABEL: mload_constmask_v16f64_allones_split: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: kxnorw %k0, %k0, %k1 -; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} ; AVX512F-NEXT: movb $85, %al ; AVX512F-NEXT: kmovw %eax, %k1 ; AVX512F-NEXT: vmovupd 64(%rdi), %zmm1 {%k1} +; AVX512F-NEXT: vmovups (%rdi), %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VLDQ-LABEL: mload_constmask_v16f64_allones_split: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: kxnorw %k0, %k0, %k1 -; AVX512VLDQ-NEXT: vmovupd (%rdi), %zmm0 {%k1} ; AVX512VLDQ-NEXT: movb $85, %al ; AVX512VLDQ-NEXT: kmovw %eax, %k1 ; AVX512VLDQ-NEXT: vmovupd 64(%rdi), %zmm1 {%k1} +; AVX512VLDQ-NEXT: vmovups (%rdi), %zmm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: mload_constmask_v16f64_allones_split: ; AVX512VLBW: ## %bb.0: -; AVX512VLBW-NEXT: kxnorw %k0, %k0, %k1 -; AVX512VLBW-NEXT: vmovupd (%rdi), %zmm0 {%k1} ; AVX512VLBW-NEXT: movb $85, %al ; AVX512VLBW-NEXT: kmovd %eax, %k1 ; AVX512VLBW-NEXT: vmovupd 64(%rdi), %zmm1 {%k1} +; AVX512VLBW-NEXT: vmovups (%rdi), %zmm0 ; AVX512VLBW-NEXT: retq %res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %addr, i32 4, <16 x i1> , <16 x double> %dst) ret <16 x double> %res From c57df3dc09e8b59c55c83ba5c354569a82a5c3b8 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 16 Sep 2020 13:18:41 -0700 Subject: [PATCH 0900/1079] [lsan] Share platform allocator settings between ASan and LSan This moves the platform-specific parameter logic from asan into sanitizer_common so lsan can reuse it. Patch By: mcgrathr Differential Revision: https://reviews.llvm.org/D85930 --- compiler-rt/lib/asan/asan_allocator.h | 38 ++------------- compiler-rt/lib/lsan/lsan_allocator.h | 47 +++++++------------ .../sanitizer_common/sanitizer_allocator.h | 37 +++++++++++++++ 3 files changed, 57 insertions(+), 65 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index 612799f90964a..4d4a7f1b135ce 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -118,43 +118,13 @@ struct AsanMapUnmapCallback { void OnUnmap(uptr p, uptr size) const; }; +using SizeClassMap = __sanitizer::AllocatorSizeClassMap; + #if SANITIZER_CAN_USE_ALLOCATOR64 -# if SANITIZER_FUCHSIA -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -typedef DefaultSizeClassMap SizeClassMap; -# elif defined(__powerpc64__) -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -typedef DefaultSizeClassMap SizeClassMap; -# elif defined(__aarch64__) && SANITIZER_ANDROID -// Android needs to support 39, 42 and 48 bit VMA. -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x2000000000ULL; // 128G. -typedef VeryCompactSizeClassMap SizeClassMap; -# elif defined(__aarch64__) -// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA -// so no need to different values for different VMA. -const uptr kAllocatorSpace = 0x10000000000ULL; -const uptr kAllocatorSize = 0x10000000000ULL; // 3T. -typedef DefaultSizeClassMap SizeClassMap; -#elif defined(__sparc__) -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -typedef DefaultSizeClassMap SizeClassMap; -# elif SANITIZER_WINDOWS -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x8000000000ULL; // 500G -typedef DefaultSizeClassMap SizeClassMap; -# else -const uptr kAllocatorSpace = 0x600000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -typedef DefaultSizeClassMap SizeClassMap; -# endif template struct AP64 { // Allocator64 parameters. Deliberately using a short name. - static const uptr kSpaceBeg = kAllocatorSpace; - static const uptr kSpaceSize = kAllocatorSize; + static const uptr kSpaceBeg = __sanitizer::kAllocatorSpace; + static const uptr kSpaceSize = __sanitizer::kAllocatorSize; static const uptr kMetadataSize = 0; typedef __asan::SizeClassMap SizeClassMap; typedef AsanMapUnmapCallback MapUnmapCallback; diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index 17e13cd014ba4..b820dd15ecdb2 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -49,51 +49,36 @@ struct ChunkMetadata { u32 stack_trace_id; }; -#if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ - defined(__arm__) +#if SANITIZER_CAN_USE_ALLOCATOR64 template -struct AP32 { - static const uptr kSpaceBeg = 0; - static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; +struct AP64 { // Allocator64 parameters. Deliberately using a short name. + static const uptr kSpaceBeg = __sanitizer::kAllocatorSpace; + static const uptr kSpaceSize = __sanitizer::kAllocatorSize; static const uptr kMetadataSize = sizeof(ChunkMetadata); - typedef __sanitizer::CompactSizeClassMap SizeClassMap; - static const uptr kRegionSizeLog = 20; - using AddressSpaceView = AddressSpaceViewTy; + typedef __sanitizer::AllocatorSizeClassMap SizeClassMap; typedef NoOpMapUnmapCallback MapUnmapCallback; static const uptr kFlags = 0; + using AddressSpaceView = AddressSpaceViewTy; }; template -using PrimaryAllocatorASVT = SizeClassAllocator32>; +using PrimaryAllocatorASVT = SizeClassAllocator64>; using PrimaryAllocator = PrimaryAllocatorASVT; -#elif defined(__x86_64__) || defined(__powerpc64__) || defined(__s390x__) -# if SANITIZER_FUCHSIA -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -# elif defined(__powerpc64__) -const uptr kAllocatorSpace = 0xa0000000000ULL; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -#elif defined(__s390x__) -const uptr kAllocatorSpace = 0x40000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -# else -const uptr kAllocatorSpace = 0x600000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -# endif +#else // !SANITIZER_CAN_USE_ALLOCATOR64 template -struct AP64 { // Allocator64 parameters. Deliberately using a short name. - static const uptr kSpaceBeg = kAllocatorSpace; - static const uptr kSpaceSize = kAllocatorSize; +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; static const uptr kMetadataSize = sizeof(ChunkMetadata); - typedef DefaultSizeClassMap SizeClassMap; + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = 20; + using AddressSpaceView = AddressSpaceViewTy; typedef NoOpMapUnmapCallback MapUnmapCallback; static const uptr kFlags = 0; - using AddressSpaceView = AddressSpaceViewTy; }; - template -using PrimaryAllocatorASVT = SizeClassAllocator64>; +using PrimaryAllocatorASVT = SizeClassAllocator32>; using PrimaryAllocator = PrimaryAllocatorASVT; -#endif +#endif // SANITIZER_CAN_USE_ALLOCATOR64 template using AllocatorASVT = CombinedAllocator>; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h index 23d589888d3b6..dd792de1effa7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h @@ -76,6 +76,43 @@ INLINE void RandomShuffle(T *a, u32 n, u32 *rand_state) { #include "sanitizer_allocator_secondary.h" #include "sanitizer_allocator_combined.h" +// The platform-specific default parameters are shared by both +// asan_allocator.h and lsan_allocator.h. +#if SANITIZER_CAN_USE_ALLOCATOR64 +# if SANITIZER_FUCHSIA +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +# elif defined(__powerpc64__) +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +# elif defined(__aarch64__) && SANITIZER_ANDROID +// Android needs to support 39, 42 and 48 bit VMA. +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x2000000000ULL; // 128G. +using AllocatorSizeClassMap = VeryCompactSizeClassMap; +# elif defined(__aarch64__) +// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA +// so no need to different values for different VMA. +const uptr kAllocatorSpace = 0x10000000000ULL; +const uptr kAllocatorSize = 0x10000000000ULL; // 3T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +#elif defined(__sparc__) +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +# elif SANITIZER_WINDOWS +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x8000000000ULL; // 500G +using AllocatorSizeClassMap = DefaultSizeClassMap; +# else +const uptr kAllocatorSpace = 0x600000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +# endif +#endif // SANITIZER_CAN_USE_ALLOCATOR64 + } // namespace __sanitizer #endif // SANITIZER_ALLOCATOR_H From e3fe203ec7f766ad6028144d266557b0d89b77fe Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 16 Sep 2020 13:48:19 -0700 Subject: [PATCH 0901/1079] Revert "[lsan] Share platform allocator settings between ASan and LSan" This reverts commit c57df3dc09e8b59c55c83ba5c354569a82a5c3b8 which broke Windows sanitizer bots. --- compiler-rt/lib/asan/asan_allocator.h | 38 +++++++++++++-- compiler-rt/lib/lsan/lsan_allocator.h | 47 ++++++++++++------- .../sanitizer_common/sanitizer_allocator.h | 37 --------------- 3 files changed, 65 insertions(+), 57 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index 4d4a7f1b135ce..612799f90964a 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -118,13 +118,43 @@ struct AsanMapUnmapCallback { void OnUnmap(uptr p, uptr size) const; }; -using SizeClassMap = __sanitizer::AllocatorSizeClassMap; - #if SANITIZER_CAN_USE_ALLOCATOR64 +# if SANITIZER_FUCHSIA +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +typedef DefaultSizeClassMap SizeClassMap; +# elif defined(__powerpc64__) +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +typedef DefaultSizeClassMap SizeClassMap; +# elif defined(__aarch64__) && SANITIZER_ANDROID +// Android needs to support 39, 42 and 48 bit VMA. +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x2000000000ULL; // 128G. +typedef VeryCompactSizeClassMap SizeClassMap; +# elif defined(__aarch64__) +// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA +// so no need to different values for different VMA. +const uptr kAllocatorSpace = 0x10000000000ULL; +const uptr kAllocatorSize = 0x10000000000ULL; // 3T. +typedef DefaultSizeClassMap SizeClassMap; +#elif defined(__sparc__) +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +typedef DefaultSizeClassMap SizeClassMap; +# elif SANITIZER_WINDOWS +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x8000000000ULL; // 500G +typedef DefaultSizeClassMap SizeClassMap; +# else +const uptr kAllocatorSpace = 0x600000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +typedef DefaultSizeClassMap SizeClassMap; +# endif template struct AP64 { // Allocator64 parameters. Deliberately using a short name. - static const uptr kSpaceBeg = __sanitizer::kAllocatorSpace; - static const uptr kSpaceSize = __sanitizer::kAllocatorSize; + static const uptr kSpaceBeg = kAllocatorSpace; + static const uptr kSpaceSize = kAllocatorSize; static const uptr kMetadataSize = 0; typedef __asan::SizeClassMap SizeClassMap; typedef AsanMapUnmapCallback MapUnmapCallback; diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index b820dd15ecdb2..17e13cd014ba4 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -49,21 +49,8 @@ struct ChunkMetadata { u32 stack_trace_id; }; -#if SANITIZER_CAN_USE_ALLOCATOR64 -template -struct AP64 { // Allocator64 parameters. Deliberately using a short name. - static const uptr kSpaceBeg = __sanitizer::kAllocatorSpace; - static const uptr kSpaceSize = __sanitizer::kAllocatorSize; - static const uptr kMetadataSize = sizeof(ChunkMetadata); - typedef __sanitizer::AllocatorSizeClassMap SizeClassMap; - typedef NoOpMapUnmapCallback MapUnmapCallback; - static const uptr kFlags = 0; - using AddressSpaceView = AddressSpaceViewTy; -}; -template -using PrimaryAllocatorASVT = SizeClassAllocator64>; -using PrimaryAllocator = PrimaryAllocatorASVT; -#else // !SANITIZER_CAN_USE_ALLOCATOR64 +#if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ + defined(__arm__) template struct AP32 { static const uptr kSpaceBeg = 0; @@ -78,7 +65,35 @@ struct AP32 { template using PrimaryAllocatorASVT = SizeClassAllocator32>; using PrimaryAllocator = PrimaryAllocatorASVT; -#endif // SANITIZER_CAN_USE_ALLOCATOR64 +#elif defined(__x86_64__) || defined(__powerpc64__) || defined(__s390x__) +# if SANITIZER_FUCHSIA +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +# elif defined(__powerpc64__) +const uptr kAllocatorSpace = 0xa0000000000ULL; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +#elif defined(__s390x__) +const uptr kAllocatorSpace = 0x40000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +# else +const uptr kAllocatorSpace = 0x600000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +# endif +template +struct AP64 { // Allocator64 parameters. Deliberately using a short name. + static const uptr kSpaceBeg = kAllocatorSpace; + static const uptr kSpaceSize = kAllocatorSize; + static const uptr kMetadataSize = sizeof(ChunkMetadata); + typedef DefaultSizeClassMap SizeClassMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; + using AddressSpaceView = AddressSpaceViewTy; +}; + +template +using PrimaryAllocatorASVT = SizeClassAllocator64>; +using PrimaryAllocator = PrimaryAllocatorASVT; +#endif template using AllocatorASVT = CombinedAllocator>; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h index dd792de1effa7..23d589888d3b6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h @@ -76,43 +76,6 @@ INLINE void RandomShuffle(T *a, u32 n, u32 *rand_state) { #include "sanitizer_allocator_secondary.h" #include "sanitizer_allocator_combined.h" -// The platform-specific default parameters are shared by both -// asan_allocator.h and lsan_allocator.h. -#if SANITIZER_CAN_USE_ALLOCATOR64 -# if SANITIZER_FUCHSIA -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -# elif defined(__powerpc64__) -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -# elif defined(__aarch64__) && SANITIZER_ANDROID -// Android needs to support 39, 42 and 48 bit VMA. -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x2000000000ULL; // 128G. -using AllocatorSizeClassMap = VeryCompactSizeClassMap; -# elif defined(__aarch64__) -// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA -// so no need to different values for different VMA. -const uptr kAllocatorSpace = 0x10000000000ULL; -const uptr kAllocatorSize = 0x10000000000ULL; // 3T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -#elif defined(__sparc__) -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -# elif SANITIZER_WINDOWS -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x8000000000ULL; // 500G -using AllocatorSizeClassMap = DefaultSizeClassMap; -# else -const uptr kAllocatorSpace = 0x600000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -# endif -#endif // SANITIZER_CAN_USE_ALLOCATOR64 - } // namespace __sanitizer #endif // SANITIZER_ALLOCATOR_H From 9a0d1b66730c8761a5da59351bf1c7666958130b Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 16 Sep 2020 13:46:55 -0700 Subject: [PATCH 0902/1079] [ORC] Add operations to create and lookup JITDylibs to OrcV2 C bindings. --- llvm/include/llvm-c/Orc.h | 36 +++++++++++++++++++ .../ExecutionEngine/Orc/OrcV2CBindings.cpp | 23 ++++++++++++ 2 files changed, 59 insertions(+) diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 09a058846108a..6271ab689c8b1 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -112,6 +112,42 @@ LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name); */ void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S); +/** + * Create a "bare" JITDylib. + * + * The client is responsible for ensuring that the JITDylib's name is unique, + * e.g. by calling LLVMOrcExecutionSessionGetJTIDylibByName first. + * + * This call does not install any library code or symbols into the newly + * created JITDylib. The client is responsible for all configuration. + */ +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionCreateBareJITDylib(LLVMOrcExecutionSessionRef ES, + const char *Name); + +/** + * Create a JITDylib. + * + * The client is responsible for ensuring that the JITDylib's name is unique, + * e.g. by calling LLVMOrcExecutionSessionGetJTIDylibByName first. + * + * If a Platform is attached to the ExecutionSession then + * Platform::setupJITDylib will be called to install standard platform symbols + * (e.g. standard library interposes). If no Platform is installed then this + * call is equivalent to LLVMExecutionSessionRefCreateBareJITDylib and will + * always return success. + */ +LLVMErrorRef +LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES, + LLVMOrcJITDylibRef *Result, + const char *Name); + +/** + * Returns the JITDylib with the given name, or NULL if no such JITDylib + * exists. + */ +LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name); + /** * Dispose of a JITDylib::DefinitionGenerator. This should only be called if * ownership has not been passed to a JITDylib (e.g. because some error diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index 5933c2e666d1c..f6dd235b6edea 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -68,6 +68,29 @@ void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) { OrcV2CAPIHelper::releasePoolEntry(unwrap(S)); } +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionCreateBareJITDylib(LLVMOrcExecutionSessionRef ES, + const char *Name) { + return wrap(&unwrap(ES)->createBareJITDylib(Name)); +} + +LLVMErrorRef +LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES, + LLVMOrcJITDylibRef *Result, + const char *Name) { + auto JD = unwrap(ES)->createJITDylib(Name); + if (!JD) + return wrap(JD.takeError()); + *Result = wrap(&*JD); + return LLVMErrorSuccess; +} + +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES, + const char *Name) { + return wrap(unwrap(ES)->getJITDylibByName(Name)); +} + void LLVMOrcDisposeJITDylibDefinitionGenerator( LLVMOrcJITDylibDefinitionGeneratorRef DG) { delete unwrap(DG); From bebfc3b92d5e8dd1b1d75d40d5d03975957eec14 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Wed, 16 Sep 2020 13:51:36 -0700 Subject: [PATCH 0903/1079] Revert "Do not apply calling conventions to MSVC entry points" This reverts commit 4cff1b40dacf6a5489b09657d94ea4757b8cd3b0. Caused "undefined symbol: _WinMain@16" link errors. --- clang/lib/Sema/SemaDecl.cpp | 5 -- .../test/CodeGenCXX/default_calling_conv.cpp | 48 +++---------------- 2 files changed, 7 insertions(+), 46 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 416a75fa4323b..f78f7ac246bb7 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11095,11 +11095,6 @@ void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) { if (FD->getName() != "DllMain") FD->setHasImplicitReturnZero(true); - if (FT->getCallConv() != CC_C) { - FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(CC_C)); - FD->setType(QualType(FT, 0)); - } - if (!FD->isInvalidDecl() && FD->getDescribedFunctionTemplate()) { Diag(FD->getLocation(), diag::err_mainlike_template_decl) << FD; FD->setInvalidDecl(); diff --git a/clang/test/CodeGenCXX/default_calling_conv.cpp b/clang/test/CodeGenCXX/default_calling_conv.cpp index 16b623c301971..b5b0f47ceb986 100644 --- a/clang/test/CodeGenCXX/default_calling_conv.cpp +++ b/clang/test/CodeGenCXX/default_calling_conv.cpp @@ -1,14 +1,10 @@ -// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -fdefault-calling-conv=cdecl -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=CDECL --check-prefix=ALL -// RUN: %clang_cc1 -triple i786-unknown-linux-gnu -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=FASTCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=VECTORCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s -DMAIN | FileCheck %s --check-prefix=REGCALL --check-prefix=ALL -// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DWMAIN | FileCheck %s --check-prefix=WMAIN -// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DWINMAIN | FileCheck %s --check-prefix=WINMAIN -// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DWWINMAIN | FileCheck %s --check-prefix=WWINMAIN -// RUN: %clang_cc1 -triple i386-pc-win32 -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s -DDLLMAIN | FileCheck %s --check-prefix=DLLMAIN -// +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -fdefault-calling-conv=cdecl -emit-llvm -o - %s | FileCheck %s --check-prefix=CDECL --check-prefix=ALL +// RUN: %clang_cc1 -triple i786-unknown-linux-gnu -target-feature +sse4.2 -fdefault-calling-conv=fastcall -emit-llvm -o - %s | FileCheck %s --check-prefix=FASTCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s | FileCheck %s --check-prefix=VECTORCALL --check-prefix=ALL +// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s | FileCheck %s --check-prefix=REGCALL --check-prefix=ALL + // CDECL: define void @_Z5test1v // FASTCALL: define x86_fastcallcc void @_Z5test1v // STDCALL: define x86_stdcallcc void @_Z5test1v @@ -50,37 +46,7 @@ void test() { a.test_member(); } -#ifdef MAIN // ALL: define i32 @main int main() { return 1; } -#endif // main - -#ifdef WMAIN -// WMAIN: define dso_local i32 @wmain -int wmain() { - return 1; -} -#endif // wmain - -#ifdef WINMAIN -// WINMAIN: define dso_local i32 @WinMain -int WinMain() { - return 1; -} -#endif // WinMain - -#ifdef WWINMAIN -// WWINMAIN: define dso_local i32 @wWinMain -int wWinMain() { - return 1; -} -#endif // wWinMain - -#ifdef DLLMAIN -// DLLMAIN: define dso_local i32 @DllMain -int DllMain() { - return 1; -} -#endif // DllMain From a45cdb311f6e71fdf5452a4be9037f3fb028f1d1 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 16 Sep 2020 13:43:45 -0700 Subject: [PATCH 0904/1079] [AMDGPU] gfx1030 test update. NFC. --- llvm/test/MC/AMDGPU/smem.s | 262 ++++++++++++++++++------------------- 1 file changed, 131 insertions(+), 131 deletions(-) diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index 3bae52d640282..5f00a820ee023 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -3,17 +3,19 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX1012 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIGFX1030 -check-prefix=NOSICIVIGFX1030 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIGFX1030 -check-prefix=NOSICIVIGFX1030 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIGFX1030 -check-prefix=NOSICIVIGFX1030 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIVIGFX1030 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 -check-prefix=NOGFX9GFX1012 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 -check-prefix=NOGFX9GFX1012 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX1030 -check-prefix=NOSICIVIGFX10 -check-prefix=NOSICIVIGFX1030 -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 --implicit-check-not=error: %s s_dcache_wb // GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xf4,0x00,0x00,0x00,0x00] -// NOSICI: error: instruction not supported on this GPU +// NOSICIGFX1030: error: instruction not supported on this GPU s_dcache_wb_vol // GFX89: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] @@ -64,22 +66,22 @@ s_memrealtime ttmp[0:1] s_store_dword s1, s[2:3], 0xfc // GFX89: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x40,0xf4,0xfc,0x00,0x00,0xfa] -// NOSICI: error: instruction not supported on this GPU +// NOSICIGFX1030: error: instruction not supported on this GPU s_store_dword s1, s[2:3], 0xfc glc // GFX89: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x41,0xf4,0xfc,0x00,0x00,0xfa] -// NOSICI: error: invalid operand for instruction +// NOSICIGFX1030: error: invalid operand for instruction s_store_dword s1, s[2:3], s4 // GFX89: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xf4,0x00,0x00,0x00,0x08] -// NOSICI: error: instruction not supported on this GPU +// NOSICIGFX1030: error: instruction not supported on this GPU s_store_dword s1, s[2:3], s4 glc // GFX89: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xf4,0x00,0x00,0x00,0x08] -// NOSICI: error: invalid operand for instruction +// NOSICIGFX1030: error: invalid operand for instruction s_store_dword tba_lo, s[2:3], s4 // VI: s_store_dword tba_lo, s[2:3], s4 ; encoding: [0x01,0x1b,0x40,0xc0,0x04,0x00,0x00,0x00] @@ -105,17 +107,16 @@ s_store_dword tma_hi, s[2:3], s4 s_load_dword s1, s[2:3], 0xfc glc // GFX89: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x01,0xf4,0xfc,0x00,0x00,0xfa] -// SICI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0xfc,0x83,0x00,0xc0] +// SICI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0xfc,0x83,0x00,0xc0 s_load_dword s1, s[2:3], s4 glc // GFX89: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0x08] -// SICI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x04,0x82,0x00,0xc0] s_buffer_store_dword s10, s[92:95], m0 // GFX89: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xc0,0x7c,0x00,0x00,0x00] -// NOSICI: error: instruction not supported on this GPU -// GFX10: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xf4,0x00,0x00,0x00,0xf8] +// NOSICIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dword tba_lo, s[92:95], m0 // VI: s_buffer_store_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -140,18 +141,18 @@ s_buffer_store_dword tma_hi, s[92:95], m0 s_buffer_store_dword ttmp0, s[92:95], m0 // VI: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x60,0xc0,0x7c,0x00,0x00,0x00] // GFX9: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] -// NOSICI: error: instruction not supported on this GPU -// GFX10: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xf4,0x00,0x00,0x00,0xf8] +// NOSICIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 s[10:11], s[92:95], m0 // GFX89: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xc0,0x7c,0x00,0x00,0x00] -// NOSICI: error: instruction not supported on this GPU -// GFX10: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xf4,0x00,0x00,0x00,0xf8] +// NOSICIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xc0,0x7c,0x00,0x00,0x00] -// NOSICI: error: invalid operand for instruction -// GFX10: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xf4,0x00,0x00,0x00,0xf8] +// NOSICIGFX1030: error: invalid operand for instruction +// GFX1012: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 tba, s[92:95], m0 glc // VI: s_buffer_store_dwordx2 tba, s[92:95], m0 glc ; encoding: [0x2e,0x1b,0x65,0xc0,0x7c,0x00,0x00,0x00] @@ -214,7 +215,6 @@ s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xc0,0x7c,0x00,0x00,0x00] // GFX10: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xf4,0x00,0x00,0x00,0xf8] -// SICI: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x7c,0x5c,0x84,0xc2] //===----------------------------------------------------------------------===// // s_scratch instructions @@ -223,47 +223,47 @@ s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc s_scratch_load_dword s5, s[2:3], s101 // GFX9: s_scratch_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc0,0x65,0x00,0x00,0x00] // GFX1012: s_scratch_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf4,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_load_dword s5, s[2:3], s0 glc // GFX9: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error: invalid operand for instruction +// NOSICIVIGFX1030: error: invalid operand for instruction s_scratch_load_dwordx2 s[100:101], s[2:3], s0 // GFX9: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc // GFX9: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x1b,0xc0,0x01,0x00,0x00,0x00] // GFX1012: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x19,0xf4,0x01,0x00,0x00,0xfa] -// NOSICIVI: error: invalid operand for instruction +// NOSICIVIGFX1030: error: invalid operand for instruction s_scratch_load_dwordx4 s[20:23], s[4:5], s0 // GFX9: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_store_dword s101, s[4:5], s0 // GFX9: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_scratch_store_dword s1, s[4:5], 0x123 glc // GFX9: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x57,0xc0,0x23,0x01,0x00,0x00] // GFX1012: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] -// NOSICIVI: error: invalid operand for instruction +// NOSICIVIGFX1030: error: invalid operand for instruction s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc // GFX9: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xc0,0x65,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca] -// NOSICIVI: error: invalid operand for instruction +// NOSICIVIGFX1030: error: invalid operand for instruction s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc // GFX9: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error: invalid operand for instruction +// NOSICIVIGFX1030: error: invalid operand for instruction //===----------------------------------------------------------------------===// // s_dcache_discard instructions @@ -272,22 +272,22 @@ s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc s_dcache_discard s[2:3], s0 // GFX9: s_dcache_discard s[2:3], s0 ; encoding: [0x01,0x00,0xa0,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_dcache_discard s[2:3], s0 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_dcache_discard s[2:3], 0x0 // GFX9: s_dcache_discard s[2:3], 0x0 ; encoding: [0x01,0x00,0xa2,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_dcache_discard s[2:3], 0x0 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_dcache_discard_x2 s[2:3], s101 // GFX9: s_dcache_discard_x2 s[2:3], s101 ; encoding: [0x01,0x00,0xa4,0xc0,0x65,0x00,0x00,0x00] // GFX1012: s_dcache_discard_x2 s[2:3], s101 ; encoding: [0x01,0x00,0xa4,0xf4,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_dcache_discard_x2 s[2:3], 0x0 // GFX9: s_dcache_discard_x2 s[2:3], 0x0 ; encoding: [0x01,0x00,0xa6,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_dcache_discard_x2 s[2:3], 0x0 ; encoding: [0x01,0x00,0xa4,0xf4,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // s_atomic instructions @@ -296,162 +296,162 @@ s_dcache_discard_x2 s[2:3], 0x0 s_atomic_add s5, s[2:3], s101 // GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_add s5, s[2:3], 0x0 // GFX9: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_add s5, s[2:3], s0 glc // GFX9: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_add_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_and s5, s[2:3], s101 // GFX9: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_and_x2 s[10:11], s[2:3], 0x0 // GFX9: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa2,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa0,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], s101 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], 0x0 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], s0 glc // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x86,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_dec s5, s[2:3], s0 glc // GFX9: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_dec_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_inc s5, s[2:3], s0 glc // GFX9: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_inc_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_or s5, s[2:3], 0x0 // GFX9: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x26,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x24,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_or_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_smax s5, s[2:3], s101 // GFX9: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_smax_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_smin s5, s[2:3], s101 // GFX9: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_smin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_sub s5, s[2:3], s101 // GFX9: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_sub_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_swap s5, s[2:3], s101 // GFX9: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_swap_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_umax s5, s[2:3], s0 glc // GFX9: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_umax_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_umin s5, s[2:3], s101 // GFX9: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_umin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_xor s5, s[2:3], s101 // GFX9: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_atomic_xor_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // s_buffer_atomic instructions @@ -460,162 +460,162 @@ s_atomic_xor_x2 s[10:11], s[2:3], s0 glc s_buffer_atomic_add s5, s[4:7], s101 // GFX9: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_add s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x0a,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_add s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_and s101, s[4:7], s0 // GFX9: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 // GFX9: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x06,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x86,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_dec s5, s[4:7], s0 // GFX9: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_inc s101, s[4:7], s0 // GFX9: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xae,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xac,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_or s5, s[8:11], s0 // GFX9: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 // GFX9: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_smax s5, s[4:7], s101 // GFX9: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 // GFX9: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_smin s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x12,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x10,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 // GFX9: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_sub s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_swap s5, s[4:7], s0 // GFX9: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_umax s5, s[4:7], s0 // GFX9: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_umin s5, s[4:7], s0 // GFX9: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_xor s5, s[4:7], s0 // GFX9: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: instruction not supported on this GPU +// NOSICIVIGFX1030: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // Unsigned 20-bit offsets (VI+) @@ -632,23 +632,23 @@ s_atc_probe_buffer 0x1, s[8:11], 0xFFFFF // GFX10: s_atc_probe_buffer 1, s[8:11], 0xfffff ; encoding: [0x44,0x00,0x9c,0xf4,0xff,0xff,0x0f,0xfa] s_store_dword s1, s[2:3], 0xFFFFF -// NOSICI: error: instruction not supported on this GPU +// NOSICIGFX1030: error: instruction not supported on this GPU // GFX89: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00] -// GFX10: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x40,0xf4,0xff,0xff,0x0f,0xfa] +// GFX1012: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x40,0xf4,0xff,0xff,0x0f,0xfa] s_buffer_store_dword s10, s[92:95], 0xFFFFF -// NOSICI: error: instruction not supported on this GPU +// NOSICIGFX1030: error: instruction not supported on this GPU // GFX89: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00] -// GFX10: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x60,0xf4,0xff,0xff,0x0f,0xfa] +// GFX1012: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x60,0xf4,0xff,0xff,0x0f,0xfa] s_atomic_swap s5, s[2:3], 0xFFFFF -// NOSICIVI: error: instruction not supported on this GPU -// GFX10: s_atomic_swap s5, s[2:3], 0xfffff ; encoding: [0x41,0x01,0x00,0xf6,0xff,0xff,0x0f,0xfa] +// NOSICIVIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_atomic_swap s5, s[2:3], 0xfffff ; encoding: [0x41,0x01,0x00,0xf6,0xff,0xff,0x0f,0xfa] // GFX9: s_atomic_swap s5, s[2:3], 0xfffff ; encoding: [0x41,0x01,0x02,0xc2,0xff,0xff,0x0f,0x00] s_buffer_atomic_swap s5, s[4:7], 0xFFFFF -// NOSICIVI: error: instruction not supported on this GPU -// GFX10: s_buffer_atomic_swap s5, s[4:7], 0xfffff ; encoding: [0x42,0x01,0x00,0xf5,0xff,0xff,0x0f,0xfa] +// NOSICIVIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_buffer_atomic_swap s5, s[4:7], 0xfffff ; encoding: [0x42,0x01,0x00,0xf5,0xff,0xff,0x0f,0xfa] // GFX9: s_buffer_atomic_swap s5, s[4:7], 0xfffff ; encoding: [0x42,0x01,0x02,0xc1,0xff,0xff,0x0f,0x00] s_atc_probe 0x7, s[4:5], 0x1FFFFF @@ -662,22 +662,22 @@ s_atc_probe_buffer 0x1, s[8:11], 0x1FFFFF // NOVI: error: expected a 20-bit unsigned offset s_store_dword s1, s[2:3], 0x1FFFFF -// NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 21-bit signed offset +// NOSICIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 21-bit signed offset // NOVI: error: expected a 20-bit unsigned offset s_buffer_store_dword s10, s[92:95], 0x1FFFFF -// NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOSICIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 20-bit unsigned offset // NOVI: error: expected a 20-bit unsigned offset s_atomic_swap s5, s[2:3], 0x1FFFFF -// NOSICIVI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 21-bit signed offset +// NOSICIVIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 21-bit signed offset s_buffer_atomic_swap s5, s[4:7], 0x1FFFFF -// NOSICIVI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOSICIVIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 20-bit unsigned offset //===----------------------------------------------------------------------===// // Signed offsets (gfx9+) @@ -697,13 +697,13 @@ s_atc_probe_buffer 0x1, s[8:11], -1 s_store_dword s1, s[2:3], -1 // NOVI: error: expected a 20-bit unsigned offset // GFX9: s_store_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x1f,0x00] -// GFX10: s_store_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x40,0xf4,0xff,0xff,0x1f,0xfa] -// NOSICI: error: instruction not supported on this GPU +// GFX1012: s_store_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x40,0xf4,0xff,0xff,0x1f,0xfa] +// NOSICIGFX1030: error: instruction not supported on this GPU s_buffer_store_dword s10, s[92:95], -1 // NOVI: error: expected a 20-bit unsigned offset -// NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOSICIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 20-bit unsigned offset s_load_dword s1, s[2:3], -1 // NOVI: error: expected a 20-bit unsigned offset @@ -719,13 +719,13 @@ s_buffer_load_dword s10, s[92:95], -1 s_atomic_swap s5, s[2:3], -1 // NOVI: error: instruction not supported on this GPU // GFX9: s_atomic_swap s5, s[2:3], -0x1 ; encoding: [0x41,0x01,0x02,0xc2,0xff,0xff,0x1f,0x00] -// GFX10: s_atomic_swap s5, s[2:3], -0x1 ; encoding: [0x41,0x01,0x00,0xf6,0xff,0xff,0x1f,0xfa] -// NOSICI: error: instruction not supported on this GPU +// GFX1012: s_atomic_swap s5, s[2:3], -0x1 ; encoding: [0x41,0x01,0x00,0xf6,0xff,0xff,0x1f,0xfa] +// NOSICIGFX1030: error: instruction not supported on this GPU s_buffer_atomic_swap s5, s[4:7], -1 // NOVI: error: instruction not supported on this GPU -// NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOSICIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 20-bit unsigned offset s_atc_probe 0x7, s[4:5], 0xFFFFFFFFFFF00000 // NOSICI: error: instruction not supported on this GPU @@ -739,14 +739,14 @@ s_atc_probe_buffer 0x1, s[8:11], 0xFFFFFFFFFFF00000 // NOVI: error: expected a 20-bit unsigned offset s_store_dword s1, s[2:3], 0xFFFFFFFFFFF00000 -// NOSICI: error: instruction not supported on this GPU -// GFX10: s_store_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x40,0xf4,0x00,0x00,0x10,0xfa] +// NOSICIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_store_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x40,0xf4,0x00,0x00,0x10,0xfa] // GFX9: s_store_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x42,0xc0,0x00,0x00,0x10,0x00] // NOVI: error: expected a 20-bit unsigned offset s_buffer_store_dword s10, s[92:95], 0xFFFFFFFFFFF00000 -// NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOSICIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 20-bit unsigned offset // NOVI: error: expected a 20-bit unsigned offset s_load_dword s1, s[2:3], 0xFFFFFFFFFFF00000 @@ -761,10 +761,10 @@ s_buffer_load_dword s10, s[92:95], 0xFFFFFFFFFFF00000 // NOVI: error: expected a 20-bit unsigned offset s_atomic_swap s5, s[2:3], 0xFFFFFFFFFFF00000 -// NOSICIVI: error: instruction not supported on this GPU -// GFX10: s_atomic_swap s5, s[2:3], -0x100000 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x10,0xfa] +// NOSICIVIGFX1030: error: instruction not supported on this GPU +// GFX1012: s_atomic_swap s5, s[2:3], -0x100000 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x10,0xfa] // GFX9: s_atomic_swap s5, s[2:3], -0x100000 ; encoding: [0x41,0x01,0x02,0xc2,0x00,0x00,0x10,0x00] s_buffer_atomic_swap s5, s[4:7], 0xFFFFFFFFFFF00000 -// NOSICIVI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOSICIVIGFX1030: error: instruction not supported on this GPU +// NOGFX9GFX1012: error: expected a 20-bit unsigned offset From cd13476ab57b43b66831bba14206a350c5a4a81b Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 16 Sep 2020 01:14:55 -0700 Subject: [PATCH 0905/1079] [NFC][LSAN] Change SuspendedThreadsList interface Remove RegisterCount and let GetRegistersAndSP to resize buffer as needed. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D87747 --- compiler-rt/lib/lsan/lsan_common.cpp | 13 +++++++------ .../lib/sanitizer_common/sanitizer_stoptheworld.h | 6 ++---- .../sanitizer_stoptheworld_linux_libcdep.cpp | 12 +++++------- .../sanitizer_common/sanitizer_stoptheworld_mac.cpp | 12 +++++------- .../sanitizer_stoptheworld_netbsd_libcdep.cpp | 12 +++++------- 5 files changed, 24 insertions(+), 31 deletions(-) diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 41b5ae5483299..107d63ac9117c 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -218,10 +218,7 @@ static void ProcessThreads(SuspendedThreadsList const &, Frontier *) {} // Scans thread data (stacks and TLS) for heap pointers. static void ProcessThreads(SuspendedThreadsList const &suspended_threads, Frontier *frontier) { - InternalMmapVector registers(suspended_threads.RegisterCount()); - uptr registers_begin = reinterpret_cast(registers.data()); - uptr registers_end = - reinterpret_cast(registers.data() + registers.size()); + InternalMmapVector registers; for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) { tid_t os_id = static_cast(suspended_threads.GetThreadID(i)); LOG_THREADS("Processing thread %d.\n", os_id); @@ -238,7 +235,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, } uptr sp; PtraceRegistersStatus have_registers = - suspended_threads.GetRegistersAndSP(i, registers.data(), &sp); + suspended_threads.GetRegistersAndSP(i, ®isters, &sp); if (have_registers != REGISTERS_AVAILABLE) { Report("Unable to get registers from thread %d.\n", os_id); // If unable to get SP, consider the entire stack to be reachable unless @@ -247,9 +244,13 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, sp = stack_begin; } - if (flags()->use_registers && have_registers) + if (flags()->use_registers && have_registers) { + uptr registers_begin = reinterpret_cast(registers.data()); + uptr registers_end = + reinterpret_cast(registers.data() + registers.size()); ScanRangeForPointers(registers_begin, registers_end, frontier, "REGISTERS", kReachable); + } if (flags()->use_stacks) { LOG_THREADS("Stack at %p-%p (SP = %p).\n", stack_begin, stack_end, sp); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld.h b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld.h index 4e42400571423..7eb7c7684af5e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld.h @@ -32,13 +32,11 @@ class SuspendedThreadsList { // Can't declare pure virtual functions in sanitizer runtimes: // __cxa_pure_virtual might be unavailable. Use UNIMPLEMENTED() instead. - virtual PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, - uptr *sp) const { + virtual PtraceRegistersStatus GetRegistersAndSP( + uptr index, InternalMmapVector *buffer, uptr *sp) const { UNIMPLEMENTED(); } - // The buffer in GetRegistersAndSP should be at least this big. - virtual uptr RegisterCount() const { UNIMPLEMENTED(); } virtual uptr ThreadCount() const { UNIMPLEMENTED(); } virtual tid_t GetThreadID(uptr index) const { UNIMPLEMENTED(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index bd72c0ae00cbe..fd9ab6f49f273 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -94,9 +94,9 @@ class SuspendedThreadsListLinux : public SuspendedThreadsList { bool ContainsTid(tid_t thread_id) const; void Append(tid_t tid); - PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, + PtraceRegistersStatus GetRegistersAndSP(uptr index, + InternalMmapVector *buffer, uptr *sp) const override; - uptr RegisterCount() const override; private: InternalMmapVector thread_ids_; @@ -533,7 +533,7 @@ void SuspendedThreadsListLinux::Append(tid_t tid) { } PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP( - uptr index, uptr *buffer, uptr *sp) const { + uptr index, InternalMmapVector *buffer, uptr *sp) const { pid_t tid = GetThreadID(index); regs_struct regs; int pterrno; @@ -559,13 +559,11 @@ PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP( } *sp = regs.REG_SP; - internal_memcpy(buffer, ®s, sizeof(regs)); + buffer->resize(RoundUpTo(sizeof(regs), sizeof(uptr)) / sizeof(uptr)); + internal_memcpy(buffer->data(), ®s, sizeof(regs)); return REGISTERS_AVAILABLE; } -uptr SuspendedThreadsListLinux::RegisterCount() const { - return sizeof(regs_struct) / sizeof(uptr); -} } // namespace __sanitizer #endif // SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp index 7f9529aa35562..a605d5b9ff6bd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp @@ -37,9 +37,9 @@ class SuspendedThreadsListMac : public SuspendedThreadsList { bool ContainsThread(thread_t thread) const; void Append(thread_t thread); - PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, + PtraceRegistersStatus GetRegistersAndSP(uptr index, + InternalMmapVector *buffer, uptr *sp) const override; - uptr RegisterCount() const override; private: InternalMmapVector threads_; @@ -142,7 +142,7 @@ void SuspendedThreadsListMac::Append(thread_t thread) { } PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( - uptr index, uptr *buffer, uptr *sp) const { + uptr index, InternalMmapVector *buffer, uptr *sp) const { thread_t thread = GetThread(index); regs_struct regs; int err; @@ -159,7 +159,8 @@ PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( : REGISTERS_UNAVAILABLE; } - internal_memcpy(buffer, ®s, sizeof(regs)); + buffer->resize(RoundUpTo(sizeof(regs), sizeof(uptr)) / sizeof(uptr)); + internal_memcpy(buffer->data(), ®s, sizeof(regs)); #if defined(__aarch64__) && defined(arm_thread_state64_get_sp) *sp = arm_thread_state64_get_sp(regs); #else @@ -173,9 +174,6 @@ PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( return REGISTERS_AVAILABLE; } -uptr SuspendedThreadsListMac::RegisterCount() const { - return MACHINE_THREAD_STATE_COUNT; -} } // namespace __sanitizer #endif // SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__)) || diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp index 63ef00d2750a3..70df31e6351cb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp @@ -57,9 +57,9 @@ class SuspendedThreadsListNetBSD : public SuspendedThreadsList { bool ContainsTid(tid_t thread_id) const; void Append(tid_t tid); - PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, + PtraceRegistersStatus GetRegistersAndSP(uptr index, + InternalMmapVector *buffer, uptr *sp) const; - uptr RegisterCount() const; private: InternalMmapVector thread_ids_; @@ -335,7 +335,7 @@ void SuspendedThreadsListNetBSD::Append(tid_t tid) { } PtraceRegistersStatus SuspendedThreadsListNetBSD::GetRegistersAndSP( - uptr index, uptr *buffer, uptr *sp) const { + uptr index, InternalMmapVector *buffer, uptr *sp) const { lwpid_t tid = GetThreadID(index); pid_t ppid = internal_getppid(); struct reg regs; @@ -351,14 +351,12 @@ PtraceRegistersStatus SuspendedThreadsListNetBSD::GetRegistersAndSP( } *sp = PTRACE_REG_SP(®s); - internal_memcpy(buffer, ®s, sizeof(regs)); + buffer->resize(RoundUpTo(sizeof(regs), sizeof(uptr)) / sizeof(uptr)); + internal_memcpy(buffer->data(), ®s, sizeof(regs)); return REGISTERS_AVAILABLE; } -uptr SuspendedThreadsListNetBSD::RegisterCount() const { - return sizeof(struct reg) / sizeof(uptr); -} } // namespace __sanitizer #endif From 15f0ad2fa29beaf1dad1548ccb97c2c729ea53cd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Sep 2020 14:03:34 -0700 Subject: [PATCH 0906/1079] [ELF] Bump the limit of thunk creation passes from 10 to 15 I have noticed that a 374MiB powerpc64le 'ld.lld' requires 11 passes to link. There is a ThunkSection (whose parent OutputSection is ".text" of 169MiB) with 12867 thunks. --- lld/ELF/Writer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5ef37e9ecb895..f42686f08e640 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1701,8 +1701,8 @@ template void Writer::finalizeAddressDependentContent() { bool changed = target->needsThunks && tc.createThunks(outputSections); // With Thunk Size much smaller than branch range we expect to - // converge quickly; if we get to 10 something has gone wrong. - if (changed && tc.pass >= 10) { + // converge quickly; if we get to 15 something has gone wrong. + if (changed && tc.pass >= 15) { error("thunk creation not converged"); break; } From aa2ba67a8137040b9146d0383c74f0b75ac9683a Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 16 Sep 2020 08:36:58 -0700 Subject: [PATCH 0907/1079] [NFC][regalloc] type LiveInterval::reg() as Register We have the Register type which precisely captures the role of this member. Storage-wise, it's an unsigned. This helps readability & maintainability. Differential Revision: https://reviews.llvm.org/D87768 --- llvm/include/llvm/CodeGen/LiveInterval.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index a63eaac44063b..4fa7afaefc64f 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -25,6 +25,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" @@ -704,11 +705,11 @@ namespace llvm { private: SubRange *SubRanges = nullptr; ///< Single linked list of subregister live /// ranges. - const unsigned Reg; // the register or stack slot of this interval. + const Register Reg; // the register or stack slot of this interval. float Weight = 0.0; // weight of this interval public: - unsigned reg() const { return Reg; } + Register reg() const { return Reg; } float weight() const { return Weight; } void incrementWeight(float Inc) { Weight += Inc; } void setWeight(float Value) { Weight = Value; } From b1cb9d6271263b197ba53cac28a0fc3bf27ec5b8 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 16 Sep 2020 14:17:02 -0700 Subject: [PATCH 0908/1079] [obj2yaml] - Match ".stack_size" with the original section name, and not the uniquified name. Without this patch, obj2yaml decodes the content of only one ".stack_size" section. Other sections are dumped with their full contents. Reviewed By: grimar, MaskRay Differential Revision: https://reviews.llvm.org/D87727 --- llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml | 48 +++++++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml index 8e6c66729c4e0..98a5c5ae88aac 100644 --- a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml +++ b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml @@ -83,3 +83,51 @@ Sections: - Name: .stack_sizes Type: SHT_PROGBITS Content: "" + +## Check obj2yaml can dump multiple .stack_sizes. + +# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=MULTI + +# MULTI: --- !ELF +# MULTI-NEXT: FileHeader: +# MULTI-NEXT: Class: ELFCLASS64 +# MULTI-NEXT: Data: ELFDATA2LSB +# MULTI-NEXT: Type: ET_EXEC +# MULTI-NEXT: Machine: EM_NONE +# MULTI-NEXT: Sections: +# MULTI-NEXT: - Name: .stack_sizes +# MULTI-NEXT: Type: SHT_PROGBITS +# MULTI-NEXT: Entries: +# MULTI-NEXT: - Address: 0x0000000000000010 +# MULTI-NEXT: Size: 0x0000000000000020 +# MULTI-NEXT: - Address: 0x0000000000000030 +# MULTI-NEXT: Size: 0x0000000000000040 +# MULTI-NEXT: - Name: '.stack_sizes (1)' +# MULTI-NEXT: Type: SHT_PROGBITS +# MULTI-NEXT: Entries: +# MULTI-NEXT: - Address: 0x0000000000000050 +# MULTI-NEXT: Size: 0x0000000000000001 +# MULTI-NEXT: - Address: 0x0000000000000060 +# MULTI-NEXT: Size: 0x0000000000000002 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .stack_sizes + Type: SHT_PROGBITS + Entries: + - Address: 0x0000000000000010 + Size: 0x0000000000000020 + - Address: 0x0000000000000030 + Size: 0x0000000000000040 + - Name: '.stack_sizes (1)' + Type: SHT_PROGBITS + Entries: + - Address: 0x0000000000000050 + Size: 0x0000000000000001 + - Address: 0x0000000000000060 + Size: 0x0000000000000002 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 3c3bef2dfbf4c..d7ce08af1a9a9 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -522,7 +522,7 @@ ELFDumper::dumpSections() { // Recognize some special SHT_PROGBITS sections by name. if (Sec.sh_type == ELF::SHT_PROGBITS) { - auto NameOrErr = getUniquedSectionName(&Sec); + auto NameOrErr = Obj.getSectionName(Sec); if (!NameOrErr) return NameOrErr.takeError(); From dd67581407c1693e43ac8a90b3a20c597614bda8 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 16 Sep 2020 14:26:40 -0700 Subject: [PATCH 0909/1079] [lldb/test] Enable faulthandler in dotest Register the `faulthandler` module so we can see what lldb tests are doing when they misbehave (e.g. run under a test runner that sets a timeout). This will print a stack trace for the following signals: - `SIGSEGV`, `SIGFPE`, `SIGABRT`, `SIGBUS`, and `SIGILL` (via `faulthandler.enable()`) - `SIGTERM` (via `faulthandler.register(SIGTERM)`) [This is what our test runners sends when it times out]. The only signal we currently handle is `SIGINT` (via `unittest2.signals.installHandler()`) so there should be no overlap added by this patch. Because this import is not available until python3, and the `register()` method is not available on Windows, this is enabled defensively. This should have absolutely no effect when tests are passing (or even normally failing), but can be observed by running this while ninja is running: ``` kill -s SIGTERM $(ps aux | grep dotest.py | head -1 | awk '{print $2}') ``` Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D87637 --- lldb/packages/Python/lldbsuite/test/dotest.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 30d6afc231fda..b4eddda914033 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -449,6 +449,18 @@ def parseOptionsAndInitTestdirs(): lldbtest_config.codesign_identity = args.codesign_identity +def registerFaulthandler(): + try: + import faulthandler + except ImportError: + # faulthandler is not available until python3 + return + + faulthandler.enable() + # faulthandler.register is not available on Windows. + if getattr(faulthandler, 'register', None): + faulthandler.register(signal.SIGTERM, chain=True) + def setupSysPath(): """ Add LLDB.framework/Resources/Python to the search paths for modules. @@ -875,6 +887,9 @@ def run_suite(): # parseOptionsAndInitTestdirs() + # Print a stack trace if the test hangs or is passed SIGTERM. + registerFaulthandler() + setupSysPath() import lldbconfig From ee5519d323571c4a9a7d92cb817023c9b95334cd Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 22 Jul 2020 15:31:53 -0400 Subject: [PATCH 0910/1079] [NFC] Refactor DiagnosticBuilder and PartialDiagnostic PartialDiagnostic misses some functions compared to DiagnosticBuilder. This patch refactors DiagnosticBuilder and PartialDiagnostic, extracts the common functionality so that the streaming << operators are shared. Differential Revision: https://reviews.llvm.org/D84362 --- clang/include/clang/AST/ASTContext.h | 5 +- clang/include/clang/AST/Attr.h | 11 +- clang/include/clang/AST/CanonicalType.h | 4 +- clang/include/clang/AST/Decl.h | 10 +- clang/include/clang/AST/DeclCXX.h | 7 +- clang/include/clang/AST/DeclarationName.h | 13 +- clang/include/clang/AST/NestedNameSpecifier.h | 4 +- clang/include/clang/AST/TemplateBase.h | 4 +- clang/include/clang/AST/TemplateName.h | 6 +- clang/include/clang/AST/Type.h | 39 +---- clang/include/clang/Basic/Diagnostic.h | 143 +++++++++++------- clang/include/clang/Basic/PartialDiagnostic.h | 98 +++--------- clang/include/clang/Sema/Ownership.h | 10 +- clang/include/clang/Sema/ParsedAttr.h | 22 +-- clang/include/clang/Sema/Sema.h | 11 ++ clang/lib/AST/ASTContext.cpp | 6 +- clang/lib/AST/DeclCXX.cpp | 9 +- clang/lib/AST/TemplateBase.cpp | 9 +- clang/lib/AST/TemplateName.cpp | 18 +-- clang/lib/Basic/Diagnostic.cpp | 9 +- 20 files changed, 182 insertions(+), 256 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index de0d1198b6d40..397fee4d866be 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3064,8 +3064,9 @@ OPT_LIST(V) }; /// Insertion operator for diagnostics. -const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const ASTContext::SectionInfo &Section); +const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, + const ASTContext::SectionInfo &Section); /// Utility function for constructing a nullary selector. inline Selector GetNullarySelector(StringRef name, ASTContext &Ctx) { diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h index b3729b2e0d995..b4dce8f41c672 100644 --- a/clang/include/clang/AST/Attr.h +++ b/clang/include/clang/AST/Attr.h @@ -350,19 +350,12 @@ struct ParsedTargetAttr { #include "clang/AST/Attrs.inc" -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const Attr *At) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const Attr *At) { DB.AddTaggedVal(reinterpret_cast(At), DiagnosticsEngine::ak_attr); return DB; } - -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const Attr *At) { - PD.AddTaggedVal(reinterpret_cast(At), - DiagnosticsEngine::ak_attr); - return PD; -} } // end namespace clang #endif diff --git a/clang/include/clang/AST/CanonicalType.h b/clang/include/clang/AST/CanonicalType.h index 488284713bcec..b6d9b69db09af 100644 --- a/clang/include/clang/AST/CanonicalType.h +++ b/clang/include/clang/AST/CanonicalType.h @@ -215,8 +215,8 @@ inline CanQualType Type::getCanonicalTypeUnqualified() const { return CanQualType::CreateUnsafe(getCanonicalTypeInternal()); } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - CanQualType T) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, CanQualType T) { DB << static_cast(T); return DB; } diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index c2511514fe726..852ba2316f82b 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4513,14 +4513,8 @@ class EmptyDecl : public Decl { /// Insertion operator for diagnostics. This allows sending NamedDecl's /// into a diagnostic with <<. -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const NamedDecl* ND) { - DB.AddTaggedVal(reinterpret_cast(ND), - DiagnosticsEngine::ak_nameddecl); - return DB; -} -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const NamedDecl* ND) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &PD, const NamedDecl *ND) { PD.AddTaggedVal(reinterpret_cast(ND), DiagnosticsEngine::ak_nameddecl); return PD; diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 20f058b87e7f3..065a7413e7e7d 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -4070,11 +4070,8 @@ class MSGuidDecl : public ValueDecl, /// Insertion operator for diagnostics. This allows sending an AccessSpecifier /// into a diagnostic with <<. -const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - AccessSpecifier AS); - -const PartialDiagnostic &operator<<(const PartialDiagnostic &DB, - AccessSpecifier AS); +const StreamableDiagnosticBase &operator<<(const StreamableDiagnosticBase &DB, + AccessSpecifier AS); } // namespace clang diff --git a/clang/include/clang/AST/DeclarationName.h b/clang/include/clang/AST/DeclarationName.h index a037e8b197bc3..b5692ec7684bc 100644 --- a/clang/include/clang/AST/DeclarationName.h +++ b/clang/include/clang/AST/DeclarationName.h @@ -811,19 +811,10 @@ struct DeclarationNameInfo { SourceLocation getEndLocPrivate() const; }; -/// Insertion operator for diagnostics. This allows sending DeclarationName's -/// into a diagnostic with <<. -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - DeclarationName N) { - DB.AddTaggedVal(N.getAsOpaqueInteger(), - DiagnosticsEngine::ak_declarationname); - return DB; -} - /// Insertion operator for partial diagnostics. This allows binding /// DeclarationName's into a partial diagnostic with <<. -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - DeclarationName N) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &PD, DeclarationName N) { PD.AddTaggedVal(N.getAsOpaqueInteger(), DiagnosticsEngine::ak_declarationname); return PD; diff --git a/clang/include/clang/AST/NestedNameSpecifier.h b/clang/include/clang/AST/NestedNameSpecifier.h index b11cb5f6b86d0..70edcfe704232 100644 --- a/clang/include/clang/AST/NestedNameSpecifier.h +++ b/clang/include/clang/AST/NestedNameSpecifier.h @@ -519,8 +519,8 @@ class NestedNameSpecifierLocBuilder { /// Insertion operator for diagnostics. This allows sending /// NestedNameSpecifiers into a diagnostic with <<. -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - NestedNameSpecifier *NNS) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, NestedNameSpecifier *NNS) { DB.AddTaggedVal(reinterpret_cast(NNS), DiagnosticsEngine::ak_nestednamespec); return DB; diff --git a/clang/include/clang/AST/TemplateBase.h b/clang/include/clang/AST/TemplateBase.h index 51fd8ba51034e..5abf60cab4a4a 100644 --- a/clang/include/clang/AST/TemplateBase.h +++ b/clang/include/clang/AST/TemplateBase.h @@ -681,8 +681,8 @@ struct alignas(void *) ASTTemplateKWAndArgsInfo { TemplateArgumentListInfo &List) const; }; -const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const TemplateArgument &Arg); +const StreamableDiagnosticBase &operator<<(const StreamableDiagnosticBase &DB, + const TemplateArgument &Arg); inline TemplateSpecializationType::iterator TemplateSpecializationType::end() const { diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h index 9bcf2838dcf13..0f78d7976a469 100644 --- a/clang/include/clang/AST/TemplateName.h +++ b/clang/include/clang/AST/TemplateName.h @@ -342,10 +342,8 @@ class TemplateName { /// Insertion operator for diagnostics. This allows sending TemplateName's /// into a diagnostic with <<. -const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - TemplateName N); -const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - TemplateName N); +const StreamableDiagnosticBase &operator<<(const StreamableDiagnosticBase &DB, + TemplateName N); /// A structure for storing the information associated with a /// substituted template template parameter. diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index d8eece10475a7..2bf17b6d7ab0e 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -7068,55 +7068,28 @@ inline const Type *Type::getPointeeOrArrayElementType() const { return type->getBaseElementTypeUnsafe(); return type; } -/// Insertion operator for diagnostics. This allows sending address spaces into -/// a diagnostic with <<. -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - LangAS AS) { - DB.AddTaggedVal(static_cast>(AS), - DiagnosticsEngine::ArgumentKind::ak_addrspace); - return DB; -} - /// Insertion operator for partial diagnostics. This allows sending adress /// spaces into a diagnostic with <<. -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - LangAS AS) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &PD, LangAS AS) { PD.AddTaggedVal(static_cast>(AS), DiagnosticsEngine::ArgumentKind::ak_addrspace); return PD; } -/// Insertion operator for diagnostics. This allows sending Qualifiers into a -/// diagnostic with <<. -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - Qualifiers Q) { - DB.AddTaggedVal(Q.getAsOpaqueValue(), - DiagnosticsEngine::ArgumentKind::ak_qual); - return DB; -} - /// Insertion operator for partial diagnostics. This allows sending Qualifiers /// into a diagnostic with <<. -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - Qualifiers Q) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &PD, Qualifiers Q) { PD.AddTaggedVal(Q.getAsOpaqueValue(), DiagnosticsEngine::ArgumentKind::ak_qual); return PD; } -/// Insertion operator for diagnostics. This allows sending QualType's into a -/// diagnostic with <<. -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - QualType T) { - DB.AddTaggedVal(reinterpret_cast(T.getAsOpaquePtr()), - DiagnosticsEngine::ak_qualtype); - return DB; -} - /// Insertion operator for partial diagnostics. This allows sending QualType's /// into a diagnostic with <<. -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - QualType T) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &PD, QualType T) { PD.AddTaggedVal(reinterpret_cast(T.getAsOpaquePtr()), DiagnosticsEngine::ak_qualtype); return PD; diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index 304207779c0f1..7ce418bbb9968 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -1043,6 +1043,35 @@ class DiagnosticErrorTrap { } }; +/// The streaming interface shared between DiagnosticBuilder and +/// PartialDiagnostic. +/// +/// Any new type of argument accepted by DiagnosticBuilder and PartialDiagnostic +/// should be implemented as a '<<' operator of StreamableDiagnosticBase, e.g. +/// +/// const StreamableDiagnosticBase& +/// operator<<(const StreamableDiagnosticBase&, NewArgType); +/// +class StreamableDiagnosticBase { +public: + virtual void AddString(StringRef S) const = 0; + virtual void AddTaggedVal(intptr_t V, + DiagnosticsEngine::ArgumentKind Kind) const = 0; + virtual void AddSourceRange(const CharSourceRange &R) const = 0; + virtual void AddFixItHint(const FixItHint &Hint) const = 0; + + /// Conversion of StreamableDiagnosticBase to bool always returns \c true. + /// + /// This allows is to be used in boolean error contexts (where \c true is + /// used to indicate that an error has occurred), like: + /// \code + /// return Diag(...); + /// \endcode + operator bool() const { return true; } + + virtual ~StreamableDiagnosticBase() {} +}; + //===----------------------------------------------------------------------===// // DiagnosticBuilder //===----------------------------------------------------------------------===// @@ -1059,7 +1088,7 @@ class DiagnosticErrorTrap { /// This ensures that compilers with somewhat reasonable optimizers will promote /// the common fields to registers, eliminating increments of the NumArgs field, /// for example. -class DiagnosticBuilder { +class DiagnosticBuilder : public StreamableDiagnosticBase { friend class DiagnosticsEngine; friend class PartialDiagnostic; @@ -1137,12 +1166,27 @@ class DiagnosticBuilder { NumArgs = D.NumArgs; } + template const DiagnosticBuilder &operator<<(const T &V) const { + const StreamableDiagnosticBase &DB = *this; + DB << V; + return *this; + } + + // It is necessary to limit this to rvalue reference to avoid calling this + // function with a bitfield lvalue argument since non-const reference to + // bitfield is not allowed. + template ::value>::type> + const DiagnosticBuilder &operator<<(T &&V) const { + const StreamableDiagnosticBase &DB = *this; + DB << std::move(V); + return *this; + } + DiagnosticBuilder &operator=(const DiagnosticBuilder &) = delete; /// Emits the diagnostic. - ~DiagnosticBuilder() { - Emit(); - } + virtual ~DiagnosticBuilder() { Emit(); } /// Forces the diagnostic to be emitted. const DiagnosticBuilder &setForceEmit() const { @@ -1150,16 +1194,7 @@ class DiagnosticBuilder { return *this; } - /// Conversion of DiagnosticBuilder to bool always returns \c true. - /// - /// This allows is to be used in boolean error contexts (where \c true is - /// used to indicate that an error has occurred), like: - /// \code - /// return Diag(...); - /// \endcode - operator bool() const { return true; } - - void AddString(StringRef S) const { + void AddString(StringRef S) const override { assert(isActive() && "Clients must not add to cleared diagnostic!"); assert(NumArgs < DiagnosticsEngine::MaxArguments && "Too many arguments to diagnostic!"); @@ -1167,7 +1202,8 @@ class DiagnosticBuilder { DiagObj->DiagArgumentsStr[NumArgs++] = std::string(S); } - void AddTaggedVal(intptr_t V, DiagnosticsEngine::ArgumentKind Kind) const { + void AddTaggedVal(intptr_t V, + DiagnosticsEngine::ArgumentKind Kind) const override { assert(isActive() && "Clients must not add to cleared diagnostic!"); assert(NumArgs < DiagnosticsEngine::MaxArguments && "Too many arguments to diagnostic!"); @@ -1175,12 +1211,12 @@ class DiagnosticBuilder { DiagObj->DiagArgumentsVal[NumArgs++] = V; } - void AddSourceRange(const CharSourceRange &R) const { + void AddSourceRange(const CharSourceRange &R) const override { assert(isActive() && "Clients must not add to cleared diagnostic!"); DiagObj->DiagRanges.push_back(R); } - void AddFixItHint(const FixItHint &Hint) const { + void AddFixItHint(const FixItHint &Hint) const override { assert(isActive() && "Clients must not add to cleared diagnostic!"); if (!Hint.isNull()) DiagObj->DiagFixItHints.push_back(Hint); @@ -1205,20 +1241,21 @@ inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - StringRef S) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, StringRef S) { DB.AddString(S); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const char *Str) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const char *Str) { DB.AddTaggedVal(reinterpret_cast(Str), DiagnosticsEngine::ak_c_string); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, int I) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, int I) { DB.AddTaggedVal(I, DiagnosticsEngine::ak_sint); return DB; } @@ -1226,26 +1263,27 @@ inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, int I) { // We use enable_if here to prevent that this overload is selected for // pointers or other arguments that are implicitly convertible to bool. template -inline std::enable_if_t::value, const DiagnosticBuilder &> -operator<<(const DiagnosticBuilder &DB, T I) { +inline std::enable_if_t::value, + const StreamableDiagnosticBase &> +operator<<(const StreamableDiagnosticBase &DB, T I) { DB.AddTaggedVal(I, DiagnosticsEngine::ak_sint); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - unsigned I) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, unsigned I) { DB.AddTaggedVal(I, DiagnosticsEngine::ak_uint); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - tok::TokenKind I) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, tok::TokenKind I) { DB.AddTaggedVal(static_cast(I), DiagnosticsEngine::ak_tokenkind); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const IdentifierInfo *II) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const IdentifierInfo *II) { DB.AddTaggedVal(reinterpret_cast(II), DiagnosticsEngine::ak_identifierinfo); return DB; @@ -1258,63 +1296,64 @@ inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, template inline std::enable_if_t< std::is_same, DeclContext>::value, - const DiagnosticBuilder &> -operator<<(const DiagnosticBuilder &DB, T *DC) { + const StreamableDiagnosticBase &> +operator<<(const StreamableDiagnosticBase &DB, T *DC) { DB.AddTaggedVal(reinterpret_cast(DC), DiagnosticsEngine::ak_declcontext); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - SourceRange R) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, SourceRange R) { DB.AddSourceRange(CharSourceRange::getTokenRange(R)); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - ArrayRef Ranges) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, ArrayRef Ranges) { for (SourceRange R : Ranges) DB.AddSourceRange(CharSourceRange::getTokenRange(R)); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const CharSourceRange &R) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const CharSourceRange &R) { DB.AddSourceRange(R); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const FixItHint &Hint) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const FixItHint &Hint) { DB.AddFixItHint(Hint); return DB; } -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - ArrayRef Hints) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, ArrayRef Hints) { for (const FixItHint &Hint : Hints) DB.AddFixItHint(Hint); return DB; } -inline const DiagnosticBuilder & -operator<<(const DiagnosticBuilder &DB, +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const llvm::Optional &Opt) { if (Opt) DB << *Opt; return DB; } -inline const DiagnosticBuilder & -operator<<(const DiagnosticBuilder &DB, +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const llvm::Optional &Opt) { if (Opt) DB << *Opt; return DB; } -inline const DiagnosticBuilder & -operator<<(const DiagnosticBuilder &DB, const llvm::Optional &Opt) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, + const llvm::Optional &Opt) { if (Opt) DB << *Opt; return DB; @@ -1324,8 +1363,8 @@ operator<<(const DiagnosticBuilder &DB, const llvm::Optional &Opt) { /// context-sensitive keyword. using DiagNullabilityKind = std::pair; -const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - DiagNullabilityKind nullability); +const StreamableDiagnosticBase &operator<<(const StreamableDiagnosticBase &DB, + DiagNullabilityKind nullability); inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc, unsigned DiagID) { @@ -1337,8 +1376,8 @@ inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc, return DiagnosticBuilder(this); } -const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - llvm::Error &&E); +const StreamableDiagnosticBase &operator<<(const StreamableDiagnosticBase &DB, + llvm::Error &&E); inline DiagnosticBuilder DiagnosticsEngine::Report(unsigned DiagID) { return Report(SourceLocation(), DiagID); diff --git a/clang/include/clang/Basic/PartialDiagnostic.h b/clang/include/clang/Basic/PartialDiagnostic.h index 107d621f0dec5..5f2fa6efc2791 100644 --- a/clang/include/clang/Basic/PartialDiagnostic.h +++ b/clang/include/clang/Basic/PartialDiagnostic.h @@ -31,7 +31,7 @@ namespace clang { class DeclContext; class IdentifierInfo; -class PartialDiagnostic { +class PartialDiagnostic : public StreamableDiagnosticBase { public: enum { // The MaxArguments and MaxFixItHints member enum values from @@ -163,14 +163,15 @@ class PartialDiagnostic { DiagStorage = nullptr; } - void AddSourceRange(const CharSourceRange &R) const { +public: + void AddSourceRange(const CharSourceRange &R) const override { if (!DiagStorage) DiagStorage = getStorage(); DiagStorage->DiagRanges.push_back(R); } - void AddFixItHint(const FixItHint &Hint) const { + void AddFixItHint(const FixItHint &Hint) const override { if (Hint.isNull()) return; @@ -180,7 +181,6 @@ class PartialDiagnostic { DiagStorage->FixItHints.push_back(Hint); } -public: struct NullDiagnostic {}; /// Create a null partial diagnostic, which cannot carry a payload, @@ -198,6 +198,23 @@ class PartialDiagnostic { } } + template const PartialDiagnostic &operator<<(const T &V) const { + const StreamableDiagnosticBase &DB = *this; + DB << V; + return *this; + } + + // It is necessary to limit this to rvalue reference to avoid calling this + // function with a bitfield lvalue argument since non-const reference to + // bitfield is not allowed. + template ::value>::type> + const PartialDiagnostic &operator<<(T &&V) const { + const StreamableDiagnosticBase &DB = *this; + DB << std::move(V); + return *this; + } + PartialDiagnostic(PartialDiagnostic &&Other) : DiagID(Other.DiagID), DiagStorage(Other.DiagStorage), Allocator(Other.Allocator) { @@ -255,9 +272,7 @@ class PartialDiagnostic { return *this; } - ~PartialDiagnostic() { - freeStorage(); - } + virtual ~PartialDiagnostic() { freeStorage(); } void swap(PartialDiagnostic &PD) { std::swap(DiagID, PD.DiagID); @@ -267,7 +282,8 @@ class PartialDiagnostic { unsigned getDiagID() const { return DiagID; } - void AddTaggedVal(intptr_t V, DiagnosticsEngine::ArgumentKind Kind) const { + void AddTaggedVal(intptr_t V, + DiagnosticsEngine::ArgumentKind Kind) const override { if (!DiagStorage) DiagStorage = getStorage(); @@ -277,7 +293,7 @@ class PartialDiagnostic { DiagStorage->DiagArgumentsVal[DiagStorage->NumDiagArgs++] = V; } - void AddString(StringRef V) const { + void AddString(StringRef V) const override { if (!DiagStorage) DiagStorage = getStorage(); @@ -340,70 +356,6 @@ class PartialDiagnostic { == DiagnosticsEngine::ak_std_string && "Not a string arg"); return DiagStorage->DiagArgumentsStr[I]; } - - friend const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - unsigned I) { - PD.AddTaggedVal(I, DiagnosticsEngine::ak_uint); - return PD; - } - - friend const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - int I) { - PD.AddTaggedVal(I, DiagnosticsEngine::ak_sint); - return PD; - } - - friend inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const char *S) { - PD.AddTaggedVal(reinterpret_cast(S), - DiagnosticsEngine::ak_c_string); - return PD; - } - - friend inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - StringRef S) { - - PD.AddString(S); - return PD; - } - - friend inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const IdentifierInfo *II) { - PD.AddTaggedVal(reinterpret_cast(II), - DiagnosticsEngine::ak_identifierinfo); - return PD; - } - - // Adds a DeclContext to the diagnostic. The enable_if template magic is here - // so that we only match those arguments that are (statically) DeclContexts; - // other arguments that derive from DeclContext (e.g., RecordDecls) will not - // match. - template - friend inline std::enable_if_t::value, - const PartialDiagnostic &> - operator<<(const PartialDiagnostic &PD, T *DC) { - PD.AddTaggedVal(reinterpret_cast(DC), - DiagnosticsEngine::ak_declcontext); - return PD; - } - - friend inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - SourceRange R) { - PD.AddSourceRange(CharSourceRange::getTokenRange(R)); - return PD; - } - - friend inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const CharSourceRange &R) { - PD.AddSourceRange(R); - return PD; - } - - friend const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const FixItHint &Hint) { - PD.AddFixItHint(Hint); - return PD; - } }; inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, diff --git a/clang/include/clang/Sema/Ownership.h b/clang/include/clang/Sema/Ownership.h index 7c7b1d35c9fd5..66c4e917c6497 100644 --- a/clang/include/clang/Sema/Ownership.h +++ b/clang/include/clang/Sema/Ownership.h @@ -133,7 +133,7 @@ namespace llvm { namespace clang { // Basic - class DiagnosticBuilder; + class StreamableDiagnosticBase; // Determines whether the low bit of the result pointer for the // given UID is always zero. If so, ActionResult will use that bit @@ -280,8 +280,12 @@ namespace clang { inline StmtResult StmtError() { return StmtResult(true); } inline TypeResult TypeError() { return TypeResult(true); } - inline ExprResult ExprError(const DiagnosticBuilder&) { return ExprError(); } - inline StmtResult StmtError(const DiagnosticBuilder&) { return StmtError(); } + inline ExprResult ExprError(const StreamableDiagnosticBase &) { + return ExprError(); + } + inline StmtResult StmtError(const StreamableDiagnosticBase &) { + return StmtError(); + } inline ExprResult ExprEmpty() { return ExprResult(false); } inline StmtResult StmtEmpty() { return StmtResult(false); } diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h index 8946b12ee03fc..8b4d04afd1a85 100644 --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -1044,34 +1044,20 @@ enum AttributeDeclKind { ExpectedFunctionWithProtoType, }; -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const ParsedAttr &At) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const ParsedAttr &At) { DB.AddTaggedVal(reinterpret_cast(At.getAttrName()), DiagnosticsEngine::ak_identifierinfo); return DB; } -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const ParsedAttr &At) { - PD.AddTaggedVal(reinterpret_cast(At.getAttrName()), - DiagnosticsEngine::ak_identifierinfo); - return PD; -} - -inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, - const ParsedAttr *At) { +inline const StreamableDiagnosticBase & +operator<<(const StreamableDiagnosticBase &DB, const ParsedAttr *At) { DB.AddTaggedVal(reinterpret_cast(At->getAttrName()), DiagnosticsEngine::ak_identifierinfo); return DB; } -inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, - const ParsedAttr *At) { - PD.AddTaggedVal(reinterpret_cast(At->getAttrName()), - DiagnosticsEngine::ak_identifierinfo); - return PD; -} - } // namespace clang #endif // LLVM_CLANG_SEMA_ATTRIBUTELIST_H diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9ee8e338e7329..7080736325a75 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1511,6 +1511,17 @@ class Sema final { BaseDiag << Value; return Diag; } + + // It is necessary to limit this to rvalue reference to avoid calling this + // function with a bitfield lvalue argument since non-const reference to + // bitfield is not allowed. + template ::value>::type> + const SemaDiagnosticBuilder &operator<<(T &&V) const { + const StreamableDiagnosticBase &DB = *this; + DB << std::move(V); + return *this; + } }; /// Emit a diagnostic. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 2b411cd8e2210..20ea91c68d6d3 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11294,9 +11294,9 @@ OMPTraitInfo &ASTContext::getNewOMPTraitInfo() { return *OMPTraitInfoVector.back(); } -const DiagnosticBuilder & -clang::operator<<(const DiagnosticBuilder &DB, - const ASTContext::SectionInfo &Section) { +const StreamableDiagnosticBase &clang:: +operator<<(const StreamableDiagnosticBase &DB, + const ASTContext::SectionInfo &Section) { if (Section.Decl) return DB << Section.Decl; return DB << "a prior #pragma section"; diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 59ae5cb300f72..9673fbfb5fec1 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -3301,12 +3301,7 @@ static const char *getAccessName(AccessSpecifier AS) { llvm_unreachable("Invalid access specifier!"); } -const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, - AccessSpecifier AS) { - return DB << getAccessName(AS); -} - -const PartialDiagnostic &clang::operator<<(const PartialDiagnostic &DB, - AccessSpecifier AS) { +const StreamableDiagnosticBase &clang:: +operator<<(const StreamableDiagnosticBase &DB, AccessSpecifier AS) { return DB << getAccessName(AS); } diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index 6a3d2b30e46ee..0ac84c2357e4b 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -448,8 +448,8 @@ SourceRange TemplateArgumentLoc::getSourceRange() const { llvm_unreachable("Invalid TemplateArgument Kind!"); } -const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, - const TemplateArgument &Arg) { +template +static const T &DiagTemplateArg(const T &DB, const TemplateArgument &Arg) { switch (Arg.getKind()) { case TemplateArgument::Null: // This is bad, but not as bad as crashing because of argument @@ -502,6 +502,11 @@ const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, llvm_unreachable("Invalid TemplateArgument Kind!"); } +const StreamableDiagnosticBase &clang:: +operator<<(const StreamableDiagnosticBase &DB, const TemplateArgument &Arg) { + return DiagTemplateArg(DB, Arg); +} + const ASTTemplateArgumentListInfo * ASTTemplateArgumentListInfo::Create(const ASTContext &C, const TemplateArgumentListInfo &List) { diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp index 40a8736ae1afd..14e3da12db24c 100644 --- a/clang/lib/AST/TemplateName.cpp +++ b/clang/lib/AST/TemplateName.cpp @@ -254,8 +254,8 @@ TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy, } } -const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, - TemplateName N) { +const StreamableDiagnosticBase &clang:: +operator<<(const StreamableDiagnosticBase &DB, TemplateName N) { std::string NameStr; llvm::raw_string_ostream OS(NameStr); LangOptions LO; @@ -268,20 +268,6 @@ const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, return DB << NameStr; } -const PartialDiagnostic&clang::operator<<(const PartialDiagnostic &PD, - TemplateName N) { - std::string NameStr; - llvm::raw_string_ostream OS(NameStr); - LangOptions LO; - LO.CPlusPlus = true; - LO.Bool = true; - OS << '\''; - N.print(OS, PrintingPolicy(LO)); - OS << '\''; - OS.flush(); - return PD << NameStr; -} - void TemplateName::dump(raw_ostream &OS) const { LangOptions LO; // FIXME! LO.CPlusPlus = true; diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 661eabf9bc7cb..2673b9d3bea4f 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -40,8 +40,9 @@ using namespace clang; -const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, - DiagNullabilityKind nullability) { +const StreamableDiagnosticBase &clang:: +operator<<(const StreamableDiagnosticBase &DB, + DiagNullabilityKind nullability) { StringRef string; switch (nullability.first) { case NullabilityKind::NonNull: @@ -61,8 +62,8 @@ const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, return DB; } -const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, - llvm::Error &&E) { +const StreamableDiagnosticBase &clang:: +operator<<(const StreamableDiagnosticBase &DB, llvm::Error &&E) { DB.AddString(toString(std::move(E))); return DB; } From 23bef7ee9923b1262326981960397e8cd95d6923 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Wed, 16 Sep 2020 23:03:19 +0200 Subject: [PATCH 0911/1079] [libunwind] Support for leaf function unwinding. Unwinding leaf function is useful in cases when the backtrace finds a leaf function for example when it caused a signal. This patch also add the support for the DW_CFA_undefined because it marks the end of the frames. Ryan Prichard provided code for the tests. Reviewed By: #libunwind, mstorsjo Differential Revision: https://reviews.llvm.org/D83573 --- libunwind/src/DwarfInstructions.hpp | 9 +++- libunwind/src/DwarfParser.hpp | 3 +- libunwind/test/lit.site.cfg.in | 4 ++ libunwind/test/signal_unwind.pass.cpp | 44 ++++++++++++++++++ libunwind/test/unwind_leaffunction.pass.cpp | 50 +++++++++++++++++++++ 5 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 libunwind/test/signal_unwind.pass.cpp create mode 100644 libunwind/test/unwind_leaffunction.pass.cpp diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp index ee98f538d437e..c39cabe1f7830 100644 --- a/libunwind/src/DwarfInstructions.hpp +++ b/libunwind/src/DwarfInstructions.hpp @@ -93,7 +93,8 @@ typename A::pint_t DwarfInstructions::getSavedRegister( case CFI_Parser::kRegisterInRegister: return registers.getRegister((int)savedReg.value); - + case CFI_Parser::kRegisterUndefined: + return 0; case CFI_Parser::kRegisterUnused: case CFI_Parser::kRegisterOffsetFromCFA: // FIX ME @@ -117,6 +118,7 @@ double DwarfInstructions::getSavedFloatRegister( case CFI_Parser::kRegisterIsExpression: case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterUndefined: case CFI_Parser::kRegisterOffsetFromCFA: case CFI_Parser::kRegisterInRegister: // FIX ME @@ -140,6 +142,7 @@ v128 DwarfInstructions::getSavedVectorRegister( case CFI_Parser::kRegisterIsExpression: case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterUndefined: case CFI_Parser::kRegisterOffsetFromCFA: case CFI_Parser::kRegisterInRegister: // FIX ME @@ -190,6 +193,10 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, prolog.savedRegisters[i])); else return UNW_EBADREG; + } else if (i == (int)cieInfo.returnAddressRegister) { + // Leaf function keeps the return address in register and there is no + // explicit intructions how to restore it. + returnAddress = registers.getRegister(cieInfo.returnAddressRegister); } } diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp index c98c4f92a6ad3..1ce2cf2943a2f 100644 --- a/libunwind/src/DwarfParser.hpp +++ b/libunwind/src/DwarfParser.hpp @@ -69,6 +69,7 @@ class CFI_Parser { }; enum RegisterSavedWhere { kRegisterUnused, + kRegisterUndefined, kRegisterInCFA, kRegisterOffsetFromCFA, kRegisterInRegister, @@ -503,7 +504,7 @@ bool CFI_Parser::parseInstructions(A &addressSpace, pint_t instructions, "malformed DW_CFA_undefined DWARF unwind, reg too big"); return false; } - results->setRegisterLocation(reg, kRegisterUnused, initialState); + results->setRegisterLocation(reg, kRegisterUndefined, initialState); _LIBUNWIND_TRACE_DWARF("DW_CFA_undefined(reg=%" PRIu64 ")\n", reg); break; case DW_CFA_same_value: diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index 8ff770fe29bc8..84dae3c2bfb0d 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -44,6 +44,10 @@ config.test_source_root = os.path.join(config.libunwind_src_root, 'test') # Allow expanding substitutions that are based on other substitutions config.recursiveExpansionLimit = 10 +# Make symbols available in the tests. +config.test_compiler_flags += " -funwind-tables " +config.test_linker_flags += " -Wl,--export-dynamic " + # Infer the test_exec_root from the build directory. config.test_exec_root = os.path.join(config.libunwind_obj_root, 'test') diff --git a/libunwind/test/signal_unwind.pass.cpp b/libunwind/test/signal_unwind.pass.cpp new file mode 100644 index 0000000000000..295dd75bb7264 --- /dev/null +++ b/libunwind/test/signal_unwind.pass.cpp @@ -0,0 +1,44 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Ensure that the unwinder can cope with the signal handler. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +_Unwind_Reason_Code frame_handler(struct _Unwind_Context* ctx, void* arg) { + (void)arg; + Dl_info info = { 0, 0, 0, 0 }; + assert(dladdr((void*)_Unwind_GetIP(ctx), &info)); + + // Unwind util the main is reached, above frames deeped on the platfrom and architecture. + if(info.dli_sname && !strcmp("main", info.dli_sname)) { + _Exit(0); + } + return _URC_NO_REASON; +} + +void signal_handler(int signum) { + (void)signum; + _Unwind_Backtrace(frame_handler, NULL); + _Exit(-1); +} + +int main() { + signal(SIGUSR1, signal_handler); + kill(getpid(), SIGUSR1); + return -2; +} diff --git a/libunwind/test/unwind_leaffunction.pass.cpp b/libunwind/test/unwind_leaffunction.pass.cpp new file mode 100644 index 0000000000000..b8a114516d0a6 --- /dev/null +++ b/libunwind/test/unwind_leaffunction.pass.cpp @@ -0,0 +1,50 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Ensure that leaf function can be unwund. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +_Unwind_Reason_Code frame_handler(struct _Unwind_Context* ctx, void* arg) { + (void)arg; + Dl_info info = { 0, 0, 0, 0 }; + assert(dladdr((void*)_Unwind_GetIP(ctx), &info)); + + // Unwind util the main is reached, above frames deeped on the platfrom and architecture. + if(info.dli_sname && !strcmp("main", info.dli_sname)) { + _Exit(0); + } + return _URC_NO_REASON; +} + +void signal_handler(int signum) { + (void)signum; + _Unwind_Backtrace(frame_handler, NULL); + _Exit(-1); +} + +int* faultyPointer = NULL; + +__attribute__((noinline)) void crashing_leaf_func(void) { + *faultyPointer = 0; +} + +int main() { + signal(SIGSEGV, signal_handler); + crashing_leaf_func(); + return -2; +} \ No newline at end of file From dd3eb3f33239b23a12dd8864ae236390adf79550 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Wed, 16 Sep 2020 14:42:30 -0700 Subject: [PATCH 0912/1079] [flang] Substrings with lower bound greater than upper bound According to section 9.4.1, paragraph 3, If the starting point is greater than the ending point, the substring has length zero But the compilers code for substring processing was failing a call to `CHECK()` in this case. I fixed this by just setting the number of items in the resulting string to 0 for this situation. Differential Revision: https://reviews.llvm.org/D87799 --- flang/lib/Evaluate/variable.cpp | 6 ++++-- flang/test/Semantics/resolve49.f90 | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/flang/lib/Evaluate/variable.cpp b/flang/lib/Evaluate/variable.cpp index d87c71688f1af..c81f2b175ed5e 100644 --- a/flang/lib/Evaluate/variable.cpp +++ b/flang/lib/Evaluate/variable.cpp @@ -204,9 +204,11 @@ std::optional> Substring::Fold(FoldingContext &context) { *ubi = *length; } if (lbi && literal) { - CHECK(*ubi >= *lbi); auto newStaticData{StaticDataObject::Create()}; - auto items{*ubi - *lbi + 1}; + auto items{0}; // If the lower bound is greater, the length is 0 + if (*ubi >= *lbi) { + items = *ubi - *lbi + 1; + } auto width{(*literal)->itemBytes()}; auto bytes{items * width}; auto startByte{(*lbi - 1) * width}; diff --git a/flang/test/Semantics/resolve49.f90 b/flang/test/Semantics/resolve49.f90 index b0bca059c0412..5ead0784603b1 100644 --- a/flang/test/Semantics/resolve49.f90 +++ b/flang/test/Semantics/resolve49.f90 @@ -17,6 +17,7 @@ program p2 end type character :: a(10) character :: b(5) + character :: c(0) integer :: n n = 3 b = a(n:7) @@ -26,6 +27,7 @@ program p2 a(n+3:) = b a(:n+2) = b n = iachar(1_'ABCDEFGHIJ'(1:1)) + c = 'ABCDEFGHIJ'(1:0) end ! Test pointer assignment with bounds From 1321160a26e7e489baf9b10d6de90a342f898960 Mon Sep 17 00:00:00 2001 From: jasonliu Date: Wed, 16 Sep 2020 21:51:41 +0000 Subject: [PATCH 0913/1079] Disable a large test for EXPENSIVE_CHECKS and debug build Summary: When running a large test in LLVM_ENABLE_EXPENSIVE_CHECKS=ON mode, buildbot could hit timeout. Disable the test when this mode is on. Also disable it for debug so that the test won't hang for too long. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D87794 --- llvm/test/CMakeLists.txt | 1 + llvm/test/CodeGen/PowerPC/aix-overflow-toc.py | 2 +- llvm/test/lit.cfg.py | 6 +++++- llvm/test/lit.site.cfg.py.in | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 772ff0fd5f780..12f564178af08 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -17,6 +17,7 @@ llvm_canonicalize_cmake_booleans( LLVM_BYE_LINK_INTO_TOOLS LLVM_HAVE_TF_AOT LLVM_HAVE_TF_API + LLVM_ENABLE_EXPENSIVE_CHECKS ) configure_lit_site_cfg( diff --git a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py index 870f83739dc08..f2263a31be8b7 100644 --- a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py +++ b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py @@ -1,4 +1,4 @@ -# REQUIRES: system-aix || system-linux +# UNSUPPORTED: expensive_checks, debug # RUN: python %s > %t.ll # RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=small -mcpu=pwr4 -mattr=-altivec -O0 < %t.ll | \ diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 3c4cb9c32065b..9a1dd4ebc5a4e 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -330,7 +330,8 @@ def have_ld64_plugin_support(): # Ask llvm-config about asserts llvm_config.feature_config( - [('--assertion-mode', {'ON': 'asserts'})]) + [('--assertion-mode', {'ON': 'asserts'}), + ('--build-mode', {'[Dd][Ee][Bb][Uu][Gg]': 'debug'})]) if 'darwin' == sys.platform: cmd = ['sysctl', 'hw.optional.fma'] @@ -361,3 +362,6 @@ def have_ld64_plugin_support(): if config.have_opt_viewer_modules: config.available_features.add('have_opt_viewer_modules') + +if config.expensive_checks: + config.available_features.add('expensive_checks') diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 0e77c1087ac13..9765d498b50d6 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -50,6 +50,7 @@ config.has_plugins = @LLVM_ENABLE_PLUGINS@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.have_tf_aot = @LLVM_HAVE_TF_AOT@ config.have_tf_api = @LLVM_HAVE_TF_API@ +config.expensive_checks = @LLVM_ENABLE_EXPENSIVE_CHECKS@ # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. From 95e43f84b7b9c61011aece7583c0367297dd67d8 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Wed, 16 Sep 2020 23:55:46 +0200 Subject: [PATCH 0914/1079] [AArch64] Add -mmark-bti-property flag. Writing the .note.gnu.property manually is error prone and hard to maintain in the assembly files. The -mmark-bti-property is for the assembler to emit the section with the GNU_PROPERTY_AARCH64_FEATURE_1_BTI. To be used when C/C++ is compiled with -mbranch-protection=bti. This patch refactors the .note.gnu.property handling. Reviewed By: chill, nickdesaulniers Differential Revision: https://reviews.llvm.org/D81930 --- clang/include/clang/Driver/Options.td | 3 + clang/lib/Driver/ToolChains/Clang.cpp | 9 +++ clang/test/Driver/arm64-markbti.S | 24 ++++++++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 23 +------- .../MCTargetDesc/AArch64TargetStreamer.cpp | 57 ++++++++++++++++++- .../MCTargetDesc/AArch64TargetStreamer.h | 3 + 6 files changed, 97 insertions(+), 22 deletions(-) create mode 100644 clang/test/Driver/arm64-markbti.S diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5b39ea513b243..d7c2496b8a5d8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2385,6 +2385,9 @@ def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">, def mno_fix_cortex_a53_835769 : Flag<["-"], "mno-fix-cortex-a53-835769">, Group, HelpText<"Don't workaround Cortex-A53 erratum 835769 (AArch64 only)">; +def mmark_bti_property : Flag<["-"], "mmark-bti-property">, + Group, + HelpText<"Add .note.gnu.property with BTI to assembly files (AArch64 only)">; foreach i = {1-31} in def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group, HelpText<"Reserve the x"#i#" register (AArch64/RISC-V only)">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 51056960761da..e13ffe67af89f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7018,6 +7018,15 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, } break; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: + case llvm::Triple::aarch64_be: + if (Args.hasArg(options::OPT_mmark_bti_property)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-aarch64-mark-bti-property"); + } + break; + case llvm::Triple::riscv32: case llvm::Triple::riscv64: AddRISCVTargetArgs(Args, CmdArgs); diff --git a/clang/test/Driver/arm64-markbti.S b/clang/test/Driver/arm64-markbti.S new file mode 100644 index 0000000000000..68c81d31afa32 --- /dev/null +++ b/clang/test/Driver/arm64-markbti.S @@ -0,0 +1,24 @@ +// When -mmark-bti-property is passed the generated file object gets BTI marking. +// RUN: %clang -target arm64-linux-none -mmark-bti-property -c -o - %s | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_GEN %s +// RUN: %clang -target arm64-linux-none -DNOTE_PRESENT -c %s -o - | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PRESET %s +// RUN: %clang -target arm64-linux-none -mmark-bti-property -DNOTE_PRESENT -c %s -o - | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PRESET %s +// RUN: %clang -target arm64-linux-none -mmark-bti-property -DNOTE_PRESENT -c %s -o - 2>&1 | FileCheck -check-prefix=CHECK_WARNING %s +// +// CHECK_WARNING: The .note.gnu.property is not emitted because it is already present. +// CHECK: Name: .note.gnu.property +// CHECK: Type: NT_GNU_PROPERTY_TYPE_0 +// CHECK_GEN: aarch64 feature: BTI +// CHECK_PRESET: aarch64 feature: BTI, PAC + +#ifdef NOTE_PRESENT + .section .note.gnu.property, "a"; + .balign 8; + .long 4; + .long 0x10; + .long 0x5 + .asciz "GNU" + .long 0xc0000000 + .long 4 + .long 3 + .long 0 +#endif diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 8cbd60d749708..30ac7f4c0d2e7 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -223,26 +223,9 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { return; // Emit a .note.gnu.property section with the flags. - MCSection *Cur = OutStreamer->getCurrentSectionOnly(); - MCSection *Nt = MMI->getContext().getELFSection( - ".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC); - OutStreamer->SwitchSection(Nt); - - // Emit the note header. - emitAlignment(Align(8)); - OutStreamer->emitInt32(4); // data size for "GNU\0" - OutStreamer->emitInt32(4 * 4); // Elf_Prop size - OutStreamer->emitInt32(ELF::NT_GNU_PROPERTY_TYPE_0); - OutStreamer->emitBytes(StringRef("GNU", 4)); // note name - - // Emit the PAC/BTI properties. - OutStreamer->emitInt32(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND); - OutStreamer->emitInt32(4); // data size - OutStreamer->emitInt32(Flags); // data - OutStreamer->emitInt32(0); // pad - - OutStreamer->endSection(Nt); - OutStreamer->SwitchSection(Cur); + if (auto *TS = static_cast( + OutStreamer->getTargetStreamer())) + TS->emitNoteSection(Flags); } void AArch64AsmPrinter::emitFunctionHeaderComment() { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 48ed68f492635..f32a8f15b8a54 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -11,12 +11,23 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt MarkBTIProperty( + "aarch64-mark-bti-property", cl::Hidden, + cl::desc("Add .note.gnu.property with BTI to assembly files"), + cl::init(false)); + // // AArch64TargetStreamer Implemenation // @@ -37,8 +48,50 @@ void AArch64TargetStreamer::emitCurrentConstantPool() { ConstantPools->emitForCurrentSection(Streamer); } -// finish() - write out any non-empty assembler constant pools. -void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } +// finish() - write out any non-empty assembler constant pools and +// write out note.gnu.properties if need. +void AArch64TargetStreamer::finish() { + ConstantPools->emitAll(Streamer); + + if (MarkBTIProperty) + emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI); +} + +void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { + if (Flags == 0) + return; + + MCStreamer &OutStreamer = getStreamer(); + MCContext &Context = OutStreamer.getContext(); + // Emit a .note.gnu.property section with the flags. + MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE, + ELF::SHF_ALLOC); + if (Nt->isRegistered()) { + SMLoc Loc; + Context.reportWarning( + Loc, + "The .note.gnu.property is not emitted because it is already present."); + return; + } + MCSection *Cur = OutStreamer.getCurrentSectionOnly(); + OutStreamer.SwitchSection(Nt); + + // Emit the note header. + OutStreamer.emitValueToAlignment(Align(8).value()); + OutStreamer.emitIntValue(4, 4); // data size for "GNU\0" + OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size + OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); + OutStreamer.emitBytes(StringRef("GNU", 4)); // note name + + // Emit the PAC/BTI properties. + OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); + OutStreamer.emitIntValue(4, 4); // data size + OutStreamer.emitIntValue(Flags, 4); // data + OutStreamer.emitIntValue(0, 4); // pad + + OutStreamer.endSection(Nt); + OutStreamer.SwitchSection(Cur); +} void AArch64TargetStreamer::emitInst(uint32_t Inst) { char Buffer[4]; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index c0dee085caced..09953315bbd0d 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -33,6 +33,9 @@ class AArch64TargetStreamer : public MCTargetStreamer { /// Emit contents of constant pool for the current section. void emitCurrentConstantPool(); + /// Callback used to implement the .note.gnu.property section. + void emitNoteSection(unsigned Flags); + /// Callback used to implement the .inst directive. virtual void emitInst(uint32_t Inst); From 0c6a56e41dbeb9ffc47ca0b03357f15cb5d30689 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 16 Sep 2020 18:28:51 -0400 Subject: [PATCH 0915/1079] [gn build] (manually) port 1321160a2 --- llvm/utils/gn/secondary/llvm/test/BUILD.gn | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index df4c763f64cd6..1b48d08751212 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -167,6 +167,12 @@ write_lit_config("lit_site_cfg") { extra_values += [ "LLVM_ENABLE_LIBXML2=0" ] # Must be 0. } + if (llvm_enable_expensive_checks) { + extra_values += [ "LLVM_ENABLE_EXPENSIVE_CHECKS=1" ] + } else { + extra_values += [ "LLVM_ENABLE_EXPENSIVE_CHECKS=0" ] # Must be 0. + } + if (llvm_enable_threads) { extra_values += [ "LLVM_ENABLE_THREADS=1" ] } else { From 4e4c89b22c3fc1200ee0d6d1074173c7c53d87bc Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 16 Sep 2020 18:21:10 -0400 Subject: [PATCH 0916/1079] [EarlyCSE] Simplify max/min pattern matching. NFC. --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 27 +++++++++---------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index e47ecb4fbb44a..86dd4d54d558d 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -191,25 +191,16 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Pred = ICmpInst::getSwappedPredicate(Pred); } - // Check for inverted variants of min/max by swapping operands. - bool Inversed = false; switch (Pred) { - case CmpInst::ICMP_ULE: - case CmpInst::ICMP_UGE: - case CmpInst::ICMP_SLE: - case CmpInst::ICMP_SGE: - Pred = CmpInst::getInversePredicate(Pred); - Inversed = true; - break; - default: - break; - } - - switch (Pred) { - case CmpInst::ICMP_UGT: Flavor = Inversed ? SPF_UMIN : SPF_UMAX; break; - case CmpInst::ICMP_ULT: Flavor = Inversed ? SPF_UMAX : SPF_UMIN; break; - case CmpInst::ICMP_SGT: Flavor = Inversed ? SPF_SMIN : SPF_SMAX; break; - case CmpInst::ICMP_SLT: Flavor = Inversed ? SPF_SMAX : SPF_SMIN; break; + case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break; + case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break; + case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break; + case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break; + // Non-strict inequalities. + case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break; + case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break; + case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break; + case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break; default: break; } From d89c5ae8577264f5dd660906f12577c5fdadf49e Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 16 Sep 2020 18:54:11 -0400 Subject: [PATCH 0917/1079] [Flang] Fixed installation permission of the "binary" flang Under current configuration, the permission of `flang` after installation is 700. This could bring a problem for system administrators who build and install flang for other users, which only the user who builds LLVM can execute it, and others can not. In this patch, the explicit permission setting in the `install` command is removed, and let CMake determine what perssion to be used like other components. Reviewed By: DavidTruby Differential Revision: https://reviews.llvm.org/D87783 --- flang/tools/f18/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index b92733d8374e7..64ccf12505fea 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -84,4 +84,4 @@ set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/f18_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/f18_version.h @ONLY) -install(PROGRAMS ${FLANG_BINARY_DIR}/bin/flang-install.sh DESTINATION bin RENAME flang PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) +install(PROGRAMS ${FLANG_BINARY_DIR}/bin/flang-install.sh DESTINATION bin RENAME flang) From 5b205ff474120e086435724dc04f784b784fdd1a Mon Sep 17 00:00:00 2001 From: ogiroux Date: Wed, 16 Sep 2020 16:12:10 -0700 Subject: [PATCH 0918/1079] Commenting out atomics with padding to unbreak MSAN tests --- .../atomic_helpers.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h index d06cca9bbe5ce..c248e3ab17585 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h @@ -23,9 +23,13 @@ struct UserAtomicType { return x.i == y.i; } }; +/* + +Enable these once we have P0528 + struct WeirdUserAtomicType { - char i, j, k; /* the 3 chars of doom */ + char i, j, k; // the 3 chars of doom explicit WeirdUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} @@ -35,7 +39,7 @@ struct WeirdUserAtomicType struct PaddedUserAtomicType { - char i; int j; /* probably lock-free? */ + char i; int j; // probably lock-free? explicit PaddedUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} @@ -43,6 +47,8 @@ struct PaddedUserAtomicType { return x.i == y.i; } }; +*/ + struct LargeUserAtomicType { int i, j[127]; /* decidedly not lock-free */ @@ -89,15 +95,19 @@ struct TestEachAtomicType { void operator()() const { TestEachIntegralType()(); TestFunctor()(); - TestFunctor()(); #ifndef __APPLE__ /* These aren't going to be lock-free, so some libatomic.a is necessary. */ - //TestFunctor()(); //< Actually, nobody is ready for this until P0528 TestFunctor()(); #endif +/* + Enable these once we have P0528 + + TestFunctor()(); + TestFunctor()(); +*/ TestFunctor()(); TestFunctor()(); TestFunctor()(); From 60e244f82c1f97c1b7d65c06d2b0b4f634f8d696 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Thu, 17 Sep 2020 01:17:23 +0200 Subject: [PATCH 0919/1079] Revert "[AArch64] Add -mmark-bti-property flag." This reverts commit 95e43f84b7b9c61011aece7583c0367297dd67d8. --- clang/include/clang/Driver/Options.td | 3 - clang/lib/Driver/ToolChains/Clang.cpp | 9 --- clang/test/Driver/arm64-markbti.S | 24 -------- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 23 +++++++- .../MCTargetDesc/AArch64TargetStreamer.cpp | 57 +------------------ .../MCTargetDesc/AArch64TargetStreamer.h | 3 - 6 files changed, 22 insertions(+), 97 deletions(-) delete mode 100644 clang/test/Driver/arm64-markbti.S diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d7c2496b8a5d8..5b39ea513b243 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2385,9 +2385,6 @@ def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">, def mno_fix_cortex_a53_835769 : Flag<["-"], "mno-fix-cortex-a53-835769">, Group, HelpText<"Don't workaround Cortex-A53 erratum 835769 (AArch64 only)">; -def mmark_bti_property : Flag<["-"], "mmark-bti-property">, - Group, - HelpText<"Add .note.gnu.property with BTI to assembly files (AArch64 only)">; foreach i = {1-31} in def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group, HelpText<"Reserve the x"#i#" register (AArch64/RISC-V only)">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e13ffe67af89f..51056960761da 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7018,15 +7018,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, } break; - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_32: - case llvm::Triple::aarch64_be: - if (Args.hasArg(options::OPT_mmark_bti_property)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-aarch64-mark-bti-property"); - } - break; - case llvm::Triple::riscv32: case llvm::Triple::riscv64: AddRISCVTargetArgs(Args, CmdArgs); diff --git a/clang/test/Driver/arm64-markbti.S b/clang/test/Driver/arm64-markbti.S deleted file mode 100644 index 68c81d31afa32..0000000000000 --- a/clang/test/Driver/arm64-markbti.S +++ /dev/null @@ -1,24 +0,0 @@ -// When -mmark-bti-property is passed the generated file object gets BTI marking. -// RUN: %clang -target arm64-linux-none -mmark-bti-property -c -o - %s | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_GEN %s -// RUN: %clang -target arm64-linux-none -DNOTE_PRESENT -c %s -o - | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PRESET %s -// RUN: %clang -target arm64-linux-none -mmark-bti-property -DNOTE_PRESENT -c %s -o - | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PRESET %s -// RUN: %clang -target arm64-linux-none -mmark-bti-property -DNOTE_PRESENT -c %s -o - 2>&1 | FileCheck -check-prefix=CHECK_WARNING %s -// -// CHECK_WARNING: The .note.gnu.property is not emitted because it is already present. -// CHECK: Name: .note.gnu.property -// CHECK: Type: NT_GNU_PROPERTY_TYPE_0 -// CHECK_GEN: aarch64 feature: BTI -// CHECK_PRESET: aarch64 feature: BTI, PAC - -#ifdef NOTE_PRESENT - .section .note.gnu.property, "a"; - .balign 8; - .long 4; - .long 0x10; - .long 0x5 - .asciz "GNU" - .long 0xc0000000 - .long 4 - .long 3 - .long 0 -#endif diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 30ac7f4c0d2e7..8cbd60d749708 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -223,9 +223,26 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { return; // Emit a .note.gnu.property section with the flags. - if (auto *TS = static_cast( - OutStreamer->getTargetStreamer())) - TS->emitNoteSection(Flags); + MCSection *Cur = OutStreamer->getCurrentSectionOnly(); + MCSection *Nt = MMI->getContext().getELFSection( + ".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC); + OutStreamer->SwitchSection(Nt); + + // Emit the note header. + emitAlignment(Align(8)); + OutStreamer->emitInt32(4); // data size for "GNU\0" + OutStreamer->emitInt32(4 * 4); // Elf_Prop size + OutStreamer->emitInt32(ELF::NT_GNU_PROPERTY_TYPE_0); + OutStreamer->emitBytes(StringRef("GNU", 4)); // note name + + // Emit the PAC/BTI properties. + OutStreamer->emitInt32(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND); + OutStreamer->emitInt32(4); // data size + OutStreamer->emitInt32(Flags); // data + OutStreamer->emitInt32(0); // pad + + OutStreamer->endSection(Nt); + OutStreamer->SwitchSection(Cur); } void AArch64AsmPrinter::emitFunctionHeaderComment() { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index f32a8f15b8a54..48ed68f492635 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -11,23 +11,12 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetStreamer.h" -#include "AArch64MCAsmInfo.h" -#include "AArch64Subtarget.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/ConstantPools.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt MarkBTIProperty( - "aarch64-mark-bti-property", cl::Hidden, - cl::desc("Add .note.gnu.property with BTI to assembly files"), - cl::init(false)); - // // AArch64TargetStreamer Implemenation // @@ -48,50 +37,8 @@ void AArch64TargetStreamer::emitCurrentConstantPool() { ConstantPools->emitForCurrentSection(Streamer); } -// finish() - write out any non-empty assembler constant pools and -// write out note.gnu.properties if need. -void AArch64TargetStreamer::finish() { - ConstantPools->emitAll(Streamer); - - if (MarkBTIProperty) - emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI); -} - -void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { - if (Flags == 0) - return; - - MCStreamer &OutStreamer = getStreamer(); - MCContext &Context = OutStreamer.getContext(); - // Emit a .note.gnu.property section with the flags. - MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE, - ELF::SHF_ALLOC); - if (Nt->isRegistered()) { - SMLoc Loc; - Context.reportWarning( - Loc, - "The .note.gnu.property is not emitted because it is already present."); - return; - } - MCSection *Cur = OutStreamer.getCurrentSectionOnly(); - OutStreamer.SwitchSection(Nt); - - // Emit the note header. - OutStreamer.emitValueToAlignment(Align(8).value()); - OutStreamer.emitIntValue(4, 4); // data size for "GNU\0" - OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size - OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); - OutStreamer.emitBytes(StringRef("GNU", 4)); // note name - - // Emit the PAC/BTI properties. - OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); - OutStreamer.emitIntValue(4, 4); // data size - OutStreamer.emitIntValue(Flags, 4); // data - OutStreamer.emitIntValue(0, 4); // pad - - OutStreamer.endSection(Nt); - OutStreamer.SwitchSection(Cur); -} +// finish() - write out any non-empty assembler constant pools. +void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } void AArch64TargetStreamer::emitInst(uint32_t Inst) { char Buffer[4]; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index 09953315bbd0d..c0dee085caced 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -33,9 +33,6 @@ class AArch64TargetStreamer : public MCTargetStreamer { /// Emit contents of constant pool for the current section. void emitCurrentConstantPool(); - /// Callback used to implement the .note.gnu.property section. - void emitNoteSection(unsigned Flags); - /// Callback used to implement the .inst directive. virtual void emitInst(uint32_t Inst); From f70baaf71f62ba8623b3522345527271add74f6b Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Wed, 16 Sep 2020 23:55:46 +0200 Subject: [PATCH 0920/1079] [AArch64] Add -mmark-bti-property flag. Writing the .note.gnu.property manually is error prone and hard to maintain in the assembly files. The -mmark-bti-property is for the assembler to emit the section with the GNU_PROPERTY_AARCH64_FEATURE_1_BTI. To be used when C/C++ is compiled with -mbranch-protection=bti. This patch refactors the .note.gnu.property handling. Reviewed By: chill, nickdesaulniers Differential Revision: https://reviews.llvm.org/D81930 Reland with test dependency on aarch64 target. --- clang/include/clang/Driver/Options.td | 3 + clang/lib/Driver/ToolChains/Clang.cpp | 9 +++ clang/test/Driver/arm64-markbti.S | 26 +++++++++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 23 +------- .../MCTargetDesc/AArch64TargetStreamer.cpp | 57 ++++++++++++++++++- .../MCTargetDesc/AArch64TargetStreamer.h | 3 + 6 files changed, 99 insertions(+), 22 deletions(-) create mode 100644 clang/test/Driver/arm64-markbti.S diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5b39ea513b243..d7c2496b8a5d8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2385,6 +2385,9 @@ def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">, def mno_fix_cortex_a53_835769 : Flag<["-"], "mno-fix-cortex-a53-835769">, Group, HelpText<"Don't workaround Cortex-A53 erratum 835769 (AArch64 only)">; +def mmark_bti_property : Flag<["-"], "mmark-bti-property">, + Group, + HelpText<"Add .note.gnu.property with BTI to assembly files (AArch64 only)">; foreach i = {1-31} in def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group, HelpText<"Reserve the x"#i#" register (AArch64/RISC-V only)">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 51056960761da..e13ffe67af89f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7018,6 +7018,15 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, } break; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: + case llvm::Triple::aarch64_be: + if (Args.hasArg(options::OPT_mmark_bti_property)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-aarch64-mark-bti-property"); + } + break; + case llvm::Triple::riscv32: case llvm::Triple::riscv64: AddRISCVTargetArgs(Args, CmdArgs); diff --git a/clang/test/Driver/arm64-markbti.S b/clang/test/Driver/arm64-markbti.S new file mode 100644 index 0000000000000..8eeed74810d27 --- /dev/null +++ b/clang/test/Driver/arm64-markbti.S @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target + +// When -mmark-bti-property is passed the generated file object gets BTI marking. +// RUN: %clang -target arm64-linux-none -mmark-bti-property -c -o - %s | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_GEN %s +// RUN: %clang -target arm64-linux-none -DNOTE_PRESENT -c %s -o - | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PRESET %s +// RUN: %clang -target arm64-linux-none -mmark-bti-property -DNOTE_PRESENT -c %s -o - | llvm-readobj -n - | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PRESET %s +// RUN: %clang -target arm64-linux-none -mmark-bti-property -DNOTE_PRESENT -c %s -o - 2>&1 | FileCheck -check-prefix=CHECK_WARNING %s +// +// CHECK_WARNING: The .note.gnu.property is not emitted because it is already present. +// CHECK: Name: .note.gnu.property +// CHECK: Type: NT_GNU_PROPERTY_TYPE_0 +// CHECK_GEN: aarch64 feature: BTI +// CHECK_PRESET: aarch64 feature: BTI, PAC + +#ifdef NOTE_PRESENT + .section .note.gnu.property, "a"; + .balign 8; + .long 4; + .long 0x10; + .long 0x5 + .asciz "GNU" + .long 0xc0000000 + .long 4 + .long 3 + .long 0 +#endif diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 8cbd60d749708..30ac7f4c0d2e7 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -223,26 +223,9 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { return; // Emit a .note.gnu.property section with the flags. - MCSection *Cur = OutStreamer->getCurrentSectionOnly(); - MCSection *Nt = MMI->getContext().getELFSection( - ".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC); - OutStreamer->SwitchSection(Nt); - - // Emit the note header. - emitAlignment(Align(8)); - OutStreamer->emitInt32(4); // data size for "GNU\0" - OutStreamer->emitInt32(4 * 4); // Elf_Prop size - OutStreamer->emitInt32(ELF::NT_GNU_PROPERTY_TYPE_0); - OutStreamer->emitBytes(StringRef("GNU", 4)); // note name - - // Emit the PAC/BTI properties. - OutStreamer->emitInt32(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND); - OutStreamer->emitInt32(4); // data size - OutStreamer->emitInt32(Flags); // data - OutStreamer->emitInt32(0); // pad - - OutStreamer->endSection(Nt); - OutStreamer->SwitchSection(Cur); + if (auto *TS = static_cast( + OutStreamer->getTargetStreamer())) + TS->emitNoteSection(Flags); } void AArch64AsmPrinter::emitFunctionHeaderComment() { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 48ed68f492635..f32a8f15b8a54 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -11,12 +11,23 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt MarkBTIProperty( + "aarch64-mark-bti-property", cl::Hidden, + cl::desc("Add .note.gnu.property with BTI to assembly files"), + cl::init(false)); + // // AArch64TargetStreamer Implemenation // @@ -37,8 +48,50 @@ void AArch64TargetStreamer::emitCurrentConstantPool() { ConstantPools->emitForCurrentSection(Streamer); } -// finish() - write out any non-empty assembler constant pools. -void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } +// finish() - write out any non-empty assembler constant pools and +// write out note.gnu.properties if need. +void AArch64TargetStreamer::finish() { + ConstantPools->emitAll(Streamer); + + if (MarkBTIProperty) + emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI); +} + +void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { + if (Flags == 0) + return; + + MCStreamer &OutStreamer = getStreamer(); + MCContext &Context = OutStreamer.getContext(); + // Emit a .note.gnu.property section with the flags. + MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE, + ELF::SHF_ALLOC); + if (Nt->isRegistered()) { + SMLoc Loc; + Context.reportWarning( + Loc, + "The .note.gnu.property is not emitted because it is already present."); + return; + } + MCSection *Cur = OutStreamer.getCurrentSectionOnly(); + OutStreamer.SwitchSection(Nt); + + // Emit the note header. + OutStreamer.emitValueToAlignment(Align(8).value()); + OutStreamer.emitIntValue(4, 4); // data size for "GNU\0" + OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size + OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); + OutStreamer.emitBytes(StringRef("GNU", 4)); // note name + + // Emit the PAC/BTI properties. + OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); + OutStreamer.emitIntValue(4, 4); // data size + OutStreamer.emitIntValue(Flags, 4); // data + OutStreamer.emitIntValue(0, 4); // pad + + OutStreamer.endSection(Nt); + OutStreamer.SwitchSection(Cur); +} void AArch64TargetStreamer::emitInst(uint32_t Inst) { char Buffer[4]; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index c0dee085caced..09953315bbd0d 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -33,6 +33,9 @@ class AArch64TargetStreamer : public MCTargetStreamer { /// Emit contents of constant pool for the current section. void emitCurrentConstantPool(); + /// Callback used to implement the .note.gnu.property section. + void emitNoteSection(unsigned Flags); + /// Callback used to implement the .inst directive. virtual void emitInst(uint32_t Inst); From e30371d99d5157ac9718c803dd1101f9cbb1b224 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 16 Sep 2020 16:37:36 -0700 Subject: [PATCH 0921/1079] [DAGCombiner] Teach visitMSTORE to replace an all ones mask with an unmasked store. Similar to what done in D87788 for MLOAD. Again I've skipped indexed, truncating, and compressing stores. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 + llvm/test/CodeGen/X86/masked_store.ll | 344 +++++++++++------- 2 files changed, 214 insertions(+), 138 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 276fe77978832..285bd2455b9f2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9244,6 +9244,14 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // If this is a masked load with an all ones mask, we can use a unmasked load. + // FIXME: Can we do this for indexed, compressing, or truncating stores? + if (ISD::isBuildVectorAllOnes(Mask.getNode()) && + MST->isUnindexed() && !MST->isCompressingStore() && + !MST->isTruncatingStore()) + return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), + MST->getBasePtr(), MST->getMemOperand()); + // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index 380891847a5c2..992ef96fd2e87 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -4504,34 +4504,102 @@ define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, <4 x i32>* %addr, ; SSE-NEXT: movups %xmm1, (%rdi) ; SSE-NEXT: retq ; -; AVX1-LABEL: mstore_constmask_v4i32_v4i32: +; AVX-LABEL: mstore_constmask_v4i32_v4i32: +; AVX: ## %bb.0: +; AVX-NEXT: vmovups %xmm1, (%rdi) +; AVX-NEXT: retq + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1>) + ret void +} + +; Make sure we are able to detect all ones constant mask after type legalization +; to avoid masked stores. +define void @mstore_constmask_allones_split(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) { +; SSE2-LABEL: mstore_constmask_allones_split: +; SSE2: ## %bb.0: +; SSE2-NEXT: movd %xmm4, (%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1] +; SSE2-NEXT: movd %xmm0, 4(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[3,3,3,3] +; SSE2-NEXT: movd %xmm0, 12(%rdi) +; SSE2-NEXT: movd %xmm5, 16(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3] +; SSE2-NEXT: movd %xmm0, 24(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,3,3,3] +; SSE2-NEXT: movd %xmm0, 28(%rdi) +; SSE2-NEXT: movd %xmm6, 32(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,1,1] +; SSE2-NEXT: movd %xmm0, 36(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3] +; SSE2-NEXT: movd %xmm0, 40(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,3,3,3] +; SSE2-NEXT: movd %xmm0, 44(%rdi) +; SSE2-NEXT: movd %xmm7, 48(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,1,1] +; SSE2-NEXT: movd %xmm0, 52(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,2,3] +; SSE2-NEXT: movd %xmm0, 56(%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[3,3,3,3] +; SSE2-NEXT: movd %xmm0, 60(%rdi) +; SSE2-NEXT: retq +; +; SSE4-LABEL: mstore_constmask_allones_split: +; SSE4: ## %bb.0: +; SSE4-NEXT: movss %xmm4, (%rdi) +; SSE4-NEXT: extractps $1, %xmm4, 4(%rdi) +; SSE4-NEXT: extractps $3, %xmm4, 12(%rdi) +; SSE4-NEXT: movd %xmm5, 16(%rdi) +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: palignr {{.*#+}} xmm0 = xmm6[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSE4-NEXT: palignr {{.*#+}} xmm6 = xmm5[8,9,10,11,12,13,14,15],xmm6[0,1,2,3,4,5,6,7] +; SSE4-NEXT: movdqu %xmm6, 24(%rdi) +; SSE4-NEXT: movdqu %xmm0, 40(%rdi) +; SSE4-NEXT: pextrd $2, %xmm7, 56(%rdi) +; SSE4-NEXT: pextrd $3, %xmm7, 60(%rdi) +; SSE4-NEXT: retq +; +; AVX1-LABEL: mstore_constmask_allones_split: ; AVX1: ## %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967295,0,4294967295,4294967295,0,4294967295,4294967295] +; AVX1-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi) +; AVX1-NEXT: vmovups %ymm3, 32(%rdi) +; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-LABEL: mstore_constmask_v4i32_v4i32: +; AVX2-LABEL: mstore_constmask_allones_split: ; AVX2: ## %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967295,4294967295,0,4294967295,4294967295,0,4294967295,4294967295] +; AVX2-NEXT: vpmaskmovd %ymm2, %ymm0, (%rdi) +; AVX2-NEXT: vmovups %ymm3, 32(%rdi) +; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512F-LABEL: mstore_constmask_v4i32_v4i32: +; AVX512F-LABEL: mstore_constmask_allones_split: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: movw $-37, %ax ; AVX512F-NEXT: kmovw %eax, %k1 ; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; -; AVX512VL-LABEL: mstore_constmask_v4i32_v4i32: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: kxnorw %k0, %k0, %k1 -; AVX512VL-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1} -; AVX512VL-NEXT: retq - %mask = icmp eq <4 x i32> %trigger, zeroinitializer - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1>) +; AVX512VLDQ-LABEL: mstore_constmask_allones_split: +; AVX512VLDQ: ## %bb.0: +; AVX512VLDQ-NEXT: movw $-37, %ax +; AVX512VLDQ-NEXT: kmovw %eax, %k1 +; AVX512VLDQ-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512VLDQ-NEXT: vzeroupper +; AVX512VLDQ-NEXT: retq +; +; AVX512VLBW-LABEL: mstore_constmask_allones_split: +; AVX512VLBW: ## %bb.0: +; AVX512VLBW-NEXT: movw $-37, %ax +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512VLBW-NEXT: vzeroupper +; AVX512VLBW-NEXT: retq + %mask = icmp eq <16 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %val, <16 x i32>* %addr, i32 4, <16 x i1>) ret void } @@ -4642,31 +4710,31 @@ define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, <4 x doub ; SSE-NEXT: pslld $31, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax ; SSE-NEXT: testb $1, %al -; SSE-NEXT: jne LBB23_1 +; SSE-NEXT: jne LBB24_1 ; SSE-NEXT: ## %bb.2: ## %else ; SSE-NEXT: testb $2, %al -; SSE-NEXT: jne LBB23_3 -; SSE-NEXT: LBB23_4: ## %else2 +; SSE-NEXT: jne LBB24_3 +; SSE-NEXT: LBB24_4: ## %else2 ; SSE-NEXT: testb $4, %al -; SSE-NEXT: jne LBB23_5 -; SSE-NEXT: LBB23_6: ## %else4 +; SSE-NEXT: jne LBB24_5 +; SSE-NEXT: LBB24_6: ## %else4 ; SSE-NEXT: testb $8, %al -; SSE-NEXT: jne LBB23_7 -; SSE-NEXT: LBB23_8: ## %else6 +; SSE-NEXT: jne LBB24_7 +; SSE-NEXT: LBB24_8: ## %else6 ; SSE-NEXT: retq -; SSE-NEXT: LBB23_1: ## %cond.store +; SSE-NEXT: LBB24_1: ## %cond.store ; SSE-NEXT: movlps %xmm0, (%rdi) ; SSE-NEXT: testb $2, %al -; SSE-NEXT: je LBB23_4 -; SSE-NEXT: LBB23_3: ## %cond.store1 +; SSE-NEXT: je LBB24_4 +; SSE-NEXT: LBB24_3: ## %cond.store1 ; SSE-NEXT: movhps %xmm0, 8(%rdi) ; SSE-NEXT: testb $4, %al -; SSE-NEXT: je LBB23_6 -; SSE-NEXT: LBB23_5: ## %cond.store3 +; SSE-NEXT: je LBB24_6 +; SSE-NEXT: LBB24_5: ## %cond.store3 ; SSE-NEXT: movlps %xmm1, 16(%rdi) ; SSE-NEXT: testb $8, %al -; SSE-NEXT: je LBB23_8 -; SSE-NEXT: LBB23_7: ## %cond.store5 +; SSE-NEXT: je LBB24_8 +; SSE-NEXT: LBB24_7: ## %cond.store5 ; SSE-NEXT: movhps %xmm1, 24(%rdi) ; SSE-NEXT: retq ; @@ -4728,35 +4796,35 @@ define void @one_mask_bit_set1_variable(<4 x float>* %addr, <4 x float> %val, <4 ; SSE2: ## %bb.0: ; SSE2-NEXT: movmskps %xmm1, %eax ; SSE2-NEXT: testb $1, %al -; SSE2-NEXT: jne LBB24_1 +; SSE2-NEXT: jne LBB25_1 ; SSE2-NEXT: ## %bb.2: ## %else ; SSE2-NEXT: testb $2, %al -; SSE2-NEXT: jne LBB24_3 -; SSE2-NEXT: LBB24_4: ## %else2 +; SSE2-NEXT: jne LBB25_3 +; SSE2-NEXT: LBB25_4: ## %else2 ; SSE2-NEXT: testb $4, %al -; SSE2-NEXT: jne LBB24_5 -; SSE2-NEXT: LBB24_6: ## %else4 +; SSE2-NEXT: jne LBB25_5 +; SSE2-NEXT: LBB25_6: ## %else4 ; SSE2-NEXT: testb $8, %al -; SSE2-NEXT: jne LBB24_7 -; SSE2-NEXT: LBB24_8: ## %else6 +; SSE2-NEXT: jne LBB25_7 +; SSE2-NEXT: LBB25_8: ## %else6 ; SSE2-NEXT: retq -; SSE2-NEXT: LBB24_1: ## %cond.store +; SSE2-NEXT: LBB25_1: ## %cond.store ; SSE2-NEXT: movss %xmm0, (%rdi) ; SSE2-NEXT: testb $2, %al -; SSE2-NEXT: je LBB24_4 -; SSE2-NEXT: LBB24_3: ## %cond.store1 +; SSE2-NEXT: je LBB25_4 +; SSE2-NEXT: LBB25_3: ## %cond.store1 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] ; SSE2-NEXT: movss %xmm1, 4(%rdi) ; SSE2-NEXT: testb $4, %al -; SSE2-NEXT: je LBB24_6 -; SSE2-NEXT: LBB24_5: ## %cond.store3 +; SSE2-NEXT: je LBB25_6 +; SSE2-NEXT: LBB25_5: ## %cond.store3 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: movss %xmm1, 8(%rdi) ; SSE2-NEXT: testb $8, %al -; SSE2-NEXT: je LBB24_8 -; SSE2-NEXT: LBB24_7: ## %cond.store5 +; SSE2-NEXT: je LBB25_8 +; SSE2-NEXT: LBB25_7: ## %cond.store5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; SSE2-NEXT: movss %xmm0, 12(%rdi) ; SSE2-NEXT: retq @@ -4765,31 +4833,31 @@ define void @one_mask_bit_set1_variable(<4 x float>* %addr, <4 x float> %val, <4 ; SSE4: ## %bb.0: ; SSE4-NEXT: movmskps %xmm1, %eax ; SSE4-NEXT: testb $1, %al -; SSE4-NEXT: jne LBB24_1 +; SSE4-NEXT: jne LBB25_1 ; SSE4-NEXT: ## %bb.2: ## %else ; SSE4-NEXT: testb $2, %al -; SSE4-NEXT: jne LBB24_3 -; SSE4-NEXT: LBB24_4: ## %else2 +; SSE4-NEXT: jne LBB25_3 +; SSE4-NEXT: LBB25_4: ## %else2 ; SSE4-NEXT: testb $4, %al -; SSE4-NEXT: jne LBB24_5 -; SSE4-NEXT: LBB24_6: ## %else4 +; SSE4-NEXT: jne LBB25_5 +; SSE4-NEXT: LBB25_6: ## %else4 ; SSE4-NEXT: testb $8, %al -; SSE4-NEXT: jne LBB24_7 -; SSE4-NEXT: LBB24_8: ## %else6 +; SSE4-NEXT: jne LBB25_7 +; SSE4-NEXT: LBB25_8: ## %else6 ; SSE4-NEXT: retq -; SSE4-NEXT: LBB24_1: ## %cond.store +; SSE4-NEXT: LBB25_1: ## %cond.store ; SSE4-NEXT: movss %xmm0, (%rdi) ; SSE4-NEXT: testb $2, %al -; SSE4-NEXT: je LBB24_4 -; SSE4-NEXT: LBB24_3: ## %cond.store1 +; SSE4-NEXT: je LBB25_4 +; SSE4-NEXT: LBB25_3: ## %cond.store1 ; SSE4-NEXT: extractps $1, %xmm0, 4(%rdi) ; SSE4-NEXT: testb $4, %al -; SSE4-NEXT: je LBB24_6 -; SSE4-NEXT: LBB24_5: ## %cond.store3 +; SSE4-NEXT: je LBB25_6 +; SSE4-NEXT: LBB25_5: ## %cond.store3 ; SSE4-NEXT: extractps $2, %xmm0, 8(%rdi) ; SSE4-NEXT: testb $8, %al -; SSE4-NEXT: je LBB24_8 -; SSE4-NEXT: LBB24_7: ## %cond.store5 +; SSE4-NEXT: je LBB25_8 +; SSE4-NEXT: LBB25_7: ## %cond.store5 ; SSE4-NEXT: extractps $3, %xmm0, 12(%rdi) ; SSE4-NEXT: retq ; @@ -4834,25 +4902,25 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { ; SSE2-NEXT: shlb $2, %cl ; SSE2-NEXT: orb %dl, %cl ; SSE2-NEXT: testb $1, %cl -; SSE2-NEXT: jne LBB25_1 +; SSE2-NEXT: jne LBB26_1 ; SSE2-NEXT: ## %bb.2: ## %else ; SSE2-NEXT: testb $2, %cl -; SSE2-NEXT: jne LBB25_3 -; SSE2-NEXT: LBB25_4: ## %else2 +; SSE2-NEXT: jne LBB26_3 +; SSE2-NEXT: LBB26_4: ## %else2 ; SSE2-NEXT: testb $4, %cl -; SSE2-NEXT: jne LBB25_5 -; SSE2-NEXT: LBB25_6: ## %else4 +; SSE2-NEXT: jne LBB26_5 +; SSE2-NEXT: LBB26_6: ## %else4 ; SSE2-NEXT: retq -; SSE2-NEXT: LBB25_1: ## %cond.store +; SSE2-NEXT: LBB26_1: ## %cond.store ; SSE2-NEXT: movd %xmm0, (%rdi) ; SSE2-NEXT: testb $2, %cl -; SSE2-NEXT: je LBB25_4 -; SSE2-NEXT: LBB25_3: ## %cond.store1 +; SSE2-NEXT: je LBB26_4 +; SSE2-NEXT: LBB26_3: ## %cond.store1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; SSE2-NEXT: movd %xmm1, 4(%rdi) ; SSE2-NEXT: testb $4, %cl -; SSE2-NEXT: je LBB25_6 -; SSE2-NEXT: LBB25_5: ## %cond.store3 +; SSE2-NEXT: je LBB26_6 +; SSE2-NEXT: LBB26_5: ## %cond.store3 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; SSE2-NEXT: movd %xmm0, 8(%rdi) ; SSE2-NEXT: retq @@ -4867,24 +4935,24 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { ; SSE4-NEXT: shlb $2, %cl ; SSE4-NEXT: orb %dl, %cl ; SSE4-NEXT: testb $1, %cl -; SSE4-NEXT: jne LBB25_1 +; SSE4-NEXT: jne LBB26_1 ; SSE4-NEXT: ## %bb.2: ## %else ; SSE4-NEXT: testb $2, %cl -; SSE4-NEXT: jne LBB25_3 -; SSE4-NEXT: LBB25_4: ## %else2 +; SSE4-NEXT: jne LBB26_3 +; SSE4-NEXT: LBB26_4: ## %else2 ; SSE4-NEXT: testb $4, %cl -; SSE4-NEXT: jne LBB25_5 -; SSE4-NEXT: LBB25_6: ## %else4 +; SSE4-NEXT: jne LBB26_5 +; SSE4-NEXT: LBB26_6: ## %else4 ; SSE4-NEXT: retq -; SSE4-NEXT: LBB25_1: ## %cond.store +; SSE4-NEXT: LBB26_1: ## %cond.store ; SSE4-NEXT: movss %xmm0, (%rdi) ; SSE4-NEXT: testb $2, %cl -; SSE4-NEXT: je LBB25_4 -; SSE4-NEXT: LBB25_3: ## %cond.store1 +; SSE4-NEXT: je LBB26_4 +; SSE4-NEXT: LBB26_3: ## %cond.store1 ; SSE4-NEXT: extractps $1, %xmm0, 4(%rdi) ; SSE4-NEXT: testb $4, %cl -; SSE4-NEXT: je LBB25_6 -; SSE4-NEXT: LBB25_5: ## %cond.store3 +; SSE4-NEXT: je LBB26_6 +; SSE4-NEXT: LBB26_5: ## %cond.store3 ; SSE4-NEXT: extractps $2, %xmm0, 8(%rdi) ; SSE4-NEXT: retq ; @@ -4998,68 +5066,68 @@ define void @PR11210(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <2 x i64 ; SSE2: ## %bb.0: ; SSE2-NEXT: movmskps %xmm2, %eax ; SSE2-NEXT: testb $1, %al -; SSE2-NEXT: jne LBB27_1 +; SSE2-NEXT: jne LBB28_1 ; SSE2-NEXT: ## %bb.2: ## %else ; SSE2-NEXT: testb $2, %al -; SSE2-NEXT: jne LBB27_3 -; SSE2-NEXT: LBB27_4: ## %else2 +; SSE2-NEXT: jne LBB28_3 +; SSE2-NEXT: LBB28_4: ## %else2 ; SSE2-NEXT: testb $4, %al -; SSE2-NEXT: jne LBB27_5 -; SSE2-NEXT: LBB27_6: ## %else4 +; SSE2-NEXT: jne LBB28_5 +; SSE2-NEXT: LBB28_6: ## %else4 ; SSE2-NEXT: testb $8, %al -; SSE2-NEXT: jne LBB27_7 -; SSE2-NEXT: LBB27_8: ## %else6 +; SSE2-NEXT: jne LBB28_7 +; SSE2-NEXT: LBB28_8: ## %else6 ; SSE2-NEXT: testb $1, %al -; SSE2-NEXT: jne LBB27_9 -; SSE2-NEXT: LBB27_10: ## %else9 +; SSE2-NEXT: jne LBB28_9 +; SSE2-NEXT: LBB28_10: ## %else9 ; SSE2-NEXT: testb $2, %al -; SSE2-NEXT: jne LBB27_11 -; SSE2-NEXT: LBB27_12: ## %else11 +; SSE2-NEXT: jne LBB28_11 +; SSE2-NEXT: LBB28_12: ## %else11 ; SSE2-NEXT: testb $4, %al -; SSE2-NEXT: jne LBB27_13 -; SSE2-NEXT: LBB27_14: ## %else13 +; SSE2-NEXT: jne LBB28_13 +; SSE2-NEXT: LBB28_14: ## %else13 ; SSE2-NEXT: testb $8, %al -; SSE2-NEXT: jne LBB27_15 -; SSE2-NEXT: LBB27_16: ## %else15 +; SSE2-NEXT: jne LBB28_15 +; SSE2-NEXT: LBB28_16: ## %else15 ; SSE2-NEXT: retq -; SSE2-NEXT: LBB27_1: ## %cond.store +; SSE2-NEXT: LBB28_1: ## %cond.store ; SSE2-NEXT: movss %xmm0, (%rdi) ; SSE2-NEXT: testb $2, %al -; SSE2-NEXT: je LBB27_4 -; SSE2-NEXT: LBB27_3: ## %cond.store1 +; SSE2-NEXT: je LBB28_4 +; SSE2-NEXT: LBB28_3: ## %cond.store1 ; SSE2-NEXT: movaps %xmm0, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movss %xmm2, 4(%rdi) ; SSE2-NEXT: testb $4, %al -; SSE2-NEXT: je LBB27_6 -; SSE2-NEXT: LBB27_5: ## %cond.store3 +; SSE2-NEXT: je LBB28_6 +; SSE2-NEXT: LBB28_5: ## %cond.store3 ; SSE2-NEXT: movaps %xmm0, %xmm2 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: movss %xmm2, 8(%rdi) ; SSE2-NEXT: testb $8, %al -; SSE2-NEXT: je LBB27_8 -; SSE2-NEXT: LBB27_7: ## %cond.store5 +; SSE2-NEXT: je LBB28_8 +; SSE2-NEXT: LBB28_7: ## %cond.store5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; SSE2-NEXT: movss %xmm0, 12(%rdi) ; SSE2-NEXT: testb $1, %al -; SSE2-NEXT: je LBB27_10 -; SSE2-NEXT: LBB27_9: ## %cond.store8 +; SSE2-NEXT: je LBB28_10 +; SSE2-NEXT: LBB28_9: ## %cond.store8 ; SSE2-NEXT: movss %xmm1, (%rdi) ; SSE2-NEXT: testb $2, %al -; SSE2-NEXT: je LBB27_12 -; SSE2-NEXT: LBB27_11: ## %cond.store10 +; SSE2-NEXT: je LBB28_12 +; SSE2-NEXT: LBB28_11: ## %cond.store10 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1] ; SSE2-NEXT: movss %xmm0, 4(%rdi) ; SSE2-NEXT: testb $4, %al -; SSE2-NEXT: je LBB27_14 -; SSE2-NEXT: LBB27_13: ## %cond.store12 +; SSE2-NEXT: je LBB28_14 +; SSE2-NEXT: LBB28_13: ## %cond.store12 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: movss %xmm0, 8(%rdi) ; SSE2-NEXT: testb $8, %al -; SSE2-NEXT: je LBB27_16 -; SSE2-NEXT: LBB27_15: ## %cond.store14 +; SSE2-NEXT: je LBB28_16 +; SSE2-NEXT: LBB28_15: ## %cond.store14 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] ; SSE2-NEXT: movss %xmm1, 12(%rdi) ; SSE2-NEXT: retq @@ -5068,59 +5136,59 @@ define void @PR11210(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <2 x i64 ; SSE4: ## %bb.0: ; SSE4-NEXT: movmskps %xmm2, %eax ; SSE4-NEXT: testb $1, %al -; SSE4-NEXT: jne LBB27_1 +; SSE4-NEXT: jne LBB28_1 ; SSE4-NEXT: ## %bb.2: ## %else ; SSE4-NEXT: testb $2, %al -; SSE4-NEXT: jne LBB27_3 -; SSE4-NEXT: LBB27_4: ## %else2 +; SSE4-NEXT: jne LBB28_3 +; SSE4-NEXT: LBB28_4: ## %else2 ; SSE4-NEXT: testb $4, %al -; SSE4-NEXT: jne LBB27_5 -; SSE4-NEXT: LBB27_6: ## %else4 +; SSE4-NEXT: jne LBB28_5 +; SSE4-NEXT: LBB28_6: ## %else4 ; SSE4-NEXT: testb $8, %al -; SSE4-NEXT: jne LBB27_7 -; SSE4-NEXT: LBB27_8: ## %else6 +; SSE4-NEXT: jne LBB28_7 +; SSE4-NEXT: LBB28_8: ## %else6 ; SSE4-NEXT: testb $1, %al -; SSE4-NEXT: jne LBB27_9 -; SSE4-NEXT: LBB27_10: ## %else9 +; SSE4-NEXT: jne LBB28_9 +; SSE4-NEXT: LBB28_10: ## %else9 ; SSE4-NEXT: testb $2, %al -; SSE4-NEXT: jne LBB27_11 -; SSE4-NEXT: LBB27_12: ## %else11 +; SSE4-NEXT: jne LBB28_11 +; SSE4-NEXT: LBB28_12: ## %else11 ; SSE4-NEXT: testb $4, %al -; SSE4-NEXT: jne LBB27_13 -; SSE4-NEXT: LBB27_14: ## %else13 +; SSE4-NEXT: jne LBB28_13 +; SSE4-NEXT: LBB28_14: ## %else13 ; SSE4-NEXT: testb $8, %al -; SSE4-NEXT: jne LBB27_15 -; SSE4-NEXT: LBB27_16: ## %else15 +; SSE4-NEXT: jne LBB28_15 +; SSE4-NEXT: LBB28_16: ## %else15 ; SSE4-NEXT: retq -; SSE4-NEXT: LBB27_1: ## %cond.store +; SSE4-NEXT: LBB28_1: ## %cond.store ; SSE4-NEXT: movss %xmm0, (%rdi) ; SSE4-NEXT: testb $2, %al -; SSE4-NEXT: je LBB27_4 -; SSE4-NEXT: LBB27_3: ## %cond.store1 +; SSE4-NEXT: je LBB28_4 +; SSE4-NEXT: LBB28_3: ## %cond.store1 ; SSE4-NEXT: extractps $1, %xmm0, 4(%rdi) ; SSE4-NEXT: testb $4, %al -; SSE4-NEXT: je LBB27_6 -; SSE4-NEXT: LBB27_5: ## %cond.store3 +; SSE4-NEXT: je LBB28_6 +; SSE4-NEXT: LBB28_5: ## %cond.store3 ; SSE4-NEXT: extractps $2, %xmm0, 8(%rdi) ; SSE4-NEXT: testb $8, %al -; SSE4-NEXT: je LBB27_8 -; SSE4-NEXT: LBB27_7: ## %cond.store5 +; SSE4-NEXT: je LBB28_8 +; SSE4-NEXT: LBB28_7: ## %cond.store5 ; SSE4-NEXT: extractps $3, %xmm0, 12(%rdi) ; SSE4-NEXT: testb $1, %al -; SSE4-NEXT: je LBB27_10 -; SSE4-NEXT: LBB27_9: ## %cond.store8 +; SSE4-NEXT: je LBB28_10 +; SSE4-NEXT: LBB28_9: ## %cond.store8 ; SSE4-NEXT: movss %xmm1, (%rdi) ; SSE4-NEXT: testb $2, %al -; SSE4-NEXT: je LBB27_12 -; SSE4-NEXT: LBB27_11: ## %cond.store10 +; SSE4-NEXT: je LBB28_12 +; SSE4-NEXT: LBB28_11: ## %cond.store10 ; SSE4-NEXT: extractps $1, %xmm1, 4(%rdi) ; SSE4-NEXT: testb $4, %al -; SSE4-NEXT: je LBB27_14 -; SSE4-NEXT: LBB27_13: ## %cond.store12 +; SSE4-NEXT: je LBB28_14 +; SSE4-NEXT: LBB28_13: ## %cond.store12 ; SSE4-NEXT: extractps $2, %xmm1, 8(%rdi) ; SSE4-NEXT: testb $8, %al -; SSE4-NEXT: je LBB27_16 -; SSE4-NEXT: LBB27_15: ## %cond.store14 +; SSE4-NEXT: je LBB28_16 +; SSE4-NEXT: LBB28_15: ## %cond.store14 ; SSE4-NEXT: extractps $3, %xmm1, 12(%rdi) ; SSE4-NEXT: retq ; From 344a3d0bc0fb0868b519c3342b4982d6121eece3 Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Mon, 14 Sep 2020 18:07:44 -0700 Subject: [PATCH 0922/1079] [MemorySSA] Rename uses in blocks with Phis. Renaming should include blocks with existing Phis. Resolves PR45927. Differential Revision: https://reviews.llvm.org/D87661 --- llvm/lib/Analysis/MemorySSAUpdater.cpp | 12 ++++ llvm/test/Analysis/MemorySSA/pr45927.ll | 73 +++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 llvm/test/Analysis/MemorySSA/pr45927.ll diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 19f434f82cc66..f633fbe4e12b2 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -342,6 +342,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { SmallVector FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); + SmallSet ExistingPhis; + // Remember the index where we may insert new phis. unsigned NewPhiIndex = InsertedPHIs.size(); if (!DefBeforeSameBlock) { @@ -382,6 +384,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { if (!MPhi) { MPhi = MSSA->createMemoryPhi(BBIDF); NewInsertedPHIs.push_back(MPhi); + } else { + ExistingPhis.insert(MPhi); } // Add the phis created into the IDF blocks to NonOptPhis, so they are not // optimized out as trivial by the call to getPreviousDefFromEnd below. @@ -447,6 +451,13 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { if (Phi) MSSA->renamePass(Phi->getBlock(), nullptr, Visited); } + // Existing Phi blocks may need renaming too, if an access was previously + // optimized and the inserted Defs "covers" the Optimized value. + for (auto &MP : ExistingPhis) { + MemoryPhi *Phi = dyn_cast_or_null(MP); + if (Phi) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } } } @@ -1322,6 +1333,7 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { // Note: We assume MemorySSA is not used in metadata since it's not really // part of the IR. + assert(NewDefTarget != MA && "Going into an infinite loop"); while (!MA->use_empty()) { Use &U = *MA->use_begin(); if (auto *MUD = dyn_cast(U.getUser())) diff --git a/llvm/test/Analysis/MemorySSA/pr45927.ll b/llvm/test/Analysis/MemorySSA/pr45927.ll new file mode 100644 index 0000000000000..b6c1d6ba86c19 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/pr45927.ll @@ -0,0 +1,73 @@ +; RUN: opt -disable-output -loop-simplify -lcssa -licm -print-memoryssa < %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -aa-pipeline=basic-aa -passes='loop-mssa(licm),print' < %s 2>&1 | FileCheck %s + + +@a = external dso_local global i16, align 1 +@c = external dso_local global i16, align 1 + +; CHECK-LABEL: @main() + +; CHECK: entry: +; CHECK-NEXT: %res.addr.i = alloca i16 +; CHECK-NEXT: ; MemoryUse(liveOnEntry) +; CHECK-NEXT: %c.promoted = load i16, i16* @c +; CHECK-NEXT: br label %for.cond.i + +; CHECK: for.cond.i: +; CHECK-NEXT: ; [[NO5:.*]] = MemoryPhi({entry,liveOnEntry},{f.exit.i,[[NO5]]}) +; CHECK-NEXT: %inc.i1 = phi i16 [ %inc.i, %f.exit.i ], [ %c.promoted, %entry ] +; CHECK-NEXT: %inc.i = add nsw i16 %inc.i1, 1 +; CHECK-NEXT: br i1 false, label %f.exit.thread.i, label %f.exit.i + +; CHECK: f.exit.thread.i: +; CHECK-NEXT: %inc.i.lcssa = phi i16 [ %inc.i, %for.cond.i ] +; CHECK-NEXT: ; [[NO6:.*]] = MemoryDef([[NO5]]) +; CHECK-NEXT: store i16 %inc.i.lcssa, i16* @c, align 1 +; CHECK-NEXT: ; [[NO2:.*]] = MemoryDef([[NO6]]) +; CHECK-NEXT: store i16 1, i16* @a, align 1 +; CHECK-NEXT: ; MemoryUse([[NO2]]) +; CHECK-NEXT: %tmp2 = load i16, i16* @c, align 1 +; CHECK-NEXT: br label %g.exit + +; CHECK: f.exit.i +; CHECK-NEXT: br i1 false, label %g.exit.loopexit, label %for.cond.i + +; CHECK: g.exit.loopexit: +; CHECK-NEXT: %inc.i.lcssa2 = phi i16 [ %inc.i, %f.exit.i ] +; CHECK-NEXT: ; [[NO7:.*]] = MemoryDef([[NO5]]) +; CHECK-NEXT: store i16 %inc.i.lcssa2, i16* @c, align 1 +; CHECK-NEXT: br label %g.exit + +; CHECK: g.exit +; CHECK-NEXT: ; [[NO4:.*]] = MemoryPhi({f.exit.thread.i,[[NO2]]},{g.exit.loopexit,[[NO7]]}) +; CHECK-NEXT: ; MemoryUse([[NO4]]) +; CHECK-NEXT: %tmp1 = load i16, i16* @c, align 1 +; CHECK-NEXT: ; [[NO3:.*]] = MemoryDef([[NO4]]) +; CHECK-NEXT: store i16 %tmp1, i16* %res.addr.i, align 1 +; CHECK-NEXT: ret void + +define dso_local void @main() { +entry: + %res.addr.i = alloca i16, align 1 + br label %for.cond.i + +for.cond.i: ; preds = %f.exit.i, %entry + %tmp0 = load i16, i16* @c, align 1 + %inc.i = add nsw i16 %tmp0, 1 + store i16 %inc.i, i16* @c, align 1 + br i1 false, label %f.exit.thread.i, label %f.exit.i + +f.exit.thread.i: ; preds = %for.cond.i + store i16 1, i16* @a, align 1 + %tmp2 = load i16, i16* @c, align 1 + br label %g.exit + +f.exit.i: ; preds = %for.cond.i + br i1 false, label %g.exit, label %for.cond.i + +g.exit: ; preds = %f.exit.i, %f.exit.thread.i + %tmp1 = load i16, i16* @c, align 1 + store i16 %tmp1, i16* %res.addr.i, align 1 + ret void +} + From 905b9ca26c94fa86339451a528cedde5004fc1bb Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 2 Sep 2020 14:42:37 -0700 Subject: [PATCH 0923/1079] Canonicalize declaration pointers when forming APValues. References to different declarations of the same entity aren't different values, so shouldn't have different representations. Recommit of e6393ee813178e9d3306b8e3c6949a4f32f8a2cb with fixed handling for weak declarations. We now look for attributes on the most recent declaration when determining whether a declaration is weak. --- clang/include/clang/AST/APValue.h | 4 +-- clang/lib/AST/APValue.cpp | 26 +++++++++++++------ clang/lib/AST/Decl.cpp | 2 +- clang/lib/AST/DeclBase.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 18 +++++-------- .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp | 3 +-- clang/test/OpenMP/ordered_messages.cpp | 5 +++- 7 files changed, 33 insertions(+), 27 deletions(-) diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h index 5103cfa8604e5..6307f8a92e5a2 100644 --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -174,6 +174,7 @@ class APValue { return !(LHS == RHS); } friend llvm::hash_code hash_value(const LValueBase &Base); + friend struct llvm::DenseMapInfo; private: PtrTy Ptr; @@ -201,8 +202,7 @@ class APValue { public: LValuePathEntry() : Value() {} - LValuePathEntry(BaseOrMemberType BaseOrMember) - : Value{reinterpret_cast(BaseOrMember.getOpaqueValue())} {} + LValuePathEntry(BaseOrMemberType BaseOrMember); static LValuePathEntry ArrayIndex(uint64_t Index) { LValuePathEntry Result; Result.Value = Index; diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 08ae0ff3c67d3..32d3ff7ce1d08 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -38,7 +38,7 @@ static_assert( "Type is insufficiently aligned"); APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V) - : Ptr(P), Local{I, V} {} + : Ptr(P ? cast(P->getCanonicalDecl()) : nullptr), Local{I, V} {} APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V) : Ptr(P), Local{I, V} {} @@ -82,13 +82,19 @@ bool operator==(const APValue::LValueBase &LHS, const APValue::LValueBase &RHS) { if (LHS.Ptr != RHS.Ptr) return false; - if (LHS.is()) + if (LHS.is() || LHS.is()) return true; return LHS.Local.CallIndex == RHS.Local.CallIndex && LHS.Local.Version == RHS.Local.Version; } } +APValue::LValuePathEntry::LValuePathEntry(BaseOrMemberType BaseOrMember) { + if (const Decl *D = BaseOrMember.getPointer()) + BaseOrMember.setPointer(D->getCanonicalDecl()); + Value = reinterpret_cast(BaseOrMember.getOpaqueValue()); +} + namespace { struct LVBase { APValue::LValueBase Base; @@ -113,14 +119,16 @@ APValue::LValueBase::operator bool () const { clang::APValue::LValueBase llvm::DenseMapInfo::getEmptyKey() { - return clang::APValue::LValueBase( - DenseMapInfo::getEmptyKey()); + clang::APValue::LValueBase B; + B.Ptr = DenseMapInfo::getEmptyKey(); + return B; } clang::APValue::LValueBase llvm::DenseMapInfo::getTombstoneKey() { - return clang::APValue::LValueBase( - DenseMapInfo::getTombstoneKey()); + clang::APValue::LValueBase B; + B.Ptr = DenseMapInfo::getTombstoneKey(); + return B; } namespace clang { @@ -773,8 +781,10 @@ void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember, assert(isAbsent() && "Bad state change"); MemberPointerData *MPD = new ((void*)(char*)Data.buffer) MemberPointerData; Kind = MemberPointer; - MPD->MemberAndIsDerivedMember.setPointer(Member); + MPD->MemberAndIsDerivedMember.setPointer( + Member ? cast(Member->getCanonicalDecl()) : nullptr); MPD->MemberAndIsDerivedMember.setInt(IsDerivedMember); MPD->resizePath(Path.size()); - memcpy(MPD->getPath(), Path.data(), Path.size()*sizeof(const CXXRecordDecl*)); + for (unsigned I = 0; I != Path.size(); ++I) + MPD->getPath()[I] = Path[I]->getCanonicalDecl(); } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 9815f0648ad76..b446bf0bef309 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -4706,7 +4706,7 @@ char *Buffer = new (getASTContext(), 1) char[Name.size() + 1]; void ValueDecl::anchor() {} bool ValueDecl::isWeak() const { - for (const auto *I : attrs()) + for (const auto *I : getMostRecentDecl()->attrs()) if (isa(I) || isa(I)) return true; diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index f4314d0bd9614..ab2b55c0762e7 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -720,7 +720,7 @@ bool Decl::isWeakImported() const { if (!canBeWeakImported(IsDefinition)) return false; - for (const auto *A : attrs()) { + for (const auto *A : getMostRecentDecl()->attrs()) { if (isa(A)) return true; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e8f132dd48032..8e43b62662eef 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1978,18 +1978,11 @@ static bool HasSameBase(const LValue &A, const LValue &B) { return false; if (A.getLValueBase().getOpaqueValue() != - B.getLValueBase().getOpaqueValue()) { - const Decl *ADecl = GetLValueBaseDecl(A); - if (!ADecl) - return false; - const Decl *BDecl = GetLValueBaseDecl(B); - if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl()) - return false; - } + B.getLValueBase().getOpaqueValue()) + return false; - return IsGlobalLValue(A.getLValueBase()) || - (A.getLValueCallIndex() == B.getLValueCallIndex() && - A.getLValueVersion() == B.getLValueVersion()); + return A.getLValueCallIndex() == B.getLValueCallIndex() && + A.getLValueVersion() == B.getLValueVersion(); } static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) { @@ -3108,7 +3101,8 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, // If we're currently evaluating the initializer of this declaration, use that // in-flight value. - if (Info.EvaluatingDecl.dyn_cast() == VD) { + if (declaresSameEntity(Info.EvaluatingDecl.dyn_cast(), + VD)) { Result = Info.EvaluatingDeclValue; return true; } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp index 8d51dbde71776..3720b277af7a9 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp @@ -24,11 +24,10 @@ constexpr double &ni3; // expected-error {{declaration of reference variable 'ni constexpr int nc1 = i; // expected-error {{constexpr variable 'nc1' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}} constexpr C nc2 = C(); // expected-error {{cannot have non-literal type 'const C'}} -int &f(); // expected-note {{declared here}} +int &f(); // expected-note 2{{declared here}} constexpr int &nc3 = f(); // expected-error {{constexpr variable 'nc3' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f' cannot be used in a constant expression}} constexpr int nc4(i); // expected-error {{constexpr variable 'nc4' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}} constexpr C nc5((C())); // expected-error {{cannot have non-literal type 'const C'}} -int &f(); // expected-note {{here}} constexpr int &nc6(f()); // expected-error {{constexpr variable 'nc6' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f'}} struct pixel { diff --git a/clang/test/OpenMP/ordered_messages.cpp b/clang/test/OpenMP/ordered_messages.cpp index f6b9dbd6d27fa..8a3a86443eb8c 100644 --- a/clang/test/OpenMP/ordered_messages.cpp +++ b/clang/test/OpenMP/ordered_messages.cpp @@ -16,6 +16,9 @@ void xxx(int argc) { } int foo(); +#if __cplusplus >= 201103L +// expected-note@-2 {{declared here}} +#endif template T foo() { @@ -176,7 +179,7 @@ T foo() { int foo() { #if __cplusplus >= 201103L -// expected-note@-2 2 {{declared here}} +// expected-note@-2 {{declared here}} #endif int k; #pragma omp for ordered From 7337f296194483e0959ff980049e2835e226f396 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 16 Sep 2020 18:08:03 -0700 Subject: [PATCH 0924/1079] PR47555: Inheriting constructors are implicitly definable. Don't forget to define them if they're constexpr and used inside a template; we might try to evaluate a call to them before the template is instantiated. --- clang/lib/Sema/SemaExpr.cpp | 9 +++++++-- clang/test/SemaCXX/cxx11-inheriting-ctors.cpp | 9 +++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 9a4b3e31e850c..c82febdbf3a71 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16582,8 +16582,13 @@ static OdrUseContext isOdrUseContext(Sema &SemaRef) { } static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) { - return Func->isConstexpr() && - (Func->isImplicitlyInstantiable() || !Func->isUserProvided()); + if (!Func->isConstexpr()) + return false; + + if (Func->isImplicitlyInstantiable() || !Func->isUserProvided()) + return true; + auto *CCD = dyn_cast(Func); + return CCD && CCD->getInheritedConstructor(); } /// Mark a function referenced, and check whether it is odr-used diff --git a/clang/test/SemaCXX/cxx11-inheriting-ctors.cpp b/clang/test/SemaCXX/cxx11-inheriting-ctors.cpp index 7d6f4f09f09c4..5be428401fa01 100644 --- a/clang/test/SemaCXX/cxx11-inheriting-ctors.cpp +++ b/clang/test/SemaCXX/cxx11-inheriting-ctors.cpp @@ -133,3 +133,12 @@ namespace implicit_member_srcloc { S0 s0; } } + +namespace PR47555 { + struct A { constexpr A(int) {} }; + struct B : A { using A::A; }; + template void f() { + constexpr B b = 0; + }; + template void f(); +} From f4ea0f98142a97666cd0478757570e819923a829 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 19:01:38 -0700 Subject: [PATCH 0925/1079] [NewPM] Port -print-alias-sets to NPM Really it should be named print, but for the sake of changing fewer tests, added a TODO to rename after NPM switch and test cleanup. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D87713 --- llvm/include/llvm/Analysis/AliasSetTracker.h | 9 ++++++++ llvm/lib/Analysis/AliasSetTracker.cpp | 23 +++++++++++++++----- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 2 ++ llvm/test/Analysis/AliasSet/guards.ll | 1 + 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 690a94d9cf2ce..1db657528d194 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include @@ -457,6 +458,14 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) { return OS; } +class AliasSetsPrinterPass : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit AliasSetsPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_ANALYSIS_ALIASSETTRACKER_H diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 03f486477b4e1..6f8f192d0d968 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -734,8 +735,6 @@ AliasSetTracker::ASTCallbackVH::operator=(Value *V) { namespace { class AliasSetPrinter : public FunctionPass { - AliasSetTracker *Tracker; - public: static char ID; // Pass identification, replacement for typeid @@ -750,12 +749,11 @@ namespace { bool runOnFunction(Function &F) override { auto &AAWP = getAnalysis(); - Tracker = new AliasSetTracker(AAWP.getAAResults()); + AliasSetTracker Tracker(AAWP.getAAResults()); errs() << "Alias sets for function '" << F.getName() << "':\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - Tracker->add(&*I); - Tracker->print(errs()); - delete Tracker; + Tracker.add(&*I); + Tracker.print(errs()); return false; } }; @@ -769,3 +767,16 @@ INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) + +AliasSetsPrinterPass::AliasSetsPrinterPass(raw_ostream &OS) : OS(OS) {} + +PreservedAnalyses AliasSetsPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &AA = AM.getResult(F); + AliasSetTracker Tracker(AA); + OS << "Alias sets for function '" << F.getName() << "':\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + Tracker.add(&*I); + Tracker.print(OS); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 71e013f75d0a7..83b2674e3cda4 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d006f86ea2fbb..2dfe9fc60f1af 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -258,6 +258,8 @@ FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) +// TODO: rename to print after NPM switch +FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs())) FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) diff --git a/llvm/test/Analysis/AliasSet/guards.ll b/llvm/test/Analysis/AliasSet/guards.ll index 3a162b5c21c8d..f822290917c85 100644 --- a/llvm/test/Analysis/AliasSet/guards.ll +++ b/llvm/test/Analysis/AliasSet/guards.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) ; CHECK: Alias sets for function 'test0': From b04c1a9d3127730c05e8a22a0e931a12a39528df Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Wed, 16 Sep 2020 20:24:29 -0500 Subject: [PATCH 0926/1079] [IRSim] Adding IR Instruction Mapper This introduces the IRInstructionMapper, and the associated wrapper for instructions, IRInstructionData, that maps IR level Instructions to unsigned integers. Mapping is done mainly by using the "isSameOperationAs" comparison between two instructions. If they return true, the opcode, result type, and operand types of the instruction are used to hash the instruction with an unsigned integer. The mapper accepts instruction ranges, and adds each resulting integer to a list, and each wrapped instruction to a separate list. At present, branches, phi nodes are not mapping and exception handling is illegal. Debug instructions are not considered. The different mapping schemes are tested in unittests/Analysis/IRSimilarityIdentifierTest.cpp Differential Revision: https://reviews.llvm.org/D86968 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 357 +++++ llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 153 +++ llvm/unittests/Analysis/CMakeLists.txt | 1 + .../Analysis/IRSimilarityIdentifierTest.cpp | 1177 +++++++++++++++++ 5 files changed, 1689 insertions(+) create mode 100644 llvm/include/llvm/Analysis/IRSimilarityIdentifier.h create mode 100644 llvm/lib/Analysis/IRSimilarityIdentifier.cpp create mode 100644 llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h new file mode 100644 index 0000000000000..9e6d3aeec0304 --- /dev/null +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -0,0 +1,357 @@ +//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Interface file for the IRSimilarityIdentifier for identifying similarities in +// IR including the IRInstructionMapper, which maps an Instruction to unsigned +// integers. +// +// Two sequences of instructions are called "similar" if they perform the same +// series of operations for all inputs. +// +// \code +// %1 = add i32 %a, 10 +// %2 = add i32 %a, %1 +// %3 = icmp slt icmp %1, %2 +// \endcode +// +// and +// +// \code +// %1 = add i32 11, %a +// %2 = sub i32 %a, %1 +// %3 = icmp sgt icmp %2, %1 +// \endcode +// +// ultimately have the same result, even if the inputs, and structure are +// slightly different. +// +// For instructions, we do not worry about operands that do not have fixed +// semantic meaning to the program. We consider the opcode that the instruction +// has, the types, parameters, and extra information such as the function name, +// or comparison predicate. These are used to create a hash to map instructions +// to integers to be used in similarity matching in sequences of instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H +#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H + +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { +namespace IRSimilarity { + +/// This represents what is and is not supported when finding similarity in +/// Instructions. +/// +/// Legal Instructions are considered when looking at similarity between +/// Instructions. +/// +/// Illegal Instructions cannot be considered when looking for similarity +/// between Instructions. They act as boundaries between similarity regions. +/// +/// Invisible Instructions are skipped over during analysis. +// TODO: Shared with MachineOutliner +enum InstrType { Legal, Illegal, Invisible }; + +/// This provides the utilities for hashing an Instruction to an unsigned +/// integer. Two IRInstructionDatas produce the same hash value when their +/// underlying Instructions perform the same operation (even if they don't have +/// the same input operands.) +/// As a more concrete example, consider the following: +/// +/// \code +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// \endcode +/// +// Then the IRInstructionData wrappers for these Instructions may be hashed like +/// so: +/// +/// \code +/// ; These two adds have the same types and operand types, so they hash to the +/// ; same number. +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d ; Hash: 1 +/// ; This add produces an i64. This differentiates it from %add1 and %add2. So, +/// ; it hashes to a different number. +/// %add3 = add i64 %e, %f; Hash: 2 +/// \endcode +/// +/// +/// This hashing scheme will be used to represent the program as a very long +/// string. This string can then be placed in a data structure which can be used +/// for similarity queries. +/// +/// TODO: Handle types of Instructions which can be equal even with different +/// operands. (E.g. comparisons with swapped predicates.) +/// TODO: Handle CallInsts, which are only checked for function type +/// by \ref isSameOperationAs. +/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the +/// exact same, and some do not. +struct IRInstructionData : ilist_node { + + /// The source Instruction that is being wrapped. + Instruction *Inst = nullptr; + /// The values of the operands in the Instruction. + SmallVector OperVals; + /// The legality of the wrapped instruction. This is informed by InstrType, + /// and is used when checking when two instructions are considered similar. + /// If either instruction is not legal, the instructions are automatically not + /// considered similar. + bool Legal; + + /// Gather the information that is difficult to gather for an Instruction, or + /// is changed. i.e. the operands of an Instruction and the Types of those + /// operands. This extra information allows for similarity matching to make + /// assertions that allow for more flexibility when checking for whether an + /// Instruction performs the same operation. + IRInstructionData(Instruction &I, bool Legality); + + /// Hashes \p Value based on its opcode, types, and operand types. + /// Two IRInstructionData instances produce the same hash when they perform + /// the same operation. + /// + /// As a simple example, consider the following instructions. + /// + /// \code + /// %add1 = add i32 %x1, %y1 + /// %add2 = add i32 %x2, %y2 + /// + /// %sub = sub i32 %x1, %y1 + /// + /// %add_i64 = add i64 %x2, %y2 + /// \endcode + /// + /// Because the first two adds operate the same types, and are performing the + /// same action, they will be hashed to the same value. + /// + /// However, the subtraction instruction is not the same as an addition, and + /// will be hashed to a different value. + /// + /// Finally, the last add has a different type compared to the first two add + /// instructions, so it will also be hashed to a different value that any of + /// the previous instructions. + /// + /// \param [in] Value - The IRInstructionData instance to be hashed. + /// \returns A hash_value of the IRInstructionData. + friend hash_code hash_value(const IRInstructionData &ID) { + SmallVector OperTypes; + for (Value *V : ID.OperVals) + OperTypes.push_back(V->getType()); + + return hash_combine( + hash_value(ID.Inst->getOpcode()), hash_value(ID.Inst->getType()), + hash_combine_range(OperTypes.begin(), OperTypes.end())); + } +}; + +/// Compare one IRInstructionData class to another IRInstructionData class for +/// whether they are performing a the same operation, and can mapped to the +/// same value. For regular instructions if the hash value is the same, then +/// they will also be close. +/// +/// \param A - The first IRInstructionData class to compare +/// \param B - The second IRInstructionData class to compare +/// \returns true if \p A and \p B are similar enough to be mapped to the same +/// value. +bool isClose(const IRInstructionData &A, const IRInstructionData &B); + +struct IRInstructionDataTraits : DenseMapInfo { + static inline IRInstructionData *getEmptyKey() { return nullptr; } + static inline IRInstructionData *getTombstoneKey() { + return reinterpret_cast(-1); + } + + static unsigned getHashValue(const IRInstructionData *E) { + using llvm::hash_value; + assert(E && "IRInstructionData is a nullptr?"); + return hash_value(*E); + } + + static bool isEqual(const IRInstructionData *LHS, + const IRInstructionData *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey() || + LHS == getEmptyKey() || LHS == getTombstoneKey()) + return LHS == RHS; + + assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); + return isClose(*LHS, *RHS); + } +}; + +/// Helper struct for converting the Instructions in a Module into a vector of +/// unsigned integers. This vector of unsigned integers can be thought of as a +/// "numeric string". This numeric string can then be queried by, for example, +/// data structures that find repeated substrings. +/// +/// This hashing is done per BasicBlock in the module. To hash Instructions +/// based off of their operations, each Instruction is wrapped in an +/// IRInstructionData struct. The unsigned integer for an IRInstructionData +/// depends on: +/// - The hash provided by the IRInstructionData. +/// - Which member of InstrType the IRInstructionData is classified as. +// See InstrType for more details on the possible classifications, and how they +// manifest in the numeric string. +/// +/// The numeric string for an individual BasicBlock is terminated by an unique +/// unsigned integer. This prevents data structures which rely on repetition +/// from matching across BasicBlocks. (For example, the SuffixTree.) +/// As a concrete example, if we have the following two BasicBlocks: +/// \code +/// bb0: +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// bb1: +/// %sub = sub i32 %c, %d +/// \endcode +/// We may hash the Instructions like this (via IRInstructionData): +/// \code +/// bb0: +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d; Hash: 1 +/// %add3 = add i64 %e, %f; Hash: 2 +/// bb1: +/// %sub = sub i32 %c, %d; Hash: 3 +/// %add4 = add i32 %c, %d ; Hash: 1 +/// \endcode +/// And produce a "numeric string representation" like so: +/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 +/// +/// TODO: This is very similar to the MachineOutliner, and should be +/// consolidated into the same interface. +struct IRInstructionMapper { + /// The starting illegal instruction number to map to. + /// + /// Set to -3 for compatibility with DenseMapInfo. + unsigned IllegalInstrNumber = static_cast(-3); + + /// The next available integer to assign to a legal Instruction to. + unsigned LegalInstrNumber = 0; + + /// Correspondence from IRInstructionData to unsigned integers. + DenseMap + InstructionIntegerMap; + + /// Set if we added an illegal number in the previous step. + /// Since each illegal number is unique, we only need one of them between + /// each range of legal numbers. This lets us make sure we don't add more + /// than one illegal number per range. + bool AddedIllegalLastTime = false; + + /// Marks whether we found a illegal instruction in the previous step. + bool CanCombineWithPrevInstr = false; + + /// Marks whether we have found a set of instructions that is long enough + /// to be considered for similarity. + bool HaveLegalRange = false; + + /// This allocator pointer is in charge of holding on to the IRInstructionData + /// so it is not deallocated until whatever external tool is using it is done + /// with the information. + BumpPtrAllocator *InstDataAllocator = nullptr; + + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers + /// determined by \p InstrType. Two Instructions are mapped to the same value + /// if they are close as defined by the InstructionData class above. + /// + /// \param [in] BB - The BasicBlock to be mapped to integers. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. + void convertToUnsignedVec(BasicBlock &BB, + std::vector &InstrList, + std::vector &IntegerMapping); + + /// Maps an Instruction to a legal integer. + /// + /// \param [in] It - The Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrList - Vector of InstructionData to append + /// to. \returns The integer \p It was mapped to. + unsigned mapToLegalUnsigned(BasicBlock::iterator &It, + std::vector &IntegerMappingForBB, + std::vector &InstrListForBB); + + /// Maps an Instruction to an illegal integer. + /// + /// \param [in] It - The \p Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param End - true if creating a dummy IRInstructionData at the end of a + /// basic block. + /// \returns The integer \p It was mapped to. + unsigned mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB, bool End = false); + + IRInstructionMapper(BumpPtrAllocator *IDA) : InstDataAllocator(IDA) { + // Make sure that the implementation of DenseMapInfo hasn't + // changed. + assert(DenseMapInfo::getEmptyKey() == static_cast(-1) && + "DenseMapInfo's empty key isn't -1!"); + assert(DenseMapInfo::getTombstoneKey() == + static_cast(-2) && + "DenseMapInfo's tombstone key isn't -2!"); + } + + /// Custom InstVisitor to classify different instructions for whether it can + /// be analyzed for similarity. + struct InstructionClassification + : public InstVisitor { + InstructionClassification() {} + + // TODO: Determine a scheme to resolve when the label is similar enough. + InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + // TODO: Determine a scheme to resolve when the labels are similar enough. + InstrType visitPHINode(PHINode &PN) { return Illegal; } + // TODO: Handle allocas. + InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } + // We exclude variable argument instructions since variable arguments + // requires extra checking of the argument list. + InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } + // We exclude all exception handling cases since they are so context + // dependent. + InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } + InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } + // DebugInfo should be included in the regions, but should not be + // analyzed for similarity as it has no bearing on the outcome of the + // program. + InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } + // TODO: Handle GetElementPtrInsts + InstrType visitGetElementPtrInst(GetElementPtrInst &GEPI) { + return Illegal; + } + // TODO: Handle specific intrinsics. + InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + // TODO: Handle CallInsts. + InstrType visitCallInst(CallInst &CI) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } + // TODO: Handle interblock similarity. + InstrType visitTerminator(Instruction &I) { return Illegal; } + InstrType visitInstruction(Instruction &I) { return Legal; } + }; + + /// Maps an Instruction to a member of InstrType. + InstructionClassification InstClassifier; +}; + +} // end namespace IRSimilarity +} // end namespace llvm + +#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 78cc764379e17..4bd45ead30d35 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_component_library(LLVMAnalysis GlobalsModRef.cpp GuardUtils.cpp HeatUtils.cpp + IRSimilarityIdentifier.cpp IVDescriptors.cpp IVUsers.cpp IndirectCallPromotionAnalysis.cpp diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp new file mode 100644 index 0000000000000..050f5b1c0962c --- /dev/null +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -0,0 +1,153 @@ +//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Implementation file for the IRSimilarityIdentifier for identifying +// similarities in IR including the IRInstructionMapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/User.h" + +using namespace llvm; +using namespace IRSimilarity; + +IRInstructionData::IRInstructionData(Instruction &I, bool Legality) + : Inst(&I), Legal(Legality) { + // Here we collect the operands to be used to determine whether two + // instructions are similar to one another. + for (Use &OI : I.operands()) + OperVals.push_back(OI.get()); +} + +bool IRSimilarity::isClose(const IRInstructionData &A, + const IRInstructionData &B) { + return A.Legal && A.Inst->isSameOperationAs(B.Inst); +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +void IRInstructionMapper::convertToUnsignedVec( + BasicBlock &BB, std::vector &InstrList, + std::vector &IntegerMapping) { + BasicBlock::iterator It = BB.begin(); + + std::vector IntegerMappingForBB; + std::vector InstrListForBB; + + HaveLegalRange = false; + CanCombineWithPrevInstr = false; + AddedIllegalLastTime = true; + + for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { + switch (InstClassifier.visit(*It)) { + case InstrType::Legal: + mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Illegal: + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Invisible: + AddedIllegalLastTime = false; + break; + } + } + + if (HaveLegalRange) { + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + InstrList.insert(InstrList.end(), InstrListForBB.begin(), + InstrListForBB.end()); + IntegerMapping.insert(IntegerMapping.end(), IntegerMappingForBB.begin(), + IntegerMappingForBB.end()); + } +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToLegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB) { + // We added something legal, so we should unset the AddedLegalLastTime + // flag. + AddedIllegalLastTime = false; + + // If we have at least two adjacent legal instructions (which may have + // invisible instructions in between), remember that. + if (CanCombineWithPrevInstr) + HaveLegalRange = true; + CanCombineWithPrevInstr = true; + + // Get the integer for this instruction or give it the current + // LegalInstrNumber. + IRInstructionData *ID = new (InstDataAllocator->Allocate()) + IRInstructionData(*It, true); + InstrListForBB.push_back(ID); + + // Add to the instruction list + bool WasInserted; + DenseMap::iterator + ResultIt; + std::tie(ResultIt, WasInserted) = + InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); + unsigned INumber = ResultIt->second; + + // There was an insertion. + if (WasInserted) + LegalInstrNumber++; + + IntegerMappingForBB.push_back(INumber); + + // Make sure we don't overflow or use any integers reserved by the DenseMap. + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(LegalInstrNumber != DenseMapInfo::getEmptyKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + assert(LegalInstrNumber != DenseMapInfo::getTombstoneKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + + return INumber; +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB, bool End) { + // Can't combine an illegal instruction. Set the flag. + CanCombineWithPrevInstr = false; + + // Only add one illegal number per range of legal numbers. + if (AddedIllegalLastTime) + return IllegalInstrNumber; + + IRInstructionData *ID = nullptr; + if (!End) + ID = new (InstDataAllocator->Allocate()) + IRInstructionData(*It, false); + InstrListForBB.push_back(ID); + + // Remember that we added an illegal number last time. + AddedIllegalLastTime = true; + unsigned INumber = IllegalInstrNumber; + IntegerMappingForBB.push_back(IllegalInstrNumber--); + + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(IllegalInstrNumber != DenseMapInfo::getEmptyKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + assert(IllegalInstrNumber != DenseMapInfo::getTombstoneKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + return INumber; +} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index dfe570fd15749..0480649352214 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_unittest_with_input_files(AnalysisTests DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp FunctionPropertiesAnalysisTest.cpp + IRSimilarityIdentifierTest.cpp IVDescriptorsTest.cpp LazyCallGraphTest.cpp LoadsTest.cpp diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp new file mode 100644 index 0000000000000..4cc81b29a630e --- /dev/null +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -0,0 +1,1177 @@ +//===- IRSimilarityIdentifierTest.cpp - IRSimilarityIdentifier unit tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Tests for components for finding similarity such as the instruction mapper, +// suffix tree usage, and structural analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace IRSimilarity; + +static std::unique_ptr makeLLVMModule(LLVMContext &Context, + StringRef ModuleStr) { + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(ModuleStr, Err, Context); + assert(M && "Bad LLVM IR?"); + return M; +} + +void getVectors(Module &M, std::vector &InstrList, + std::vector &UnsignedVec) { + BumpPtrAllocator InstDataAllocator; + IRInstructionMapper Mapper(&InstDataAllocator); + + for (Function &F : M) + for (BasicBlock &BB : F) + Mapper.convertToUnsignedVec(BB, InstrList, UnsignedVec); +} + +// Checks that different opcodes are mapped to different values. +TEST(IRInstructionMapper, OpcodeDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = mul i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check that the size of the unsigned vector and the instruction list are the + // same as a safety check. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + // Make sure that the unsigned vector is the expected size. + ASSERT_TRUE(UnsignedVec.size() == 3); + + // Check whether the instructions are not mapped to the same value. + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the same opcodes and types are mapped to the same values. +TEST(IRInstructionMapper, OpcodeTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + + // Check whether the instructions are mapped to the same value. + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the same opcode and different types are mapped to different +// values. +TEST(IRInstructionMapper, TypeDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) { + bb0: + %0 = add i32 %a, %b + %1 = add i64 %c, %d + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that different predicates map to different values. +TEST(IRInstructionMapper, PredicateDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp sge i32 %b, %a + %1 = icmp slt i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that predicates with the same swapped predicate map to different +// values. +TEST(IRInstructionMapper, PredicateIsomorphism) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp sgt i32 %a, %b + %1 = icmp slt i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the same predicate maps to the same value. +TEST(IRInstructionMapper, PredicateSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp slt i32 %a, %b + %1 = icmp slt i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the same predicate maps to the same value for floating point +// CmpInsts. +TEST(IRInstructionMapper, FPPredicateSimilarity) { + StringRef ModuleString = R"( + define i32 @f(double %a, double %b) { + bb0: + %0 = fcmp olt double %a, %b + %1 = fcmp olt double %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the different predicate maps to a different value for floating +// point CmpInsts. +TEST(IRInstructionMapper, FPPredicatDifference) { + StringRef ModuleString = R"( + define i32 @f(double %a, double %b) { + bb0: + %0 = fcmp olt double %a, %b + %1 = fcmp oge double %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the zexts that have the same type parameters map to the same +// unsigned integer. +TEST(IRInstructionMapper, ZextTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a) { + bb0: + %0 = zext i32 %a to i64 + %1 = zext i32 %a to i64 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the sexts that have the same type parameters map to the same +// unsigned integer. +TEST(IRInstructionMapper, SextTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a) { + bb0: + %0 = sext i32 %a to i64 + %1 = sext i32 %a to i64 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the zexts that have the different type parameters map to the +// different unsigned integers. +TEST(IRInstructionMapper, ZextTypeDifference) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i8 %b) { + bb0: + %0 = zext i32 %a to i64 + %1 = zext i8 %b to i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + + +// Checks that the sexts that have the different type parameters map to the +// different unsigned integers. +TEST(IRInstructionMapper, SextTypeDifference) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i8 %b) { + bb0: + %0 = sext i32 %a to i64 + %1 = sext i8 %b to i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same type are mapped to the same unsigned +// integer. +TEST(IRInstructionMapper, LoadSimilarType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load i32, i32* %a + %1 = load i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different types are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i64* %b) { + bb0: + %0 = load i32, i32* %a + %1 = load i64, i64* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the different aligns are mapped to different +// unsigned integers. +TEST(IRInstructionMapper, LoadDifferentAlign) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 8 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the different volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load volatile i32, i32* %a + %1 = load i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadSameVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load volatile i32, i32* %a + %1 = load volatile i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load atomic i32, i32* %a unordered, align 4 + %1 = load atomic i32, i32* %b monotonic, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadSameAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load atomic i32, i32* %a unordered, align 4 + %1 = load atomic i32, i32* %b unordered, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that stores that have the same type are mapped to the same unsigned +// integer. +TEST(IRInstructionMapper, StoreSimilarType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store i32 1, i32* %a + store i32 2, i32* %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that stores that have the different types are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i64* %b) { + bb0: + store i32 1, i32* %a + store i64 1, i64* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the different aligns are mapped to different +// unsigned integers. +TEST(IRInstructionMapper, StoreDifferentAlign) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store i32 1, i32* %a, align 4 + store i32 1, i32* %b, align 8 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the different volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store volatile i32 1, i32* %a + store i32 1, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the same volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreSameVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store volatile i32 1, i32* %a + store volatile i32 1, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the same atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreSameAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store atomic i32 1, i32* %a unordered, align 4 + store atomic i32 1, i32* %b unordered, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store atomic i32 1, i32* %a unordered, align 4 + store atomic i32 1, i32* %b monotonic, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// In most cases, the illegal instructions we are collecting don't require any +// sort of setup. In these cases, we can just only have illegal instructions, +// and the mapper will create 0 length vectors, and we can check that. + +// In cases where we have legal instructions needed to set up the illegal +// instruction, to check illegal instructions are assigned unsigned integers +// from the maximum value decreasing to 0, it will be greater than a legal +// instruction that comes after. So to check that we have an illegal +// instruction, we place a legal instruction after an illegal instruction, and +// check that the illegal unsigned integer is greater than the unsigned integer +// of the legal instruction. + +// Checks that the branch is mapped to be illegal since there is extra checking +// needed to ensure that a branch in one region is branching to an isomorphic +// location in a different region. +TEST(IRInstructionMapper, BranchIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp slt i32 %a, %b + br i1 %0, label %bb0, label %bb1 + bb1: + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a PHINode is mapped to be illegal since there is extra checking +// needed to ensure that a branch in one region is bin an isomorphic +// location in a different region. +TEST(IRInstructionMapper, PhiIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ] + ret i32 0 + bb1: + ret i32 1 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an alloca instruction is mapped to be illegal. +TEST(IRInstructionMapper, AllocaIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = alloca i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an getelementptr instruction is mapped to be illegal. There is +// extra checking required for the parameters if a getelementptr has more than +// two operands. +TEST(IRInstructionMapper, GetElementPtrIllegal) { + StringRef ModuleString = R"( + %struct.RT = type { i8, [10 x [20 x i32]], i8 } + %struct.ST = type { i32, double, %struct.RT } + define i32 @f(%struct.ST* %s, i32 %a, i32 %b) { + bb0: + %0 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a call instruction is mapped to be illegal. We have to perform +// extra checks to ensure that both the name and function type are the same. +TEST(IRInstructionMapper, CallIllegal) { + StringRef ModuleString = R"( + declare i32 @f1(i32, i32) + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = call i32 @f1(i32 %a, i32 %b) + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an invoke instruction is mapped to be illegal. Invoke +// instructions are considered to be illegal because of the change in the +// control flow that is currently not recognized. +TEST(IRInstructionMapper, InvokeIllegal) { + StringRef ModuleString = R"( + define i32 @f(i8 *%gep1, i32 %b) { + then: + invoke i32 undef(i8* undef) + to label %invoke unwind label %lpad + + invoke: + unreachable + + lpad: + landingpad { i8*, i32 } + catch i8* null + unreachable + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an callbr instructions are considered to be illegal. Callbr +// instructions are considered to be illegal because of the change in the +// control flow that is currently not recognized. +TEST(IRInstructionMapper, CallBrInstIllegal) { + StringRef ModuleString = R"( + define void @test() { + fail: + ret void + } + + define i32 @f(i32 %a, i32 %b) { + bb0: + callbr void asm "xorl $0, $0; jmp ${1:l}", "r,X,~{dirflag},~{fpsr},~{flags}"(i32 %a, i8* blockaddress(@test, %fail)) to label %normal [label %fail] + fail: + ret i32 0 + normal: + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an debuginfo intrinsics are mapped to be invisible. Since they +// do not semantically change the program, they can be recognized as similar. +TEST(IRInstructionMapper, DebugInfoInvisible) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + then: + %0 = add i32 %a, %b + call void @llvm.dbg.value(metadata !0) + %1 = add i32 %a, %b + ret i32 0 + } + + declare void @llvm.dbg.value(metadata) + !0 = distinct !{!"test\00", i32 10})"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(3)); +} + +// The following are all exception handling intrinsics. We do not currently +// handle these instruction because they are very context dependent. + +// Checks that an eh.typeid.for intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingTypeIdIllegal) { + StringRef ModuleString = R"( + @_ZTIi = external constant i8* + define i32 @f() { + then: + %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + ret i32 0 + } + + declare i32 @llvm.eh.typeid.for(i8*))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.exceptioncode intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingExceptionCodeIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + %0 = catchswitch within none [label %__except] unwind to caller + + __except: + %1 = catchpad within %0 [i8* null] + catchret from %1 to label %__except + + then: + %2 = call i32 @llvm.eh.exceptioncode(token %1) + ret i32 0 + } + + declare i32 @llvm.eh.exceptioncode(token))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.unwind intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingUnwindIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + call void @llvm.eh.unwind.init() + ret i32 0 + } + + declare void @llvm.eh.unwind.init())"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.exceptionpointer intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingExceptionPointerIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + %0 = call i8* @llvm.eh.exceptionpointer.p0i8(i32 0) + ret i32 0 + } + + declare i8* @llvm.eh.exceptionpointer.p0i8(i32))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a catchpad instruction is mapped to an illegal value. +TEST(IRInstructionMapper, CatchpadIllegal) { + StringRef ModuleString = R"( + declare void @llvm.donothing() nounwind readnone + + define void @function() personality i8 3 { + entry: + invoke void @llvm.donothing() to label %normal unwind label %exception + exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + catchpad1: + catchpad within %cs1 [] + br label %normal + normal: + ret void + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a cleanuppad instruction is mapped to an illegal value. +TEST(IRInstructionMapper, CleanuppadIllegal) { + StringRef ModuleString = R"( + declare void @llvm.donothing() nounwind readnone + + define void @function() personality i8 3 { + entry: + invoke void @llvm.donothing() to label %normal unwind label %exception + exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + catchpad1: + %clean = cleanuppad within none [] + br label %normal + normal: + ret void + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// The following three instructions are memory transfer and setting based, which +// are considered illegal since is extra checking needed to handle the address +// space checking. + +// Checks that a memset instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemSetIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a memcpy instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemCpyIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a memmove instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemMoveIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a variable argument instructions are mapped to an illegal value. +// We exclude variable argument instructions since variable arguments +// requires extra checking of the argument list. +TEST(IRInstructionMapper, VarArgsIllegal) { + StringRef ModuleString = R"( + declare void @llvm.va_start(i8*) + declare void @llvm.va_copy(i8*, i8*) + declare void @llvm.va_end(i8*) + + define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { + entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + %0 = va_arg i8** %ap, i32 + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + call void @llvm.va_copy(i8* %v, i8* %ap1) + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(16)); + ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); + ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); + ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); + ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); +} + +// Check the length of adding two illegal instructions one after th other. We +// should find that only one element is added for each illegal range. +TEST(IRInstructionMapper, RepeatedIllegalLength) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = mul i32 %a, %b + %2 = call i32 @f(i32 %a, i32 %b) + %3 = call i32 @f(i32 %a, i32 %b) + %4 = add i32 %a, %b + %5 = mul i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check that the size of the unsigned vector and the instruction list are the + // same as a safety check. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + // Make sure that the unsigned vector is the expected size. + ASSERT_TRUE(UnsignedVec.size() == 6); +} From b76f523be6ea606d9cf494e247546cec1cd7f209 Mon Sep 17 00:00:00 2001 From: zhanghb97 Date: Mon, 14 Sep 2020 22:52:22 +0800 Subject: [PATCH 0927/1079] [mlir] expose affine map to C API This patch provides C API for MLIR affine map. - Implement C API for AffineMap class. - Add Utils.h to include/mlir/CAPI/, and move the definition of the CallbackOstream to Utils.h to make sure mlirAffineMapPrint work correct. - Add TODO for exposing the C API related to AffineExpr and mutable affine map. Differential Revision: https://reviews.llvm.org/D87617 --- mlir/include/mlir-c/AffineMap.h | 110 ++++++++++++++++++++++++++ mlir/include/mlir/CAPI/Utils.h | 48 ++++++++++++ mlir/lib/CAPI/IR/AffineMap.cpp | 116 +++++++++++++++++++++++++++- mlir/lib/CAPI/IR/IR.cpp | 41 ++-------- mlir/test/CAPI/ir.c | 132 ++++++++++++++++++++++++++++++++ 5 files changed, 411 insertions(+), 36 deletions(-) create mode 100644 mlir/include/mlir/CAPI/Utils.h diff --git a/mlir/include/mlir-c/AffineMap.h b/mlir/include/mlir-c/AffineMap.h index bef13fd0bfa84..a5d99185eaf40 100644 --- a/mlir/include/mlir-c/AffineMap.h +++ b/mlir/include/mlir-c/AffineMap.h @@ -18,6 +18,116 @@ extern "C" { DEFINE_C_API_STRUCT(MlirAffineMap, const void); +/** Gets the context that the given affine map was created with*/ +MlirContext mlirAffineMapGetContext(MlirAffineMap affineMap); + +/** Checks whether an affine map is null. */ +inline int mlirAffineMapIsNull(MlirAffineMap affineMap) { + return !affineMap.ptr; +} + +/** Checks if two affine maps are equal. */ +int mlirAffineMapEqual(MlirAffineMap a1, MlirAffineMap a2); + +/** Prints an affine map by sending chunks of the string representation and + * forwarding `userData to `callback`. Note that the callback may be called + * several times with consecutive chunks of the string. */ +void mlirAffineMapPrint(MlirAffineMap affineMap, MlirStringCallback callback, + void *userData); + +/** Prints the affine map to the standard error stream. */ +void mlirAffineMapDump(MlirAffineMap affineMap); + +/** Creates a zero result affine map with no dimensions or symbols in the + * context. The affine map is owned by the context. */ +MlirAffineMap mlirAffineMapEmptyGet(MlirContext ctx); + +/** Creates a zero result affine map of the given dimensions and symbols in the + * context. The affine map is owned by the context. */ +MlirAffineMap mlirAffineMapGet(MlirContext ctx, intptr_t dimCount, + intptr_t symbolCount); + +/** Creates a single constant result affine map in the context. The affine map + * is owned by the context. */ +MlirAffineMap mlirAffineMapConstantGet(MlirContext ctx, int64_t val); + +/** Creates an affine map with 'numDims' identity in the context. The affine map + * is owned by the context. */ +MlirAffineMap mlirAffineMapMultiDimIdentityGet(MlirContext ctx, + intptr_t numDims); + +/** Creates an identity affine map on the most minor dimensions in the context. + * The affine map is owned by the context. The function asserts that the number + * of dimensions is greater or equal to the number of results. */ +MlirAffineMap mlirAffineMapMinorIdentityGet(MlirContext ctx, intptr_t dims, + intptr_t results); + +/** Creates an affine map with a permutation expression and its size in the + * context. The permutation expression is a non-empty vector of integers. + * The elements of the permutation vector must be continuous from 0 and cannot + * be repeated (i.e. `[1,2,0]` is a valid permutation. `[2,0]` or `[1,1,2]` is + * an invalid invalid permutation.) The affine map is owned by the context. */ +MlirAffineMap mlirAffineMapPermutationGet(MlirContext ctx, intptr_t size, + unsigned *permutation); + +/** Checks whether the given affine map is an identity affine map. The function + * asserts that the number of dimensions is greater or equal to the number of + * results. */ +int mlirAffineMapIsIdentity(MlirAffineMap affineMap); + +/** Checks whether the given affine map is a minor identity affine map. */ +int mlirAffineMapIsMinorIdentity(MlirAffineMap affineMap); + +/** Checks whether the given affine map is an empty affine map. */ +int mlirAffineMapIsEmpty(MlirAffineMap affineMap); + +/** Checks whether the given affine map is a single result constant affine + * map. */ +int mlirAffineMapIsSingleConstant(MlirAffineMap affineMap); + +/** Returns the constant result of the given affine map. The function asserts + * that the map has a single constant result. */ +int64_t mlirAffineMapGetSingleConstantResult(MlirAffineMap affineMap); + +/** Returns the number of dimensions of the given affine map. */ +intptr_t mlirAffineMapGetNumDims(MlirAffineMap affineMap); + +/** Returns the number of symbols of the given affine map. */ +intptr_t mlirAffineMapGetNumSymbols(MlirAffineMap affineMap); + +/** Returns the number of results of the given affine map. */ +intptr_t mlirAffineMapGetNumResults(MlirAffineMap affineMap); + +/** Returns the number of inputs (dimensions + symbols) of the given affine + * map. */ +intptr_t mlirAffineMapGetNumInputs(MlirAffineMap affineMap); + +/** Checks whether the given affine map represents a subset of a symbol-less + * permutation map. */ +int mlirAffineMapIsProjectedPermutation(MlirAffineMap affineMap); + +/** Checks whether the given affine map represents a symbol-less permutation + * map. */ +int mlirAffineMapIsPermutation(MlirAffineMap affineMap); + +/** Returns the affine map consisting of the `resultPos` subset. */ +MlirAffineMap mlirAffineMapGetSubMap(MlirAffineMap affineMap, intptr_t size, + intptr_t *resultPos); + +/** Returns the affine map consisting of the most major `numResults` results. + * Returns the null AffineMap if the `numResults` is equal to zero. + * Returns the `affineMap` if `numResults` is greater or equals to number of + * results of the given affine map. */ +MlirAffineMap mlirAffineMapGetMajorSubMap(MlirAffineMap affineMap, + intptr_t numResults); + +/** Returns the affine map consisting of the most minor `numResults` results. + * Returns the null AffineMap if the `numResults` is equal to zero. + * Returns the `affineMap` if `numResults` is greater or equals to number of + * results of the given affine map. */ +MlirAffineMap mlirAffineMapGetMinorSubMap(MlirAffineMap affineMap, + intptr_t numResults); + #ifdef __cplusplus } #endif diff --git a/mlir/include/mlir/CAPI/Utils.h b/mlir/include/mlir/CAPI/Utils.h new file mode 100644 index 0000000000000..022f09df6a5de --- /dev/null +++ b/mlir/include/mlir/CAPI/Utils.h @@ -0,0 +1,48 @@ +//===- Utils.h - C API General Utilities ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines general utilities for C API. This file should not be +// included from C++ code other than C API implementation nor from C code. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CAPI_UTILS_H +#define MLIR_CAPI_UTILS_H + +#include "llvm/Support/raw_ostream.h" + +/* ========================================================================== */ +/* Printing helper. */ +/* ========================================================================== */ + +namespace mlir { +namespace detail { +/// A simple raw ostream subclass that forwards write_impl calls to the +/// user-supplied callback together with opaque user-supplied data. +class CallbackOstream : public llvm::raw_ostream { +public: + CallbackOstream(std::function callback, + void *opaqueData) + : callback(callback), opaqueData(opaqueData), pos(0u) {} + + void write_impl(const char *ptr, size_t size) override { + callback(ptr, size, opaqueData); + pos += size; + } + + uint64_t current_pos() const override { return pos; } + +private: + std::function callback; + void *opaqueData; + uint64_t pos; +}; +} // end namespace detail +} // end namespace mlir + +#endif // MLIR_CAPI_UTILS_H diff --git a/mlir/lib/CAPI/IR/AffineMap.cpp b/mlir/lib/CAPI/IR/AffineMap.cpp index d80d9e20486a0..6a87c269a4216 100644 --- a/mlir/lib/CAPI/IR/AffineMap.cpp +++ b/mlir/lib/CAPI/IR/AffineMap.cpp @@ -9,7 +9,119 @@ #include "mlir-c/AffineMap.h" #include "mlir-c/IR.h" #include "mlir/CAPI/AffineMap.h" +#include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Utils.h" #include "mlir/IR/AffineMap.h" -// This is a placeholder for affine map bindings. The file is here to serve as a -// compilation unit that includes the headers. +// TODO: expose the C API related to `AffineExpr` and mutable affine map. + +using namespace mlir; + +MlirContext mlirAffineMapGetContext(MlirAffineMap affineMap) { + return wrap(unwrap(affineMap).getContext()); +} + +int mlirAffineMapEqual(MlirAffineMap a1, MlirAffineMap a2) { + return unwrap(a1) == unwrap(a2); +} + +void mlirAffineMapPrint(MlirAffineMap affineMap, MlirStringCallback callback, + void *userData) { + mlir::detail::CallbackOstream stream(callback, userData); + unwrap(affineMap).print(stream); + stream.flush(); +} + +void mlirAffineMapDump(MlirAffineMap affineMap) { unwrap(affineMap).dump(); } + +MlirAffineMap mlirAffineMapEmptyGet(MlirContext ctx) { + return wrap(AffineMap::get(unwrap(ctx))); +} + +MlirAffineMap mlirAffineMapGet(MlirContext ctx, intptr_t dimCount, + intptr_t symbolCount) { + return wrap(AffineMap::get(dimCount, symbolCount, unwrap(ctx))); +} + +MlirAffineMap mlirAffineMapConstantGet(MlirContext ctx, int64_t val) { + return wrap(AffineMap::getConstantMap(val, unwrap(ctx))); +} + +MlirAffineMap mlirAffineMapMultiDimIdentityGet(MlirContext ctx, + intptr_t numDims) { + return wrap(AffineMap::getMultiDimIdentityMap(numDims, unwrap(ctx))); +} + +MlirAffineMap mlirAffineMapMinorIdentityGet(MlirContext ctx, intptr_t dims, + intptr_t results) { + return wrap(AffineMap::getMinorIdentityMap(dims, results, unwrap(ctx))); +} + +MlirAffineMap mlirAffineMapPermutationGet(MlirContext ctx, intptr_t size, + unsigned *permutation) { + return wrap(AffineMap::getPermutationMap( + llvm::makeArrayRef(permutation, static_cast(size)), unwrap(ctx))); +} + +int mlirAffineMapIsIdentity(MlirAffineMap affineMap) { + return unwrap(affineMap).isIdentity(); +} + +int mlirAffineMapIsMinorIdentity(MlirAffineMap affineMap) { + return unwrap(affineMap).isMinorIdentity(); +} + +int mlirAffineMapIsEmpty(MlirAffineMap affineMap) { + return unwrap(affineMap).isEmpty(); +} + +int mlirAffineMapIsSingleConstant(MlirAffineMap affineMap) { + return unwrap(affineMap).isSingleConstant(); +} + +int64_t mlirAffineMapGetSingleConstantResult(MlirAffineMap affineMap) { + return unwrap(affineMap).getSingleConstantResult(); +} + +intptr_t mlirAffineMapGetNumDims(MlirAffineMap affineMap) { + return unwrap(affineMap).getNumDims(); +} + +intptr_t mlirAffineMapGetNumSymbols(MlirAffineMap affineMap) { + return unwrap(affineMap).getNumSymbols(); +} + +intptr_t mlirAffineMapGetNumResults(MlirAffineMap affineMap) { + return unwrap(affineMap).getNumResults(); +} + +intptr_t mlirAffineMapGetNumInputs(MlirAffineMap affineMap) { + return unwrap(affineMap).getNumInputs(); +} + +int mlirAffineMapIsProjectedPermutation(MlirAffineMap affineMap) { + return unwrap(affineMap).isProjectedPermutation(); +} + +int mlirAffineMapIsPermutation(MlirAffineMap affineMap) { + return unwrap(affineMap).isPermutation(); +} + +MlirAffineMap mlirAffineMapGetSubMap(MlirAffineMap affineMap, intptr_t size, + intptr_t *resultPos) { + SmallVector pos; + pos.reserve(size); + for (intptr_t i = 0; i < size; ++i) + pos.push_back(static_cast(resultPos[i])); + return wrap(unwrap(affineMap).getSubMap(pos)); +} + +MlirAffineMap mlirAffineMapGetMajorSubMap(MlirAffineMap affineMap, + intptr_t numResults) { + return wrap(unwrap(affineMap).getMajorSubMap(numResults)); +} + +MlirAffineMap mlirAffineMapGetMinorSubMap(MlirAffineMap affineMap, + intptr_t numResults) { + return wrap(unwrap(affineMap).getMinorSubMap(numResults)); +} diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 2a008a2114d67..8611d6537371a 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -9,43 +9,16 @@ #include "mlir-c/IR.h" #include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Utils.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Module.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Types.h" #include "mlir/Parser.h" -#include "llvm/Support/raw_ostream.h" using namespace mlir; -/* ========================================================================== */ -/* Printing helper. */ -/* ========================================================================== */ - -namespace { -/// A simple raw ostream subclass that forwards write_impl calls to the -/// user-supplied callback together with opaque user-supplied data. -class CallbackOstream : public llvm::raw_ostream { -public: - CallbackOstream(std::function callback, - void *opaqueData) - : callback(callback), opaqueData(opaqueData), pos(0u) {} - - void write_impl(const char *ptr, size_t size) override { - callback(ptr, size, opaqueData); - pos += size; - } - - uint64_t current_pos() const override { return pos; } - -private: - std::function callback; - void *opaqueData; - uint64_t pos; -}; -} // end namespace - /* ========================================================================== */ /* Context API. */ /* ========================================================================== */ @@ -77,7 +50,7 @@ MlirLocation mlirLocationUnknownGet(MlirContext context) { void mlirLocationPrint(MlirLocation location, MlirStringCallback callback, void *userData) { - CallbackOstream stream(callback, userData); + detail::CallbackOstream stream(callback, userData); unwrap(location).print(stream); stream.flush(); } @@ -244,7 +217,7 @@ MlirAttribute mlirOperationGetAttributeByName(MlirOperation op, void mlirOperationPrint(MlirOperation op, MlirStringCallback callback, void *userData) { - CallbackOstream stream(callback, userData); + detail::CallbackOstream stream(callback, userData); unwrap(op)->print(stream); stream.flush(); } @@ -326,7 +299,7 @@ MlirValue mlirBlockGetArgument(MlirBlock block, intptr_t pos) { void mlirBlockPrint(MlirBlock block, MlirStringCallback callback, void *userData) { - CallbackOstream stream(callback, userData); + detail::CallbackOstream stream(callback, userData); unwrap(block)->print(stream); stream.flush(); } @@ -341,7 +314,7 @@ MlirType mlirValueGetType(MlirValue value) { void mlirValuePrint(MlirValue value, MlirStringCallback callback, void *userData) { - CallbackOstream stream(callback, userData); + detail::CallbackOstream stream(callback, userData); unwrap(value).print(stream); stream.flush(); } @@ -361,7 +334,7 @@ MlirContext mlirTypeGetContext(MlirType type) { int mlirTypeEqual(MlirType t1, MlirType t2) { return unwrap(t1) == unwrap(t2); } void mlirTypePrint(MlirType type, MlirStringCallback callback, void *userData) { - CallbackOstream stream(callback, userData); + detail::CallbackOstream stream(callback, userData); unwrap(type).print(stream); stream.flush(); } @@ -382,7 +355,7 @@ int mlirAttributeEqual(MlirAttribute a1, MlirAttribute a2) { void mlirAttributePrint(MlirAttribute attr, MlirStringCallback callback, void *userData) { - CallbackOstream stream(callback, userData); + detail::CallbackOstream stream(callback, userData); unwrap(attr).print(stream); stream.flush(); } diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index ceb19ef730e48..fa63c72bf4e84 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -10,6 +10,7 @@ /* RUN: mlir-capi-ir-test 2>&1 | FileCheck %s */ +#include "mlir-c/AffineMap.h" #include "mlir-c/IR.h" #include "mlir-c/Registration.h" #include "mlir-c/StandardAttributes.h" @@ -593,6 +594,121 @@ int printStandardAttributes(MlirContext ctx) { return 0; } +int printAffineMap(MlirContext ctx) { + MlirAffineMap emptyAffineMap = mlirAffineMapEmptyGet(ctx); + MlirAffineMap affineMap = mlirAffineMapGet(ctx, 3, 2); + MlirAffineMap constAffineMap = mlirAffineMapConstantGet(ctx, 2); + MlirAffineMap multiDimIdentityAffineMap = + mlirAffineMapMultiDimIdentityGet(ctx, 3); + MlirAffineMap minorIdentityAffineMap = + mlirAffineMapMinorIdentityGet(ctx, 3, 2); + unsigned permutation[] = {1, 2, 0}; + MlirAffineMap permutationAffineMap = mlirAffineMapPermutationGet( + ctx, sizeof(permutation) / sizeof(unsigned), permutation); + + mlirAffineMapDump(emptyAffineMap); + mlirAffineMapDump(affineMap); + mlirAffineMapDump(constAffineMap); + mlirAffineMapDump(multiDimIdentityAffineMap); + mlirAffineMapDump(minorIdentityAffineMap); + mlirAffineMapDump(permutationAffineMap); + + if (!mlirAffineMapIsIdentity(emptyAffineMap) || + mlirAffineMapIsIdentity(affineMap) || + mlirAffineMapIsIdentity(constAffineMap) || + !mlirAffineMapIsIdentity(multiDimIdentityAffineMap) || + mlirAffineMapIsIdentity(minorIdentityAffineMap) || + mlirAffineMapIsIdentity(permutationAffineMap)) + return 1; + + if (!mlirAffineMapIsMinorIdentity(emptyAffineMap) || + mlirAffineMapIsMinorIdentity(affineMap) || + !mlirAffineMapIsMinorIdentity(multiDimIdentityAffineMap) || + !mlirAffineMapIsMinorIdentity(minorIdentityAffineMap) || + mlirAffineMapIsMinorIdentity(permutationAffineMap)) + return 2; + + if (!mlirAffineMapIsEmpty(emptyAffineMap) || + mlirAffineMapIsEmpty(affineMap) || + mlirAffineMapIsEmpty(constAffineMap) || + mlirAffineMapIsEmpty(multiDimIdentityAffineMap) || + mlirAffineMapIsEmpty(minorIdentityAffineMap) || + mlirAffineMapIsEmpty(permutationAffineMap)) + return 3; + + if (mlirAffineMapIsSingleConstant(emptyAffineMap) || + mlirAffineMapIsSingleConstant(affineMap) || + !mlirAffineMapIsSingleConstant(constAffineMap) || + mlirAffineMapIsSingleConstant(multiDimIdentityAffineMap) || + mlirAffineMapIsSingleConstant(minorIdentityAffineMap) || + mlirAffineMapIsSingleConstant(permutationAffineMap)) + return 4; + + if (mlirAffineMapGetSingleConstantResult(constAffineMap) != 2) + return 5; + + if (mlirAffineMapGetNumDims(emptyAffineMap) != 0 || + mlirAffineMapGetNumDims(affineMap) != 3 || + mlirAffineMapGetNumDims(constAffineMap) != 0 || + mlirAffineMapGetNumDims(multiDimIdentityAffineMap) != 3 || + mlirAffineMapGetNumDims(minorIdentityAffineMap) != 3 || + mlirAffineMapGetNumDims(permutationAffineMap) != 3) + return 6; + + if (mlirAffineMapGetNumSymbols(emptyAffineMap) != 0 || + mlirAffineMapGetNumSymbols(affineMap) != 2 || + mlirAffineMapGetNumSymbols(constAffineMap) != 0 || + mlirAffineMapGetNumSymbols(multiDimIdentityAffineMap) != 0 || + mlirAffineMapGetNumSymbols(minorIdentityAffineMap) != 0 || + mlirAffineMapGetNumSymbols(permutationAffineMap) != 0) + return 7; + + if (mlirAffineMapGetNumResults(emptyAffineMap) != 0 || + mlirAffineMapGetNumResults(affineMap) != 0 || + mlirAffineMapGetNumResults(constAffineMap) != 1 || + mlirAffineMapGetNumResults(multiDimIdentityAffineMap) != 3 || + mlirAffineMapGetNumResults(minorIdentityAffineMap) != 2 || + mlirAffineMapGetNumResults(permutationAffineMap) != 3) + return 8; + + if (mlirAffineMapGetNumInputs(emptyAffineMap) != 0 || + mlirAffineMapGetNumInputs(affineMap) != 5 || + mlirAffineMapGetNumInputs(constAffineMap) != 0 || + mlirAffineMapGetNumInputs(multiDimIdentityAffineMap) != 3 || + mlirAffineMapGetNumInputs(minorIdentityAffineMap) != 3 || + mlirAffineMapGetNumInputs(permutationAffineMap) != 3) + return 9; + + if (!mlirAffineMapIsProjectedPermutation(emptyAffineMap) || + !mlirAffineMapIsPermutation(emptyAffineMap) || + mlirAffineMapIsProjectedPermutation(affineMap) || + mlirAffineMapIsPermutation(affineMap) || + mlirAffineMapIsProjectedPermutation(constAffineMap) || + mlirAffineMapIsPermutation(constAffineMap) || + !mlirAffineMapIsProjectedPermutation(multiDimIdentityAffineMap) || + !mlirAffineMapIsPermutation(multiDimIdentityAffineMap) || + !mlirAffineMapIsProjectedPermutation(minorIdentityAffineMap) || + mlirAffineMapIsPermutation(minorIdentityAffineMap) || + !mlirAffineMapIsProjectedPermutation(permutationAffineMap) || + !mlirAffineMapIsPermutation(permutationAffineMap)) + return 10; + + intptr_t sub[] = {1}; + + MlirAffineMap subMap = mlirAffineMapGetSubMap( + multiDimIdentityAffineMap, sizeof(sub) / sizeof(intptr_t), sub); + MlirAffineMap majorSubMap = + mlirAffineMapGetMajorSubMap(multiDimIdentityAffineMap, 1); + MlirAffineMap minorSubMap = + mlirAffineMapGetMinorSubMap(multiDimIdentityAffineMap, 1); + + mlirAffineMapDump(subMap); + mlirAffineMapDump(majorSubMap); + mlirAffineMapDump(minorSubMap); + + return 0; +} + int main() { MlirContext ctx = mlirContextCreate(); mlirRegisterAllDialects(ctx); @@ -704,6 +820,22 @@ int main() { errcode = printStandardAttributes(ctx); fprintf(stderr, "%d\n", errcode); + // clang-format off + // CHECK-LABEL: @affineMap + // CHECK: () -> () + // CHECK: (d0, d1, d2)[s0, s1] -> () + // CHECK: () -> (2) + // CHECK: (d0, d1, d2) -> (d0, d1, d2) + // CHECK: (d0, d1, d2) -> (d1, d2) + // CHECK: (d0, d1, d2) -> (d1, d2, d0) + // CHECK: (d0, d1, d2) -> (d1) + // CHECK: (d0, d1, d2) -> (d0) + // CHECK: (d0, d1, d2) -> (d2) + // CHECK: 0 + fprintf(stderr, "@affineMap\n"); + errcode = printAffineMap(ctx); + fprintf(stderr, "%d\n", errcode); + mlirContextDestroy(ctx); return 0; From 436a43afb2cf85ae6e61b4c1ac09e944a6566646 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 17 Sep 2020 01:54:10 +0000 Subject: [PATCH 0928/1079] [gn build] Port b04c1a9d312 --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 335e54b4f68c5..8f86e7fdddcc3 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -52,6 +52,7 @@ static_library("Analysis") { "GlobalsModRef.cpp", "GuardUtils.cpp", "HeatUtils.cpp", + "IRSimilarityIdentifier.cpp", "IVDescriptors.cpp", "IVUsers.cpp", "IndirectCallPromotionAnalysis.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 6adc9866e883f..50c02aa2214ef 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -25,6 +25,7 @@ unittest("AnalysisTests") { "DomTreeUpdaterTest.cpp", "FunctionPropertiesAnalysisTest.cpp", "GlobalsModRefTest.cpp", + "IRSimilarityIdentifierTest.cpp", "IVDescriptorsTest.cpp", "LazyCallGraphTest.cpp", "LoadsTest.cpp", From fb1abe00635c1ec28e55921709904d5ca2e86a74 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Wed, 16 Sep 2020 01:22:55 -0700 Subject: [PATCH 0929/1079] [libunwind][DWARF] Fix end of .eh_frame calculation * When .eh_frame is located using .eh_frame_hdr (PT_GNU_EH_FRAME), the start of .eh_frame is known, but not the size. In this case, the unwinder must rely on a terminator present at the end of .eh_frame. Set dwarf_section_length to UINTPTR_MAX to indicate this. * Add a new field, text_segment_length, that the FrameHeaderCache uses to track the size of the PT_LOAD segment indicated by dso_base. * Compute ehSectionEnd by adding sectionLength to ehSectionStart, never to fdeHint. Fixes PR46829. Differential Revision: https://reviews.llvm.org/D87750 --- libunwind/src/AddressSpace.hpp | 13 ++++++++++--- libunwind/src/DwarfParser.hpp | 12 +++++++----- libunwind/src/FrameHeaderCache.hpp | 2 +- libunwind/src/UnwindCursor.hpp | 6 +++--- libunwind/test/frameheadercache_test.pass.cpp | 6 +++--- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index eccc2153c6977..26397c28798e1 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -119,6 +119,10 @@ struct UnwindInfoSections { // No dso_base for SEH or ARM EHABI. uintptr_t dso_base; #endif +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) && \ + defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) + uintptr_t text_segment_length; +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) uintptr_t dwarf_section; uintptr_t dwarf_section_length; @@ -410,7 +414,7 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base, uintptr_t end = begin + phdr->p_memsz; if (cbdata->targetAddr >= begin && cbdata->targetAddr < end) { cbdata->sects->dso_base = begin; - cbdata->sects->dwarf_section_length = phdr->p_memsz; + cbdata->sects->text_segment_length = phdr->p_memsz; return true; } } @@ -450,8 +454,12 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, found_hdr = EHHeaderParser::decodeEHHdr( *cbdata->addressSpace, eh_frame_hdr_start, phdr->p_memsz, hdrInfo); - if (found_hdr) + if (found_hdr) { + // .eh_frame_hdr records the start of .eh_frame, but not its size. + // Rely on a zero terminator to find the end of the section. cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr; + cbdata->sects->dwarf_section_length = UINTPTR_MAX; + } } else if (!found_obj) { found_obj = checkAddrInSegment(phdr, image_base, cbdata); } @@ -462,7 +470,6 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, return 1; } } - cbdata->sects->dwarf_section_length = 0; return 0; } diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp index 1ce2cf2943a2f..86c0522afd3ff 100644 --- a/libunwind/src/DwarfParser.hpp +++ b/libunwind/src/DwarfParser.hpp @@ -136,7 +136,7 @@ class CFI_Parser { }; static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, - uint32_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo, + uintptr_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo, CIE_Info *cieInfo); static const char *decodeFDE(A &addressSpace, pint_t fdeStart, FDE_Info *fdeInfo, CIE_Info *cieInfo); @@ -167,7 +167,7 @@ const char *CFI_Parser::decodeFDE(A &addressSpace, pint_t fdeStart, p += 8; } if (cfiLength == 0) - return "FDE has zero length"; // end marker + return "FDE has zero length"; // zero terminator uint32_t ciePointer = addressSpace.get32(p); if (ciePointer == 0) return "FDE is really a CIE"; // this is a CIE not an FDE @@ -212,11 +212,13 @@ const char *CFI_Parser::decodeFDE(A &addressSpace, pint_t fdeStart, /// Scan an eh_frame section to find an FDE for a pc template bool CFI_Parser::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, - uint32_t sectionLength, pint_t fdeHint, + uintptr_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo, CIE_Info *cieInfo) { //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc); pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart; - const pint_t ehSectionEnd = p + sectionLength; + const pint_t ehSectionEnd = (sectionLength == UINTPTR_MAX) + ? static_cast(-1) + : (ehSectionStart + sectionLength); while (p < ehSectionEnd) { pint_t currentCFI = p; //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p); @@ -228,7 +230,7 @@ bool CFI_Parser::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, p += 8; } if (cfiLength == 0) - return false; // end marker + return false; // zero terminator uint32_t id = addressSpace.get32(p); if (id == 0) { // Skip over CIEs. diff --git a/libunwind/src/FrameHeaderCache.hpp b/libunwind/src/FrameHeaderCache.hpp index 813fcd408b262..54d5d33c3cd7e 100644 --- a/libunwind/src/FrameHeaderCache.hpp +++ b/libunwind/src/FrameHeaderCache.hpp @@ -32,7 +32,7 @@ class _LIBUNWIND_HIDDEN FrameHeaderCache { struct CacheEntry { uintptr_t LowPC() { return Info.dso_base; }; - uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; }; + uintptr_t HighPC() { return Info.dso_base + Info.text_segment_length; }; UnwindInfoSections Info; CacheEntry *Next; }; diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 206b5e3983217..9f8fa65107b41 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -1517,7 +1517,7 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, // If compact encoding table gave offset into dwarf section, go directly there if (fdeSectionOffsetHint != 0) { foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, - (uint32_t)sects.dwarf_section_length, + sects.dwarf_section_length, sects.dwarf_section + fdeSectionOffsetHint, &fdeInfo, &cieInfo); } @@ -1534,7 +1534,7 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, if (cachedFDE != 0) { foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, - (uint32_t)sects.dwarf_section_length, + sects.dwarf_section_length, cachedFDE, &fdeInfo, &cieInfo); foundInCache = foundFDE; } @@ -1542,7 +1542,7 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, if (!foundFDE) { // Still not found, do full scan of __eh_frame section. foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, - (uint32_t)sects.dwarf_section_length, 0, + sects.dwarf_section_length, 0, &fdeInfo, &cieInfo); } if (foundFDE) { diff --git a/libunwind/test/frameheadercache_test.pass.cpp b/libunwind/test/frameheadercache_test.pass.cpp index 7f2d8e22b9f57..15c7c67c58eae 100644 --- a/libunwind/test/frameheadercache_test.pass.cpp +++ b/libunwind/test/frameheadercache_test.pass.cpp @@ -16,7 +16,7 @@ #include "../src/AddressSpace.hpp" #define kBaseAddr 0xFFF000 -#define kDwarfSectionLength 0xFF +#define kTextSegmentLength 0xFF using namespace libunwind; @@ -32,7 +32,7 @@ int main() { UnwindInfoSections UIS; UIS.dso_base = kBaseAddr; - UIS.dwarf_section_length = kDwarfSectionLength; + UIS.text_segment_length = kTextSegmentLength; dl_iterate_cb_data CBData; // Unused by the cache. CBData.addressSpace = nullptr; @@ -58,7 +58,7 @@ int main() { abort(); // Add enough things to the cache that the entry is evicted. for (int i = 0; i < 9; i++) { - UIS.dso_base = kBaseAddr + (kDwarfSectionLength * i); + UIS.dso_base = kBaseAddr + (kTextSegmentLength * i); FHC.add(&UIS); } CBData.targetAddr = kBaseAddr; From 5782ab0f52db1b1914d8ee5fe3828b0a5de9d685 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Wed, 16 Sep 2020 21:51:53 -0400 Subject: [PATCH 0930/1079] [MachineSink] add one more mir case - nfc --- .../PowerPC/sink-down-more-instructions-1.mir | 597 ++++++++++++++++++ 1 file changed, 597 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir new file mode 100644 index 0000000000000..5e19b9d005e4e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir @@ -0,0 +1,597 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -o - %s -verify-machineinstrs \ +# RUN: -run-pass=machine-sink | FileCheck %s + +--- | + ; ModuleID = 'sink-down-more-instructions-1.ll' + source_filename = "sink-down-more-instructions-1.c" + target datalayout = "e-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + + ; Function Attrs: nofree norecurse nounwind + define dso_local signext i32 @foo(i32 signext %0, i32 signext %1, i32* nocapture readonly %2, i32* nocapture %3, i32 signext %4) local_unnamed_addr #0 { + %6 = icmp sgt i32 %4, 0 + br i1 %6, label %7, label %37 + + 7: ; preds = %5 + %8 = zext i32 %4 to i64 + %9 = icmp eq i32 %4, 1 + br i1 %9, label %17, label %10 + + 10: ; preds = %7 + %11 = and i64 %8, 4294967294 + %scevgep20 = getelementptr i32, i32* %2, i64 -2 + %scevgep2021 = bitcast i32* %scevgep20 to i8* + %scevgep22 = getelementptr i32, i32* %3, i64 -2 + %scevgep2223 = bitcast i32* %scevgep22 to i8* + %12 = add nsw i64 %11, -2 + %13 = lshr i64 %12, 1 + %14 = add nuw i64 %13, 1 + call void @llvm.set.loop.iterations.i64(i64 %14) + br label %38 + + 15: ; preds = %74 + %16 = add nuw i32 %tmp18, 102 + br label %17 + + 17: ; preds = %15, %7 + %18 = phi i64 [ 0, %7 ], [ %78, %15 ] + %19 = phi i32 [ 100, %7 ], [ %16, %15 ] + %20 = phi i32 [ 0, %7 ], [ %66, %15 ] + %21 = and i64 %8, 1 + %22 = icmp eq i64 %21, 0 + br i1 %22, label %37, label %23 + + 23: ; preds = %17 + %24 = getelementptr inbounds i32, i32* %2, i64 %18 + %25 = load i32, i32* %24, align 4, !tbaa !2 + %26 = add nsw i32 %25, %20 + switch i32 %0, label %30 [ + i32 1, label %27 + i32 3, label %33 + ] + + 27: ; preds = %23 + %28 = trunc i64 %18 to i32 + %29 = shl i32 %28, 1 + br label %33 + + 30: ; preds = %23 + %31 = trunc i64 %18 to i32 + %32 = urem i32 %31, 30 + br label %33 + + 33: ; preds = %30, %27, %23 + %34 = phi i32 [ %32, %30 ], [ %29, %27 ], [ %19, %23 ] + %35 = add nsw i32 %34, %26 + %36 = getelementptr inbounds i32, i32* %3, i64 %18 + store i32 %35, i32* %36, align 4, !tbaa !2 + br label %37 + + 37: ; preds = %33, %17, %5 + ret i32 undef + + 38: ; preds = %74, %10 + %39 = phi i64 [ 0, %10 ], [ %78, %74 ] + %40 = phi i32 [ 0, %10 ], [ %66, %74 ] + %41 = phi i8* [ %scevgep2021, %10 ], [ %45, %74 ] + %42 = phi i8* [ %scevgep2223, %10 ], [ %43, %74 ] + %43 = getelementptr i8, i8* %42, i64 8 + %44 = bitcast i8* %43 to i32* + %45 = getelementptr i8, i8* %41, i64 8 + %46 = bitcast i8* %45 to i32* + %lsr19 = trunc i64 %39 to i32 + %47 = udiv i32 %lsr19, 30 + %48 = mul nsw i32 %47, -30 + %49 = zext i32 %48 to i64 + %50 = add nuw nsw i64 %49, 1 + %51 = load i32, i32* %46, align 4, !tbaa !2 + %52 = add nsw i32 %51, %40 + switch i32 %0, label %58 [ + i32 1, label %53 + i32 3, label %56 + ] + + 53: ; preds = %38 + %54 = trunc i64 %39 to i32 + %55 = shl i32 %54, 1 + br label %60 + + 56: ; preds = %38 + %57 = add nuw nsw i32 %lsr19, 100 + br label %60 + + 58: ; preds = %38 + %59 = add i64 %39, %49 + %tmp15 = trunc i64 %59 to i32 + br label %60 + + 60: ; preds = %58, %56, %53 + %61 = phi i32 [ %tmp15, %58 ], [ %57, %56 ], [ %55, %53 ] + %62 = add nsw i32 %61, %52 + store i32 %62, i32* %44, align 4, !tbaa !2 + %63 = or i64 %39, 1 + %64 = getelementptr i8, i8* %45, i64 4 + %uglygep1112.cast = bitcast i8* %64 to i32* + %65 = load i32, i32* %uglygep1112.cast, align 4, !tbaa !2 + %66 = add nsw i32 %65, %52 + switch i32 %0, label %72 [ + i32 1, label %69 + i32 3, label %67 + ] + + 67: ; preds = %60 + %68 = add nuw nsw i32 %lsr19, 101 + br label %74 + + 69: ; preds = %60 + %70 = trunc i64 %63 to i32 + %71 = shl i32 %70, 1 + br label %74 + + 72: ; preds = %60 + %73 = add i64 %39, %50 + %tmp = trunc i64 %73 to i32 + br label %74 + + 74: ; preds = %72, %69, %67 + %75 = phi i32 [ %tmp, %72 ], [ %68, %67 ], [ %71, %69 ] + %76 = add nsw i32 %75, %66 + %77 = getelementptr i8, i8* %43, i64 4 + %uglygep78.cast = bitcast i8* %77 to i32* + store i32 %76, i32* %uglygep78.cast, align 4, !tbaa !2 + %78 = add nuw nsw i64 %39, 2 + %79 = add i64 %78, -2 + %tmp18 = trunc i64 %79 to i32 + %80 = call i1 @llvm.loop.decrement.i64(i64 1) + br i1 %80, label %38, label %15 + } + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i64(i64) #1 + + ; Function Attrs: noduplicate nounwind + declare i1 @llvm.loop.decrement.i64(i64) #1 + + attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-spe" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { noduplicate nounwind } + + !llvm.module.flags = !{!0} + !llvm.ident = !{!1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{!"clang version 12.0.0"} + !2 = !{!3, !3, i64 0} + !3 = !{!"int", !4, i64 0} + !4 = !{!"omnipotent char", !5, i64 0} + !5 = !{!"Simple C/C++ TBAA"} + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc } + - { id: 1, class: g8rc } + - { id: 2, class: g8rc } + - { id: 3, class: gprc } + - { id: 4, class: g8rc } + - { id: 5, class: gprc } + - { id: 6, class: gprc } + - { id: 7, class: gprc } + - { id: 8, class: gprc_and_gprc_nor0 } + - { id: 9, class: gprc } + - { id: 10, class: gprc } + - { id: 11, class: g8rc_and_g8rc_nox0 } + - { id: 12, class: gprc } + - { id: 13, class: g8rc_and_g8rc_nox0 } + - { id: 14, class: g8rc_and_g8rc_nox0 } + - { id: 15, class: g8rc_and_g8rc_nox0 } + - { id: 16, class: g8rc_and_g8rc_nox0 } + - { id: 17, class: g8rc_and_g8rc_nox0 } + - { id: 18, class: gprc_and_gprc_nor0 } + - { id: 19, class: g8rc } + - { id: 20, class: g8rc } + - { id: 21, class: gprc } + - { id: 22, class: gprc_and_gprc_nor0 } + - { id: 23, class: gprc } + - { id: 24, class: gprc } + - { id: 25, class: gprc } + - { id: 26, class: g8rc } + - { id: 27, class: gprc } + - { id: 28, class: gprc } + - { id: 29, class: gprc } + - { id: 30, class: gprc } + - { id: 31, class: gprc } + - { id: 32, class: g8rc } + - { id: 33, class: gprc_and_gprc_nor0 } + - { id: 34, class: g8rc } + - { id: 35, class: g8rc } + - { id: 36, class: g8rc_and_g8rc_nox0 } + - { id: 37, class: g8rc_and_g8rc_nox0 } + - { id: 38, class: g8rc } + - { id: 39, class: gprc } + - { id: 40, class: gprc } + - { id: 41, class: crrc } + - { id: 42, class: g8rc } + - { id: 43, class: gprc } + - { id: 44, class: gprc } + - { id: 45, class: g8rc } + - { id: 46, class: g8rc } + - { id: 47, class: crrc } + - { id: 48, class: g8rc } + - { id: 49, class: gprc } + - { id: 50, class: g8rc_and_g8rc_nox0 } + - { id: 51, class: g8rc } + - { id: 52, class: g8rc_and_g8rc_nox0 } + - { id: 53, class: g8rc } + - { id: 54, class: gprc } + - { id: 55, class: g8rc_and_g8rc_nox0 } + - { id: 56, class: gprc } + - { id: 57, class: gprc } + - { id: 58, class: gprc } + - { id: 59, class: gprc } + - { id: 60, class: gprc } + - { id: 61, class: g8rc } + - { id: 62, class: g8rc } + - { id: 63, class: crrc } + - { id: 64, class: crrc } + - { id: 65, class: gprc } + - { id: 66, class: g8rc } + - { id: 67, class: gprc } + - { id: 68, class: gprc } + - { id: 69, class: crrc } + - { id: 70, class: crrc } + - { id: 71, class: gprc } + - { id: 72, class: g8rc } + - { id: 73, class: gprc } + - { id: 74, class: gprc_and_gprc_nor0 } + - { id: 75, class: crbitrc } + - { id: 76, class: g8rc } + - { id: 77, class: gprc } + - { id: 78, class: crrc } + - { id: 79, class: crrc } + - { id: 80, class: gprc } + - { id: 81, class: gprc } + - { id: 82, class: gprc } + - { id: 83, class: gprc } + - { id: 84, class: gprc } + - { id: 85, class: gprc } + - { id: 86, class: gprc } + - { id: 87, class: gprc } + - { id: 88, class: g8rc } + - { id: 89, class: g8rc } + - { id: 90, class: g8rc } + - { id: 91, class: gprc } + - { id: 92, class: gprc_nor0 } + - { id: 93, class: gprc } + - { id: 94, class: gprc_nor0 } + - { id: 95, class: crrc } +liveins: + - { reg: '$x3', virtual-reg: '%34' } + - { reg: '$x5', virtual-reg: '%36' } + - { reg: '$x6', virtual-reg: '%37' } + - { reg: '$x7', virtual-reg: '%38' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0 (%ir-block.5): + ; CHECK: successors: %bb.1(0x50000000), %bb.8(0x30000000) + ; CHECK: liveins: $x3, $x5, $x6, $x7 + ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x7 + ; CHECK: [[COPY1:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x6 + ; CHECK: [[COPY2:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x5 + ; CHECK: [[COPY3:%[0-9]+]]:g8rc = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:gprc = COPY [[COPY]].sub_32 + ; CHECK: [[CMPWI:%[0-9]+]]:crrc = CMPWI [[COPY4]], 1 + ; CHECK: BCC 12, killed [[CMPWI]], %bb.8 + ; CHECK: B %bb.1 + ; CHECK: bb.1 (%ir-block.7): + ; CHECK: successors: %bb.18(0x40000000), %bb.2(0x40000000) + ; CHECK: [[COPY5:%[0-9]+]]:gprc = COPY [[COPY3]].sub_32 + ; CHECK: [[DEF:%[0-9]+]]:g8rc = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:g8rc = INSERT_SUBREG [[DEF]], [[COPY4]], %subreg.sub_32 + ; CHECK: [[RLDICL:%[0-9]+]]:g8rc = RLDICL killed [[INSERT_SUBREG]], 0, 32 + ; CHECK: [[CMPLWI:%[0-9]+]]:crrc = CMPLWI [[COPY4]], 1 + ; CHECK: [[CMPLWI1:%[0-9]+]]:crrc = CMPLWI [[COPY5]], 3 + ; CHECK: BCC 68, killed [[CMPLWI]], %bb.2 + ; CHECK: bb.18: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[LI:%[0-9]+]]:gprc = LI 0 + ; CHECK: [[LI1:%[0-9]+]]:gprc = LI 100 + ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 0 + ; CHECK: B %bb.4 + ; CHECK: bb.2 (%ir-block.10): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: [[RLWINM8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = RLWINM8 [[RLDICL]], 0, 0, 30 + ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 [[COPY2]], -8 + ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[COPY1]], -8 + ; CHECK: [[ADDI8_2:%[0-9]+]]:g8rc = nsw ADDI8 killed [[RLWINM8_]], -2 + ; CHECK: [[RLDICL1:%[0-9]+]]:g8rc_and_g8rc_nox0 = RLDICL [[ADDI8_2]], 63, 1 + ; CHECK: [[ADDI8_3:%[0-9]+]]:g8rc = nuw ADDI8 killed [[RLDICL1]], 1 + ; CHECK: MTCTR8loop killed [[ADDI8_3]], implicit-def dead $ctr8 + ; CHECK: [[LI2:%[0-9]+]]:gprc = LI 0 + ; CHECK: [[LI8_1:%[0-9]+]]:g8rc = LI8 0 + ; CHECK: [[LIS:%[0-9]+]]:gprc = LIS 34952 + ; CHECK: [[ORI:%[0-9]+]]:gprc = ORI [[LIS]], 34953 + ; CHECK: [[DEF1:%[0-9]+]]:g8rc = IMPLICIT_DEF + ; CHECK: [[CMPLWI2:%[0-9]+]]:crrc = CMPLWI [[COPY5]], 1 + ; CHECK: B %bb.9 + ; CHECK: bb.3 (%ir-block.15): + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[COPY6:%[0-9]+]]:gprc_and_gprc_nor0 = COPY %32.sub_32 + ; CHECK: [[ADDI:%[0-9]+]]:gprc_and_gprc_nor0 = ADDI [[COPY6]], -2 + ; CHECK: [[ADDI1:%[0-9]+]]:gprc = nuw ADDI [[ADDI]], 102 + ; CHECK: bb.4 (%ir-block.17): + ; CHECK: successors: %bb.8(0x40000000), %bb.5(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:g8rc = PHI [[LI8_]], %bb.18, %32, %bb.3 + ; CHECK: [[PHI1:%[0-9]+]]:gprc = PHI [[LI1]], %bb.18, [[ADDI1]], %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:gprc = PHI [[LI]], %bb.18, %27, %bb.3 + ; CHECK: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec [[RLDICL]], 1, implicit-def $cr0 + ; CHECK: [[COPY7:%[0-9]+]]:crbitrc = COPY $cr0gt + ; CHECK: BCn killed [[COPY7]], %bb.8 + ; CHECK: B %bb.5 + ; CHECK: bb.5 (%ir-block.23): + ; CHECK: successors: %bb.7(0x2aaaaaab), %bb.6(0x55555555) + ; CHECK: [[RLDICR:%[0-9]+]]:g8rc = RLDICR [[PHI]], 2, 61 + ; CHECK: [[LWZX:%[0-9]+]]:gprc = LWZX [[COPY2]], [[RLDICR]] :: (load 4 from %ir.24, !tbaa !2) + ; CHECK: [[ADD4_:%[0-9]+]]:gprc = nsw ADD4 killed [[LWZX]], [[PHI2]] + ; CHECK: BCC 76, [[CMPLWI1]], %bb.7 + ; CHECK: B %bb.6 + ; CHECK: bb.6 (%ir-block.23): + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: [[CMPLWI3:%[0-9]+]]:crrc = CMPLWI [[COPY5]], 1 + ; CHECK: [[COPY8:%[0-9]+]]:gprc = COPY [[PHI]].sub_32 + ; CHECK: [[LIS1:%[0-9]+]]:gprc = LIS 34952 + ; CHECK: [[ORI1:%[0-9]+]]:gprc = ORI killed [[LIS1]], 34953 + ; CHECK: [[MULHWU:%[0-9]+]]:gprc = MULHWU [[COPY8]], killed [[ORI1]] + ; CHECK: [[RLWINM:%[0-9]+]]:gprc = RLWINM [[MULHWU]], 28, 4, 31 + ; CHECK: [[MULLI:%[0-9]+]]:gprc = MULLI killed [[RLWINM]], 30 + ; CHECK: [[SUBF:%[0-9]+]]:gprc = SUBF killed [[MULLI]], [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:gprc = COPY [[PHI]].sub_32 + ; CHECK: [[RLWINM1:%[0-9]+]]:gprc_and_gprc_nor0 = RLWINM [[COPY9]], 1, 0, 30 + ; CHECK: [[ISEL:%[0-9]+]]:gprc = ISEL [[RLWINM1]], [[SUBF]], [[CMPLWI3]].sub_eq + ; CHECK: B %bb.7 + ; CHECK: bb.7 (%ir-block.33): + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: [[PHI3:%[0-9]+]]:gprc = PHI [[PHI1]], %bb.5, [[ISEL]], %bb.6 + ; CHECK: [[ADD4_1:%[0-9]+]]:gprc = nsw ADD4 [[PHI3]], [[ADD4_]] + ; CHECK: STWX killed [[ADD4_1]], [[COPY1]], [[RLDICR]] :: (store 4 into %ir.36, !tbaa !2) + ; CHECK: bb.8 (%ir-block.37): + ; CHECK: [[LI8_2:%[0-9]+]]:g8rc = LI8 0 + ; CHECK: $x3 = COPY [[LI8_2]] + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 + ; CHECK: bb.9 (%ir-block.38): + ; CHECK: successors: %bb.11(0x2aaaaaab), %bb.10(0x55555555) + ; CHECK: [[PHI4:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[LI8_1]], %bb.2, %32, %bb.17 + ; CHECK: [[PHI5:%[0-9]+]]:gprc = PHI [[LI2]], %bb.2, %27, %bb.17 + ; CHECK: [[PHI6:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.2, %55, %bb.17 + ; CHECK: [[PHI7:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.2, %15, %bb.17 + ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8 + ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2) + ; CHECK: [[COPY10:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI4]].sub_32 + ; CHECK: [[MULHWU1:%[0-9]+]]:gprc = MULHWU [[COPY10]], [[ORI]] + ; CHECK: [[RLWINM2:%[0-9]+]]:gprc = RLWINM [[MULHWU1]], 28, 4, 31 + ; CHECK: [[MULLI1:%[0-9]+]]:gprc = nsw MULLI killed [[RLWINM2]], -30 + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:g8rc = INSERT_SUBREG [[DEF1]], killed [[MULLI1]], %subreg.sub_32 + ; CHECK: [[RLDICL2:%[0-9]+]]:g8rc = RLDICL killed [[INSERT_SUBREG1]], 0, 32 + ; CHECK: [[ADD4_2:%[0-9]+]]:gprc = nsw ADD4 killed [[LWZU]], [[PHI5]] + ; CHECK: BCC 76, [[CMPLWI1]], %bb.11 + ; CHECK: B %bb.10 + ; CHECK: bb.10 (%ir-block.38): + ; CHECK: successors: %bb.12(0x80000000) + ; CHECK: [[ADD8_:%[0-9]+]]:g8rc = ADD8 [[PHI4]], [[RLDICL2]] + ; CHECK: [[COPY11:%[0-9]+]]:gprc = COPY [[ADD8_]].sub_32 + ; CHECK: [[COPY12:%[0-9]+]]:gprc = COPY [[PHI4]].sub_32 + ; CHECK: [[RLWINM3:%[0-9]+]]:gprc_and_gprc_nor0 = RLWINM [[COPY12]], 1, 0, 30 + ; CHECK: [[ISEL1:%[0-9]+]]:gprc = ISEL [[RLWINM3]], [[COPY11]], [[CMPLWI2]].sub_eq + ; CHECK: B %bb.12 + ; CHECK: bb.11 (%ir-block.56): + ; CHECK: successors: %bb.12(0x80000000) + ; CHECK: [[ADDI2:%[0-9]+]]:gprc = nuw nsw ADDI [[COPY10]], 100 + ; CHECK: B %bb.12 + ; CHECK: bb.12 (%ir-block.60): + ; CHECK: successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555) + ; CHECK: [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10 + ; CHECK: [[COPY13:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]] + ; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]] + ; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2) + ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 4, [[LWZU1]] :: (load 4 from %ir.uglygep1112.cast, !tbaa !2) + ; CHECK: [[ADD4_4:%[0-9]+]]:gprc = nsw ADD4 killed [[LWZ]], [[ADD4_2]] + ; CHECK: BCC 76, [[CMPLWI2]], %bb.15 + ; CHECK: B %bb.13 + ; CHECK: bb.13 (%ir-block.60): + ; CHECK: successors: %bb.14(0x40000001), %bb.16(0x3fffffff) + ; CHECK: BCC 68, [[CMPLWI1]], %bb.16 + ; CHECK: B %bb.14 + ; CHECK: bb.14 (%ir-block.67): + ; CHECK: successors: %bb.17(0x80000000) + ; CHECK: [[ADDI3:%[0-9]+]]:gprc = nuw nsw ADDI [[COPY10]], 101 + ; CHECK: B %bb.17 + ; CHECK: bb.15 (%ir-block.69): + ; CHECK: successors: %bb.17(0x80000000) + ; CHECK: [[ORI8_:%[0-9]+]]:g8rc = ORI8 [[PHI4]], 1 + ; CHECK: [[COPY14:%[0-9]+]]:gprc = COPY [[ORI8_]].sub_32 + ; CHECK: [[RLWINM4:%[0-9]+]]:gprc = RLWINM [[COPY14]], 1, 0, 30 + ; CHECK: B %bb.17 + ; CHECK: bb.16 (%ir-block.72): + ; CHECK: successors: %bb.17(0x80000000) + ; CHECK: [[ORI8_1:%[0-9]+]]:g8rc = ORI8 [[RLDICL2]], 1 + ; CHECK: [[ADD8_1:%[0-9]+]]:g8rc = ADD8 [[PHI4]], [[ORI8_1]] + ; CHECK: [[COPY15:%[0-9]+]]:gprc = COPY [[ADD8_1]].sub_32 + ; CHECK: bb.17 (%ir-block.74): + ; CHECK: successors: %bb.9(0x7c000000), %bb.3(0x04000000) + ; CHECK: [[PHI9:%[0-9]+]]:gprc = PHI [[ADDI3]], %bb.14, [[RLWINM4]], %bb.15, [[COPY15]], %bb.16 + ; CHECK: [[ADD4_5:%[0-9]+]]:gprc = nsw ADD4 [[PHI9]], [[ADD4_4]] + ; CHECK: STW killed [[ADD4_5]], 4, [[COPY13]] :: (store 4 into %ir.uglygep78.cast, !tbaa !2) + ; CHECK: [[ADDI8_5:%[0-9]+]]:g8rc = nuw nsw ADDI8 [[PHI4]], 2 + ; CHECK: BDNZ8 %bb.9, implicit-def dead $ctr8, implicit $ctr8 + ; CHECK: B %bb.3 + bb.0 (%ir-block.5): + successors: %bb.1(0x50000000), %bb.9(0x30000000) + liveins: $x3, $x5, $x6, $x7 + + %38:g8rc = COPY $x7 + %37:g8rc_and_g8rc_nox0 = COPY $x6 + %36:g8rc_and_g8rc_nox0 = COPY $x5 + %34:g8rc = COPY $x3 + %39:gprc = COPY %34.sub_32 + %40:gprc = COPY %38.sub_32 + %41:crrc = CMPWI %40, 1 + BCC 12, killed %41, %bb.9 + B %bb.1 + + bb.1 (%ir-block.7): + %46:g8rc = IMPLICIT_DEF + %45:g8rc = INSERT_SUBREG %46, %40, %subreg.sub_32 + %0:g8rc = RLDICL killed %45, 0, 32 + %44:gprc = LI 0 + %43:gprc = LI 100 + %42:g8rc = LI8 0 + %47:crrc = CMPLWI %40, 1 + %95:crrc = CMPLWI %39, 3 + BCC 76, killed %47, %bb.4 + B %bb.2 + + bb.2 (%ir-block.10): + %50:g8rc_and_g8rc_nox0 = RLWINM8 %0, 0, 0, 30 + %1:g8rc = ADDI8 %36, -8 + %2:g8rc = ADDI8 %37, -8 + %51:g8rc = nsw ADDI8 killed %50, -2 + %52:g8rc_and_g8rc_nox0 = RLDICL %51, 63, 1 + %53:g8rc = nuw ADDI8 killed %52, 1 + MTCTR8loop killed %53, implicit-def dead $ctr8 + %49:gprc = LI 0 + %48:g8rc = LI8 0 + %56:gprc = LIS 34952 + %57:gprc = ORI %56, 34953 + %62:g8rc = IMPLICIT_DEF + %69:crrc = CMPLWI %39, 1 + B %bb.10 + + bb.3 (%ir-block.15): + %3:gprc = nuw ADDI %33, 102 + + bb.4 (%ir-block.17): + %4:g8rc = PHI %42, %bb.1, %32, %bb.3 + %5:gprc = PHI %43, %bb.1, %3, %bb.3 + %6:gprc = PHI %44, %bb.1, %27, %bb.3 + %90:g8rc = ANDI8_rec %0, 1, implicit-def $cr0 + %75:crbitrc = COPY $cr0gt + BCn killed %75, %bb.9 + B %bb.5 + + bb.5 (%ir-block.23): + successors: %bb.8(0x2aaaaaab), %bb.21(0x55555555) + + %76:g8rc = RLDICR %4, 2, 61 + %77:gprc = LWZX %36, %76 :: (load 4 from %ir.24, !tbaa !2) + %7:gprc = nsw ADD4 killed %77, %6 + BCC 76, %95, %bb.8 + B %bb.21 + + bb.21 (%ir-block.23): + %79:crrc = CMPLWI %39, 1 + %81:gprc = COPY %4.sub_32 + %82:gprc = LIS 34952 + %83:gprc = ORI killed %82, 34953 + %84:gprc = MULHWU %81, killed %83 + %85:gprc = RLWINM %84, 28, 4, 31 + %86:gprc = MULLI killed %85, 30 + %9:gprc = SUBF killed %86, %81 + %80:gprc = COPY %4.sub_32 + %8:gprc_and_gprc_nor0 = RLWINM %80, 1, 0, 30 + %91:gprc = ISEL %8, %9, %79.sub_eq + B %bb.8 + + bb.8 (%ir-block.33): + %10:gprc = PHI %5, %bb.5, %91, %bb.21 + %87:gprc = nsw ADD4 %10, %7 + STWX killed %87, %37, %76 :: (store 4 into %ir.36, !tbaa !2) + + bb.9 (%ir-block.37): + %89:g8rc = LI8 0 + $x3 = COPY %89 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.10 (%ir-block.38): + successors: %bb.12(0x2aaaaaab), %bb.19(0x55555555) + + %11:g8rc_and_g8rc_nox0 = PHI %48, %bb.2, %32, %bb.18 + %12:gprc = PHI %49, %bb.2, %27, %bb.18 + %13:g8rc_and_g8rc_nox0 = PHI %1, %bb.2, %17, %bb.18 + %14:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %15, %bb.18 + %16:g8rc_and_g8rc_nox0 = ADDI8 %14, 8 + %15:g8rc_and_g8rc_nox0 = COPY %16 + %54:gprc, %55:g8rc_and_g8rc_nox0 = LWZU 8, %13 :: (load 4 from %ir.46, !tbaa !2) + %17:g8rc_and_g8rc_nox0 = COPY %55 + %18:gprc_and_gprc_nor0 = COPY %11.sub_32 + %58:gprc = MULHWU %18, %57 + %59:gprc = RLWINM %58, 28, 4, 31 + %60:gprc = nsw MULLI killed %59, -30 + %61:g8rc = INSERT_SUBREG %62, killed %60, %subreg.sub_32 + %19:g8rc = RLDICL killed %61, 0, 32 + %20:g8rc = ORI8 %19, 1 + %21:gprc = nsw ADD4 killed %54, %12 + BCC 76, %95, %bb.12 + B %bb.19 + + bb.19 (%ir-block.38): + %66:g8rc = ADD8 %11, %19 + %24:gprc = COPY %66.sub_32 + %65:gprc = COPY %11.sub_32 + %22:gprc_and_gprc_nor0 = RLWINM %65, 1, 0, 30 + %93:gprc = ISEL %22, %24, %69.sub_eq + B %bb.14 + + bb.12 (%ir-block.56): + %23:gprc = nuw nsw ADDI %18, 100 + B %bb.14 + + bb.14 (%ir-block.60): + successors: %bb.16(0x2aaaaaab), %bb.20(0x55555555) + + %25:gprc = PHI %23, %bb.12, %93, %bb.19 + %67:gprc = nsw ADD4 %25, %21 + STW killed %67, 0, %16 :: (store 4 into %ir.44, !tbaa !2) + %26:g8rc = ORI8 %11, 1 + %68:gprc = LWZ 4, %17 :: (load 4 from %ir.uglygep1112.cast, !tbaa !2) + %27:gprc = nsw ADD4 killed %68, %21 + BCC 76, %69, %bb.16 + B %bb.20 + + bb.20 (%ir-block.60): + successors: %bb.15(0x40000001), %bb.17(0x3fffffff) + + BCC 68, %95, %bb.17 + B %bb.15 + + bb.15 (%ir-block.67): + %28:gprc = nuw nsw ADDI %18, 101 + B %bb.18 + + bb.16 (%ir-block.69): + %71:gprc = COPY %26.sub_32 + %29:gprc = RLWINM %71, 1, 0, 30 + B %bb.18 + + bb.17 (%ir-block.72): + %72:g8rc = ADD8 %11, %20 + %30:gprc = COPY %72.sub_32 + + bb.18 (%ir-block.74): + successors: %bb.10(0x7c000000), %bb.3(0x04000000) + + %31:gprc = PHI %28, %bb.15, %29, %bb.16, %30, %bb.17 + %73:gprc = nsw ADD4 %31, %27 + STW killed %73, 4, %15 :: (store 4 into %ir.uglygep78.cast, !tbaa !2) + %32:g8rc = nuw nsw ADDI8 %11, 2 + %74:gprc_and_gprc_nor0 = COPY %32.sub_32 + %33:gprc_and_gprc_nor0 = ADDI killed %74, -2 + BDNZ8 %bb.10, implicit-def dead $ctr8, implicit $ctr8 + B %bb.3 + +... From ebfbdebe9678f4a42ec35396eb517eefd85d2b4c Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 17 Sep 2020 10:19:09 +0800 Subject: [PATCH 0931/1079] [PowerPC] Fix store-fptoi combine of f128 on Power8 llc would crash for (store (fptosi-f128-i32)) when -mcpu=pwr8, we should not generate FP_TO_(S|U)INT_IN_VSR for f128 types at this time. This patch fixes it. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D86686 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- llvm/test/CodeGen/PowerPC/store_fptoi.ll | 76 +++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 3b0acfa76ec82..6bdebf9111d6e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14094,8 +14094,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, EVT Op1VT = N->getOperand(1).getValueType(); EVT ResVT = Val.getValueType(); - // Floating point types smaller than 32 bits are not legal on Power. - if (ResVT.getScalarSizeInBits() < 32) + if (!isTypeLegal(ResVT)) return SDValue(); // Only perform combine for conversion to i64/i32 or power9 i16/i8. diff --git a/llvm/test/CodeGen/PowerPC/store_fptoi.ll b/llvm/test/CodeGen/PowerPC/store_fptoi.ll index e4f47ab7628fd..1e5b8414243b1 100644 --- a/llvm/test/CodeGen/PowerPC/store_fptoi.ll +++ b/llvm/test/CodeGen/PowerPC/store_fptoi.ll @@ -7,6 +7,82 @@ ; Tests for store of fp_to_sint converstions ; ========================================== +; Function Attrs: norecurse nounwind +define void @qpConv2sdw(fp128* nocapture readonly %a, i64* nocapture %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %conv = fptosi fp128 %0 to i64 + store i64 %conv, i64* %b, align 8 + ret void + +; CHECK-LABEL: qpConv2sdw +; CHECK: lxv [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvqpsdz [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: qpConv2sdw +; CHECK-PWR8: bl __fixkfdi +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @qpConv2sw(fp128* nocapture readonly %a, i32* nocapture %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %conv = fptosi fp128 %0 to i32 + store i32 %conv, i32* %b, align 4 + ret void + +; CHECK-LABEL: qpConv2sw +; CHECK: lxv [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvqpswz [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsiwx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: qpConv2sw +; CHECK-PWR8: bl __fixkfsi +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @qpConv2udw(fp128* nocapture readonly %a, i64* nocapture %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %conv = fptoui fp128 %0 to i64 + store i64 %conv, i64* %b, align 8 + ret void + +; CHECK-LABEL: qpConv2udw +; CHECK: lxv [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvqpudz [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: qpConv2udw +; CHECK-PWR8: bl __fixunskfdi +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @qpConv2uw(fp128* nocapture readonly %a, i32* nocapture %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %conv = fptoui fp128 %0 to i32 + store i32 %conv, i32* %b, align 4 + ret void + +; CHECK-LABEL: qpConv2uw +; CHECK: lxv [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvqpuwz [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsiwx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: qpConv2uw +; CHECK-PWR8: bl __fixunskfsi +; CHECK-PWR8: blr +} + ; Function Attrs: norecurse nounwind define void @dpConv2sdw(double* nocapture readonly %a, i64* nocapture %b) { entry: From c140322819806cb292e079d62f2e9dbab697c08c Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 16 Sep 2020 15:52:50 -0700 Subject: [PATCH 0932/1079] Use zu rather than llu format specifier for size_t (-Wformat warning fix). --- lldb/source/Expression/REPL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Expression/REPL.cpp b/lldb/source/Expression/REPL.cpp index 1f2b009c48935..c3d14960f74c5 100644 --- a/lldb/source/Expression/REPL.cpp +++ b/lldb/source/Expression/REPL.cpp @@ -196,7 +196,7 @@ static bool ReadCode(const std::string &path, std::string &code, const size_t max_size = code.max_size(); if (file_size > max_size) { error_sp->Printf("file at path '%s' too large: " - "file_size = %llu, max_size = %llu\n", + "file_size = %zu, max_size = %zu\n", path.c_str(), file_size, max_size); return false; } From 6a07f1edf8e6a172734286cd3ab5988313313d8f Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 15 Sep 2020 12:49:53 -0700 Subject: [PATCH 0933/1079] debug_rnglists/symbolizing: reduce memory usage by not caching rnglists This matches the debug_ranges behavior - though is currently implemented differently. (the debug_ranges parsing was handled by creating a new ranges parser during DIE address querying, and just destroying it after the query - whereas the rnglists parser is a member of the DWARFUnit currently - so the API doesn't cache anymore) I think this could/should be improved by not parsing debug_rnglists headers at all when dumping debug_info or symbolizing - do it the way DWARF (roughly) intended: take the rnglists_base, add addr*index to it, read the offset, parse the list at rnglists_base+offset. This would have no error checking for valid index (because the number of valid indexes is stored in the header, which has a negative offset from rnglists_base - and is sort of only intended for use by dumpers, not by parsers going from debug_info to a rnglist) or out of contribution bounds access (since it wouldn't know the length of the contribution, also in the header) - nor any error-checking that the rnglist contribution was using the same properties as the debug_info (version, DWARF32/64, address size, etc). --- llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h index bcfc71381aeee..e54bed2d65d67 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -270,19 +270,13 @@ template Expected DWARFListTableBase::findList(DWARFDataExtractor Data, uint64_t Offset) { - auto Entry = ListMap.find(Offset); - if (Entry != ListMap.end()) - return Entry->second; - // Extract the list from the section and enter it into the list map. DWARFListType List; uint64_t End = getHeaderOffset() + Header.length(); - uint64_t StartingOffset = Offset; if (Error E = List.extract(Data, getHeaderOffset(), End, &Offset, Header.getSectionName(), Header.getListTypeString())) return std::move(E); - ListMap[StartingOffset] = List; return List; } From a895040eb022b8a621d8e85754f113d82e232ab1 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Wed, 16 Sep 2020 20:00:43 -0700 Subject: [PATCH 0934/1079] Revert "[IRSim] Adding IR Instruction Mapper" This reverts commit b04c1a9d3127730c05e8a22a0e931a12a39528df. --- .../llvm/Analysis/IRSimilarityIdentifier.h | 357 ----- llvm/lib/Analysis/CMakeLists.txt | 1 - llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 153 --- llvm/unittests/Analysis/CMakeLists.txt | 1 - .../Analysis/IRSimilarityIdentifierTest.cpp | 1177 ----------------- 5 files changed, 1689 deletions(-) delete mode 100644 llvm/include/llvm/Analysis/IRSimilarityIdentifier.h delete mode 100644 llvm/lib/Analysis/IRSimilarityIdentifier.cpp delete mode 100644 llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h deleted file mode 100644 index 9e6d3aeec0304..0000000000000 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ /dev/null @@ -1,357 +0,0 @@ -//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// \file -// Interface file for the IRSimilarityIdentifier for identifying similarities in -// IR including the IRInstructionMapper, which maps an Instruction to unsigned -// integers. -// -// Two sequences of instructions are called "similar" if they perform the same -// series of operations for all inputs. -// -// \code -// %1 = add i32 %a, 10 -// %2 = add i32 %a, %1 -// %3 = icmp slt icmp %1, %2 -// \endcode -// -// and -// -// \code -// %1 = add i32 11, %a -// %2 = sub i32 %a, %1 -// %3 = icmp sgt icmp %2, %1 -// \endcode -// -// ultimately have the same result, even if the inputs, and structure are -// slightly different. -// -// For instructions, we do not worry about operands that do not have fixed -// semantic meaning to the program. We consider the opcode that the instruction -// has, the types, parameters, and extra information such as the function name, -// or comparison predicate. These are used to create a hash to map instructions -// to integers to be used in similarity matching in sequences of instructions -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H -#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H - -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Allocator.h" - -namespace llvm { -namespace IRSimilarity { - -/// This represents what is and is not supported when finding similarity in -/// Instructions. -/// -/// Legal Instructions are considered when looking at similarity between -/// Instructions. -/// -/// Illegal Instructions cannot be considered when looking for similarity -/// between Instructions. They act as boundaries between similarity regions. -/// -/// Invisible Instructions are skipped over during analysis. -// TODO: Shared with MachineOutliner -enum InstrType { Legal, Illegal, Invisible }; - -/// This provides the utilities for hashing an Instruction to an unsigned -/// integer. Two IRInstructionDatas produce the same hash value when their -/// underlying Instructions perform the same operation (even if they don't have -/// the same input operands.) -/// As a more concrete example, consider the following: -/// -/// \code -/// %add1 = add i32 %a, %b -/// %add2 = add i32 %c, %d -/// %add3 = add i64 %e, %f -/// \endcode -/// -// Then the IRInstructionData wrappers for these Instructions may be hashed like -/// so: -/// -/// \code -/// ; These two adds have the same types and operand types, so they hash to the -/// ; same number. -/// %add1 = add i32 %a, %b ; Hash: 1 -/// %add2 = add i32 %c, %d ; Hash: 1 -/// ; This add produces an i64. This differentiates it from %add1 and %add2. So, -/// ; it hashes to a different number. -/// %add3 = add i64 %e, %f; Hash: 2 -/// \endcode -/// -/// -/// This hashing scheme will be used to represent the program as a very long -/// string. This string can then be placed in a data structure which can be used -/// for similarity queries. -/// -/// TODO: Handle types of Instructions which can be equal even with different -/// operands. (E.g. comparisons with swapped predicates.) -/// TODO: Handle CallInsts, which are only checked for function type -/// by \ref isSameOperationAs. -/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the -/// exact same, and some do not. -struct IRInstructionData : ilist_node { - - /// The source Instruction that is being wrapped. - Instruction *Inst = nullptr; - /// The values of the operands in the Instruction. - SmallVector OperVals; - /// The legality of the wrapped instruction. This is informed by InstrType, - /// and is used when checking when two instructions are considered similar. - /// If either instruction is not legal, the instructions are automatically not - /// considered similar. - bool Legal; - - /// Gather the information that is difficult to gather for an Instruction, or - /// is changed. i.e. the operands of an Instruction and the Types of those - /// operands. This extra information allows for similarity matching to make - /// assertions that allow for more flexibility when checking for whether an - /// Instruction performs the same operation. - IRInstructionData(Instruction &I, bool Legality); - - /// Hashes \p Value based on its opcode, types, and operand types. - /// Two IRInstructionData instances produce the same hash when they perform - /// the same operation. - /// - /// As a simple example, consider the following instructions. - /// - /// \code - /// %add1 = add i32 %x1, %y1 - /// %add2 = add i32 %x2, %y2 - /// - /// %sub = sub i32 %x1, %y1 - /// - /// %add_i64 = add i64 %x2, %y2 - /// \endcode - /// - /// Because the first two adds operate the same types, and are performing the - /// same action, they will be hashed to the same value. - /// - /// However, the subtraction instruction is not the same as an addition, and - /// will be hashed to a different value. - /// - /// Finally, the last add has a different type compared to the first two add - /// instructions, so it will also be hashed to a different value that any of - /// the previous instructions. - /// - /// \param [in] Value - The IRInstructionData instance to be hashed. - /// \returns A hash_value of the IRInstructionData. - friend hash_code hash_value(const IRInstructionData &ID) { - SmallVector OperTypes; - for (Value *V : ID.OperVals) - OperTypes.push_back(V->getType()); - - return hash_combine( - hash_value(ID.Inst->getOpcode()), hash_value(ID.Inst->getType()), - hash_combine_range(OperTypes.begin(), OperTypes.end())); - } -}; - -/// Compare one IRInstructionData class to another IRInstructionData class for -/// whether they are performing a the same operation, and can mapped to the -/// same value. For regular instructions if the hash value is the same, then -/// they will also be close. -/// -/// \param A - The first IRInstructionData class to compare -/// \param B - The second IRInstructionData class to compare -/// \returns true if \p A and \p B are similar enough to be mapped to the same -/// value. -bool isClose(const IRInstructionData &A, const IRInstructionData &B); - -struct IRInstructionDataTraits : DenseMapInfo { - static inline IRInstructionData *getEmptyKey() { return nullptr; } - static inline IRInstructionData *getTombstoneKey() { - return reinterpret_cast(-1); - } - - static unsigned getHashValue(const IRInstructionData *E) { - using llvm::hash_value; - assert(E && "IRInstructionData is a nullptr?"); - return hash_value(*E); - } - - static bool isEqual(const IRInstructionData *LHS, - const IRInstructionData *RHS) { - if (RHS == getEmptyKey() || RHS == getTombstoneKey() || - LHS == getEmptyKey() || LHS == getTombstoneKey()) - return LHS == RHS; - - assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); - return isClose(*LHS, *RHS); - } -}; - -/// Helper struct for converting the Instructions in a Module into a vector of -/// unsigned integers. This vector of unsigned integers can be thought of as a -/// "numeric string". This numeric string can then be queried by, for example, -/// data structures that find repeated substrings. -/// -/// This hashing is done per BasicBlock in the module. To hash Instructions -/// based off of their operations, each Instruction is wrapped in an -/// IRInstructionData struct. The unsigned integer for an IRInstructionData -/// depends on: -/// - The hash provided by the IRInstructionData. -/// - Which member of InstrType the IRInstructionData is classified as. -// See InstrType for more details on the possible classifications, and how they -// manifest in the numeric string. -/// -/// The numeric string for an individual BasicBlock is terminated by an unique -/// unsigned integer. This prevents data structures which rely on repetition -/// from matching across BasicBlocks. (For example, the SuffixTree.) -/// As a concrete example, if we have the following two BasicBlocks: -/// \code -/// bb0: -/// %add1 = add i32 %a, %b -/// %add2 = add i32 %c, %d -/// %add3 = add i64 %e, %f -/// bb1: -/// %sub = sub i32 %c, %d -/// \endcode -/// We may hash the Instructions like this (via IRInstructionData): -/// \code -/// bb0: -/// %add1 = add i32 %a, %b ; Hash: 1 -/// %add2 = add i32 %c, %d; Hash: 1 -/// %add3 = add i64 %e, %f; Hash: 2 -/// bb1: -/// %sub = sub i32 %c, %d; Hash: 3 -/// %add4 = add i32 %c, %d ; Hash: 1 -/// \endcode -/// And produce a "numeric string representation" like so: -/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 -/// -/// TODO: This is very similar to the MachineOutliner, and should be -/// consolidated into the same interface. -struct IRInstructionMapper { - /// The starting illegal instruction number to map to. - /// - /// Set to -3 for compatibility with DenseMapInfo. - unsigned IllegalInstrNumber = static_cast(-3); - - /// The next available integer to assign to a legal Instruction to. - unsigned LegalInstrNumber = 0; - - /// Correspondence from IRInstructionData to unsigned integers. - DenseMap - InstructionIntegerMap; - - /// Set if we added an illegal number in the previous step. - /// Since each illegal number is unique, we only need one of them between - /// each range of legal numbers. This lets us make sure we don't add more - /// than one illegal number per range. - bool AddedIllegalLastTime = false; - - /// Marks whether we found a illegal instruction in the previous step. - bool CanCombineWithPrevInstr = false; - - /// Marks whether we have found a set of instructions that is long enough - /// to be considered for similarity. - bool HaveLegalRange = false; - - /// This allocator pointer is in charge of holding on to the IRInstructionData - /// so it is not deallocated until whatever external tool is using it is done - /// with the information. - BumpPtrAllocator *InstDataAllocator = nullptr; - - /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers - /// determined by \p InstrType. Two Instructions are mapped to the same value - /// if they are close as defined by the InstructionData class above. - /// - /// \param [in] BB - The BasicBlock to be mapped to integers. - /// \param [in,out] InstrList - Vector of IRInstructionData to append to. - /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. - void convertToUnsignedVec(BasicBlock &BB, - std::vector &InstrList, - std::vector &IntegerMapping); - - /// Maps an Instruction to a legal integer. - /// - /// \param [in] It - The Instruction to be mapped to an integer. - /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to - /// append to. - /// \param [in,out] InstrList - Vector of InstructionData to append - /// to. \returns The integer \p It was mapped to. - unsigned mapToLegalUnsigned(BasicBlock::iterator &It, - std::vector &IntegerMappingForBB, - std::vector &InstrListForBB); - - /// Maps an Instruction to an illegal integer. - /// - /// \param [in] It - The \p Instruction to be mapped to an integer. - /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to - /// append to. - /// \param [in,out] InstrList - Vector of IRInstructionData to append to. - /// \param End - true if creating a dummy IRInstructionData at the end of a - /// basic block. - /// \returns The integer \p It was mapped to. - unsigned mapToIllegalUnsigned( - BasicBlock::iterator &It, std::vector &IntegerMappingForBB, - std::vector &InstrListForBB, bool End = false); - - IRInstructionMapper(BumpPtrAllocator *IDA) : InstDataAllocator(IDA) { - // Make sure that the implementation of DenseMapInfo hasn't - // changed. - assert(DenseMapInfo::getEmptyKey() == static_cast(-1) && - "DenseMapInfo's empty key isn't -1!"); - assert(DenseMapInfo::getTombstoneKey() == - static_cast(-2) && - "DenseMapInfo's tombstone key isn't -2!"); - } - - /// Custom InstVisitor to classify different instructions for whether it can - /// be analyzed for similarity. - struct InstructionClassification - : public InstVisitor { - InstructionClassification() {} - - // TODO: Determine a scheme to resolve when the label is similar enough. - InstrType visitBranchInst(BranchInst &BI) { return Illegal; } - // TODO: Determine a scheme to resolve when the labels are similar enough. - InstrType visitPHINode(PHINode &PN) { return Illegal; } - // TODO: Handle allocas. - InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } - // We exclude variable argument instructions since variable arguments - // requires extra checking of the argument list. - InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } - // We exclude all exception handling cases since they are so context - // dependent. - InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } - InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } - // DebugInfo should be included in the regions, but should not be - // analyzed for similarity as it has no bearing on the outcome of the - // program. - InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } - // TODO: Handle GetElementPtrInsts - InstrType visitGetElementPtrInst(GetElementPtrInst &GEPI) { - return Illegal; - } - // TODO: Handle specific intrinsics. - InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } - // TODO: Handle CallInsts. - InstrType visitCallInst(CallInst &CI) { return Illegal; } - // TODO: We do not current handle similarity that changes the control flow. - InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } - // TODO: We do not current handle similarity that changes the control flow. - InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } - // TODO: Handle interblock similarity. - InstrType visitTerminator(Instruction &I) { return Illegal; } - InstrType visitInstruction(Instruction &I) { return Legal; } - }; - - /// Maps an Instruction to a member of InstrType. - InstructionClassification InstClassifier; -}; - -} // end namespace IRSimilarity -} // end namespace llvm - -#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 4bd45ead30d35..78cc764379e17 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -54,7 +54,6 @@ add_llvm_component_library(LLVMAnalysis GlobalsModRef.cpp GuardUtils.cpp HeatUtils.cpp - IRSimilarityIdentifier.cpp IVDescriptors.cpp IVUsers.cpp IndirectCallPromotionAnalysis.cpp diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp deleted file mode 100644 index 050f5b1c0962c..0000000000000 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// \file -// Implementation file for the IRSimilarityIdentifier for identifying -// similarities in IR including the IRInstructionMapper. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/IRSimilarityIdentifier.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/User.h" - -using namespace llvm; -using namespace IRSimilarity; - -IRInstructionData::IRInstructionData(Instruction &I, bool Legality) - : Inst(&I), Legal(Legality) { - // Here we collect the operands to be used to determine whether two - // instructions are similar to one another. - for (Use &OI : I.operands()) - OperVals.push_back(OI.get()); -} - -bool IRSimilarity::isClose(const IRInstructionData &A, - const IRInstructionData &B) { - return A.Legal && A.Inst->isSameOperationAs(B.Inst); -} - -// TODO: This is the same as the MachineOutliner, and should be consolidated -// into the same interface. -void IRInstructionMapper::convertToUnsignedVec( - BasicBlock &BB, std::vector &InstrList, - std::vector &IntegerMapping) { - BasicBlock::iterator It = BB.begin(); - - std::vector IntegerMappingForBB; - std::vector InstrListForBB; - - HaveLegalRange = false; - CanCombineWithPrevInstr = false; - AddedIllegalLastTime = true; - - for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { - switch (InstClassifier.visit(*It)) { - case InstrType::Legal: - mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); - break; - case InstrType::Illegal: - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); - break; - case InstrType::Invisible: - AddedIllegalLastTime = false; - break; - } - } - - if (HaveLegalRange) { - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); - InstrList.insert(InstrList.end(), InstrListForBB.begin(), - InstrListForBB.end()); - IntegerMapping.insert(IntegerMapping.end(), IntegerMappingForBB.begin(), - IntegerMappingForBB.end()); - } -} - -// TODO: This is the same as the MachineOutliner, and should be consolidated -// into the same interface. -unsigned IRInstructionMapper::mapToLegalUnsigned( - BasicBlock::iterator &It, std::vector &IntegerMappingForBB, - std::vector &InstrListForBB) { - // We added something legal, so we should unset the AddedLegalLastTime - // flag. - AddedIllegalLastTime = false; - - // If we have at least two adjacent legal instructions (which may have - // invisible instructions in between), remember that. - if (CanCombineWithPrevInstr) - HaveLegalRange = true; - CanCombineWithPrevInstr = true; - - // Get the integer for this instruction or give it the current - // LegalInstrNumber. - IRInstructionData *ID = new (InstDataAllocator->Allocate()) - IRInstructionData(*It, true); - InstrListForBB.push_back(ID); - - // Add to the instruction list - bool WasInserted; - DenseMap::iterator - ResultIt; - std::tie(ResultIt, WasInserted) = - InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); - unsigned INumber = ResultIt->second; - - // There was an insertion. - if (WasInserted) - LegalInstrNumber++; - - IntegerMappingForBB.push_back(INumber); - - // Make sure we don't overflow or use any integers reserved by the DenseMap. - assert(LegalInstrNumber < IllegalInstrNumber && - "Instruction mapping overflow!"); - - assert(LegalInstrNumber != DenseMapInfo::getEmptyKey() && - "Tried to assign DenseMap tombstone or empty key to instruction."); - assert(LegalInstrNumber != DenseMapInfo::getTombstoneKey() && - "Tried to assign DenseMap tombstone or empty key to instruction."); - - return INumber; -} - -// TODO: This is the same as the MachineOutliner, and should be consolidated -// into the same interface. -unsigned IRInstructionMapper::mapToIllegalUnsigned( - BasicBlock::iterator &It, std::vector &IntegerMappingForBB, - std::vector &InstrListForBB, bool End) { - // Can't combine an illegal instruction. Set the flag. - CanCombineWithPrevInstr = false; - - // Only add one illegal number per range of legal numbers. - if (AddedIllegalLastTime) - return IllegalInstrNumber; - - IRInstructionData *ID = nullptr; - if (!End) - ID = new (InstDataAllocator->Allocate()) - IRInstructionData(*It, false); - InstrListForBB.push_back(ID); - - // Remember that we added an illegal number last time. - AddedIllegalLastTime = true; - unsigned INumber = IllegalInstrNumber; - IntegerMappingForBB.push_back(IllegalInstrNumber--); - - assert(LegalInstrNumber < IllegalInstrNumber && - "Instruction mapping overflow!"); - - assert(IllegalInstrNumber != DenseMapInfo::getEmptyKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - - assert(IllegalInstrNumber != DenseMapInfo::getTombstoneKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - - return INumber; -} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index 0480649352214..dfe570fd15749 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -29,7 +29,6 @@ add_llvm_unittest_with_input_files(AnalysisTests DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp FunctionPropertiesAnalysisTest.cpp - IRSimilarityIdentifierTest.cpp IVDescriptorsTest.cpp LazyCallGraphTest.cpp LoadsTest.cpp diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp deleted file mode 100644 index 4cc81b29a630e..0000000000000 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ /dev/null @@ -1,1177 +0,0 @@ -//===- IRSimilarityIdentifierTest.cpp - IRSimilarityIdentifier unit tests -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Tests for components for finding similarity such as the instruction mapper, -// suffix tree usage, and structural analysis. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/IRSimilarityIdentifier.h" -#include "llvm/AsmParser/Parser.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/SourceMgr.h" -#include "gtest/gtest.h" - -using namespace llvm; -using namespace IRSimilarity; - -static std::unique_ptr makeLLVMModule(LLVMContext &Context, - StringRef ModuleStr) { - SMDiagnostic Err; - std::unique_ptr M = parseAssemblyString(ModuleStr, Err, Context); - assert(M && "Bad LLVM IR?"); - return M; -} - -void getVectors(Module &M, std::vector &InstrList, - std::vector &UnsignedVec) { - BumpPtrAllocator InstDataAllocator; - IRInstructionMapper Mapper(&InstDataAllocator); - - for (Function &F : M) - for (BasicBlock &BB : F) - Mapper.convertToUnsignedVec(BB, InstrList, UnsignedVec); -} - -// Checks that different opcodes are mapped to different values. -TEST(IRInstructionMapper, OpcodeDifferentiation) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = add i32 %a, %b - %1 = mul i32 %a, %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - // Check that the size of the unsigned vector and the instruction list are the - // same as a safety check. - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - - // Make sure that the unsigned vector is the expected size. - ASSERT_TRUE(UnsignedVec.size() == 3); - - // Check whether the instructions are not mapped to the same value. - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that the same opcodes and types are mapped to the same values. -TEST(IRInstructionMapper, OpcodeTypeSimilarity) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = add i32 %a, %b - %1 = add i32 %b, %a - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - - // Check whether the instructions are mapped to the same value. - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that the same opcode and different types are mapped to different -// values. -TEST(IRInstructionMapper, TypeDifferentiation) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) { - bb0: - %0 = add i32 %a, %b - %1 = add i64 %c, %d - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that different predicates map to different values. -TEST(IRInstructionMapper, PredicateDifferentiation) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = icmp sge i32 %b, %a - %1 = icmp slt i32 %a, %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that predicates with the same swapped predicate map to different -// values. -TEST(IRInstructionMapper, PredicateIsomorphism) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = icmp sgt i32 %a, %b - %1 = icmp slt i32 %b, %a - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that the same predicate maps to the same value. -TEST(IRInstructionMapper, PredicateSimilarity) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = icmp slt i32 %a, %b - %1 = icmp slt i32 %b, %a - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that the same predicate maps to the same value for floating point -// CmpInsts. -TEST(IRInstructionMapper, FPPredicateSimilarity) { - StringRef ModuleString = R"( - define i32 @f(double %a, double %b) { - bb0: - %0 = fcmp olt double %a, %b - %1 = fcmp olt double %b, %a - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that the different predicate maps to a different value for floating -// point CmpInsts. -TEST(IRInstructionMapper, FPPredicatDifference) { - StringRef ModuleString = R"( - define i32 @f(double %a, double %b) { - bb0: - %0 = fcmp olt double %a, %b - %1 = fcmp oge double %b, %a - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that the zexts that have the same type parameters map to the same -// unsigned integer. -TEST(IRInstructionMapper, ZextTypeSimilarity) { - StringRef ModuleString = R"( - define i32 @f(i32 %a) { - bb0: - %0 = zext i32 %a to i64 - %1 = zext i32 %a to i64 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that the sexts that have the same type parameters map to the same -// unsigned integer. -TEST(IRInstructionMapper, SextTypeSimilarity) { - StringRef ModuleString = R"( - define i32 @f(i32 %a) { - bb0: - %0 = sext i32 %a to i64 - %1 = sext i32 %a to i64 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that the zexts that have the different type parameters map to the -// different unsigned integers. -TEST(IRInstructionMapper, ZextTypeDifference) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i8 %b) { - bb0: - %0 = zext i32 %a to i64 - %1 = zext i8 %b to i32 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - - -// Checks that the sexts that have the different type parameters map to the -// different unsigned integers. -TEST(IRInstructionMapper, SextTypeDifference) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i8 %b) { - bb0: - %0 = sext i32 %a to i64 - %1 = sext i8 %b to i32 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that loads that have the same type are mapped to the same unsigned -// integer. -TEST(IRInstructionMapper, LoadSimilarType) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - %0 = load i32, i32* %a - %1 = load i32, i32* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that loads that have the different types are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, LoadDifferentType) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i64* %b) { - bb0: - %0 = load i32, i32* %a - %1 = load i64, i64* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that loads that have the different aligns are mapped to different -// unsigned integers. -TEST(IRInstructionMapper, LoadDifferentAlign) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - %0 = load i32, i32* %a, align 4 - %1 = load i32, i32* %b, align 8 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that loads that have the different volatile settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, LoadDifferentVolatile) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - %0 = load volatile i32, i32* %a - %1 = load i32, i32* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that loads that have the same volatile settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, LoadSameVolatile) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - %0 = load volatile i32, i32* %a - %1 = load volatile i32, i32* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that loads that have the different atomicity settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, LoadDifferentAtomic) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - %0 = load atomic i32, i32* %a unordered, align 4 - %1 = load atomic i32, i32* %b monotonic, align 4 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that loads that have the same atomicity settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, LoadSameAtomic) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - %0 = load atomic i32, i32* %a unordered, align 4 - %1 = load atomic i32, i32* %b unordered, align 4 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that stores that have the same type are mapped to the same unsigned -// integer. -TEST(IRInstructionMapper, StoreSimilarType) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - store i32 1, i32* %a - store i32 2, i32* %a - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that stores that have the different types are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, StoreDifferentType) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i64* %b) { - bb0: - store i32 1, i32* %a - store i64 1, i64* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that stores that have the different aligns are mapped to different -// unsigned integers. -TEST(IRInstructionMapper, StoreDifferentAlign) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - store i32 1, i32* %a, align 4 - store i32 1, i32* %b, align 8 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that stores that have the different volatile settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, StoreDifferentVolatile) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - store volatile i32 1, i32* %a - store i32 1, i32* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// Checks that stores that have the same volatile settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, StoreSameVolatile) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - store volatile i32 1, i32* %a - store volatile i32 1, i32* %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that loads that have the same atomicity settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, StoreSameAtomic) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - store atomic i32 1, i32* %a unordered, align 4 - store atomic i32 1, i32* %b unordered, align 4 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); -} - -// Checks that loads that have the different atomicity settings are mapped to -// different unsigned integers. -TEST(IRInstructionMapper, StoreDifferentAtomic) { - StringRef ModuleString = R"( - define i32 @f(i32* %a, i32* %b) { - bb0: - store atomic i32 1, i32* %a unordered, align 4 - store atomic i32 1, i32* %b monotonic, align 4 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - ASSERT_TRUE(UnsignedVec.size() == 3); - ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); -} - -// In most cases, the illegal instructions we are collecting don't require any -// sort of setup. In these cases, we can just only have illegal instructions, -// and the mapper will create 0 length vectors, and we can check that. - -// In cases where we have legal instructions needed to set up the illegal -// instruction, to check illegal instructions are assigned unsigned integers -// from the maximum value decreasing to 0, it will be greater than a legal -// instruction that comes after. So to check that we have an illegal -// instruction, we place a legal instruction after an illegal instruction, and -// check that the illegal unsigned integer is greater than the unsigned integer -// of the legal instruction. - -// Checks that the branch is mapped to be illegal since there is extra checking -// needed to ensure that a branch in one region is branching to an isomorphic -// location in a different region. -TEST(IRInstructionMapper, BranchIllegal) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = icmp slt i32 %a, %b - br i1 %0, label %bb0, label %bb1 - bb1: - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that a PHINode is mapped to be illegal since there is extra checking -// needed to ensure that a branch in one region is bin an isomorphic -// location in a different region. -TEST(IRInstructionMapper, PhiIllegal) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ] - ret i32 0 - bb1: - ret i32 1 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an alloca instruction is mapped to be illegal. -TEST(IRInstructionMapper, AllocaIllegal) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = alloca i32 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an getelementptr instruction is mapped to be illegal. There is -// extra checking required for the parameters if a getelementptr has more than -// two operands. -TEST(IRInstructionMapper, GetElementPtrIllegal) { - StringRef ModuleString = R"( - %struct.RT = type { i8, [10 x [20 x i32]], i8 } - %struct.ST = type { i32, double, %struct.RT } - define i32 @f(%struct.ST* %s, i32 %a, i32 %b) { - bb0: - %0 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1 - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that a call instruction is mapped to be illegal. We have to perform -// extra checks to ensure that both the name and function type are the same. -TEST(IRInstructionMapper, CallIllegal) { - StringRef ModuleString = R"( - declare i32 @f1(i32, i32) - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = call i32 @f1(i32 %a, i32 %b) - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an invoke instruction is mapped to be illegal. Invoke -// instructions are considered to be illegal because of the change in the -// control flow that is currently not recognized. -TEST(IRInstructionMapper, InvokeIllegal) { - StringRef ModuleString = R"( - define i32 @f(i8 *%gep1, i32 %b) { - then: - invoke i32 undef(i8* undef) - to label %invoke unwind label %lpad - - invoke: - unreachable - - lpad: - landingpad { i8*, i32 } - catch i8* null - unreachable - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an callbr instructions are considered to be illegal. Callbr -// instructions are considered to be illegal because of the change in the -// control flow that is currently not recognized. -TEST(IRInstructionMapper, CallBrInstIllegal) { - StringRef ModuleString = R"( - define void @test() { - fail: - ret void - } - - define i32 @f(i32 %a, i32 %b) { - bb0: - callbr void asm "xorl $0, $0; jmp ${1:l}", "r,X,~{dirflag},~{fpsr},~{flags}"(i32 %a, i8* blockaddress(@test, %fail)) to label %normal [label %fail] - fail: - ret i32 0 - normal: - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an debuginfo intrinsics are mapped to be invisible. Since they -// do not semantically change the program, they can be recognized as similar. -TEST(IRInstructionMapper, DebugInfoInvisible) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - then: - %0 = add i32 %a, %b - call void @llvm.dbg.value(metadata !0) - %1 = add i32 %a, %b - ret i32 0 - } - - declare void @llvm.dbg.value(metadata) - !0 = distinct !{!"test\00", i32 10})"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(3)); -} - -// The following are all exception handling intrinsics. We do not currently -// handle these instruction because they are very context dependent. - -// Checks that an eh.typeid.for intrinsic is mapped to be illegal. -TEST(IRInstructionMapper, ExceptionHandlingTypeIdIllegal) { - StringRef ModuleString = R"( - @_ZTIi = external constant i8* - define i32 @f() { - then: - %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) - ret i32 0 - } - - declare i32 @llvm.eh.typeid.for(i8*))"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an eh.exceptioncode intrinsic is mapped to be illegal. -TEST(IRInstructionMapper, ExceptionHandlingExceptionCodeIllegal) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - entry: - %0 = catchswitch within none [label %__except] unwind to caller - - __except: - %1 = catchpad within %0 [i8* null] - catchret from %1 to label %__except - - then: - %2 = call i32 @llvm.eh.exceptioncode(token %1) - ret i32 0 - } - - declare i32 @llvm.eh.exceptioncode(token))"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an eh.unwind intrinsic is mapped to be illegal. -TEST(IRInstructionMapper, ExceptionHandlingUnwindIllegal) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - entry: - call void @llvm.eh.unwind.init() - ret i32 0 - } - - declare void @llvm.eh.unwind.init())"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that an eh.exceptionpointer intrinsic is mapped to be illegal. -TEST(IRInstructionMapper, ExceptionHandlingExceptionPointerIllegal) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - entry: - %0 = call i8* @llvm.eh.exceptionpointer.p0i8(i32 0) - ret i32 0 - } - - declare i8* @llvm.eh.exceptionpointer.p0i8(i32))"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that a catchpad instruction is mapped to an illegal value. -TEST(IRInstructionMapper, CatchpadIllegal) { - StringRef ModuleString = R"( - declare void @llvm.donothing() nounwind readnone - - define void @function() personality i8 3 { - entry: - invoke void @llvm.donothing() to label %normal unwind label %exception - exception: - %cs1 = catchswitch within none [label %catchpad1] unwind to caller - catchpad1: - catchpad within %cs1 [] - br label %normal - normal: - ret void - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// Checks that a cleanuppad instruction is mapped to an illegal value. -TEST(IRInstructionMapper, CleanuppadIllegal) { - StringRef ModuleString = R"( - declare void @llvm.donothing() nounwind readnone - - define void @function() personality i8 3 { - entry: - invoke void @llvm.donothing() to label %normal unwind label %exception - exception: - %cs1 = catchswitch within none [label %catchpad1] unwind to caller - catchpad1: - %clean = cleanuppad within none [] - br label %normal - normal: - ret void - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); -} - -// The following three instructions are memory transfer and setting based, which -// are considered illegal since is extra checking needed to handle the address -// space checking. - -// Checks that a memset instruction is mapped to an illegal value. -TEST(IRInstructionMapper, MemSetIllegal) { - StringRef ModuleString = R"( - declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) - - define i64 @function(i64 %x, i64 %z, i64 %n) { - entry: - %pool = alloca [59 x i64], align 4 - %tmp = bitcast [59 x i64]* %pool to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) - %cmp3 = icmp eq i64 %n, 0 - %a = add i64 %x, %z - %c = add i64 %x, %z - ret i64 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); -} - -// Checks that a memcpy instruction is mapped to an illegal value. -TEST(IRInstructionMapper, MemCpyIllegal) { - StringRef ModuleString = R"( - declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) - - define i64 @function(i64 %x, i64 %z, i64 %n) { - entry: - %pool = alloca [59 x i64], align 4 - %tmp = bitcast [59 x i64]* %pool to i8* - call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) - %cmp3 = icmp eq i64 %n, 0 - %a = add i64 %x, %z - %c = add i64 %x, %z - ret i64 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); -} - -// Checks that a memmove instruction is mapped to an illegal value. -TEST(IRInstructionMapper, MemMoveIllegal) { - StringRef ModuleString = R"( - declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) - - define i64 @function(i64 %x, i64 %z, i64 %n) { - entry: - %pool = alloca [59 x i64], align 4 - %tmp = bitcast [59 x i64]* %pool to i8* - call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) - %cmp3 = icmp eq i64 %n, 0 - %a = add i64 %x, %z - %c = add i64 %x, %z - ret i64 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); -} - -// Checks that a variable argument instructions are mapped to an illegal value. -// We exclude variable argument instructions since variable arguments -// requires extra checking of the argument list. -TEST(IRInstructionMapper, VarArgsIllegal) { - StringRef ModuleString = R"( - declare void @llvm.va_start(i8*) - declare void @llvm.va_copy(i8*, i8*) - declare void @llvm.va_end(i8*) - - define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { - entry: - %a.addr = alloca i32, align 4 - %b.addr = alloca double, align 8 - %ap = alloca i8*, align 4 - %c = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - store double %b, double* %b.addr, align 8 - %ap1 = bitcast i8** %ap to i8* - call void @llvm.va_start(i8* %ap1) - store double %b, double* %b.addr, align 8 - store double %b, double* %b.addr, align 8 - %0 = va_arg i8** %ap, i32 - store double %b, double* %b.addr, align 8 - store double %b, double* %b.addr, align 8 - call void @llvm.va_copy(i8* %v, i8* %ap1) - store double %b, double* %b.addr, align 8 - store double %b, double* %b.addr, align 8 - call void @llvm.va_end(i8* %ap1) - store i32 %0, i32* %c, align 4 - %tmp = load i32, i32* %c, align 4 - ret i32 %tmp - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(16)); - ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); - ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); - ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); - ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); -} - -// Check the length of adding two illegal instructions one after th other. We -// should find that only one element is added for each illegal range. -TEST(IRInstructionMapper, RepeatedIllegalLength) { - StringRef ModuleString = R"( - define i32 @f(i32 %a, i32 %b) { - bb0: - %0 = add i32 %a, %b - %1 = mul i32 %a, %b - %2 = call i32 @f(i32 %a, i32 %b) - %3 = call i32 @f(i32 %a, i32 %b) - %4 = add i32 %a, %b - %5 = mul i32 %a, %b - ret i32 0 - })"; - LLVMContext Context; - std::unique_ptr M = makeLLVMModule(Context, ModuleString); - - std::vector InstrList; - std::vector UnsignedVec; - - getVectors(*M, InstrList, UnsignedVec); - - // Check that the size of the unsigned vector and the instruction list are the - // same as a safety check. - ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); - - // Make sure that the unsigned vector is the expected size. - ASSERT_TRUE(UnsignedVec.size() == 6); -} From 0dd4d70ec20cebb951bd2e0e6525b056fb8dc86c Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 17 Sep 2020 03:02:00 +0000 Subject: [PATCH 0935/1079] [gn build] Port a895040eb02 --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 - 2 files changed, 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 8f86e7fdddcc3..335e54b4f68c5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -52,7 +52,6 @@ static_library("Analysis") { "GlobalsModRef.cpp", "GuardUtils.cpp", "HeatUtils.cpp", - "IRSimilarityIdentifier.cpp", "IVDescriptors.cpp", "IVUsers.cpp", "IndirectCallPromotionAnalysis.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 50c02aa2214ef..6adc9866e883f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -25,7 +25,6 @@ unittest("AnalysisTests") { "DomTreeUpdaterTest.cpp", "FunctionPropertiesAnalysisTest.cpp", "GlobalsModRefTest.cpp", - "IRSimilarityIdentifierTest.cpp", "IVDescriptorsTest.cpp", "LazyCallGraphTest.cpp", "LoadsTest.cpp", From 11201315d5881a135faa5aa87f415ce03f99eb96 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Sat, 12 Sep 2020 19:35:17 +0000 Subject: [PATCH 0936/1079] Flush bitcode incrementally for LTO output Bitcode writer does not flush buffer until the end by default. This is fine to small bitcode files. When -flto,--plugin-opt=emit-llvm,-gmlt are used, the final bitcode file is large, for example, >8G. Keeping all data in memory consumes a lot of memory. This change allows bitcode writer flush data to disk early when buffered data size is above some threshold. This is only enabled when lld emits LLVM bitcode. One issue to address is backpatching bitcode: subblock length, function body indexes, meta data indexes need to backfill. If buffer can be flushed partially, we introduced raw_fd_stream that supports read/seek/write, and enables backpatching bitcode flushed in disk. Reviewed-by: tejohnson, MaskRay Differential Revision: https://reviews.llvm.org/D86905 --- lld/ELF/LTO.cpp | 16 ++- llvm/include/llvm/Bitcode/BitcodeWriter.h | 2 +- llvm/include/llvm/Bitstream/BitstreamWriter.h | 100 ++++++++++++++++-- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 12 ++- 4 files changed, 115 insertions(+), 15 deletions(-) diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index ae77fadcc78d3..30281a1541f1a 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -57,6 +57,19 @@ static std::unique_ptr openFile(StringRef file) { return ret; } +// The merged bitcode after LTO is large. Try openning a file stream that +// supports reading, seeking and writing. Such a file allows BitcodeWriter to +// flush buffered data to reduce memory comsuption. If this fails, open a file +// stream that supports only write. +static std::unique_ptr openLTOOutputFile(StringRef file) { + std::error_code ec; + std::unique_ptr fs = + std::make_unique(file, ec); + if (!ec) + return fs; + return openFile(file); +} + static std::string getThinLTOOutputFile(StringRef modulePath) { return lto::getThinLTOOutputFile( std::string(modulePath), std::string(config->thinLTOPrefixReplace.first), @@ -151,7 +164,8 @@ static lto::Config createConfig() { if (config->emitLLVM) { c.PostInternalizeModuleHook = [](size_t task, const Module &m) { - if (std::unique_ptr os = openFile(config->outputFile)) + if (std::unique_ptr os = + openLTOOutputFile(config->outputFile)) WriteBitcodeToFile(m, *os, false); return false; }; diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h index 5701c07a2c4ab..74e9d103b7f3b 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -47,7 +47,7 @@ class raw_ostream; public: /// Create a BitcodeWriter that writes to Buffer. - BitcodeWriter(SmallVectorImpl &Buffer); + BitcodeWriter(SmallVectorImpl &Buffer, raw_fd_stream *FS = nullptr); ~BitcodeWriter(); diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 162a0fea09132..3faadf0095a67 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -20,17 +20,27 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitCodes.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { class BitstreamWriter { + /// Out - The buffer that keeps unflushed bytes. SmallVectorImpl &Out; + /// FS - The file stream that Out flushes to. If FS is nullptr, it does not + /// support read or seek, Out cannot be flushed until all data are written. + raw_fd_stream *FS; + + /// FlushThreshold - If FS is valid, this is the threshold (unit B) to flush + /// FS. + const uint64_t FlushThreshold; + /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. unsigned CurBit; - /// CurValue - The current value. Only bits < CurBit are valid. + /// CurValue - The current value. Only bits < CurBit are valid. uint32_t CurValue; /// CurCodeSize - This is the declared size of code values used for the @@ -64,15 +74,19 @@ class BitstreamWriter { void WriteByte(unsigned char Value) { Out.push_back(Value); + FlushToFile(); } void WriteWord(unsigned Value) { Value = support::endian::byte_swap(Value); Out.append(reinterpret_cast(&Value), reinterpret_cast(&Value + 1)); + FlushToFile(); } - size_t GetBufferOffset() const { return Out.size(); } + uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; } + + size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); } size_t GetWordIndex() const { size_t Offset = GetBufferOffset(); @@ -80,9 +94,29 @@ class BitstreamWriter { return Offset / 4; } + /// If the related file stream supports reading, seeking and writing, flush + /// the buffer if its size is above a threshold. + void FlushToFile() { + if (!FS) + return; + if (Out.size() < FlushThreshold) + return; + FS->write((char *)&Out.front(), Out.size()); + Out.clear(); + } + public: - explicit BitstreamWriter(SmallVectorImpl &O) - : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} + /// Create a BitstreamWriter that writes to Buffer \p O. + /// + /// \p FS is the file stream that \p O flushes to incrementally. If \p FS is + /// null, \p O does not flush incrementially, but writes to disk at the end. + /// + /// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is + /// valid. + BitstreamWriter(SmallVectorImpl &O, raw_fd_stream *FS = nullptr, + uint32_t FlushThreshold = 512) + : Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0), + CurValue(0), CurCodeSize(2) {} ~BitstreamWriter() { assert(CurBit == 0 && "Unflushed data remaining"); @@ -104,11 +138,59 @@ class BitstreamWriter { void BackpatchWord(uint64_t BitNo, unsigned NewWord) { using namespace llvm::support; uint64_t ByteNo = BitNo / 8; - assert((!endian::readAtBitAlignment( - &Out[ByteNo], BitNo & 7)) && - "Expected to be patching over 0-value placeholders"); - endian::writeAtBitAlignment( - &Out[ByteNo], NewWord, BitNo & 7); + uint64_t StartBit = BitNo & 7; + uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes(); + + if (ByteNo >= NumOfFlushedBytes) { + assert((!endian::readAtBitAlignment( + &Out[ByteNo - NumOfFlushedBytes], StartBit)) && + "Expected to be patching over 0-value placeholders"); + endian::writeAtBitAlignment( + &Out[ByteNo - NumOfFlushedBytes], NewWord, StartBit); + return; + } + + // If the byte offset to backpatch is flushed, use seek to backfill data. + // First, save the file position to restore later. + uint64_t CurPos = FS->tell(); + + // Copy data to update into Bytes from the file FS and the buffer Out. + char Bytes[8]; + size_t BytesNum = StartBit ? 8 : 4; + size_t BytesFromDisk = std::min(BytesNum, NumOfFlushedBytes - ByteNo); + size_t BytesFromBuffer = BytesNum - BytesFromDisk; + + // When unaligned, copy existing data into Bytes from the file FS and the + // buffer Out so that it can be updated before writing. For debug builds + // read bytes unconditionally in order to check that the existing value is 0 + // as expected. +#ifdef NDEBUG + if (StartBit) +#endif + { + FS->seek(ByteNo); + ssize_t BytesRead = FS->read(Bytes, BytesFromDisk); + (void)BytesRead; // silence warning + assert(BytesRead >= 0 && static_cast(BytesRead) == BytesFromDisk); + for (size_t i = 0; i < BytesFromBuffer; ++i) + Bytes[BytesFromDisk + i] = Out[i]; + assert((!endian::readAtBitAlignment( + Bytes, StartBit)) && + "Expected to be patching over 0-value placeholders"); + } + + // Update Bytes in terms of bit offset and value. + endian::writeAtBitAlignment(Bytes, NewWord, + StartBit); + + // Copy updated data back to the file FS and the buffer Out. + FS->seek(ByteNo); + FS->write(Bytes, BytesFromDisk); + for (size_t i = 0; i < BytesFromBuffer; ++i) + Out[i] = Bytes[BytesFromDisk + i]; + + // Restore the file position. + FS->seek(CurPos); } void BackpatchWord64(uint64_t BitNo, uint64_t Val) { diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 28384bcb354fd..26874c9ac364f 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -86,6 +86,9 @@ static cl::opt IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25), cl::desc("Number of metadatas above which we emit an index " "to enable lazy-loading")); +static cl::opt FlushThreshold( + "bitcode-flush-threshold", cl::Hidden, cl::init(512), + cl::desc("The threshold (unit M) for flushing LLVM bitcode.")); static cl::opt WriteRelBFToSummary( "write-relbf-to-summary", cl::Hidden, cl::init(false), @@ -4453,8 +4456,8 @@ static void writeBitcodeHeader(BitstreamWriter &Stream) { Stream.Emit(0xD, 4); } -BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer) - : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) { +BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer, raw_fd_stream *FS) + : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, FlushThreshold)) { writeBitcodeHeader(*Stream); } @@ -4565,7 +4568,7 @@ void llvm::WriteBitcodeToFile(const Module &M, raw_ostream &Out, if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); - BitcodeWriter Writer(Buffer); + BitcodeWriter Writer(Buffer, dyn_cast(&Out)); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); Writer.writeSymtab(); @@ -4575,7 +4578,8 @@ void llvm::WriteBitcodeToFile(const Module &M, raw_ostream &Out, emitDarwinBCHeaderAndTrailer(Buffer, TT); // Write the generated bitstream to "Out". - Out.write((char*)&Buffer.front(), Buffer.size()); + if (!Buffer.empty()) + Out.write((char *)&Buffer.front(), Buffer.size()); } void IndexBitcodeWriter::write() { From 352a55ef06a9dcb3dfeb45302e9789da24b513c3 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Thu, 17 Sep 2020 03:48:36 +0000 Subject: [PATCH 0937/1079] Add the header of std::min fixing https://github.com/llvm/llvm-project/commit/11201315d5881a135faa5aa87f415ce03f99eb96 --- llvm/include/llvm/Bitstream/BitstreamWriter.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 3faadf0095a67..d5593d6ea9f05 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -21,6 +21,7 @@ #include "llvm/Bitstream/BitCodes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" +#include #include namespace llvm { From aec80c5cfd1bda8e630fca0f3ed2a84659f68635 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Thu, 17 Sep 2020 04:02:19 +0000 Subject: [PATCH 0938/1079] Fix the arguments of std::min fixing https://github.com/llvm/llvm-project/commit/11201315d5881a135faa5aa87f415ce03f99eb96 --- llvm/include/llvm/Bitstream/BitstreamWriter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index d5593d6ea9f05..8dc135e6404da 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -158,7 +158,7 @@ class BitstreamWriter { // Copy data to update into Bytes from the file FS and the buffer Out. char Bytes[8]; size_t BytesNum = StartBit ? 8 : 4; - size_t BytesFromDisk = std::min(BytesNum, NumOfFlushedBytes - ByteNo); + size_t BytesFromDisk = std::min(static_cast(BytesNum), NumOfFlushedBytes - ByteNo); size_t BytesFromBuffer = BytesNum - BytesFromDisk; // When unaligned, copy existing data into Bytes from the file FS and the From 57dd92746a53526bd7a86c1cfc7c0dce57a2e170 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 16 Sep 2020 21:11:40 -0700 Subject: [PATCH 0939/1079] [lldb] Return FileSP and StreamFileSP by value in IOHandler (NFC) Smart pointers should be returned by value. --- lldb/include/lldb/Core/IOHandler.h | 6 +++--- lldb/source/Core/IOHandler.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lldb/include/lldb/Core/IOHandler.h b/lldb/include/lldb/Core/IOHandler.h index c96dc1cd18880..2e8f3225fd5f7 100644 --- a/lldb/include/lldb/Core/IOHandler.h +++ b/lldb/include/lldb/Core/IOHandler.h @@ -128,11 +128,11 @@ class IOHandler { FILE *GetErrorFILE(); - lldb::FileSP &GetInputFileSP(); + lldb::FileSP GetInputFileSP(); - lldb::StreamFileSP &GetOutputStreamFileSP(); + lldb::StreamFileSP GetOutputStreamFileSP(); - lldb::StreamFileSP &GetErrorStreamFileSP(); + lldb::StreamFileSP GetErrorStreamFileSP(); Debugger &GetDebugger() { return m_debugger; } diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp index 0648cf41f28aa..8c654d9d8a98b 100644 --- a/lldb/source/Core/IOHandler.cpp +++ b/lldb/source/Core/IOHandler.cpp @@ -103,11 +103,11 @@ FILE *IOHandler::GetErrorFILE() { return (m_error_sp ? m_error_sp->GetFile().GetStream() : nullptr); } -FileSP &IOHandler::GetInputFileSP() { return m_input_sp; } +FileSP IOHandler::GetInputFileSP() { return m_input_sp; } -StreamFileSP &IOHandler::GetOutputStreamFileSP() { return m_output_sp; } +StreamFileSP IOHandler::GetOutputStreamFileSP() { return m_output_sp; } -StreamFileSP &IOHandler::GetErrorStreamFileSP() { return m_error_sp; } +StreamFileSP IOHandler::GetErrorStreamFileSP() { return m_error_sp; } bool IOHandler::GetIsInteractive() { return GetInputFileSP() ? GetInputFileSP()->GetIsInteractive() : false; From c9af34027bc9cb852a4e5e96154a7bd89531a6de Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 16 Sep 2020 21:56:01 -0700 Subject: [PATCH 0940/1079] Add __divmodti4 to match libgcc. gcc has used this on x86-64 since at least version 7. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D80506 --- compiler-rt/lib/builtins/CMakeLists.txt | 1 + compiler-rt/lib/builtins/README.txt | 2 + compiler-rt/lib/builtins/divmodti4.c | 32 +++++++ .../test/builtins/Unit/divmodti4_test.c | 91 +++++++++++++++++++ 4 files changed, 126 insertions(+) create mode 100644 compiler-rt/lib/builtins/divmodti4.c create mode 100644 compiler-rt/test/builtins/Unit/divmodti4_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 8dbe15364ab8e..3c50df1797640 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -71,6 +71,7 @@ set(GENERIC_SOURCES divdi3.c divmoddi4.c divmodsi4.c + divmodti4.c divsc3.c divsf3.c divsi3.c diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt index f9e1bc805092e..d66d725e7ab59 100644 --- a/compiler-rt/lib/builtins/README.txt +++ b/compiler-rt/lib/builtins/README.txt @@ -87,6 +87,8 @@ du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b u tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed +di_int __divmoddi4(di_int a, di_int b, di_int* rem); // a / b, *rem = a % b signed +ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem); // a / b, *rem = a % b signed diff --git a/compiler-rt/lib/builtins/divmodti4.c b/compiler-rt/lib/builtins/divmodti4.c new file mode 100644 index 0000000000000..b243ba4ef8537 --- /dev/null +++ b/compiler-rt/lib/builtins/divmodti4.c @@ -0,0 +1,32 @@ +//===-- divmodti4.c - Implement __divmodti4 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divmodti4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: a / b, *rem = a % b + +COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) { + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0 + ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + tu_int r; + ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; +} + +#endif // CRT_HAS_128BIT diff --git a/compiler-rt/test/builtins/Unit/divmodti4_test.c b/compiler-rt/test/builtins/Unit/divmodti4_test.c new file mode 100644 index 0000000000000..a9f70dcf1c1eb --- /dev/null +++ b/compiler-rt/test/builtins/Unit/divmodti4_test.c @@ -0,0 +1,91 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_divmodti4 +// REQUIRES: int128 +//===-- divmodti4_test.c - Test __divmodti4 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file tests __divmodti4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include + +#ifdef CRT_HAS_128BIT + +// Effects: if rem != 0, *rem = a % b +// Returns: a / b + +COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem); + +int test__divmodti4(ti_int a, ti_int b, ti_int expected_q, ti_int expected_r) { + ti_int r; + ti_int q = __divmodti4(a, b, &r); + if (q != expected_q || r != expected_r) + { + utwords at; + at.all = a; + utwords bt; + bt.all = b; + utwords expected_qt; + expected_qt.all = expected_q; + utwords expected_rt; + expected_rt.all = expected_r; + utwords qt; + qt.all = q; + utwords rt; + rt.all = r; + printf("error in __divmodti4: 0x%.16llX%.16llX / 0x%.16llX%.16llX = " + "0x%.16llX%.16llX, R = 0x%.16llX%.16llX, expected 0x%.16llX%.16llX, " + "0x%.16llX%.16llX\n", + at.s.high, at.s.low, bt.s.high, bt.s.low, qt.s.high, qt.s.low, + rt.s.high, rt.s.low, expected_qt.s.high, expected_qt.s.low, + expected_rt.s.high, expected_rt.s.low); + } + return !(q == expected_q && r == expected_r); +} + +char assumption_1[sizeof(ti_int) == 2*sizeof(di_int)] = {0}; + +tu_int tests[][4] = +{ +{ (ti_int) 0, (ti_int) 1, (ti_int) 0, (ti_int) 0 }, +{ (ti_int) 0, (ti_int)-1, (ti_int) 0, (ti_int) 0 }, +{ (ti_int) 2, (ti_int) 1, (ti_int) 2, (ti_int) 0 }, +{ (ti_int) 2, (ti_int)-1, (ti_int)-2, (ti_int) 0 }, +{ (ti_int)-2, (ti_int) 1, (ti_int)-2, (ti_int) 0 }, +{ (ti_int)-2, (ti_int)-1, (ti_int) 2, (ti_int) 0 }, +{ (ti_int) 5, (ti_int) 3, (ti_int) 1, (ti_int) 2 }, +{ (ti_int) 5, (ti_int)-3, (ti_int)-1, (ti_int) 2 }, +{ (ti_int)-5, (ti_int) 3, (ti_int)-1, (ti_int)-2 }, +{ (ti_int)-5, (ti_int)-3, (ti_int) 1, (ti_int)-2 }, +{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-2, (ti_int)0x4000000000000000LL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 2, (ti_int)0xC000000000000000LL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-3, (ti_int)0x2AAAAAAAAAAAAAAALL << 64 | 0xAAAAAAAAAAAAAAAALL, (ti_int)-2 }, +{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 3, (ti_int)0xD555555555555555LL << 64 | 0x5555555555555556LL, (ti_int)-2 }, +}; + +#endif + +int main() +{ +#ifdef CRT_HAS_128BIT + const unsigned N = sizeof(tests) / sizeof(tests[0]); + unsigned i; + for (i = 0; i < N; ++i) + if (test__divmodti4(tests[i][0], tests[i][1], tests[i][2], tests[i][3])) + return 1; + + +#else + printf("skipped\n"); +#endif + return 0; +} From e69092be5247937213865289013185811d0fbc5e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Sep 2020 22:41:30 -0700 Subject: [PATCH 0941/1079] [llvm-cov gcov][test] Move tests to gcov/ And rename llvm-cov.test (misnomer) to basic.test --- .../tools/llvm-cov/{ => gcov}/Inputs/gcov-4.7.gcda | Bin .../tools/llvm-cov/{ => gcov}/Inputs/gcov-4.7.gcno | Bin .../tools/llvm-cov/{ => gcov}/Inputs/gcov-8.gcda | Bin .../tools/llvm-cov/{ => gcov}/Inputs/gcov-8.gcno | Bin .../tools/llvm-cov/{ => gcov}/Inputs/gcov-9.gcda | Bin .../tools/llvm-cov/{ => gcov}/Inputs/gcov-9.gcno | Bin .../llvm-cov/{ => gcov}/Inputs/gcov-fake-4.2.gcda | Bin .../llvm-cov/{ => gcov}/Inputs/gcov-fake-4.2.gcno | Bin llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test.cpp | 0 .../test/tools/llvm-cov/{ => gcov}/Inputs/test.gcda | Bin .../test/tools/llvm-cov/{ => gcov}/Inputs/test.gcno | Bin llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test.h | 0 .../{ => gcov}/Inputs/test_file_checksum_fail.gcda | Bin .../{ => gcov}/Inputs/test_func_checksum_fail.gcda | Bin .../{ => gcov}/Inputs/test_no_gcda.cpp.gcov | 0 .../llvm-cov/{ => gcov}/Inputs/test_no_gcda.h.gcov | 0 .../{ => gcov}/Inputs/test_no_options.cpp.gcov | 0 .../{ => gcov}/Inputs/test_no_options.h.gcov | 0 .../llvm-cov/{ => gcov}/Inputs/test_paths.gcda | Bin .../llvm-cov/{ => gcov}/Inputs/test_paths.gcno | Bin .../llvm-cov/{ => gcov}/Inputs/test_read_fail.gcno | Bin .../llvm-cov/{llvm-cov.test => gcov/basic.test} | 0 llvm/test/tools/llvm-cov/{ => gcov}/gcov-4.7.c | 0 llvm/test/tools/llvm-cov/{ => gcov}/gcov-8.c | 0 llvm/test/tools/llvm-cov/{ => gcov}/gcov-9.c | 0 llvm/test/tools/llvm-cov/{ => gcov}/gcov-fake-4.2.c | 0 .../intermediate-format.test} | 0 27 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-4.7.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-4.7.gcno (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-8.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-8.gcno (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-9.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-9.gcno (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-fake-4.2.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/gcov-fake-4.2.gcno (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test.cpp (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test.gcno (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test.h (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_file_checksum_fail.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_func_checksum_fail.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_no_gcda.cpp.gcov (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_no_gcda.h.gcov (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_no_options.cpp.gcov (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_no_options.h.gcov (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_paths.gcda (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_paths.gcno (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/Inputs/test_read_fail.gcno (100%) rename llvm/test/tools/llvm-cov/{llvm-cov.test => gcov/basic.test} (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/gcov-4.7.c (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/gcov-8.c (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/gcov-9.c (100%) rename llvm/test/tools/llvm-cov/{ => gcov}/gcov-fake-4.2.c (100%) rename llvm/test/tools/llvm-cov/{gcov-intermediate-format.test => gcov/intermediate-format.test} (100%) diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-4.7.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-4.7.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-4.7.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-4.7.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-4.7.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-4.7.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-4.7.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-4.7.gcno diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-8.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-8.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-8.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-8.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-8.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-8.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-8.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-8.gcno diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-9.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-9.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-9.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-9.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-9.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-9.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-9.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-9.gcno diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-fake-4.2.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-fake-4.2.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-fake-4.2.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-fake-4.2.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/gcov-fake-4.2.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/gcov-fake-4.2.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/gcov-fake-4.2.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/gcov-fake-4.2.gcno diff --git a/llvm/test/tools/llvm-cov/Inputs/test.cpp b/llvm/test/tools/llvm-cov/gcov/Inputs/test.cpp similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test.cpp rename to llvm/test/tools/llvm-cov/gcov/Inputs/test.cpp diff --git a/llvm/test/tools/llvm-cov/Inputs/test.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/test.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/test.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/test.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/test.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/test.gcno diff --git a/llvm/test/tools/llvm-cov/Inputs/test.h b/llvm/test/tools/llvm-cov/gcov/Inputs/test.h similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test.h rename to llvm/test/tools/llvm-cov/gcov/Inputs/test.h diff --git a/llvm/test/tools/llvm-cov/Inputs/test_file_checksum_fail.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/test_file_checksum_fail.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_file_checksum_fail.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_file_checksum_fail.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/test_func_checksum_fail.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/test_func_checksum_fail.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_func_checksum_fail.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_func_checksum_fail.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/test_no_gcda.cpp.gcov b/llvm/test/tools/llvm-cov/gcov/Inputs/test_no_gcda.cpp.gcov similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_no_gcda.cpp.gcov rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_no_gcda.cpp.gcov diff --git a/llvm/test/tools/llvm-cov/Inputs/test_no_gcda.h.gcov b/llvm/test/tools/llvm-cov/gcov/Inputs/test_no_gcda.h.gcov similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_no_gcda.h.gcov rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_no_gcda.h.gcov diff --git a/llvm/test/tools/llvm-cov/Inputs/test_no_options.cpp.gcov b/llvm/test/tools/llvm-cov/gcov/Inputs/test_no_options.cpp.gcov similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_no_options.cpp.gcov rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_no_options.cpp.gcov diff --git a/llvm/test/tools/llvm-cov/Inputs/test_no_options.h.gcov b/llvm/test/tools/llvm-cov/gcov/Inputs/test_no_options.h.gcov similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_no_options.h.gcov rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_no_options.h.gcov diff --git a/llvm/test/tools/llvm-cov/Inputs/test_paths.gcda b/llvm/test/tools/llvm-cov/gcov/Inputs/test_paths.gcda similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_paths.gcda rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_paths.gcda diff --git a/llvm/test/tools/llvm-cov/Inputs/test_paths.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/test_paths.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_paths.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_paths.gcno diff --git a/llvm/test/tools/llvm-cov/Inputs/test_read_fail.gcno b/llvm/test/tools/llvm-cov/gcov/Inputs/test_read_fail.gcno similarity index 100% rename from llvm/test/tools/llvm-cov/Inputs/test_read_fail.gcno rename to llvm/test/tools/llvm-cov/gcov/Inputs/test_read_fail.gcno diff --git a/llvm/test/tools/llvm-cov/llvm-cov.test b/llvm/test/tools/llvm-cov/gcov/basic.test similarity index 100% rename from llvm/test/tools/llvm-cov/llvm-cov.test rename to llvm/test/tools/llvm-cov/gcov/basic.test diff --git a/llvm/test/tools/llvm-cov/gcov-4.7.c b/llvm/test/tools/llvm-cov/gcov/gcov-4.7.c similarity index 100% rename from llvm/test/tools/llvm-cov/gcov-4.7.c rename to llvm/test/tools/llvm-cov/gcov/gcov-4.7.c diff --git a/llvm/test/tools/llvm-cov/gcov-8.c b/llvm/test/tools/llvm-cov/gcov/gcov-8.c similarity index 100% rename from llvm/test/tools/llvm-cov/gcov-8.c rename to llvm/test/tools/llvm-cov/gcov/gcov-8.c diff --git a/llvm/test/tools/llvm-cov/gcov-9.c b/llvm/test/tools/llvm-cov/gcov/gcov-9.c similarity index 100% rename from llvm/test/tools/llvm-cov/gcov-9.c rename to llvm/test/tools/llvm-cov/gcov/gcov-9.c diff --git a/llvm/test/tools/llvm-cov/gcov-fake-4.2.c b/llvm/test/tools/llvm-cov/gcov/gcov-fake-4.2.c similarity index 100% rename from llvm/test/tools/llvm-cov/gcov-fake-4.2.c rename to llvm/test/tools/llvm-cov/gcov/gcov-fake-4.2.c diff --git a/llvm/test/tools/llvm-cov/gcov-intermediate-format.test b/llvm/test/tools/llvm-cov/gcov/intermediate-format.test similarity index 100% rename from llvm/test/tools/llvm-cov/gcov-intermediate-format.test rename to llvm/test/tools/llvm-cov/gcov/intermediate-format.test From 027d47d1c7ce1708294f5273cde09b24c7cbab77 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Thu, 17 Sep 2020 12:47:38 +0700 Subject: [PATCH 0942/1079] [DebugInfo] Simplify DIEInteger::SizeOf(). An AsmPrinter should always be provided to the method because some forms depend on its parameters. The only place in the codebase which passed a nullptr value was found in the unit tests, so the patch updates it to use some dummy AsmPrinter instead. Differential Revision: https://reviews.llvm.org/D85293 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 8 ++++---- llvm/unittests/CodeGen/DIEHashTest.cpp | 23 ++++++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 9b074c89aa93d..39b0b027c7657 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -428,10 +428,10 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of integer value in bytes. /// unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - dwarf::FormParams Params = {0, 0, dwarf::DWARF32}; - if (AP) - Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()), - AP->OutStreamer->getContext().getDwarfFormat()}; + assert(AP && "AsmPrinter is required to set FormParams"); + dwarf::FormParams Params = {AP->getDwarfVersion(), + uint8_t(AP->getPointerSize()), + AP->OutStreamer->getContext().getDwarfFormat()}; if (Optional FixedSize = dwarf::getFixedFormByteSize(Form, Params)) return *FixedSize; diff --git a/llvm/unittests/CodeGen/DIEHashTest.cpp b/llvm/unittests/CodeGen/DIEHashTest.cpp index 649e13208f0c1..03bb7de5a0ae1 100644 --- a/llvm/unittests/CodeGen/DIEHashTest.cpp +++ b/llvm/unittests/CodeGen/DIEHashTest.cpp @@ -7,12 +7,15 @@ //===----------------------------------------------------------------------===// #include "../lib/CodeGen/AsmPrinter/DIEHash.h" +#include "TestAsmPrinter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" using namespace llvm; @@ -26,6 +29,14 @@ class DIEHashTest : public testing::Test { private: StringMap Pool; + std::unique_ptr TestPrinter; + + void setupTestPrinter() { + auto ExpectedTestPrinter = TestAsmPrinter::create( + sys::getDefaultTargetTriple(), /*DwarfVersion=*/4, dwarf::DWARF32); + ASSERT_THAT_EXPECTED(ExpectedTestPrinter, Succeeded()); + TestPrinter = std::move(ExpectedTestPrinter.get()); + } public: DIEString getString(StringRef S) { @@ -33,6 +44,12 @@ class DIEHashTest : public testing::Test { return DIEString(DwarfStringPoolEntryRef( *Pool.insert(std::make_pair(S, Entry)).first, Entry.isIndexed())); } + + AsmPrinter *getAsmPrinter() { + if (!TestPrinter) + setupTestPrinter(); + return TestPrinter ? TestPrinter->getAP() : nullptr; + } }; TEST_F(DIEHashTest, Data1) { @@ -644,6 +661,10 @@ TEST_F(DIEHashTest, MemberSdata) { // }; // A a; TEST_F(DIEHashTest, MemberBlock) { + if (!this->getAsmPrinter()) + // TODO: Use GTEST_SKIP() when GTest is updated to version 1.10.0 + return; + DIE &A = *DIE::get(Alloc, dwarf::DW_TAG_structure_type); DIEInteger One(1); DIEString AStr = getString("A"); @@ -692,7 +713,7 @@ TEST_F(DIEHashTest, MemberBlock) { A.addChild(std::move(PI)); - uint64_t MD5Res = DIEHash().computeTypeSignature(A); + uint64_t MD5Res = DIEHash(this->getAsmPrinter()).computeTypeSignature(A); ASSERT_EQ(0x493af53ad3d3f651ULL, MD5Res); } } From 4ce84b0e704ee7b8b13e236e65b3bf49da27a91c Mon Sep 17 00:00:00 2001 From: Artur Bialas Date: Wed, 16 Sep 2020 22:53:52 -0700 Subject: [PATCH 0943/1079] [mlir][spirv] Add GroupNonUniformBroadcastOp Added GroupNonUniformBroadcastOp to spirv dialect. Differential Revision: https://reviews.llvm.org/D87688 --- mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td | 21 +++--- .../mlir/Dialect/SPIRV/SPIRVNonUniformOps.td | 75 ++++++++++++++++++- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 27 +++++++ .../SPIRV/Serialization/non-uniform-ops.mlir | 8 ++ mlir/test/Dialect/SPIRV/non-uniform-ops.mlir | 39 ++++++++++ 5 files changed, 158 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td index 1fa72bf4dcaba..83150dad514db 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td @@ -3256,6 +3256,7 @@ def SPV_OC_OpGroupBroadcast : I32EnumAttrCase<"OpGroupBroadcast", 263 def SPV_OC_OpNoLine : I32EnumAttrCase<"OpNoLine", 317>; def SPV_OC_OpModuleProcessed : I32EnumAttrCase<"OpModuleProcessed", 330>; def SPV_OC_OpGroupNonUniformElect : I32EnumAttrCase<"OpGroupNonUniformElect", 333>; +def SPV_OC_OpGroupNonUniformBroadcast : I32EnumAttrCase<"OpGroupNonUniformBroadcast", 337>; def SPV_OC_OpGroupNonUniformBallot : I32EnumAttrCase<"OpGroupNonUniformBallot", 339>; def SPV_OC_OpGroupNonUniformIAdd : I32EnumAttrCase<"OpGroupNonUniformIAdd", 349>; def SPV_OC_OpGroupNonUniformFAdd : I32EnumAttrCase<"OpGroupNonUniformFAdd", 350>; @@ -3323,16 +3324,16 @@ def SPV_OpcodeAttr : SPV_OC_OpBranch, SPV_OC_OpBranchConditional, SPV_OC_OpReturn, SPV_OC_OpReturnValue, SPV_OC_OpUnreachable, SPV_OC_OpGroupBroadcast, SPV_OC_OpNoLine, SPV_OC_OpModuleProcessed, SPV_OC_OpGroupNonUniformElect, - SPV_OC_OpGroupNonUniformBallot, SPV_OC_OpGroupNonUniformIAdd, - SPV_OC_OpGroupNonUniformFAdd, SPV_OC_OpGroupNonUniformIMul, - SPV_OC_OpGroupNonUniformFMul, SPV_OC_OpGroupNonUniformSMin, - SPV_OC_OpGroupNonUniformUMin, SPV_OC_OpGroupNonUniformFMin, - SPV_OC_OpGroupNonUniformSMax, SPV_OC_OpGroupNonUniformUMax, - SPV_OC_OpGroupNonUniformFMax, SPV_OC_OpSubgroupBallotKHR, - SPV_OC_OpTypeCooperativeMatrixNV, SPV_OC_OpCooperativeMatrixLoadNV, - SPV_OC_OpCooperativeMatrixStoreNV, SPV_OC_OpCooperativeMatrixMulAddNV, - SPV_OC_OpCooperativeMatrixLengthNV, SPV_OC_OpSubgroupBlockReadINTEL, - SPV_OC_OpSubgroupBlockWriteINTEL + SPV_OC_OpGroupNonUniformBroadcast, SPV_OC_OpGroupNonUniformBallot, + SPV_OC_OpGroupNonUniformIAdd, SPV_OC_OpGroupNonUniformFAdd, + SPV_OC_OpGroupNonUniformIMul, SPV_OC_OpGroupNonUniformFMul, + SPV_OC_OpGroupNonUniformSMin, SPV_OC_OpGroupNonUniformUMin, + SPV_OC_OpGroupNonUniformFMin, SPV_OC_OpGroupNonUniformSMax, + SPV_OC_OpGroupNonUniformUMax, SPV_OC_OpGroupNonUniformFMax, + SPV_OC_OpSubgroupBallotKHR, SPV_OC_OpTypeCooperativeMatrixNV, + SPV_OC_OpCooperativeMatrixLoadNV, SPV_OC_OpCooperativeMatrixStoreNV, + SPV_OC_OpCooperativeMatrixMulAddNV, SPV_OC_OpCooperativeMatrixLengthNV, + SPV_OC_OpSubgroupBlockReadINTEL, SPV_OC_OpSubgroupBlockWriteINTEL ]>; // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY! diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td index 34be336bb2a56..da3da3050efce 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td @@ -105,6 +105,77 @@ def SPV_GroupNonUniformBallotOp : SPV_Op<"GroupNonUniformBallot", []> { // ----- +def SPV_GroupNonUniformBroadcastOp : SPV_Op<"GroupNonUniformBroadcast", + [NoSideEffect, AllTypesMatch<["value", "result"]>]> { + let summary = [{ + Return the Value of the invocation identified by the id Id to all active + invocations in the group. + }]; + + let description = [{ + Result Type must be a scalar or vector of floating-point type, integer + type, or Boolean type. + + Execution must be Workgroup or Subgroup Scope. + + The type of Value must be the same as Result Type. + + Id must be a scalar of integer type, whose Signedness operand is 0. + + Before version 1.5, Id must come from a constant instruction. Starting + with version 1.5, Id must be dynamically uniform. + + The resulting value is undefined if Id is an inactive invocation, or is + greater than or equal to the size of the group. + + + + ``` + scope ::= `"Workgroup"` | `"Subgroup"` + integer-float-scalar-vector-type ::= integer-type | float-type | + `vector<` integer-literal `x` integer-type `>` | + `vector<` integer-literal `x` float-type `>` + group-non-uniform-broadcast-op ::= ssa-id `=` + `spv.GroupNonUniformBroadcast` scope ssa_use, + ssa_use `:` integer-float-scalar-vector-type `,` integer-type + ```mlir + + #### Example: + + ``` + %scalar_value = ... : f32 + %vector_value = ... : vector<4xf32> + %id = ... : i32 + %0 = spv.GroupNonUniformBroadcast "Subgroup" %scalar_value, %id : f32, i32 + %1 = spv.GroupNonUniformBroadcast "Workgroup" %vector_value, %id : + vector<4xf32>, i32 + ``` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[]>, + Capability<[SPV_C_GroupNonUniformBallot]> + ]; + + let arguments = (ins + SPV_ScopeAttr:$execution_scope, + SPV_Type:$value, + SPV_Integer:$id + ); + + let results = (outs + SPV_Type:$result + ); + + let assemblyFormat = [{ + $execution_scope operands attr-dict `:` type($value) `,` type($id) + }]; +} + +// ----- + def SPV_GroupNonUniformElectOp : SPV_Op<"GroupNonUniformElect", []> { let summary = [{ Result is true only in the active invocation with the lowest id in the @@ -368,8 +439,8 @@ def SPV_GroupNonUniformFMulOp : def SPV_GroupNonUniformIAddOp : SPV_GroupNonUniformArithmeticOp<"GroupNonUniformIAdd", SPV_Integer, []> { let summary = [{ - An integer add group operation of all Value operands contributed active - by invocations in the group. + An integer add group operation of all Value operands contributed by + active invocations in the group. }]; let description = [{ diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index a16dc1c8bc35d..a01177132b27b 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/SPIRV/SPIRVAttributes.h" #include "mlir/Dialect/SPIRV/SPIRVDialect.h" #include "mlir/Dialect/SPIRV/SPIRVTypes.h" +#include "mlir/Dialect/SPIRV/TargetAndABI.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/FunctionImplementation.h" @@ -2043,6 +2044,32 @@ static LogicalResult verify(spirv::GroupNonUniformBallotOp ballotOp) { return success(); } +//===----------------------------------------------------------------------===// +// spv.GroupNonUniformBroadcast +//===----------------------------------------------------------------------===// + +static LogicalResult verify(spirv::GroupNonUniformBroadcastOp broadcastOp) { + spirv::Scope scope = broadcastOp.execution_scope(); + if (scope != spirv::Scope::Workgroup && scope != spirv::Scope::Subgroup) + return broadcastOp.emitOpError( + "execution scope must be 'Workgroup' or 'Subgroup'"); + + // SPIR-V spec: "Before version 1.5, Id must come from a + // constant instruction. + auto targetEnv = spirv::getDefaultTargetEnv(broadcastOp.getContext()); + if (auto spirvModule = broadcastOp.getParentOfType()) + targetEnv = spirv::lookupTargetEnvOrDefault(spirvModule); + + if (targetEnv.getVersion() < spirv::Version::V_1_5) { + auto *idOp = broadcastOp.id().getDefiningOp(); + if (!idOp || !isa(idOp)) // for spec constant + return broadcastOp.emitOpError("id must be the result of a constant op"); + } + + return success(); +} + //===----------------------------------------------------------------------===// // spv.SubgroupBlockReadINTEL //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/Serialization/non-uniform-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/non-uniform-ops.mlir index ab714dfbaa008..f7b8f6cfc1858 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/non-uniform-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/non-uniform-ops.mlir @@ -8,6 +8,14 @@ spv.module Logical GLSL450 requires #spv.vce { spv.ReturnValue %0: vector<4xi32> } + // CHECK-LABEL: @group_non_uniform_broadcast + spv.func @group_non_uniform_broadcast(%value: f32) -> f32 "None" { + %one = spv.constant 1 : i32 + // CHECK: spv.GroupNonUniformBroadcast "Subgroup" %{{.*}}, %{{.*}} : f32, i32 + %0 = spv.GroupNonUniformBroadcast "Subgroup" %value, %one : f32, i32 + spv.ReturnValue %0: f32 + } + // CHECK-LABEL: @group_non_uniform_elect spv.func @group_non_uniform_elect() -> i1 "None" { // CHECK: %{{.+}} = spv.GroupNonUniformElect "Workgroup" : i1 diff --git a/mlir/test/Dialect/SPIRV/non-uniform-ops.mlir b/mlir/test/Dialect/SPIRV/non-uniform-ops.mlir index 86c3c2886a4fe..5839ee7c56276 100644 --- a/mlir/test/Dialect/SPIRV/non-uniform-ops.mlir +++ b/mlir/test/Dialect/SPIRV/non-uniform-ops.mlir @@ -28,6 +28,45 @@ func @group_non_uniform_ballot(%predicate: i1) -> vector<4xsi32> { // ----- +//===----------------------------------------------------------------------===// +// spv.NonUniformGroupBroadcast +//===----------------------------------------------------------------------===// + +func @group_non_uniform_broadcast_scalar(%value: f32) -> f32 { + %one = spv.constant 1 : i32 + // CHECK: spv.GroupNonUniformBroadcast "Workgroup" %{{.*}}, %{{.*}} : f32, i32 + %0 = spv.GroupNonUniformBroadcast "Workgroup" %value, %one : f32, i32 + return %0: f32 +} + +// ----- + +func @group_non_uniform_broadcast_vector(%value: vector<4xf32>) -> vector<4xf32> { + %one = spv.constant 1 : i32 + // CHECK: spv.GroupNonUniformBroadcast "Subgroup" %{{.*}}, %{{.*}} : vector<4xf32>, i32 + %0 = spv.GroupNonUniformBroadcast "Subgroup" %value, %one : vector<4xf32>, i32 + return %0: vector<4xf32> +} + +// ----- + +func @group_non_uniform_broadcast_negative_scope(%value: f32, %localid: i32 ) -> f32 { + %one = spv.constant 1 : i32 + // expected-error @+1 {{execution scope must be 'Workgroup' or 'Subgroup'}} + %0 = spv.GroupNonUniformBroadcast "Device" %value, %one : f32, i32 + return %0: f32 +} + +// ----- + +func @group_non_uniform_broadcast_negative_non_const(%value: f32, %localid: i32) -> f32 { + // expected-error @+1 {{id must be the result of a constant op}} + %0 = spv.GroupNonUniformBroadcast "Subgroup" %value, %localid : f32, i32 + return %0: f32 +} + +// ----- + //===----------------------------------------------------------------------===// // spv.GroupNonUniformElect //===----------------------------------------------------------------------===// From c16417f65f9a9eb3718efa3ece63ba910f91f77b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Sep 2020 23:18:46 -0700 Subject: [PATCH 0944/1079] [llvm-cov gcov] Add --demangled-names (-m) gcov 4.9 introduced the option. --- llvm/include/llvm/ProfileData/GCOV.h | 10 ++++--- llvm/lib/ProfileData/GCOV.cpp | 30 ++++++++++++++++--- llvm/lib/ProfileData/LLVMBuild.txt | 2 +- .../tools/llvm-cov/gcov/demangled-names.test | 10 +++++++ llvm/tools/llvm-cov/gcov.cpp | 9 ++++-- 5 files changed, 50 insertions(+), 11 deletions(-) create mode 100644 llvm/test/tools/llvm-cov/gcov/demangled-names.test diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 452cf458f4e98..2766ff52e4a09 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -47,11 +47,11 @@ enum GCOVVersion { V304, V407, V408, V800, V900 }; /// A struct for passing gcov options between functions. struct Options { Options(bool A, bool B, bool C, bool F, bool P, bool U, bool I, bool L, - bool N, bool R, bool T, bool X, std::string SourcePrefix) + bool M, bool N, bool R, bool T, bool X, std::string SourcePrefix) : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), PreservePaths(P), UncondBranch(U), Intermediate(I), LongFileNames(L), - NoOutput(N), RelativeOnly(R), UseStdout(T), HashFilenames(X), - SourcePrefix(std::move(SourcePrefix)) {} + Demangle(M), NoOutput(N), RelativeOnly(R), UseStdout(T), + HashFilenames(X), SourcePrefix(std::move(SourcePrefix)) {} bool AllBlocks; bool BranchInfo; @@ -61,6 +61,7 @@ struct Options { bool UncondBranch; bool Intermediate; bool LongFileNames; + bool Demangle; bool NoOutput; bool RelativeOnly; bool UseStdout; @@ -232,7 +233,7 @@ class GCOVFunction { GCOVFunction(GCOVFile &file) : file(file) {} - StringRef getName() const { return Name; } + StringRef getName(bool demangle) const; StringRef getFilename() const; uint64_t getEntryCount() const; GCOVBlock &getExitBlock() const; @@ -255,6 +256,7 @@ class GCOVFunction { uint32_t endColumn = 0; uint8_t artificial = 0; StringRef Name; + mutable SmallString<0> demangled; unsigned srcIdx; SmallVector, 0> blocks; SmallVector, 0> arcs, treeArcs; diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 0597797c6561b..1d8aec08c0eed 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/GCOV.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" @@ -316,6 +317,26 @@ bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; } //===----------------------------------------------------------------------===// // GCOVFunction implementation. +StringRef GCOVFunction::getName(bool demangle) const { + if (!demangle) + return Name; + if (demangled.empty()) { + do { + if (Name.startswith("_Z")) { + int status = 0; + // Name is guaranteed to be NUL-terminated. + char *res = itaniumDemangle(Name.data(), nullptr, nullptr, &status); + if (status == 0) { + demangled = res; + free(res); + break; + } + } + demangled = Name; + } while (0); + } + return demangled; +} StringRef GCOVFunction::getFilename() const { return file.filenames[srcIdx]; } /// getEntryCount - Get the number of times the function was called by @@ -785,7 +806,7 @@ void Context::printSourceToIntermediate(const SourceInfo &si, for (const auto &fs : si.startLineToFunctions) for (const GCOVFunction *f : fs) os << "function:" << f->startLine << ',' << f->getEntryCount() << ',' - << f->Name << '\n'; + << f->getName(options.Demangle) << '\n'; for (size_t lineNum = 1, size = si.lines.size(); lineNum < size; ++lineNum) { const LineInfo &line = si.lines[lineNum]; if (line.blocks.empty()) @@ -832,7 +853,7 @@ void Context::print(StringRef filename, StringRef gcno, StringRef gcda, raw_ostream &os = llvm::outs(); for (GCOVFunction &f : make_pointee_range(file.functions)) { - Summary summary(f.Name); + Summary summary(f.getName(options.Demangle)); collectFunction(f, summary); if (options.FuncCoverage && !options.UseStdout) { os << "Function '" << summary.Name << "'\n"; @@ -900,8 +921,9 @@ void Context::printFunctionDetails(const GCOVFunction &f, if (b.number != 0 && &b != &exitBlock && b.getCount()) ++blocksExec; - os << "function " << f.getName() << " called " << entryCount << " returned " - << formatPercentage(exitCount, entryCount) << "% blocks executed " + os << "function " << f.getName(options.Demangle) << " called " << entryCount + << " returned " << formatPercentage(exitCount, entryCount) + << "% blocks executed " << formatPercentage(blocksExec, f.blocks.size() - 2) << "%\n"; } diff --git a/llvm/lib/ProfileData/LLVMBuild.txt b/llvm/lib/ProfileData/LLVMBuild.txt index 335c2260a0029..2fffab24579b1 100644 --- a/llvm/lib/ProfileData/LLVMBuild.txt +++ b/llvm/lib/ProfileData/LLVMBuild.txt @@ -21,4 +21,4 @@ subdirectories = Coverage type = Library name = ProfileData parent = Libraries -required_libraries = Core Support +required_libraries = Core Support Demangle diff --git a/llvm/test/tools/llvm-cov/gcov/demangled-names.test b/llvm/test/tools/llvm-cov/gcov/demangled-names.test new file mode 100644 index 0000000000000..31cb05fdca574 --- /dev/null +++ b/llvm/test/tools/llvm-cov/gcov/demangled-names.test @@ -0,0 +1,10 @@ +# Test --demangled-names (-m). +RUN: rm -rf %t && mkdir %t && cd %t +RUN: cp %S/Inputs/test.cpp %S/Inputs/test.gcno %S/Inputs/test.gcda . + +RUN: llvm-cov gcov -b -f -m test.gcda | FileCheck %s +RUN: llvm-cov gcov -b -f --demangled-names test.gcda | FileCheck %s +RUN: FileCheck %s --check-prefix=BRANCH < test.cpp.gcov + +CHECK: Function 'A::B()' +BRANCH: function A::B() called diff --git a/llvm/tools/llvm-cov/gcov.cpp b/llvm/tools/llvm-cov/gcov.cpp index 8d2876b6f42ee..d42e7cd3b551e 100644 --- a/llvm/tools/llvm-cov/gcov.cpp +++ b/llvm/tools/llvm-cov/gcov.cpp @@ -115,6 +115,11 @@ int gcovMain(int argc, const char *argv[]) { cl::Grouping, cl::NotHidden, cl::aliasopt(Intermediate)); + cl::opt Demangle("demangled-names", cl::init(false), + cl::desc("Demangle function names")); + cl::alias DemangleA("m", cl::desc("Alias for --demangled-names"), + cl::Grouping, cl::NotHidden, cl::aliasopt(Demangle)); + cl::opt NoOutput("n", cl::Grouping, cl::init(false), cl::desc("Do not output any .gcov files")); cl::alias NoOutputA("no-output", cl::aliasopt(NoOutput)); @@ -163,8 +168,8 @@ int gcovMain(int argc, const char *argv[]) { GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary, PreservePaths, UncondBranch, Intermediate, LongNames, - NoOutput, RelativeOnly, UseStdout, HashFilenames, - SourcePrefix); + Demangle, NoOutput, RelativeOnly, UseStdout, + HashFilenames, SourcePrefix); for (const auto &SourceFile : SourceFiles) reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV, From b05629230e9c7e90a2e70a761f7800afb1a8eefd Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Tue, 15 Sep 2020 18:28:59 +0200 Subject: [PATCH 0945/1079] [mlir] Remove redundant shape.cstr_broadcastable canonicalization. These canonicalizations are already handled by folding which will occur in a superset of situations, so they are being removed. Differential Revision: https://reviews.llvm.org/D87706 --- mlir/lib/Dialect/Shape/IR/Shape.cpp | 43 +---------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index cd722870f5072..3be53ee2a833a 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -399,46 +399,6 @@ LogicalResult getShapeVec(Value input, SmallVectorImpl &shapeValues) { return failure(); } } - -// For shapes that were created by some operations, we can obtain partial -// information on the shapes and sometimes determine if they will be -// broadcastable with that. -struct CstrBroadcastablePartialInfo - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(CstrBroadcastableOp op, - PatternRewriter &rewriter) const override { - SmallVector lhsShape, rhsShape; - if (failed(getShapeVec(op.lhs(), lhsShape))) - return failure(); - if (failed(getShapeVec(op.rhs(), rhsShape))) - return failure(); - if (!OpTrait::util::staticallyKnownBroadcastable(lhsShape, rhsShape)) - return failure(); - - rewriter.replaceOpWithNewOp(op.getOperation(), true); - return success(); - } -}; - -// Scalars are always broadcastable. -struct CstrBroadcastableScalar : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(CstrBroadcastableOp op, - PatternRewriter &rewriter) const override { - SmallVector shape; - if (failed(getShapeVec(op.lhs(), shape)) || shape.size() > 0) - return failure(); - if (failed(getShapeVec(op.rhs(), shape)) || shape.size() > 0) - return failure(); - - rewriter.replaceOpWithNewOp(op.getOperation(), true); - return success(); - } -}; - } // namespace void CstrBroadcastableOp::getCanonicalizationPatterns( @@ -446,8 +406,7 @@ void CstrBroadcastableOp::getCanonicalizationPatterns( // Canonicalization patterns have overlap with the considerations during // folding in case additional shape information is inferred at some point that // does not result in folding. - patterns.insert(context); + patterns.insert(context); } OpFoldResult CstrBroadcastableOp::fold(ArrayRef operands) { From a2fb5446be960ad164060b3c05fc268f7f72d67a Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 17 Sep 2020 16:00:54 +0800 Subject: [PATCH 0946/1079] [SelectionDAG] Check any use of negation result before removal 2508ef01 fixed a bug about constant removal in negation. But after sanitizing check I found there's still some issue about it so it's reverted. Temporary nodes will be removed if useless in negation. Before the removal, they'd be checked if any other nodes used it. So the removal was moved after getNode. However in rare cases the node to be removed is the same as result of getNode. We missed that and will be fixed by this patch. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D87614 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 22 ++++++++++----- llvm/test/CodeGen/X86/pr47517.ll | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr47517.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3446ee0efc450..5c9273150014f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5773,8 +5773,10 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // If we already have the use of the negated floating constant, it is free // to negate it even it has multiple uses. - if (!Op.hasOneUse() && CFP.use_empty()) + if (!Op.hasOneUse() && CFP.use_empty()) { + RemoveDeadNode(CFP); break; + } Cost = NegatibleCost::Neutral; return CFP; } @@ -5832,7 +5834,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (NegX && (CostX <= CostY)) { Cost = CostX; SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags); - RemoveDeadNode(NegY); + if (NegY != N) + RemoveDeadNode(NegY); return N; } @@ -5840,7 +5843,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (NegY) { Cost = CostY; SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags); - RemoveDeadNode(NegX); + if (NegX != N) + RemoveDeadNode(NegX); return N; } break; @@ -5879,7 +5883,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (NegX && (CostX <= CostY)) { Cost = CostX; SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags); - RemoveDeadNode(NegY); + if (NegY != N) + RemoveDeadNode(NegY); return N; } @@ -5892,7 +5897,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (NegY) { Cost = CostY; SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags); - RemoveDeadNode(NegX); + if (NegX != N) + RemoveDeadNode(NegX); return N; } break; @@ -5923,7 +5929,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (NegX && (CostX <= CostY)) { Cost = std::min(CostX, CostZ); SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags); - RemoveDeadNode(NegY); + if (NegY != N) + RemoveDeadNode(NegY); return N; } @@ -5931,7 +5938,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (NegY) { Cost = std::min(CostY, CostZ); SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); - RemoveDeadNode(NegX); + if (NegX != N) + RemoveDeadNode(NegX); return N; } break; diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll new file mode 100644 index 0000000000000..5672fbc69a41d --- /dev/null +++ b/llvm/test/CodeGen/X86/pr47517.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple x86_64 < %s | FileCheck %s + +; To ensure unused floating point constant is correctly removed +define float @test(float %src, float* %p) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq $0, (%rdi) +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %a0 = getelementptr inbounds float, float* %p, i32 0 + %a1 = getelementptr inbounds float, float* %p, i32 1 + store float 0.000000e+00, float* %a0 + store float 0.000000e+00, float* %a1 + %zero = load float, float* %a0 + %fmul1 = fmul fast float %zero, %src + %fadd1 = fadd fast float %fmul1, %zero + %fmul2 = fmul fast float %fadd1, 2.000000e+00 + %fmul3 = fmul fast float %fmul2, %fmul2 + %fmul4 = fmul fast float %fmul2, 2.000000e+00 + %fadd2 = fadd fast float %fmul4, -3.000000e+00 + %fmul5 = fmul fast float %fadd2, %fmul2 + %fadd3 = fadd fast float %fmul2, %src + %fadd4 = fadd fast float %fadd3, %fmul5 + %fmul6 = fmul fast float %fmul3, %fadd4 + ret float %fmul6 +} From 6637d72ddd3cf4cf3a7e6dfc227a86999137badb Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 17 Sep 2020 08:47:39 +0100 Subject: [PATCH 0947/1079] [Lint] Add check for intrinsic get.active.lane.mask As @efriedma pointed out in D86301, this "not equal to 0 check" of get.active.lane.mask's second operand needs to live here in Lint and not the Verifier. Differential Revision: https://reviews.llvm.org/D87228 --- llvm/lib/Analysis/Lint.cpp | 5 +++ .../Analysis/Lint/get-active-lane-mask.ll | 39 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 llvm/test/Analysis/Lint/get-active-lane-mask.ll diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 04e04a8053e87..75b8f31c8a312 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -365,6 +365,11 @@ void Lint::visitCallBase(CallBase &I) { visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, None, nullptr, MemRef::Read | MemRef::Write); break; + case Intrinsic::get_active_lane_mask: + if (auto *TripCount = dyn_cast(I.getArgOperand(1))) + Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 " + "must be greater than 0", &I); + break; } } diff --git a/llvm/test/Analysis/Lint/get-active-lane-mask.ll b/llvm/test/Analysis/Lint/get-active-lane-mask.ll new file mode 100644 index 0000000000000..4ee344afe6665 --- /dev/null +++ b/llvm/test/Analysis/Lint/get-active-lane-mask.ll @@ -0,0 +1,39 @@ +; RUN: opt -lint -disable-output < %s 2>&1 | FileCheck %s + +define <4 x i1> @t1(i32 %IV) { +; +; CHECK: get_active_lane_mask: operand #2 must be greater than 0 +; CHECK-NEXT: %res = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %IV, i32 0) +; + %res = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %IV, i32 0) + ret <4 x i1> %res +} + +define <4 x i1> @t2(i32 %IV) { +; +; CHECK-NOT: get_active_lane_mask +; CHECK-NOT: call <4 x i1> @llvm.get.active.lane.mask +; + %res = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %IV, i32 1) + ret <4 x i1> %res +} + +define <4 x i1> @t3(i32 %IV) { +; +; CHECK-NOT: get_active_lane_mask +; CHECK-NOT: call <4 x i1> @llvm.get.active.lane.mask +; + %res = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %IV, i32 -1) + ret <4 x i1> %res +} + +define <4 x i1> @t4(i32 %IV, i32 %TC) { +; +; CHECK-NOT: get_active_lane_mask +; CHECK-NOT: call <4 x i1> @llvm.get.active.lane.mask +; + %res = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %IV, i32 %TC) + ret <4 x i1> %res +} + +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) From d49707cf4b288e8d3cad00a78cfa45ec4c376496 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 16 Sep 2020 20:28:02 +0100 Subject: [PATCH 0948/1079] [AMDGPU] Generate test checks for splitkit-copy-bundle.mir This is a pre-commit for D87757 "[SplitKit] Only copy live lanes". --- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 198 +++++++++++++++--- 1 file changed, 167 insertions(+), 31 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index dca3150b404cd..c02b9a001fbbe 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -1,42 +1,178 @@ -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,RA %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,VR %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=ASM %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=RA %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefix=VR %s --- -# MIR-LABEL: name: splitkit_copy_bundle - -# RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { -# RA-NEXT: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 -# RA-NEXT: internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29 -# RA-NEXT: } - -# RA: undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { -# RA-NEXT: internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 -# RA-NEXT: internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29 -# RA-NEXT: } - - -# RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { -# RA-NEXT: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 -# RA-NEXT: internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29 -# RA-NEXT: } - - -# VR: renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 -# VR-NEXT: renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97 - -# ASM-LABEL: {{^}}splitkit_copy_bundle: -# ASM: ; implicit-def: $sgpr34_sgpr35 -# ASM-NEXT: ; implicit-def: $sgpr98_sgpr99 -# ASM-NEXT: ; kill: def $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 killed $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 -# ASM-NEXT: ; kill: def $sgpr96_sgpr97 killed $sgpr96_sgpr97 - name: splitkit_copy_bundle tracksRegLiveness: true machineFunctionInfo: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' stackPtrOffsetReg: '$sgpr32' body: | + ; RA-LABEL: name: splitkit_copy_bundle + ; RA: bb.0: + ; RA: successors: %bb.1(0x80000000) + ; RA: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; RA: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; RA: undef %5.sub1:sgpr_1024 = S_MOV_B32 -1 + ; RA: %5.sub0:sgpr_1024 = S_MOV_B32 -1 + ; RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { + ; RA: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 + ; RA: internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29 + ; RA: } + ; RA: undef %3.sub0:sgpr_1024 = S_MOV_B32 0 + ; RA: bb.1: + ; RA: successors: %bb.2(0x80000000) + ; RA: undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { + ; RA: internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 + ; RA: internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29 + ; RA: } + ; RA: %6.sub2:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub3:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub4:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub5:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub6:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub7:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub8:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub9:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub10:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub11:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub12:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub13:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub14:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub15:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub16:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub17:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub18:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub19:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub20:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub21:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub22:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub23:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub24:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub25:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub26:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub27:sgpr_1024 = COPY %6.sub1 + ; RA: %6.sub28:sgpr_1024 = COPY %6.sub0 + ; RA: %6.sub29:sgpr_1024 = COPY %6.sub1 + ; RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { + ; RA: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 + ; RA: internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29 + ; RA: } + ; RA: %3.sub1:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub2:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub3:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub4:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub5:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub6:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub7:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub8:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub9:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub10:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub11:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub12:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub13:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub14:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub15:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub16:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub17:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub18:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub19:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub20:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub21:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub22:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub23:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub24:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub25:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub26:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub27:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub28:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub29:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub30:sgpr_1024 = COPY %3.sub0 + ; RA: %3.sub31:sgpr_1024 = COPY %3.sub0 + ; RA: bb.2: + ; RA: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; RA: S_NOP 0, csr_amdgpu_highregs, implicit [[DEF]], implicit [[DEF1]] + ; RA: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; RA: S_BRANCH %bb.2 + ; VR-LABEL: name: splitkit_copy_bundle + ; VR: bb.0: + ; VR: successors: %bb.1(0x80000000) + ; VR: renamable $sgpr69 = S_MOV_B32 -1 + ; VR: renamable $sgpr68 = S_MOV_B32 -1 + ; VR: renamable $sgpr36 = S_MOV_B32 0 + ; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF + ; VR: renamable $sgpr98_sgpr99 = IMPLICIT_DEF + ; VR: renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; VR: renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97 + ; VR: bb.1: + ; VR: successors: %bb.2(0x80000000) + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0FFFFFFFFFFFFFFF, $sgpr34_sgpr35, $sgpr98_sgpr99 + ; VR: renamable $sgpr70 = COPY renamable $sgpr68 + ; VR: renamable $sgpr71 = COPY renamable $sgpr69 + ; VR: renamable $sgpr72 = COPY renamable $sgpr68 + ; VR: renamable $sgpr73 = COPY renamable $sgpr69 + ; VR: renamable $sgpr74 = COPY renamable $sgpr68 + ; VR: renamable $sgpr75 = COPY renamable $sgpr69 + ; VR: renamable $sgpr76 = COPY renamable $sgpr68 + ; VR: renamable $sgpr77 = COPY renamable $sgpr69 + ; VR: renamable $sgpr78 = COPY renamable $sgpr68 + ; VR: renamable $sgpr79 = COPY renamable $sgpr69 + ; VR: renamable $sgpr80 = COPY renamable $sgpr68 + ; VR: renamable $sgpr81 = COPY renamable $sgpr69 + ; VR: renamable $sgpr82 = COPY renamable $sgpr68 + ; VR: renamable $sgpr83 = COPY renamable $sgpr69 + ; VR: renamable $sgpr84 = COPY renamable $sgpr68 + ; VR: renamable $sgpr85 = COPY renamable $sgpr69 + ; VR: renamable $sgpr86 = COPY renamable $sgpr68 + ; VR: renamable $sgpr87 = COPY renamable $sgpr69 + ; VR: renamable $sgpr88 = COPY renamable $sgpr68 + ; VR: renamable $sgpr89 = COPY renamable $sgpr69 + ; VR: renamable $sgpr90 = COPY renamable $sgpr68 + ; VR: renamable $sgpr91 = COPY renamable $sgpr69 + ; VR: renamable $sgpr92 = COPY renamable $sgpr68 + ; VR: renamable $sgpr93 = COPY renamable $sgpr69 + ; VR: renamable $sgpr94 = COPY renamable $sgpr68 + ; VR: renamable $sgpr95 = COPY renamable $sgpr69 + ; VR: renamable $sgpr96 = COPY renamable $sgpr68 + ; VR: renamable $sgpr97 = COPY renamable $sgpr69 + ; VR: renamable $sgpr37 = COPY renamable $sgpr36 + ; VR: renamable $sgpr38 = COPY renamable $sgpr36 + ; VR: renamable $sgpr39 = COPY renamable $sgpr36 + ; VR: renamable $sgpr40 = COPY renamable $sgpr36 + ; VR: renamable $sgpr41 = COPY renamable $sgpr36 + ; VR: renamable $sgpr42 = COPY renamable $sgpr36 + ; VR: renamable $sgpr43 = COPY renamable $sgpr36 + ; VR: renamable $sgpr44 = COPY renamable $sgpr36 + ; VR: renamable $sgpr45 = COPY renamable $sgpr36 + ; VR: renamable $sgpr46 = COPY renamable $sgpr36 + ; VR: renamable $sgpr47 = COPY renamable $sgpr36 + ; VR: renamable $sgpr48 = COPY renamable $sgpr36 + ; VR: renamable $sgpr49 = COPY renamable $sgpr36 + ; VR: renamable $sgpr50 = COPY renamable $sgpr36 + ; VR: renamable $sgpr51 = COPY renamable $sgpr36 + ; VR: renamable $sgpr52 = COPY renamable $sgpr36 + ; VR: renamable $sgpr53 = COPY renamable $sgpr36 + ; VR: renamable $sgpr54 = COPY renamable $sgpr36 + ; VR: renamable $sgpr55 = COPY renamable $sgpr36 + ; VR: renamable $sgpr56 = COPY renamable $sgpr36 + ; VR: renamable $sgpr57 = COPY renamable $sgpr36 + ; VR: renamable $sgpr58 = COPY renamable $sgpr36 + ; VR: renamable $sgpr59 = COPY renamable $sgpr36 + ; VR: renamable $sgpr60 = COPY renamable $sgpr36 + ; VR: renamable $sgpr61 = COPY renamable $sgpr36 + ; VR: renamable $sgpr62 = COPY renamable $sgpr36 + ; VR: renamable $sgpr63 = COPY renamable $sgpr36 + ; VR: renamable $sgpr64 = COPY renamable $sgpr36 + ; VR: renamable $sgpr65 = COPY renamable $sgpr36 + ; VR: renamable $sgpr66 = COPY renamable $sgpr36 + ; VR: renamable $sgpr67 = COPY renamable $sgpr36 + ; VR: bb.2: + ; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0FFFFFFFFFFFFFFF, $sgpr34_sgpr35, $sgpr98_sgpr99 + ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr98_sgpr99 + ; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; VR: S_BRANCH %bb.2 bb.0: %0:sreg_64 = IMPLICIT_DEF %1:sreg_64 = IMPLICIT_DEF From 6f6d389da5c37e5e9a900902f03dc649d57919b7 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 16 Sep 2020 11:13:45 +0100 Subject: [PATCH 0949/1079] [SplitKit] Only copy live lanes When splitting a live interval with subranges, only insert copies for the lanes that are live at the point of the split. This avoids some unnecessary copies and fixes a problem where copying dead lanes was generating MIR that failed verification. The test case for this is test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir. Without this fix, some earlier live range splitting would create %430: %430 [256r,848r:0)[848r,2584r:1) 0@256r 1@848r L0000000000000003 [848r,2584r:0) 0@848r L0000000000000030 [256r,2584r:0) 0@256r weight:1.480938e-03 ... 256B undef %430.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec ... 848B %430.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec ... 2584B %431:vreg_128 = COPY %430:vreg_128 Then RAGreedy::tryLocalSplit would split %430 into %432 and %433 just before 848B giving: %432 [256r,844r:0) 0@256r L0000000000000030 [256r,844r:0) 0@256r weight:3.066802e-03 %433 [844r,848r:0)[848r,2584r:1) 0@844r 1@848r L0000000000000030 [844r,2584r:0) 0@844r L0000000000000003 [844r,844d:0)[848r,2584r:1) 0@844r 1@848r weight:2.831776e-03 ... 256B undef %432.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec ... 844B undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 { internal %433.sub2:vreg_128 = COPY %432.sub2:vreg_128 848B } %433.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec ... 2584B %431:vreg_128 = COPY %433:vreg_128 Note that the copy from %432 to %433 at 844B is a curious bundle-without-a-BUNDLE-instruction that SplitKit creates deliberately, and it includes a copy of .sub0 which is not live at this point, and that causes it to fail verification: *** Bad machine code: No live subrange at use *** - function: zextload_global_v64i16_to_v64i64 - basic block: %bb.0 (0x7faed48) [0B;2848B) - instruction: 844B undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 - operand 1: %432.sub0:vreg_128 - interval: %432 [256r,844r:0) 0@256r L0000000000000030 [256r,844r:0) 0@256r weight:3.066802e-03 - at: 844B Using real bundles with a BUNDLE instruction might also fix this problem, but the current fix is less invasive and also avoids some unnecessary copies. https://bugs.llvm.org/show_bug.cgi?id=47492 Differential Revision: https://reviews.llvm.org/D87757 --- llvm/lib/CodeGen/SplitKit.cpp | 9 +- .../CodeGen/AMDGPU/spill-scavenge-offset.ll | 2 +- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 83 ++- .../AMDGPU/splitkit-copy-live-lanes.mir | 525 ++++++++++++++++++ .../AMDGPU/subreg-split-live-in-error.mir | 6 +- 5 files changed, 572 insertions(+), 53 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 372c7f8061295..4029c855c910e 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -649,10 +649,13 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, } if (!DidRemat) { LaneBitmask LaneMask; - if (LI->hasSubRanges()) { + if (OrigLI.hasSubRanges()) { LaneMask = LaneBitmask::getNone(); - for (LiveInterval::SubRange &S : LI->subranges()) - LaneMask |= S.LaneMask; + for (LiveInterval::SubRange &S : OrigLI.subranges()) { + if (S.liveAt(UseIdx)) + LaneMask |= S.LaneMask; + } + assert(LaneMask.any() && "Interval has no live subranges"); } else { LaneMask = LaneBitmask::getAll(); } diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index d2434682eebc9..5695487d58d88 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -39,7 +39,7 @@ entry: ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32 ; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9]+]] ; GFX6: NumSgprs: 48 -; GFX6: ScratchSize: 8624 +; GFX6: ScratchSize: 8608 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 { entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index c02b9a001fbbe..c9f3a82cf695f 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -16,17 +16,11 @@ body: | ; RA: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; RA: undef %5.sub1:sgpr_1024 = S_MOV_B32 -1 ; RA: %5.sub0:sgpr_1024 = S_MOV_B32 -1 - ; RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { - ; RA: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 - ; RA: internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29 - ; RA: } + ; RA: undef %4.sub0_sub1:sgpr_1024 = COPY %5.sub0_sub1 ; RA: undef %3.sub0:sgpr_1024 = S_MOV_B32 0 ; RA: bb.1: ; RA: successors: %bb.2(0x80000000) - ; RA: undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { - ; RA: internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 - ; RA: internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29 - ; RA: } + ; RA: undef %6.sub0_sub1:sgpr_1024 = COPY %4.sub0_sub1 ; RA: %6.sub2:sgpr_1024 = COPY %6.sub0 ; RA: %6.sub3:sgpr_1024 = COPY %6.sub1 ; RA: %6.sub4:sgpr_1024 = COPY %6.sub0 @@ -55,10 +49,7 @@ body: | ; RA: %6.sub27:sgpr_1024 = COPY %6.sub1 ; RA: %6.sub28:sgpr_1024 = COPY %6.sub0 ; RA: %6.sub29:sgpr_1024 = COPY %6.sub1 - ; RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 { - ; RA: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 - ; RA: internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29 - ; RA: } + ; RA: undef %4.sub0_sub1:sgpr_1024 = COPY %6.sub0_sub1 ; RA: %3.sub1:sgpr_1024 = COPY %3.sub0 ; RA: %3.sub2:sgpr_1024 = COPY %3.sub0 ; RA: %3.sub3:sgpr_1024 = COPY %3.sub0 @@ -102,40 +93,40 @@ body: | ; VR: renamable $sgpr68 = S_MOV_B32 -1 ; VR: renamable $sgpr36 = S_MOV_B32 0 ; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF - ; VR: renamable $sgpr98_sgpr99 = IMPLICIT_DEF - ; VR: renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; VR: renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97 + ; VR: renamable $sgpr70_sgpr71 = IMPLICIT_DEF ; VR: bb.1: ; VR: successors: %bb.2(0x80000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0FFFFFFFFFFFFFFF, $sgpr34_sgpr35, $sgpr98_sgpr99 - ; VR: renamable $sgpr70 = COPY renamable $sgpr68 - ; VR: renamable $sgpr71 = COPY renamable $sgpr69 - ; VR: renamable $sgpr72 = COPY renamable $sgpr68 - ; VR: renamable $sgpr73 = COPY renamable $sgpr69 - ; VR: renamable $sgpr74 = COPY renamable $sgpr68 - ; VR: renamable $sgpr75 = COPY renamable $sgpr69 - ; VR: renamable $sgpr76 = COPY renamable $sgpr68 - ; VR: renamable $sgpr77 = COPY renamable $sgpr69 - ; VR: renamable $sgpr78 = COPY renamable $sgpr68 - ; VR: renamable $sgpr79 = COPY renamable $sgpr69 - ; VR: renamable $sgpr80 = COPY renamable $sgpr68 - ; VR: renamable $sgpr81 = COPY renamable $sgpr69 - ; VR: renamable $sgpr82 = COPY renamable $sgpr68 - ; VR: renamable $sgpr83 = COPY renamable $sgpr69 - ; VR: renamable $sgpr84 = COPY renamable $sgpr68 - ; VR: renamable $sgpr85 = COPY renamable $sgpr69 - ; VR: renamable $sgpr86 = COPY renamable $sgpr68 - ; VR: renamable $sgpr87 = COPY renamable $sgpr69 - ; VR: renamable $sgpr88 = COPY renamable $sgpr68 - ; VR: renamable $sgpr89 = COPY renamable $sgpr69 - ; VR: renamable $sgpr90 = COPY renamable $sgpr68 - ; VR: renamable $sgpr91 = COPY renamable $sgpr69 - ; VR: renamable $sgpr92 = COPY renamable $sgpr68 - ; VR: renamable $sgpr93 = COPY renamable $sgpr69 - ; VR: renamable $sgpr94 = COPY renamable $sgpr68 - ; VR: renamable $sgpr95 = COPY renamable $sgpr69 - ; VR: renamable $sgpr96 = COPY renamable $sgpr68 - ; VR: renamable $sgpr97 = COPY renamable $sgpr69 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71 + ; VR: renamable $sgpr40_sgpr41 = COPY killed renamable $sgpr68_sgpr69 + ; VR: renamable $sgpr42 = COPY renamable $sgpr40 + ; VR: renamable $sgpr43 = COPY renamable $sgpr41 + ; VR: renamable $sgpr44 = COPY renamable $sgpr40 + ; VR: renamable $sgpr45 = COPY renamable $sgpr41 + ; VR: renamable $sgpr46 = COPY renamable $sgpr40 + ; VR: renamable $sgpr47 = COPY renamable $sgpr41 + ; VR: renamable $sgpr48 = COPY renamable $sgpr40 + ; VR: renamable $sgpr49 = COPY renamable $sgpr41 + ; VR: renamable $sgpr50 = COPY renamable $sgpr40 + ; VR: renamable $sgpr51 = COPY renamable $sgpr41 + ; VR: renamable $sgpr52 = COPY renamable $sgpr40 + ; VR: renamable $sgpr53 = COPY renamable $sgpr41 + ; VR: renamable $sgpr54 = COPY renamable $sgpr40 + ; VR: renamable $sgpr55 = COPY renamable $sgpr41 + ; VR: renamable $sgpr56 = COPY renamable $sgpr40 + ; VR: renamable $sgpr57 = COPY renamable $sgpr41 + ; VR: renamable $sgpr58 = COPY renamable $sgpr40 + ; VR: renamable $sgpr59 = COPY renamable $sgpr41 + ; VR: renamable $sgpr60 = COPY renamable $sgpr40 + ; VR: renamable $sgpr61 = COPY renamable $sgpr41 + ; VR: renamable $sgpr62 = COPY renamable $sgpr40 + ; VR: renamable $sgpr63 = COPY renamable $sgpr41 + ; VR: renamable $sgpr64 = COPY renamable $sgpr40 + ; VR: renamable $sgpr65 = COPY renamable $sgpr41 + ; VR: renamable $sgpr66 = COPY renamable $sgpr40 + ; VR: renamable $sgpr67 = COPY renamable $sgpr41 + ; VR: renamable $sgpr68 = COPY renamable $sgpr40 + ; VR: renamable $sgpr69 = COPY renamable $sgpr41 + ; VR: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr40_sgpr41 ; VR: renamable $sgpr37 = COPY renamable $sgpr36 ; VR: renamable $sgpr38 = COPY renamable $sgpr36 ; VR: renamable $sgpr39 = COPY renamable $sgpr36 @@ -169,8 +160,8 @@ body: | ; VR: renamable $sgpr67 = COPY renamable $sgpr36 ; VR: bb.2: ; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0FFFFFFFFFFFFFFF, $sgpr34_sgpr35, $sgpr98_sgpr99 - ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr98_sgpr99 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71 + ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr70_sgpr71 ; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc ; VR: S_BRANCH %bb.2 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir new file mode 100644 index 0000000000000..56ebf9305dbd5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -0,0 +1,525 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -verify-regalloc -run-pass=greedy %s -o - | FileCheck %s + +--- +name: zextload_global_v64i16_to_v64i64 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: zextload_global_v64i16_to_v64i64 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: undef %2.sub3:sgpr_128 = S_MOV_B32 61440 + ; CHECK: %2.sub2:sgpr_128 = S_MOV_B32 -1 + ; CHECK: %2.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK: %2.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 + ; CHECK: undef %3.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK: %3.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub3 + ; CHECK: %3.sub2:sgpr_128 = COPY %2.sub2 + ; CHECK: %3.sub3:sgpr_128 = COPY %2.sub3 + ; CHECK: early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec { + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: } + ; CHECK: undef %47.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %47, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + ; CHECK: undef %52.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %52, %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.1, align 4, addrspace 5) + ; CHECK: undef %57.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %57, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.2, align 4, addrspace 5) + ; CHECK: undef %62.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %62, %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.3, align 4, addrspace 5) + ; CHECK: undef %67.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec + ; CHECK: undef %71.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %71, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.4, align 4, addrspace 5) + ; CHECK: undef %76.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %76, %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.5, align 4, addrspace 5) + ; CHECK: undef %81.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %81, %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.6, align 4, addrspace 5) + ; CHECK: undef %86.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec + ; CHECK: undef %90.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %90, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.7, align 4, addrspace 5) + ; CHECK: undef %95.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %95, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.8, align 4, addrspace 5) + ; CHECK: undef %100.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %100, %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.9, align 4, addrspace 5) + ; CHECK: undef %105.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK: undef %109.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK: undef %113.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK: undef %117.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %117, %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1) + ; CHECK: undef %122.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK: undef %126.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK: undef %130.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %130, %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) + ; CHECK: undef %135.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %135, %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: undef %140.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %144, %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) + ; CHECK: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %149, %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) + ; CHECK: undef %154.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) + ; CHECK: undef %158.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec + ; CHECK: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec + ; CHECK: undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec + ; CHECK: undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec + ; CHECK: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec + ; CHECK: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 + ; CHECK: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE]], %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE1]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE1]], %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.1, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE2]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE2]], %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.2, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE3]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE3]], %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.3, align 4, addrspace 5) + ; CHECK: undef %68.sub2:vreg_128 = COPY %67.sub2 + ; CHECK: %68.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE4]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE4]], %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.4, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE5]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE5]], %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.5, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE6]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE6]], %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.6, align 4, addrspace 5) + ; CHECK: undef %87.sub2:vreg_128 = COPY %86.sub2 + ; CHECK: %87.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE7]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE7]], %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.7, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE8]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE8]], %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.8, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE9]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE9]], %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.9, align 4, addrspace 5) + ; CHECK: undef %106.sub2:vreg_128 = COPY %105.sub2 + ; CHECK: %106.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK: undef %110.sub2:vreg_128 = COPY %109.sub2 + ; CHECK: %110.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK: undef %114.sub2:vreg_128 = COPY %113.sub2 + ; CHECK: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE10]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE10]], %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5) + ; CHECK: undef %123.sub2:vreg_128 = COPY %122.sub2 + ; CHECK: %123.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK: undef %127.sub2:vreg_128 = COPY %126.sub2 + ; CHECK: %127.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE11]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE11]], %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE12:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE12]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE12]], %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) + ; CHECK: undef %141.sub2:vreg_128 = COPY %140.sub2 + ; CHECK: %141.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE13:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE13]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE13]], %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE14:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE14]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE14]], %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) + ; CHECK: undef %155.sub2:vreg_128 = COPY %154.sub2 + ; CHECK: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK: undef %159.sub2:vreg_128 = COPY %158.sub2 + ; CHECK: %159.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec + ; CHECK: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec + ; CHECK: %38.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec + ; CHECK: %40.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec + ; CHECK: %41.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec + ; CHECK: %42.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec + ; CHECK: %43.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec + ; CHECK: %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %43.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: %42.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %42.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: %41.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %41.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: %40.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %40.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: %38.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %38.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: %37.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %37.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: %36.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %36.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + ; CHECK: undef %157.sub0:vreg_128 = COPY %159.sub0 { + ; CHECK: internal %157.sub2:vreg_128 = COPY %159.sub2 + ; CHECK: } + ; CHECK: %157.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %157.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: undef %153.sub0:vreg_128 = COPY %155.sub0 { + ; CHECK: internal %153.sub2:vreg_128 = COPY %155.sub2 + ; CHECK: } + ; CHECK: %153.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %153.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) + ; CHECK: undef %148.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub0 { + ; CHECK: internal %148.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub2 + ; CHECK: } + ; CHECK: %148.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %148.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) + ; CHECK: undef %143.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 { + ; CHECK: internal %143.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2 + ; CHECK: } + ; CHECK: %143.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %143.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: undef %139.sub0:vreg_128 = COPY %141.sub0 { + ; CHECK: internal %139.sub2:vreg_128 = COPY %141.sub2 + ; CHECK: } + ; CHECK: %139.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %139.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) + ; CHECK: undef %134.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 { + ; CHECK: internal %134.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2 + ; CHECK: } + ; CHECK: %134.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %134.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) + ; CHECK: undef %129.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 { + ; CHECK: internal %129.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2 + ; CHECK: } + ; CHECK: %129.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %129.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: undef %125.sub0:vreg_128 = COPY %127.sub0 { + ; CHECK: internal %125.sub2:vreg_128 = COPY %127.sub2 + ; CHECK: } + ; CHECK: %125.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %125.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1) + ; CHECK: undef %121.sub0:vreg_128 = COPY %123.sub0 { + ; CHECK: internal %121.sub2:vreg_128 = COPY %123.sub2 + ; CHECK: } + ; CHECK: %121.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %121.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) + ; CHECK: undef %116.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 { + ; CHECK: internal %116.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2 + ; CHECK: } + ; CHECK: %116.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %116.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %116, %2, 0, 224, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: undef %112.sub0:vreg_128 = COPY %114.sub0 { + ; CHECK: internal %112.sub2:vreg_128 = COPY %114.sub2 + ; CHECK: } + ; CHECK: %112.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %112.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 240, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: undef %108.sub0:vreg_128 = COPY %110.sub0 { + ; CHECK: internal %108.sub2:vreg_128 = COPY %110.sub2 + ; CHECK: } + ; CHECK: %108.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %108.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %108, %2, 0, 192, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: undef %104.sub0:vreg_128 = COPY %106.sub0 { + ; CHECK: internal %104.sub2:vreg_128 = COPY %106.sub2 + ; CHECK: } + ; CHECK: %104.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %104.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5) + ; CHECK: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 { + ; CHECK: internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2 + ; CHECK: } + ; CHECK: %99.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %99.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5) + ; CHECK: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 { + ; CHECK: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2 + ; CHECK: } + ; CHECK: %94.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %94.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5) + ; CHECK: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 { + ; CHECK: internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2 + ; CHECK: } + ; CHECK: %89.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %89.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %89, %2, 0, 128, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + ; CHECK: undef %85.sub0:vreg_128 = COPY %87.sub0 { + ; CHECK: internal %85.sub2:vreg_128 = COPY %87.sub2 + ; CHECK: } + ; CHECK: %85.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %85.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5) + ; CHECK: undef %80.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 { + ; CHECK: internal %80.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2 + ; CHECK: } + ; CHECK: %80.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %80.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5) + ; CHECK: undef %75.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 { + ; CHECK: internal %75.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2 + ; CHECK: } + ; CHECK: %75.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %75.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5) + ; CHECK: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 { + ; CHECK: internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2 + ; CHECK: } + ; CHECK: %70.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %70.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: undef %66.sub0:vreg_128 = COPY %68.sub0 { + ; CHECK: internal %66.sub2:vreg_128 = COPY %68.sub2 + ; CHECK: } + ; CHECK: %66.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %66.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5) + ; CHECK: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 { + ; CHECK: internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2 + ; CHECK: } + ; CHECK: %61.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %61.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5) + ; CHECK: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 { + ; CHECK: internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2 + ; CHECK: } + ; CHECK: %56.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %56.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5) + ; CHECK: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 { + ; CHECK: internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2 + ; CHECK: } + ; CHECK: %51.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %51.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; CHECK: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 { + ; CHECK: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2 + ; CHECK: } + ; CHECK: %46.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %46.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: S_ENDPGM 0 + %0:sgpr_64(p4) = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + undef %2.sub3:sgpr_128 = S_MOV_B32 61440 + %2.sub2:sgpr_128 = S_MOV_B32 -1 + %2.sub0:sgpr_128 = COPY %1.sub0 + %2.sub1:sgpr_128 = COPY %1.sub1 + undef %3.sub0:sgpr_128 = COPY %1.sub2 + %3.sub1:sgpr_128 = COPY %1.sub3 + %3.sub2:sgpr_128 = COPY %2.sub2 + %3.sub3:sgpr_128 = COPY %2.sub3 + early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec { + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + } + undef %8.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub1, implicit $exec + undef %9.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub0, implicit $exec + undef %10.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub3, implicit $exec + undef %11.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub2, implicit $exec + undef %12.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub1, implicit $exec + undef %13.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub0, implicit $exec + undef %14.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub3, implicit $exec + undef %15.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub2, implicit $exec + undef %16.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub1, implicit $exec + undef %17.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub0, implicit $exec + undef %18.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub3, implicit $exec + undef %19.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub2, implicit $exec + undef %20.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub1, implicit $exec + undef %21.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub0, implicit $exec + undef %22.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub3, implicit $exec + undef %23.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub2, implicit $exec + %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1) + undef %25.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub1, implicit $exec + undef %26.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub0, implicit $exec + undef %27.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub3, implicit $exec + undef %28.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub2, implicit $exec + %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + undef %30.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub1, implicit $exec + undef %31.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub0, implicit $exec + undef %32.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub3, implicit $exec + undef %33.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub2, implicit $exec + %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) + undef %35.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub1, implicit $exec + undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub0, implicit $exec + undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub3, implicit $exec + undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub2, implicit $exec + %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub1, implicit $exec + undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub0, implicit $exec + undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub3, implicit $exec + undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub2, implicit $exec + %44:sreg_32 = S_MOV_B32 65535 + %8.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub1, implicit $exec + %9.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub0, implicit $exec + %10.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub3, implicit $exec + %11.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub2, implicit $exec + %12.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub1, implicit $exec + %13.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub0, implicit $exec + %14.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub3, implicit $exec + %15.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub2, implicit $exec + %16.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub1, implicit $exec + %17.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub0, implicit $exec + %18.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub3, implicit $exec + %19.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub2, implicit $exec + %20.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub1, implicit $exec + %21.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub0, implicit $exec + %22.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub3, implicit $exec + %23.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub2, implicit $exec + %25.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub1, implicit $exec + %26.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub0, implicit $exec + %27.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub3, implicit $exec + %28.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub2, implicit $exec + %30.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub1, implicit $exec + %31.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub0, implicit $exec + %32.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub3, implicit $exec + %33.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub2, implicit $exec + %35.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub1, implicit $exec + %36.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub0, implicit $exec + %37.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub3, implicit $exec + %38.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub2, implicit $exec + %40.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub1, implicit $exec + %41.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub0, implicit $exec + %42.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub3, implicit $exec + %43.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub2, implicit $exec + %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %43.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %42.sub1:vreg_128 = COPY %43.sub1 + %42.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %41.sub1:vreg_128 = COPY %43.sub1 + %41.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + %40.sub1:vreg_128 = COPY %43.sub1 + %40.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %38.sub1:vreg_128 = COPY %43.sub1 + %38.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %37.sub1:vreg_128 = COPY %43.sub1 + %37.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %36.sub1:vreg_128 = COPY %43.sub1 + %36.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + %35.sub1:vreg_128 = COPY %43.sub1 + %35.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %33.sub1:vreg_128 = COPY %43.sub1 + %33.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %32.sub1:vreg_128 = COPY %43.sub1 + %32.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %31.sub1:vreg_128 = COPY %43.sub1 + %31.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + %30.sub1:vreg_128 = COPY %43.sub1 + %30.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %28.sub1:vreg_128 = COPY %43.sub1 + %28.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %27.sub1:vreg_128 = COPY %43.sub1 + %27.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %26.sub1:vreg_128 = COPY %43.sub1 + %26.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1) + %25.sub1:vreg_128 = COPY %43.sub1 + %25.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %23.sub1:vreg_128 = COPY %43.sub1 + %23.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %22.sub1:vreg_128 = COPY %43.sub1 + %22.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %21.sub1:vreg_128 = COPY %43.sub1 + %21.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + %20.sub1:vreg_128 = COPY %43.sub1 + %20.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %19.sub1:vreg_128 = COPY %43.sub1 + %19.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %18.sub1:vreg_128 = COPY %43.sub1 + %18.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %17.sub1:vreg_128 = COPY %43.sub1 + %17.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + %16.sub1:vreg_128 = COPY %43.sub1 + %16.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %15.sub1:vreg_128 = COPY %43.sub1 + %15.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %14.sub1:vreg_128 = COPY %43.sub1 + %14.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %13.sub1:vreg_128 = COPY %43.sub1 + %13.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + %12.sub1:vreg_128 = COPY %43.sub1 + %12.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %11.sub1:vreg_128 = COPY %43.sub1 + %11.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + %10.sub1:vreg_128 = COPY %43.sub1 + %10.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %9.sub1:vreg_128 = COPY %43.sub1 + %9.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1) + %8.sub1:vreg_128 = COPY %43.sub1 + %8.sub3:vreg_128 = COPY %43.sub1 + BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir index 0fa0ddab4e11f..6759cd1040f85 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -110,7 +110,7 @@ body: | ; and inserting a spill. Here we just check that the point where the error ; occurs we see a correctly generated spill. ; GCN-LABEL: bb.7: - ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec %15.sub1:vreg_128 = COPY %15.sub0 @@ -126,7 +126,7 @@ body: | successors: %bb.12(0x80000000) ; GCN-LABEL: bb.9: - ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec %15.sub1:vreg_128 = COPY %15.sub0 @@ -137,7 +137,7 @@ body: | successors: %bb.12(0x80000000) ; GCN-LABEL: bb.10: - ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec %15.sub1:vreg_128 = COPY %15.sub0 From aadf55d1cea24a4e5384ab8546c3d794cb1ec724 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 17 Sep 2020 11:08:26 +0300 Subject: [PATCH 0950/1079] [NFC] EliminateDuplicatePHINodes(): small-size optimization: if there are <= 32 PHI's, O(n^2) algo is faster (geomean -0.08%) This is functionally equivalent to the old implementation. As per https://llvm-compile-time-tracker.com/compare.php?from=5f4e9bf6416e45eba483a4e5e263749989fdb3b3&to=4739e6e4eb54d3736e6457249c0919b30f6c855a&stat=instructions this is a clear geomean compile-time regression-free win with overall geomean of `-0.08%` 32 PHI's appears to be the sweet spot; both the 16 and 64 performed worse: https://llvm-compile-time-tracker.com/compare.php?from=5f4e9bf6416e45eba483a4e5e263749989fdb3b3&to=c4efe1fbbfdf0305ac26cd19eacb0c7774cdf60e&stat=instructions https://llvm-compile-time-tracker.com/compare.php?from=5f4e9bf6416e45eba483a4e5e263749989fdb3b3&to=e4989d1c67010d3339d1a40ff5286a31f10cfe82&stat=instructions If we have more PHI's than that, we fall-back to the original DenseSet-based implementation, so the not-so-fast cases will still be handled. However compile-time isn't the main motivation here. I can name at least 3 limitations of this CSE: 1. Assumes that all PHI nodes have incoming basic blocks in the same order (can be fixed while keeping the DenseMap) 2. Does not special-handle `undef` incoming values (i don't see how we can do this with hashing) 3. Does not special-handle backedge incoming values (maybe can be fixed by hashing backedge as some magical value) Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87408 --- llvm/lib/Transforms/Utils/Local.cpp | 55 +++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0b848feddf8ee..51e8251b22800 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -104,6 +104,12 @@ static cl::opt PHICSEDebugHash( cl::desc("Perform extra assertion checking to verify that PHINodes's hash " "function is well-behaved w.r.t. its isEqual predicate")); +static cl::opt PHICSENumPHISmallSize( + "phicse-num-phi-smallsize", cl::init(32), cl::Hidden, + cl::desc( + "When the basic block contains not more than this number of PHI nodes, " + "perform a (faster!) exhaustive search instead of set-driven one.")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms static const unsigned BitPartRecursionMaxDepth = 64; @@ -1132,9 +1138,39 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -// WARNING: this logic must be kept in sync with -// Instruction::isIdenticalToWhenDefined()! -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { +static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + bool Changed = false; + + // Examine each PHI. + // Note that increment of I must *NOT* be in the iteration_expression, since + // we don't want to immediately advance when we restart from the beginning. + for (auto I = BB->begin(); PHINode *PN = dyn_cast(I);) { + ++I; + // Is there an identical PHI node in this basic block? + // Note that we only look in the upper square's triangle, + // we already checked that the lower triangle PHI's aren't identical. + for (auto J = I; PHINode *DuplicatePN = dyn_cast(J); ++J) { + if (!DuplicatePN->isIdenticalToWhenDefined(PN)) + continue; + // A duplicate. Replace this PHI with the base PHI. + ++NumPHICSEs; + DuplicatePN->replaceAllUsesWith(PN); + DuplicatePN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. + I = BB->begin(); + break; // Start over from the beginning. + } + } + return Changed; +} + +static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1152,6 +1188,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return PN == getEmptyKey() || PN == getTombstoneKey(); } + // WARNING: this logic must be kept in sync with + // Instruction::isIdenticalToWhenDefined()! static unsigned getHashValueImpl(PHINode *PN) { // Compute a hash value on the operands. Instcombine will likely have // sorted them, which helps expose duplicates, but we have to check all @@ -1191,6 +1229,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // Set of unique PHINodes. DenseSet PHISet; + PHISet.reserve(4 * PHICSENumPHISmallSize); // Examine each PHI. bool Changed = false; @@ -1213,6 +1252,16 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return Changed; } +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + if ( +#ifndef NDEBUG + !PHICSEDebugHash && +#endif + hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) + return EliminateDuplicatePHINodesNaiveImpl(BB); + return EliminateDuplicatePHINodesSetBasedImpl(BB); +} + /// enforceKnownAlignment - If the specified pointer points to an object that /// we control, modify the object's alignment to PrefAlign. This isn't /// often possible though. If alignment is important, a more reliable approach From b03c2b8395ba94fb53f1e73a6473faedf628bbd9 Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Thu, 17 Sep 2020 01:28:32 -0700 Subject: [PATCH 0951/1079] Revert "Re-land: Add new hidden option -print-changed which only reports changes to IR" The test added in this commit is failing on Windows bots: http://lab.llvm.org:8011/builders/llvm-clang-win-x-armv7l/builds/1269 This reverts commit f9e6d1edc0dad9afb26e773aa125ed62c58f7080 and follow-up commit 6859d95ea2d0f3fe0de2923a3f642170e66a1a14. --- .../llvm/Passes/StandardInstrumentations.h | 92 ------- llvm/lib/IR/LegacyPassManager.cpp | 4 +- llvm/lib/Passes/StandardInstrumentations.cpp | 228 +----------------- llvm/test/Other/change-printer.ll | 109 --------- 4 files changed, 7 insertions(+), 426 deletions(-) delete mode 100644 llvm/test/Other/change-printer.ll diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 8fc868bfa4c9e..76e217c899745 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -124,97 +124,6 @@ class PreservedCFGCheckerInstrumentation { void registerCallbacks(PassInstrumentationCallbacks &PIC); }; -// Base class for classes that report changes to the IR. -// It presents an interface for such classes and provides calls -// on various events as the new pass manager transforms the IR. -// It also provides filtering of information based on hidden options -// specifying which functions are interesting. -// Calls are made for the following events/queries: -// 1. The initial IR processed. -// 2. To get the representation of the IR (of type \p T). -// 3. When a pass does not change the IR. -// 4. When a pass changes the IR (given both before and after representations -// of type \p T). -// 5. When an IR is invalidated. -// 6. When a pass is run on an IR that is not interesting (based on options). -// 7. When a pass is ignored (pass manager or adapter pass). -// 8. To compare two IR representations (of type \p T). -template class ChangePrinter { -protected: - ChangePrinter() : InitialIR(true) {} - -public: - virtual ~ChangePrinter(); - - // Determine if this pass/IR is interesting and if so, save the IR - // otherwise it is left on the stack without data - void saveIRBeforePass(Any IR, StringRef PassID); - // Compare the IR from before the pass after the pass. - void handleIRAfterPass(Any IR, StringRef PassID); - // Handle the situation where a pass is invalidated. - void handleInvalidatedPass(StringRef PassID); - -protected: - // called on the first IR processed - virtual void handleInitialIR(Any IR) = 0; - // called before and after a pass to get the representation of the IR - virtual void generateIRRepresentation(Any IR, StringRef PassID, - IRUnitT &Output) = 0; - // called when the pass is not iteresting - virtual void omitAfter(StringRef PassID, std::string &Name) = 0; - // called when an interesting IR has changed - virtual void handleAfter(StringRef PassID, std::string &Name, - const IRUnitT &Before, const IRUnitT &After, - Any) = 0; - // called when an interesting pass is invalidated - virtual void handleInvalidated(StringRef PassID) = 0; - // called when the IR or pass is not interesting - virtual void handleFiltered(StringRef PassID, std::string &Name) = 0; - // called when an ignored pass is encountered - virtual void handleIgnored(StringRef PassID, std::string &Name) = 0; - // called to compare the before and after representations of the IR - virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0; - - // stack of IRs before passes - std::vector BeforeStack; - // Is this the first IR seen? - bool InitialIR; -}; - -// A change printer based on the string representation of the IR as created -// by unwrapAndPrint. The string representation is stored in a std::string -// to preserve it as the IR changes in each pass. Note that the banner is -// included in this representation but it is massaged before reporting. -class IRChangePrinter : public ChangePrinter { -public: - IRChangePrinter(); - ~IRChangePrinter() override; - void registerCallbacks(PassInstrumentationCallbacks &PIC); - -protected: - // called on the first IR processed - void handleInitialIR(Any IR) override; - // called before and after a pass to get the representation of the IR - void generateIRRepresentation(Any IR, StringRef PassID, - std::string &Output) override; - // called when the pass is not iteresting - void omitAfter(StringRef PassID, std::string &Name) override; - // called when an interesting IR has changed - void handleAfter(StringRef PassID, std::string &Name, - const std::string &Before, const std::string &After, - Any) override; - // called when an interesting pass is invalidated - void handleInvalidated(StringRef PassID) override; - // called when the IR or pass is not interesting - void handleFiltered(StringRef PassID, std::string &Name) override; - // called when an ignored pass is encountered - void handleIgnored(StringRef PassID, std::string &Name) override; - // called to compare the before and after representations of the IR - bool same(const std::string &Before, const std::string &After) override; - - raw_ostream &Out; -}; - /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -223,7 +132,6 @@ class StandardInstrumentations { TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; PreservedCFGCheckerInstrumentation PreservedCFGChecker; - IRChangePrinter PrintChangedIR; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 63886f4861708..8d9ed917bb617 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -87,14 +87,14 @@ static cl::opt PrintAfterAll("print-after-all", static cl::opt PrintModuleScope("print-module-scope", cl::desc("When printing IR for print-[before|after]{-all} " - "and change reporters always print a module IR"), + "always print a module IR"), cl::init(false), cl::Hidden); static cl::list PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), cl::desc("Only print IR for functions whose name " "match this for all print-[before|after][-all] " - "and change reporter options"), + "options"), cl::CommaSeparated, cl::Hidden); /// This is a helper to determine whether to print IR before or diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index e2cc19b34f3bc..2ee373b912be0 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -26,7 +26,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; @@ -52,34 +51,6 @@ static cl::opt cl::desc("Print all pass management debugging information. " "`-debug-pass-manager` must also be specified")); -// A hidden option that prints out the IR after passes, similar to -// -print-after-all except that it only prints the IR after passes that -// change the IR. Those passes that do not make changes to the IR are -// reported as not making any changes. In addition, the initial IR is -// also reported. Other hidden options affect the output from this -// option. -filter-passes will limit the output to the named passes -// that actually change the IR and other passes are reported as filtered out. -// The specified passes will either be reported as making no changes (with -// no IR reported) or the changed IR will be reported. Also, the -// -filter-print-funcs and -print-module-scope options will do similar -// filtering based on function name, reporting changed IRs as functions(or -// modules if -print-module-scope is specified) for a particular function -// or indicating that the IR has been filtered out. The extra options -// can be combined, allowing only changed IRs for certain passes on certain -// functions to be reported in different formats, with the rest being -// reported as filtered out. -static cl::opt PrintChanged("print-changed", - cl::desc("Print changed IRs"), - cl::init(false), cl::Hidden); -// A hidden option that supports the -print-changed option. See -// the description for -print-changed for an explanation of the use -// of this option. Note that this option has no effect without -print-changed. -static cl::list - PrintPassesList("filter-passes", cl::value_desc("pass names"), - cl::desc("Only consider IR changes for passes whose names " - "match for the print-changed option"), - cl::CommaSeparated, cl::Hidden); - namespace { /// Extracting Module out of \p IR unit. Also fills a textual description @@ -136,8 +107,7 @@ void printIR(raw_ostream &OS, const Function *F, StringRef Banner, } void printIR(raw_ostream &OS, const Module *M, StringRef Banner, - StringRef Extra = StringRef(), bool Brief = false, - bool ShouldPreserveUseListOrder = false) { + StringRef Extra = StringRef(), bool Brief = false) { if (Brief) { OS << M->getName() << '\n'; return; @@ -145,7 +115,7 @@ void printIR(raw_ostream &OS, const Module *M, StringRef Banner, if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) { OS << Banner << Extra << "\n"; - M->print(OS, nullptr, ShouldPreserveUseListOrder); + M->print(OS, nullptr, false); } else { for (const auto &F : M->functions()) { printIR(OS, &F, Banner, Extra); @@ -189,19 +159,17 @@ void printIR(raw_ostream &OS, const Loop *L, StringRef Banner, /// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into /// llvm::Any and does actual print job. void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, - bool ForceModule = false, bool Brief = false, - bool ShouldPreserveUseListOrder = false) { + bool ForceModule = false, bool Brief = false) { if (ForceModule) { if (auto UnwrappedModule = unwrapModule(IR)) - printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second, - Brief, ShouldPreserveUseListOrder); + printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second); return; } if (any_isa(IR)) { const Module *M = any_cast(IR); assert(M && "module should be valid for printing"); - printIR(OS, M, Banner, "", Brief, ShouldPreserveUseListOrder); + printIR(OS, M, Banner, "", Brief); return; } @@ -229,193 +197,8 @@ void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, llvm_unreachable("Unknown wrapped IR type"); } -// Return true when this is a pass for which changes should be ignored -inline bool isIgnored(StringRef PassID) { - return isSpecialPass(PassID, - {"PassManager", "PassAdaptor", "AnalysisManagerProxy"}); -} - -// Return true when this is a defined function for which printing -// of changes is desired. -inline bool isInterestingFunction(const Function &F) { - return llvm::isFunctionInPrintList(F.getName()); -} - -// Return true when this is a pass for which printing of changes is desired. -inline bool isInterestingPass(StringRef PassID) { - if (isIgnored(PassID)) - return false; - - static std::unordered_set PrintPassNames(PrintPassesList.begin(), - PrintPassesList.end()); - return PrintPassNames.empty() || PrintPassNames.count(PassID.str()); -} - -// Return true when this is a pass on IR for which printing -// of changes is desired. -bool isInteresting(Any IR, StringRef PassID) { - if (!isInterestingPass(PassID)) - return false; - if (any_isa(IR)) - return isInterestingFunction(*any_cast(IR)); - return true; -} - } // namespace -template -void ChangePrinter::saveIRBeforePass(Any IR, StringRef PassID) { - // Always need to place something on the stack because invalidated passes - // are not given the IR so it cannot be determined whether the pass was for - // something that was filtered out. - BeforeStack.emplace_back(); - - if (!isInteresting(IR, PassID)) - return; - // Is this the initial IR? - if (InitialIR) { - InitialIR = false; - handleInitialIR(IR); - } - - // Save the IR representation on the stack. - auto &Data = BeforeStack.back(); - generateIRRepresentation(IR, PassID, Data); -} - -template -void ChangePrinter::handleIRAfterPass(Any IR, StringRef PassID) { - assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); - std::string Name; - - // unwrapModule has inconsistent handling of names for function IRs. - if (any_isa(IR)) { - const Function *F = any_cast(IR); - Name = formatv(" (function: {0})", F->getName()).str(); - } else { - if (auto UM = unwrapModule(IR)) - Name = UM->second; - } - if (Name == "") - Name = " (module)"; - - if (isIgnored(PassID)) - handleIgnored(PassID, Name); - else if (!isInteresting(IR, PassID)) - handleFiltered(PassID, Name); - else { - // Get the before rep from the stack - IRUnitT &Before = BeforeStack.back(); - // Create the after rep - IRUnitT After; - generateIRRepresentation(IR, PassID, After); - - // was there a change in IR? - if (same(Before, After)) - omitAfter(PassID, Name); - else - handleAfter(PassID, Name, Before, After, IR); - } - BeforeStack.pop_back(); -} - -template -void ChangePrinter::handleInvalidatedPass(StringRef PassID) { - assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); - - // Always flag it as invalidated as we cannot determine when - // a pass for a filtered function is invalidated since we do not - // get the IR in the call. Also, the output is just alternate - // forms of the banner anyway. - handleInvalidated(PassID); - BeforeStack.pop_back(); -} - -template ChangePrinter::~ChangePrinter() { - assert(BeforeStack.empty() && "Problem with Change Printer stack."); -} - -IRChangePrinter::IRChangePrinter() : Out(dbgs()) {} - -IRChangePrinter::~IRChangePrinter() { -} - -void IRChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { - if (!PrintChanged) - return; - - PIC.registerBeforePassCallback([this](StringRef P, Any IR) { - saveIRBeforePass(IR, P); - return true; - }); - - PIC.registerAfterPassCallback( - [this](StringRef P, Any IR, const PreservedAnalyses &) { - handleIRAfterPass(IR, P); - }); - PIC.registerAfterPassInvalidatedCallback( - [this](StringRef P, const PreservedAnalyses &) { - handleInvalidatedPass(P); - }); -} - -void IRChangePrinter::handleInitialIR(Any IR) { - StringRef Banner("*** IR Dump At Start: ***"); - unwrapAndPrint(Out, IR, Banner, true, - /*Brief*/ false, /*ShouldPreserveUseListOrder*/ true); -} - -void IRChangePrinter::generateIRRepresentation(Any IR, StringRef PassID, - std::string &Output) { - raw_string_ostream OS(Output); - // use the after banner for all cases so it will match - SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID); - unwrapAndPrint(OS, IR, Banner, llvm::forcePrintModuleIR(), - /*Brief*/ false, /*ShouldPreserveUseListOrder*/ true); - OS.str(); -} - -void IRChangePrinter::omitAfter(StringRef PassID, std::string &Name) { - Out << formatv("*** IR Dump After {0}{1} omitted because no change ***\n", - PassID, Name); -} - -void IRChangePrinter::handleAfter(StringRef PassID, std::string &Name, - const std::string &Before, - const std::string &After, Any) { - assert(After.find("*** IR Dump") == 0 && "Unexpected banner format."); - StringRef AfterRef = After; - StringRef Banner = - AfterRef.take_until([](char C) -> bool { return C == '\n'; }); - Out << Banner; - - // LazyCallGraph::SCC already has "(scc:..." in banner so only add - // in the name if it isn't already there. - if (Name.substr(0, 6).compare(" (scc:") != 0 && !llvm::forcePrintModuleIR()) - Out << Name; - - Out << After.substr(Banner.size()); -} - -void IRChangePrinter::handleInvalidated(StringRef PassID) { - Out << formatv("*** IR Pass {0} invalidated ***\n", PassID); -} - -void IRChangePrinter::handleFiltered(StringRef PassID, std::string &Name) { - SmallString<20> Banner = - formatv("*** IR Dump After {0}{1} filtered out ***\n", PassID, Name); - Out << Banner; -} - -void IRChangePrinter::handleIgnored(StringRef PassID, std::string &Name) { - Out << formatv("*** IR Pass {0}{1} ignored ***\n", PassID, Name); -} - -bool IRChangePrinter::same(const std::string &Before, - const std::string &After) { - return Before.compare(After) == 0; -} - PrintIRInstrumentation::~PrintIRInstrumentation() { assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit"); } @@ -725,5 +508,4 @@ void StandardInstrumentations::registerCallbacks( TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); - PrintChangedIR.registerCallbacks(PIC); } diff --git a/llvm/test/Other/change-printer.ll b/llvm/test/Other/change-printer.ll deleted file mode 100644 index 54c941b293009..0000000000000 --- a/llvm/test/Other/change-printer.ll +++ /dev/null @@ -1,109 +0,0 @@ -; Simple checks of -print-changed functionality -; -; Note that (mostly) only the banners are checked. -; -; Simple functionality check. -; RUN: opt -S -print-changed -passes=instsimplify 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_SIMPLE -; -; Check that only the passes that change the IR are printed and that the -; others (including g) are filtered out. -; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FUNC_FILTER -; -; Check that the reporting of IRs respects -print-module-scope -; RUN: opt -S -print-changed -passes=instsimplify -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_PRINT_MOD_SCOPE -; -; Check that the reporting of IRs respects -print-module-scope -; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FUNC_FILTER_MOD_SCOPE -; -; Check that reporting of multiple functions happens -; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs="f,g" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_MULT_FUNC -; -; Check that the reporting of IRs respects -filter-passes -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_PASSES -; -; Check that the reporting of IRs respects -filter-passes with multiple passes -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_MULT_PASSES -; -; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_FUNC_PASSES -; -; Check that the reporting of IRs respects -filter-passes, -filter-print-funcs and -print-module-scope -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_FILTER_FUNC_PASSES_MOD_SCOPE -; -; Check that repeated passes that change the IR are printed and that the -; others (including g) are filtered out. Note that the second time -; instsimplify is run on f, it does not change the IR -; RUN: opt -S -print-changed -passes="instsimplify,instsimplify" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK_MULT_PASSES_FILTER_FUNC - -define i32 @g() { -entry: - %a = add i32 2, 3 - ret i32 %a -} - -define i32 @f() { -entry: - %a = add i32 2, 3 - ret i32 %a -} - -; CHECK_SIMPLE: *** IR Dump At Start: *** -; CHECK_SIMPLE: ; ModuleID = '' -; CHECK_SIMPLE: *** IR Dump After VerifierPass (module) omitted because no change *** -; CHECK_SIMPLE: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK_SIMPLE: *** IR Pass PassManager (function: g) ignored *** -; CHECK_SIMPLE: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_SIMPLE: *** IR Pass PassManager (function: f) ignored *** -; CHECK_SIMPLE: *** IR Pass ModuleToFunctionPassAdaptor{{ ?}}> (module) ignored *** -; CHECK_SIMPLE: *** IR Dump After VerifierPass (module) omitted because no change *** -; CHECK_SIMPLE: *** IR Dump After PrintModulePass (module) omitted because no change *** - -; CHECK_FUNC_FILTER: *** IR Dump At Start: *** -; CHECK_FUNC_FILTER: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_FUNC_FILTER: *** IR Dump After InstSimplifyPass *** (function: f) - -; CHECK_PRINT_MOD_SCOPE: *** IR Dump At Start: *** -; CHECK_PRINT_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK_PRINT_MOD_SCOPE: ModuleID = '' -; CHECK_PRINT_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_PRINT_MOD_SCOPE: ModuleID = '' - -; CHECK_FUNC_FILTER_MOD_SCOPE: *** IR Dump At Start: *** -; CHECK_FUNC_FILTER_MOD_SCOPE: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_FUNC_FILTER_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_FUNC_FILTER_MOD_SCOPE: ModuleID = '' - -; CHECK_FILTER_MULT_FUNC: *** IR Dump At Start: *** -; CHECK_FILTER_MULT_FUNC: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK_FILTER_MULT_FUNC: *** IR Dump After InstSimplifyPass *** (function: f) - -; CHECK_FILTER_PASSES: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_FILTER_PASSES: *** IR Dump At Start: *** (function: g) -; CHECK_FILTER_PASSES: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** -; CHECK_FILTER_PASSES: *** IR Dump After InstSimplifyPass (function: f) filtered out *** -; CHECK_FILTER_PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK_FILTER_MULT_PASSES: *** IR Dump At Start: *** (function: g) -; CHECK_FILTER_MULT_PASSES: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK_FILTER_MULT_PASSES: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** -; CHECK_FILTER_MULT_PASSES: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_FILTER_MULT_PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK_FILTER_FUNC_PASSES: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_FILTER_FUNC_PASSES: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** -; CHECK_FILTER_FUNC_PASSES: *** IR Dump At Start: *** (function: f) -; CHECK_FILTER_FUNC_PASSES: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_FILTER_FUNC_PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** -; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump At Start: *** (function: f) -; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: ModuleID = '' -; CHECK_FILTER_FUNC_PASSES_MOD_SCOPE: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump At Start: *** -; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK_MULT_PASSES_FILTER_FUNC: *** IR Dump After InstSimplifyPass (function: f) omitted because no change *** From a9cbe5cf30e386a4f44981f5bf9e1862ad36574d Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Thu, 17 Sep 2020 11:17:11 +0200 Subject: [PATCH 0952/1079] [X86] Fix stack alignment on 32-bit Solaris/x86 On Solaris/x86, several hundred 32-bit tests `FAIL`, all in the same way: env ASAN_OPTIONS=halt_on_error=false ./halt_on_error_suppress_equal_pcs.cpp.tmp Segmentation Fault (core dumped) They segfault during startup: Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1 (LWP 1)] 0x080f21f0 in __sanitizer::internal_mmap(void*, unsigned long, int, int, int, unsigned long long) () at /vol/llvm/src/llvm-project/dist/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp:65 65 int prot, int flags, int fd, OFF_T offset) { 1: x/i $pc => 0x80f21f0 <_ZN11__sanitizer13internal_mmapEPvmiiiy+16>: movaps 0x30(%esp),%xmm0 (gdb) p/x $esp $3 = 0xfeffd488 The problem is that `movaps` expects 16-byte alignment, while 32-bit Solaris/x86 only guarantees 4-byte alignment following the i386 psABI. This patch updates `X86Subtarget::initSubtargetFeatures` accordingly, handles Solaris/x86 in the corresponding testcase, and allows for some variation in address alignment in `compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp`. Tested on `amd64-pc-solaris2.11` and `x86_64-pc-linux-gnu`. Differential Revision: https://reviews.llvm.org/D87615 --- compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp | 6 +++--- llvm/lib/Target/X86/X86Subtarget.cpp | 9 +++++---- llvm/test/CodeGen/X86/stack-align2.ll | 7 ++++++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp index 67239e82d340d..ac35e42275710 100644 --- a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp +++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp @@ -162,7 +162,7 @@ int access_p(T *p, char type) { case 'm': // CHECK-MEMBER: vptr.cpp:[[@LINE+6]]:15: runtime error: member access within address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T' // CHECK-MEMBER-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']] - // CHECK-MEMBER-NEXT: {{^ .. .. .. .. .. .. .. .. .. .. .. .. }} + // CHECK-MEMBER-NEXT: {{^ ?.. .. .. .. ?.. .. .. .. ?.. .. .. .. ?}} // CHECK-MEMBER-NEXT: {{^ \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}} // CHECK-MEMBER-NEXT: {{^ vptr for}} [[DYN_TYPE]] // CHECK-Linux-MEMBER: #0 {{.*}}access_p{{.*}}vptr.cpp:[[@LINE+1]] @@ -178,7 +178,7 @@ int access_p(T *p, char type) { case 'f': // CHECK-MEMFUN: vptr.cpp:[[@LINE+6]]:15: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T' // CHECK-MEMFUN-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']] - // CHECK-MEMFUN-NEXT: {{^ .. .. .. .. .. .. .. .. .. .. .. .. }} + // CHECK-MEMFUN-NEXT: {{^ ?.. .. .. .. ?.. .. .. .. ?.. .. .. .. ?}} // CHECK-MEMFUN-NEXT: {{^ \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}} // CHECK-MEMFUN-NEXT: {{^ vptr for}} [[DYN_TYPE]] // TODO: Add check for stacktrace here. @@ -196,7 +196,7 @@ int access_p(T *p, char type) { case 'c': // CHECK-DOWNCAST: vptr.cpp:[[@LINE+6]]:11: runtime error: downcast of address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T' // CHECK-DOWNCAST-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']] - // CHECK-DOWNCAST-NEXT: {{^ .. .. .. .. .. .. .. .. .. .. .. .. }} + // CHECK-DOWNCAST-NEXT: {{^ ?.. .. .. .. ?.. .. .. .. ?.. .. .. .. ?}} // CHECK-DOWNCAST-NEXT: {{^ \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}} // CHECK-DOWNCAST-NEXT: {{^ vptr for}} [[DYN_TYPE]] // CHECK-Linux-DOWNCAST: #0 {{.*}}access_p{{.*}}vptr.cpp:[[@LINE+1]] diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 4cf17e46a598a..d50c552a65b6f 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -258,12 +258,13 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, report_fatal_error("64-bit code requested on a subtarget that doesn't " "support it!"); - // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both - // 32 and 64 bit) and for all 64-bit targets. + // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and for all + // 64-bit targets. On Solaris (32-bit), stack alignment is 4 bytes + // following the i386 psABI, while on Illumos it is always 16 bytes. if (StackAlignOverride) stackAlignment = *StackAlignOverride; - else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || - isTargetKFreeBSD() || In64BitMode) + else if (isTargetDarwin() || isTargetLinux() || isTargetKFreeBSD() || + In64BitMode) stackAlignment = Align(16); // Consume the vector width attribute or apply any target specific limit. diff --git a/llvm/test/CodeGen/X86/stack-align2.ll b/llvm/test/CodeGen/X86/stack-align2.ll index 7239198000c99..095a9090ed08f 100644 --- a/llvm/test/CodeGen/X86/stack-align2.ll +++ b/llvm/test/CodeGen/X86/stack-align2.ll @@ -2,10 +2,12 @@ ; RUN: llc < %s -mcpu=generic -mtriple=i386-kfreebsd | FileCheck %s -check-prefix=KFREEBSD-I386 ; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386 ; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386 +; RUN: llc < %s -mcpu=generic -mtriple=i386-pc-solaris2.11 | FileCheck %s -check-prefix=SOLARIS-I386 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-kfreebsd | FileCheck %s -check-prefix=KFREEBSD-X86_64 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-solaris2.11 | FileCheck %s -check-prefix=SOLARIS-X86_64 define i32 @test() nounwind { entry: @@ -15,7 +17,8 @@ entry: ; LINUX-I386: subl $12, %esp ; KFREEBSD-I386: subl $12, %esp ; DARWIN-I386: subl $12, %esp -; NETBSD-I386-NOT: subl {{.*}}, %esp +; NETBSD-I386-NOT: subl {{.*}}, %esp +; SOLARIS-I386-NOT: subl {{.*}}, %esp ; LINUX-X86_64: pushq %{{.*}} ; LINUX-X86_64-NOT: subq {{.*}}, %rsp @@ -23,6 +26,8 @@ entry: ; DARWIN-X86_64-NOT: subq {{.*}}, %rsp ; NETBSD-X86_64: pushq %{{.*}} ; NETBSD-X86_64-NOT: subq {{.*}}, %rsp +; SOLARIS-X86_64: pushq %{{.*}} +; SOLARIS-X86_64-NOT: subq {{.*}}, %rsp ; KFREEBSD-X86_64: pushq %{{.*}} ; KFREEBSD-X86_64-NOT: subq {{.*}}, %rsp } From c687af0c30b4dbdc9f614d5e061c888238e0f9c5 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 15 Sep 2020 14:49:48 +0100 Subject: [PATCH 0953/1079] [lldb] Don't send invalid region addresses to lldb server Previously when in "memory region " didn't parse correctly, we'd print an error then also ask lldb-server for a region containing LLDB_INVALID_ADDRESS. (lldb) memory region not_an_address error: invalid address argument "not_an_address"... error: Server returned invalid range Only send the command to lldb-server if the address parsed correctly. (lldb) memory region not_an_address error: invalid address argument "not_an_address"... Reviewed By: labath Differential Revision: https://reviews.llvm.org/D87694 --- lldb/source/Commands/CommandObjectMemory.cpp | 1 + .../API/functionalities/memory-region/TestMemoryRegion.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index 474c377101493..d918937994981 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -1707,6 +1707,7 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { "invalid address argument \"%s\": %s\n", command[0].c_str(), error.AsCString()); result.SetStatus(eReturnStatusFailed); + return false; } } diff --git a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py index 283cc945ed09a..61e64d44e7945 100644 --- a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py +++ b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py @@ -41,6 +41,12 @@ def test(self): self.assertFalse(result.Succeeded()) self.assertRegexpMatches(result.GetError(), "Usage: memory region ADDR") + # Test that when the address fails to parse, we show an error and do not continue + interp.HandleCommand("memory region not_an_address", result) + self.assertFalse(result.Succeeded()) + self.assertEqual(result.GetError(), + "error: invalid address argument \"not_an_address\": address expression \"not_an_address\" evaluation failed\n") + # Now let's print the memory region starting at 0 which should always work. interp.HandleCommand("memory region 0x0", result) self.assertTrue(result.Succeeded()) From 9218f9283802b2d1ff33c490761fdb925b1e56d9 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 11 Sep 2020 15:18:44 +0000 Subject: [PATCH 0954/1079] [clang][aarch64] ACLE: Support implicit casts between GNU and SVE vectors This patch adds support for implicit casting between GNU vectors and SVE vectors when `__ARM_FEATURE_SVE_BITS==N`, as defined by the Arm C Language Extensions (ACLE, version 00bet5, section 3.7.3.3) for SVE [1]. This behavior makes it possible to use GNU vectors with ACLE functions that operate on VLAT. For example: typedef int8_t vec __attribute__((vector_size(32))); vec f(vec x) { return svasrd_x(svptrue_b8(), x, 1); } Tests are also added for implicit casting between GNU and fixed-length SVE vectors created by the 'arm_sve_vector_bits' attribute. This behavior makes it possible to use VLST with existing interfaces that operate on GNUT. For example: typedef int8_t vec1 __attribute__((vector_size(32))); void f(vec1); #if __ARM_FEATURE_SVE_BITS==256 && __ARM_FEATURE_SVE_VECTOR_OPERATORS typedef svint8_t vec2 __attribute__((arm_sve_vector_bits(256))); void g(vec2 x) { f(x); } // OK #endif The `__ARM_FEATURE_SVE_VECTOR_OPERATORS` feature macro indicates interoperability with the GNU vector extension. This is the first patch providing support for this feature, which once complete will be enabled by the `-msve-vector-bits` flag, as the `__ARM_FEATURE_SVE_BITS` feature currently is. [1] https://developer.arm.com/documentation/100987/latest Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87607 --- clang/lib/AST/ASTContext.cpp | 4 + .../CodeGen/attr-arm-sve-vector-bits-cast.c | 53 +++++++++++ clang/test/Sema/attr-arm-sve-vector-bits.c | 92 +++++++++++++------ .../test/SemaCXX/attr-arm-sve-vector-bits.cpp | 14 ++- 4 files changed, 134 insertions(+), 29 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 20ea91c68d6d3..84f747361235a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8516,6 +8516,10 @@ bool ASTContext::areCompatibleSveTypes(QualType FirstType, else if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) return VT->getElementType().getCanonicalType() == FirstType->getSveEltType(*this); + else if (VT->getVectorKind() == VectorType::GenericVector) + return getTypeSize(SecondType) == getLangOpts().ArmSveVectorBits && + hasSameType(VT->getElementType(), + getBuiltinVectorTypeInfo(BT).ElementType); } } return false; diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index 18a7e1f1496cf..e65537cead104 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -9,6 +9,7 @@ typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N))); +typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8))); // CHECK-LABEL: @to_svint32_t( // CHECK-NEXT: entry: @@ -107,3 +108,55 @@ svbool_t to_svbool_t(fixed_bool_t type) { fixed_bool_t from_svbool_t(svbool_t type) { return type; } + +// CHECK-LABEL: @to_svint32_t__from_gnu_int32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 16 +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[TYPE_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[TYPE_ADDR]] to * +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: ret [[TMP2]] +// +svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { + return type; +} + +// CHECK-LABEL: @from_svint32_t__to_gnu_int32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA5]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <16 x i32>* +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: ret void +// +gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { + return type; +} + +// CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* +// CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP1]] +// +fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { + return type; +} + +// CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TYPE:%.*]] = alloca <16 x i32>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[TYPE]] to * +// CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 +// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, [[TBAA2]] +// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: ret void +// +gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) { + return type; +} diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c index 1bcbfa360c976..7cc2d4f4e0b5e 100644 --- a/clang/test/Sema/attr-arm-sve-vector-bits.c +++ b/clang/test/Sema/attr-arm-sve-vector-bits.c @@ -1,11 +1,16 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=128 -fallow-half-arguments-and-returns %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=256 -fallow-half-arguments-and-returns %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=512 -fallow-half-arguments-and-returns %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=1024 -fallow-half-arguments-and-returns %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=2048 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -msve-vector-bits=128 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -msve-vector-bits=256 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -msve-vector-bits=512 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -msve-vector-bits=1024 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -msve-vector-bits=2048 -fallow-half-arguments-and-returns %s + +#include #define N __ARM_FEATURE_SVE_BITS +typedef __fp16 float16_t; +typedef float float32_t; +typedef double float64_t; typedef __SVInt8_t svint8_t; typedef __SVInt16_t svint16_t; typedef __SVInt32_t svint32_t; @@ -19,6 +24,7 @@ typedef __SVFloat32_t svfloat32_t; typedef __SVFloat64_t svfloat64_t; #if defined(__ARM_FEATURE_SVE_BF16) +typedef __bf16 bfloat16_t; typedef __SVBFloat16_t svbfloat16_t; #endif @@ -43,6 +49,23 @@ typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N))); typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N))); +// GNU vector types +typedef int8_t gnu_int8_t __attribute__((vector_size(N / 8))); +typedef int16_t gnu_int16_t __attribute__((vector_size(N / 8))); +typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8))); +typedef int64_t gnu_int64_t __attribute__((vector_size(N / 8))); + +typedef uint8_t gnu_uint8_t __attribute__((vector_size(N / 8))); +typedef uint16_t gnu_uint16_t __attribute__((vector_size(N / 8))); +typedef uint32_t gnu_uint32_t __attribute__((vector_size(N / 8))); +typedef uint64_t gnu_uint64_t __attribute__((vector_size(N / 8))); + +typedef float16_t gnu_float16_t __attribute__((vector_size(N / 8))); +typedef float32_t gnu_float32_t __attribute__((vector_size(N / 8))); +typedef float64_t gnu_float64_t __attribute__((vector_size(N / 8))); + +typedef bfloat16_t gnu_bfloat16_t __attribute__((vector_size(N / 8))); + // Attribute must have a single argument typedef svint8_t no_argument __attribute__((arm_sve_vector_bits)); // expected-error {{'arm_sve_vector_bits' attribute takes one argument}} typedef svint8_t two_arguments __attribute__((arm_sve_vector_bits(2, 4))); // expected-error {{'arm_sve_vector_bits' attribute takes one argument}} @@ -176,38 +199,51 @@ union union_bool { fixed_bool_t x, y[5]; }; // --------------------------------------------------------------------------// // Implicit casts -#define TEST_CAST(TYPE) \ - sv##TYPE##_t to_sv##TYPE##_t(fixed_##TYPE##_t x) { return x; } \ - fixed_##TYPE##_t from_sv##TYPE##_t(sv##TYPE##_t x) { return x; } - -TEST_CAST(int8) -TEST_CAST(int16) -TEST_CAST(int32) -TEST_CAST(int64) -TEST_CAST(uint8) -TEST_CAST(uint16) -TEST_CAST(uint32) -TEST_CAST(uint64) -TEST_CAST(float16) -TEST_CAST(float32) -TEST_CAST(float64) -TEST_CAST(bfloat16) -TEST_CAST(bool) +#define TEST_CAST_COMMON(TYPE) \ + sv##TYPE##_t to_sv##TYPE##_t_from_fixed(fixed_##TYPE##_t x) { return x; } \ + fixed_##TYPE##_t from_sv##TYPE##_t_to_fixed(sv##TYPE##_t x) { return x; } + +#define TEST_CAST_GNU(PREFIX, TYPE) \ + gnu_##TYPE##_t to_gnu_##TYPE##_t_from_##PREFIX##TYPE##_t(PREFIX##TYPE##_t x) { return x; } \ + PREFIX##TYPE##_t from_gnu_##TYPE##_t_to_##PREFIX##TYPE##_t(gnu_##TYPE##_t x) { return x; } + +#define TEST_CAST_VECTOR(TYPE) \ + TEST_CAST_COMMON(TYPE) \ + TEST_CAST_GNU(sv, TYPE) \ + TEST_CAST_GNU(fixed_, TYPE) + +TEST_CAST_VECTOR(int8) +TEST_CAST_VECTOR(int16) +TEST_CAST_VECTOR(int32) +TEST_CAST_VECTOR(int64) +TEST_CAST_VECTOR(uint8) +TEST_CAST_VECTOR(uint16) +TEST_CAST_VECTOR(uint32) +TEST_CAST_VECTOR(uint64) +TEST_CAST_VECTOR(float16) +TEST_CAST_VECTOR(float32) +TEST_CAST_VECTOR(float64) +TEST_CAST_VECTOR(bfloat16) +TEST_CAST_COMMON(bool) // Test the implicit conversion only applies to valid types fixed_int8_t to_fixed_int8_t__from_svuint8_t(svuint8_t x) { return x; } // expected-error-re {{returning 'svuint8_t' (aka '__SVUint8_t') from a function with incompatible result type 'fixed_int8_t' (vector of {{[0-9]+}} 'signed char' values)}} fixed_bool_t to_fixed_bool_t__from_svint32_t(svint32_t x) { return x; } // expected-error-re {{returning 'svint32_t' (aka '__SVInt32_t') from a function with incompatible result type 'fixed_bool_t' (vector of {{[0-9]+}} 'unsigned char' values)}} +svint64_t to_svint64_t__from_gnu_int32_t(gnu_int32_t x) { return x; } // expected-error-re {{returning 'gnu_int32_t' (vector of {{[0-9]+}} 'int32_t' values) from a function with incompatible result type 'svint64_t' (aka '__SVInt64_t')}} +gnu_int32_t from_svint64_t__to_gnu_int32_t(svint64_t x) { return x; } // expected-error-re {{returning 'svint64_t' (aka '__SVInt64_t') from a function with incompatible result type 'gnu_int32_t' (vector of {{[0-9]+}} 'int32_t' values)}} + +// Test implicit conversion between SVE and GNU vector is invalid when +// __ARM_FEATURE_SVE_BITS != N +#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 512 +typedef int32_t int4 __attribute__((vector_size(16))); +svint32_t badcast(int4 x) { return x; } // expected-error {{returning 'int4' (vector of 4 'int32_t' values) from a function with incompatible result type 'svint32_t' (aka '__SVInt32_t')}} +#endif + // Test conversion between predicate and uint8 is invalid, both have the same // memory representation. fixed_bool_t to_fixed_bool_t__from_svuint8_t(svuint8_t x) { return x; } // expected-error-re {{returning 'svuint8_t' (aka '__SVUint8_t') from a function with incompatible result type 'fixed_bool_t' (vector of {{[0-9]+}} 'unsigned char' values)}} -// Test the implicit conversion only applies to fixed-length types -typedef signed int vSInt32 __attribute__((__vector_size__(16))); -svint32_t to_svint32_t_from_gnut(vSInt32 x) { return x; } // expected-error-re {{returning 'vSInt32' (vector of {{[0-9]+}} 'int' values) from a function with incompatible result type 'svint32_t' (aka '__SVInt32_t')}} - -vSInt32 to_gnut_from_svint32_t(svint32_t x) { return x; } // expected-error-re {{returning 'svint32_t' (aka '__SVInt32_t') from a function with incompatible result type 'vSInt32' (vector of {{[0-9]+}} 'int' values)}} - // --------------------------------------------------------------------------// // Test the scalable and fixed-length types can be used interchangeably diff --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp index ea7c4778db0ea..5e796b7c8995f 100644 --- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp +++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp @@ -1,14 +1,26 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns %s // expected-no-diagnostics +#include + #define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t svint8_t; typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); +typedef int8_t gnu_int8_t __attribute__((vector_size(N / 8))); template struct S { T var; }; S s; +// Test implicit casts between VLA and VLS vectors svint8_t to_svint8_t(fixed_int8_t x) { return x; } fixed_int8_t from_svint8_t(svint8_t x) { return x; } + +// Test implicit casts between GNU and VLA vectors +svint8_t to_svint8_t__from_gnu_int8_t(gnu_int8_t x) { return x; } +gnu_int8_t from_svint8_t__to_gnu_int8_t(svint8_t x) { return x; } + +// Test implicit casts between GNU and VLS vectors +fixed_int8_t to_fixed_int8_t__from_gnu_int8_t(gnu_int8_t x) { return x; } +gnu_int8_t from_fixed_int8_t__to_gnu_int8_t(fixed_int8_t x) { return x; } From 347d59b16c71194d7a9372dd69d3e41ebeca3113 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Thu, 17 Sep 2020 09:26:30 +0000 Subject: [PATCH 0955/1079] [mlir][Linalg] Convolution tiling added to ConvOp vectorization pass ConvOp vectorization supports now only convolutions of static shapes with dimensions of size either 3(vectorized) or 1(not) as underlying vectors have to be of static shape as well. In this commit we add support for convolutions of any size as well as dynamic shapes by leveraging existing matmul infrastructure for tiling of both input and kernel to sizes accepted by the previous version of ConvOp vectorization. In the future this pass can be extended to take "tiling mask" as a user input which will enable vectorization of user specified dimensions. Differential Revision: https://reviews.llvm.org/D87676 --- .../Dialect/Linalg/Transforms/Transforms.h | 8 +- .../Dialect/Linalg/CPU/test-conv-1d-call.mlir | 10 +- .../Linalg/CPU/test-conv-1d-ncw-call.mlir | 10 +- .../Linalg/CPU/test-conv-1d-nwc-call.mlir | 10 +- .../Dialect/Linalg/CPU/test-conv-2d-call.mlir | 10 +- .../Linalg/CPU/test-conv-2d-nchw-call.mlir | 10 +- .../Linalg/CPU/test-conv-2d-nhwc-call.mlir | 10 +- .../Dialect/Linalg/CPU/test-conv-3d-call.mlir | 10 +- .../Linalg/CPU/test-conv-3d-ncdhw-call.mlir | 10 +- .../Linalg/CPU/test-conv-3d-ndhwc-call.mlir | 10 +- .../Linalg/Transforms/Vectorization.cpp | 87 ++++++-- .../LinalgToVector/linalg-to-vector.mlir | 203 ++++-------------- .../lib/Transforms/TestConvVectorization.cpp | 79 ++++++- 13 files changed, 214 insertions(+), 253 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index b55c429a9d02d..a34ea00fdf5df 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -31,8 +31,8 @@ struct TiledLinalgOp { }; /// Populates patterns for vectorization of all ConvN-D ops. -void populateConvVectorizationPatterns(MLIRContext *context, - OwningRewritePatternList &patterns); +void populateConvVectorizationPatterns( + MLIRContext *context, SmallVectorImpl &patterns); /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` @@ -589,6 +589,10 @@ class ConvOpVectorization : public OpRewritePattern { LogicalResult matchAndRewrite(ConvOp minOp, PatternRewriter &rewriter) const override; + + // TODO: Make these pass arguments. + static const int tileSize = 3; + static const int noTile = 1; }; //===----------------------------------------------------------------------===// diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir index 1b3ee65f13d96..8f3c6df79f904 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" -linalg-tile="linalg-tile-sizes=1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir index 2647ee3d663c3..46634a7e5921c 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -linalg-tile="linalg-tile-sizes=1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir index 5cc4de3844aa6..a6aeb30fc153b 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -linalg-tile="linalg-tile-sizes=1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir index 38420974ad983..819d95ef5da0c 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" -linalg-tile="linalg-tile-sizes=1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir index fbd831f6801a9..fb0e70861864b 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -linalg-tile="linalg-tile-sizes=1,1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir index 422720da429ef..5888eec7d67a4 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -linalg-tile="linalg-tile-sizes=1,1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir index 8f38962acf8bb..f0ca37f86fcd0 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" -linalg-tile="linalg-tile-sizes=1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir index 2ad2b4fc3465e..a56a260b9cd8a 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -linalg-tile="linalg-tile-sizes=1,1,1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir index 4f1392363bb2d..37fc6453e5dd0 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir @@ -9,17 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1,1" -test-conv-vectorization \ -// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -linalg-tile="linalg-tile-sizes=1,1,1,1,1" \ -// RUN: -test-conv-vectorization -convert-linalg-to-loops \ -// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ -// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" \ +// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index a8b11a48df174..9a225dd81c79c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -371,7 +371,6 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( template LogicalResult ConvOpVectorization::matchAndRewrite( ConvOp op, PatternRewriter &rewriter) const { - unsigned dimSize = 3; Location loc = op.getLoc(); MLIRContext *context = op.getContext(); edsc::ScopedContext scope(rewriter, loc); @@ -391,7 +390,7 @@ LogicalResult ConvOpVectorization::matchAndRewrite( for (unsigned i = 0; i < N; i++) { if (!mask[i] && (inShape[i] != 1 || kShape[i] != 1)) return failure(); - if (mask[i] && (inShape[i] != dimSize || kShape[i] != dimSize)) + if (mask[i] && (inShape[i] != tileSize || kShape[i] != tileSize)) return failure(); if (mask[i]) @@ -409,7 +408,7 @@ LogicalResult ConvOpVectorization::matchAndRewrite( auto map = AffineMap::get(rank, 0, mapping, context); SmallVector zeros(rank, std_constant_index(0)); auto vecType = - VectorType::get(SmallVector(numDims, dimSize), elemType); + VectorType::get(SmallVector(numDims, tileSize), elemType); auto inputVec = vector_transfer_read(vecType, input, zeros, map); auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map); @@ -433,32 +432,76 @@ LogicalResult ConvOpVectorization::matchAndRewrite( return success(); } +using ConvOpConst = ConvOpVectorization; + +/// Inserts tiling, promotion and vectorization pattern for ConvOp +/// conversion into corresponding pattern lists. +template +static void +populateVectorizationPatterns(OwningRewritePatternList &tilingPatterns, + OwningRewritePatternList &promotionPatterns, + OwningRewritePatternList &vectorizationPatterns, + ArrayRef tileSizes, + MLIRContext *context) { + constexpr static StringRef kTiledMarker = "TILED"; + constexpr static StringRef kPromotedMarker = "PROMOTED"; + tilingPatterns.insert>( + context, LinalgTilingOptions().setTileSizes(tileSizes), + LinalgMarker({}, Identifier::get(kTiledMarker, context))); + + promotionPatterns.insert>( + context, LinalgPromotionOptions().setUseFullTileBuffersByDefault(true), + LinalgMarker(Identifier::get(kTiledMarker, context), + Identifier::get(kPromotedMarker, context))); + + SmallVector mask(N); + int offset = tileSizes.size() - N; + std::transform(tileSizes.begin() + offset, tileSizes.end(), mask.begin(), + [](int64_t i) -> bool { return i != ConvOpConst::noTile; }); + + vectorizationPatterns.insert>(context, mask); +} + void mlir::linalg::populateConvVectorizationPatterns( - MLIRContext *context, OwningRewritePatternList &patterns) { - patterns.insert>( - context, SmallVector{true}); + MLIRContext *context, SmallVectorImpl &patterns) { + const int64_t tileSize = ConvOpConst::tileSize; + const int64_t noTile = ConvOpConst::noTile; + auto makeTileSizes = [&](unsigned numNoTile, unsigned numTile) { + SmallVector result(numNoTile, noTile); + result.append(numTile, tileSize); + return result; + }; + + OwningRewritePatternList tiling, promotion, vectorization; + populateVectorizationPatterns( + tiling, promotion, vectorization, + makeTileSizes(/*numNoTile=*/1, /*numTile*/ 1), context); + + populateVectorizationPatterns(tiling, promotion, vectorization, + makeTileSizes(3, 2), context); - patterns.insert>( - context, SmallVector{false, true, true}); + populateVectorizationPatterns(tiling, promotion, vectorization, + makeTileSizes(3, 2), context); - patterns.insert>( - context, SmallVector{false, true, true}); + populateVectorizationPatterns(tiling, promotion, vectorization, + makeTileSizes(2, 2), context); - patterns.insert>( - context, SmallVector{true, true}); + populateVectorizationPatterns(tiling, promotion, vectorization, + makeTileSizes(4, 3), context); - patterns.insert>( - context, SmallVector{false, true, true, true}); + populateVectorizationPatterns(tiling, promotion, vectorization, + makeTileSizes(4, 3), context); - patterns.insert>( - context, SmallVector{false, true, true, true}); + populateVectorizationPatterns(tiling, promotion, vectorization, + makeTileSizes(3, 3), context); - patterns.insert>( - context, SmallVector{true, true, true}); + populateVectorizationPatterns( + tiling, promotion, vectorization, makeTileSizes(5, 4), context); - patterns.insert>( - context, SmallVector{false, true, true, true, true}); + populateVectorizationPatterns( + tiling, promotion, vectorization, makeTileSizes(5, 4), context); - patterns.insert>( - context, SmallVector{false, true, true, true, true}); + patterns.push_back(std::move(tiling)); + patterns.push_back(std::move(promotion)); + patterns.push_back(std::move(vectorization)); } diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir index 487718301d005..c2e8a31eb443c 100644 --- a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir +++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir @@ -1,167 +1,52 @@ // RUN: mlir-opt %s -test-conv-vectorization --cse | FileCheck %s -// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0) -> (d0)> -// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0) -> ()> -// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)> -// CHECK-DAG: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1)> -// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0, d1) -> ()> -// CHECK-DAG: #[[$map5:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)> -// CHECK-DAG: #[[$map6:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -// CHECK-DAG: #[[$map7:.*]] = affine_map<(d0, d1, d2) -> ()> -// CHECK-DAG: #[[$map8:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d2, d3, d4)> -// CHECK-DAG: #[[$map9:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK-DAG: #[[$map10:.*]] = affine_map<(d0, d1, d2, d3) -> ()> +// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0)[s0] -> (1, -d0 + s0)> +// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK-DAG: #[[$map3:.*]] = affine_map<(d0, d1)[s0] -> (3, -d0 - d1 + s0)> +// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> +// CHECK-DAG: #[[$map5:.*]] = affine_map<(d0) -> (d0)> -func @conv_1d(%arg0: memref<3xf32>, %arg1: memref<3xf32>, %arg2: memref) { - linalg.conv_1d %arg0, %arg1, %arg2 : (memref<3xf32>, memref<3xf32>, memref) +func @conv_1d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref, memref, memref) return } // CHECK-LABEL: @conv_1d -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]]], %[[cst]] : memref<3xf32>, vector<3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map0]], #[[$map1]]], iterator_types = ["reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3xf32>, vector<3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]]] : memref -// CHECK: return - -func @conv_1d_ncw(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { - linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_1d_ncw -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return - - -func @conv_1d_nwc(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { - linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_1d_nwc -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return - -func @conv_2d(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>, %arg2: memref) { - linalg.conv_2d %arg0, %arg1, %arg2 : (memref<3x3xf32>, memref<3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_2d -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]]], %[[cst]] : memref<3x3xf32>, vector<3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]]] : memref -// CHECK: return - -func @conv_2d_nchw(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { - linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_2d_nchw -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return - -func @conv_2d_nhwc(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { - linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_2d_nhwc -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return - -func @conv_3d(%arg0: memref<3x3x3xf32>, %arg1: memref<3x3x3xf32>, %arg2: memref) { - linalg.conv_3d %arg0, %arg1, %arg2 : (memref<3x3x3xf32>, memref<3x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_3d -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<3x3x3xf32>, vector<3x3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return - -func @conv_3d_ncdhw(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { - linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_3d_ncdhw -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return - -func @conv_3d_ndhwc(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { - linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) - return -} - -// CHECK-LABEL: @conv_3d_ndhwc -// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> -// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> -// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> -// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> -// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 -// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref -// CHECK: return +// CHECK-DAG: %[[c12:.*]] = constant 12 : index +// CHECK-DAG: %[[c4:.*]] = constant 4 : index +// CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c3:.*]] = constant 3 : index +// CHECK-DAG: %[[c0:.*]] = constant 0 : index +// CHECK-DAG: %[[c1:.*]] = constant 1 : index +// CHECK: %[[v0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECK: %[[v1:.*]] = dim %[[arg2]], %[[c0]] : memref +// CHECK: %[[v2:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECK: %[[v3:.*]] = alloc(%[[c12]]) : memref +// CHECK: %[[v4:.*]] = alloc(%[[c12]]) : memref +// CHECK: %[[v5:.*]] = alloc(%[[c4]]) : memref +// CHECK: %[[v6:.*]] = std.view %[[v3]][%[[c0]]][] : memref to memref<3xf32> +// CHECK: %[[v7:.*]] = std.view %[[v4]][%[[c0]]][] : memref to memref<3xf32> +// CHECK: %[[v8:.*]] = std.view %[[v5]][%[[c0]]][] : memref to memref<1xf32> +// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[v1]] step %[[c1]] { +// CHECK: %[[v9:.*]] = affine.min #[[$map0]](%[[arg3]])[%[[v1]]] +// CHECK: %[[v10:.*]] = subview %[[arg2]][%[[arg3]]] [%[[v9]]] [1] : memref to memref +// CHECK: %[[v11:.*]] = subview %[[v8]][0] [%[[v9]]] [1] : memref<1xf32> to memref +// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[v0]] step %[[c3]] { +// CHECK: %[[v12:.*]] = affine.apply #[[$map2]](%[[arg3]], %[[arg4]]) +// CHECK: %[[v13:.*]] = affine.min #[[$map3]](%[[arg3]], %[[arg4]])[%[[v2]]] +// CHECK: %[[v14:.*]] = subview %arg0[%12] [%13] [1] : memref to memref +// CHECK: %[[v15:.*]] = affine.min #[[$map4]](%arg4)[%0] +// CHECK: %[[v16:.*]] = subview %[[arg1]][%[[arg4]]] [%[[v15]]] [1] : memref to memref +// CHECK: %[[v17:.*]] = subview %[[v6]][0] [%[[v13]]] [1] : memref<3xf32> to memref +// CHECK: %[[v19:.*]] = vector.transfer_read %[[v6]][%[[c0]]], %[[cst]] {masked = [false]} : memref<3xf32>, vector<3xf32> +// CHECK: %[[v20:.*]] = vector.transfer_read %[[v7]][%[[c0]]], %[[cst]] {masked = [false]} : memref<3xf32>, vector<3xf32> +// CHECK: %[[v21:.*]] = mulf %[[v19]], %[[v20]] : vector<3xf32> +// CHECK: %[[v22:.*]] = vector.reduction "add", %[[v21]], %[[cst]] : vector<3xf32> into f32 +// CHECK: store %[[v22]], %[[v8]][%[[c0]]] : memref<1xf32> +// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[v9]] step %[[c1]] { +// CHECK: %[[v23:.*]] = load %[[v11]][%[[arg5]]] : memref +// CHECK: store %[[v23]], %[[v10]][%[[arg5]]] : memref diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp index 37e509cbbbe1b..c90d8058de329 100644 --- a/mlir/test/lib/Transforms/TestConvVectorization.cpp +++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp @@ -1,4 +1,4 @@ -//===- TestConvVectorization.cpp - Linalg to Vector dialect conversion ----===// +//===- TestConvVectorization.cpp - Vectorization of Conv ops --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,19 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Hoisting.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Vector/VectorTransforms.h" #include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" using namespace mlir; +using namespace vector; namespace { /// A pass converting MLIR Linalg ops into Vector ops. @@ -19,8 +27,10 @@ class TestConvVectorization void runOnOperation() override; void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); registry.insert(); + registry.insert(); + registry.insert(); registry.insert(); } }; @@ -32,15 +42,70 @@ void TestConvVectorization::runOnOperation() { ConversionTarget target(*context); target.addLegalDialect(); + VectorDialect>(); target.addLegalOp(); target.addLegalOp(); - OwningRewritePatternList patterns; - linalg::populateConvVectorizationPatterns(context, patterns); + SmallVector stage1Patterns; + linalg::populateConvVectorizationPatterns(context, stage1Patterns); - if (failed(applyPartialConversion(module, target, patterns))) - return signalPassFailure(); + OwningRewritePatternList stage2Patterns = + linalg::getLinalgTilingCanonicalizationPatterns(context); + stage2Patterns.insert(context); + + auto stage3Transforms = [](Operation *op) { + PassManager pm(op->getContext()); + pm.addPass(createLoopInvariantCodeMotionPass()); + if (failed(pm.run(cast(op)))) + llvm_unreachable("Unexpected failure in cleanup pass pipeline."); + op->walk([](FuncOp func) { + promoteSingleIterationLoops(func); + linalg::hoistViewAllocOps(func); + linalg::hoistRedundantVectorTransfers(func); + }); + return success(); + }; + + linalg::applyStagedPatterns(module, stage1Patterns, stage2Patterns, + stage3Transforms); + + //===--------------------------------------------------------------------===// + // Post staged patterns transforms + //===--------------------------------------------------------------------===// + + VectorTransformsOptions vectorTransformsOptions{ + VectorContractLowering::Dot, VectorTransposeLowering::EltWise}; + + OwningRewritePatternList vectorTransferPatterns; + // Pattern is not applied because rank-reducing vector transfer is not yet + // supported as can be seen in splitFullAndPartialTransferPrecondition, + // VectorTransforms.cpp + vectorTransferPatterns.insert( + context, vectorTransformsOptions); + applyPatternsAndFoldGreedily(module, vectorTransferPatterns); + + // Programmatic controlled lowering of linalg.copy and linalg.fill. + PassManager pm(context); + pm.addPass(createConvertLinalgToLoopsPass()); + if (failed(pm.run(module))) + llvm_unreachable("Unexpected failure in linalg to loops pass."); + + // Programmatic controlled lowering of vector.contract only. + OwningRewritePatternList vectorContractLoweringPatterns; + populateVectorContractLoweringPatterns(vectorContractLoweringPatterns, + context, vectorTransformsOptions); + applyPatternsAndFoldGreedily(module, vectorContractLoweringPatterns); + + // Programmatic controlled lowering of vector.transfer only. + OwningRewritePatternList vectorToLoopsPatterns; + populateVectorToSCFConversionPatterns(vectorToLoopsPatterns, context, + VectorTransferToSCFOptions()); + applyPatternsAndFoldGreedily(module, vectorToLoopsPatterns); + + // Ensure we drop the marker in the end. + module.walk([](linalg::LinalgOp op) { + op.removeAttr(linalg::LinalgTransforms::kLinalgTransformMarker); + }); } namespace mlir { From 4ae1bb193a596d5dab8e4e6acfcc081972b166a3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 18:52:28 +0100 Subject: [PATCH 0956/1079] [AsmPrinter] Remove orphan DwarfUnit::shareAcrossDWOCUs declaration. NFCI. Method implementation no longer exists. --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4cd66fb2cada8..63a1e5a4780f1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -74,7 +74,6 @@ class DwarfUnit : public DIEUnit { bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); - bool shareAcrossDWOCUs() const; bool isShareableAcrossCUs(const DINode *D) const; public: From 8adf92e2d11ad23c946ae5bc10fc17505389e956 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 19:01:42 +0100 Subject: [PATCH 0957/1079] [AMDGPU] Remove orphan SITargetLowering::LowerINT_TO_FP declaration. NFCI. Method implementation no longer exists. --- llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 3e8220ad9db22..6bfa33cef7ced 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -90,7 +90,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; From 550b1a6fd46f59134b2629ce23ca6a7874b45585 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Sep 2020 19:02:20 +0100 Subject: [PATCH 0958/1079] [AsmPrinter] DwarfDebug - use DebugLoc const references where possible. NFC. Avoid unnecessary copies. --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5a97e321ab1a2..94bf94c296cb0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -218,8 +218,8 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { const DIExpression *Expr = MI->getDebugExpression(); assert(MI->getNumOperands() == 4); if (MI->getDebugOperand(0).isReg()) { - auto RegOp = MI->getDebugOperand(0); - auto Op1 = MI->getDebugOffset(); + const auto &RegOp = MI->getDebugOperand(0); + const auto &Op1 = MI->getDebugOffset(); // If the second operand is an immediate, this is a // register-indirect address. assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); @@ -227,7 +227,7 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { return DbgValueLoc(Expr, MLoc); } if (MI->getDebugOperand(0).isTargetIndex()) { - auto Op = MI->getDebugOperand(0); + const auto &Op = MI->getDebugOperand(0); return DbgValueLoc(Expr, TargetIndexLocation(Op.getIndex(), Op.getOffset())); } @@ -2506,7 +2506,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, }) && "all values are expected to be fragments"); assert(llvm::is_sorted(Values) && "fragments are expected to be sorted"); - for (auto Fragment : Values) + for (const auto &Fragment : Values) DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr); } else { From f108e71437c47cc5172af4a7f704bb3f69d392f2 Mon Sep 17 00:00:00 2001 From: Vincent Zhao Date: Wed, 16 Sep 2020 16:04:09 +0100 Subject: [PATCH 0959/1079] [MLIR] Turns swapId into a FlatAffineConstraints member func `swapId` used to be a static function in `AffineStructures.cpp`. This diff makes it accessible from the external world by turning it into a member function of `FlatAffineConstraints`. This will be very helpful for other projects that need to manipulate the content of `FlatAffineConstraints`. Differential Revision: https://reviews.llvm.org/D87766 --- mlir/include/mlir/Analysis/AffineStructures.h | 3 ++ mlir/lib/Analysis/AffineStructures.cpp | 39 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index e7b10c37825bd..d64a24e713d13 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -307,6 +307,9 @@ class FlatAffineConstraints { /// otherwise. bool containsId(Value id) const; + /// Swap the posA^th identifier with the posB^th identifier. + void swapId(unsigned posA, unsigned posB); + // Add identifiers of the specified kind - specified positions are relative to // the kind of identifier. The coefficient column corresponding to the added // identifier is initialized to zero. 'id' is the Value corresponding to the diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 546dfa4ba7db2..5b7f4d4982d02 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -366,23 +366,6 @@ areIdsUnique(const FlatAffineConstraints &cst) { return true; } -// Swap the posA^th identifier with the posB^th identifier. -static void swapId(FlatAffineConstraints *A, unsigned posA, unsigned posB) { - assert(posA < A->getNumIds() && "invalid position A"); - assert(posB < A->getNumIds() && "invalid position B"); - - if (posA == posB) - return; - - for (unsigned r = 0, e = A->getNumInequalities(); r < e; r++) { - std::swap(A->atIneq(r, posA), A->atIneq(r, posB)); - } - for (unsigned r = 0, e = A->getNumEqualities(); r < e; r++) { - std::swap(A->atEq(r, posA), A->atEq(r, posB)); - } - std::swap(A->getId(posA), A->getId(posB)); -} - /// Merge and align the identifiers of A and B starting at 'offset', so that /// both constraint systems get the union of the contained identifiers that is /// dimension-wise and symbol-wise unique; both constraint systems are updated @@ -429,7 +412,7 @@ static void mergeAndAlignIds(unsigned offset, FlatAffineConstraints *A, assert(loc >= offset && "A's dim appears in B's aligned range"); assert(loc < B->getNumDimIds() && "A's dim appears in B's non-dim position"); - swapId(B, d, loc); + B->swapId(d, loc); } else { B->addDimId(d); B->setIdValue(d, aDimValue); @@ -451,7 +434,7 @@ static void mergeAndAlignIds(unsigned offset, FlatAffineConstraints *A, if (B->findId(aSymValue, &loc)) { assert(loc >= B->getNumDimIds() && loc < B->getNumDimAndSymbolIds() && "A's symbol appears in B's non-symbol position"); - swapId(B, s, loc); + B->swapId(s, loc); } else { B->addSymbolId(s - B->getNumDimIds()); B->setIdValue(s, aSymValue); @@ -619,7 +602,7 @@ LogicalResult FlatAffineConstraints::composeMatchingMap(AffineMap other) { static void turnDimIntoSymbol(FlatAffineConstraints *cst, Value id) { unsigned pos; if (cst->findId(id, &pos) && pos < cst->getNumDimIds()) { - swapId(cst, pos, cst->getNumDimIds() - 1); + cst->swapId(pos, cst->getNumDimIds() - 1); cst->setDimSymbolSeparation(cst->getNumSymbolIds() + 1); } } @@ -629,7 +612,7 @@ static void turnSymbolIntoDim(FlatAffineConstraints *cst, Value id) { unsigned pos; if (cst->findId(id, &pos) && pos >= cst->getNumDimIds() && pos < cst->getNumDimAndSymbolIds()) { - swapId(cst, pos, cst->getNumDimIds()); + cst->swapId(pos, cst->getNumDimIds()); cst->setDimSymbolSeparation(cst->getNumSymbolIds() - 1); } } @@ -1964,6 +1947,20 @@ bool FlatAffineConstraints::containsId(Value id) const { }); } +void FlatAffineConstraints::swapId(unsigned posA, unsigned posB) { + assert(posA < getNumIds() && "invalid position A"); + assert(posB < getNumIds() && "invalid position B"); + + if (posA == posB) + return; + + for (unsigned r = 0, e = getNumInequalities(); r < e; r++) + std::swap(atIneq(r, posA), atIneq(r, posB)); + for (unsigned r = 0, e = getNumEqualities(); r < e; r++) + std::swap(atEq(r, posA), atEq(r, posB)); + std::swap(getId(posA), getId(posB)); +} + void FlatAffineConstraints::setDimSymbolSeparation(unsigned newSymbolCount) { assert(newSymbolCount <= numDims + numSymbols && "invalid separation position"); From 504697e6f40ecad3da44aa43568b869780644353 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 17 Sep 2020 06:33:24 -0400 Subject: [PATCH 0960/1079] [gn build] (manually) port c9af34027bc --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 024a2aa0dfbc6..5ce3cba59ac46 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -64,6 +64,7 @@ static_library("builtins") { "divdi3.c", "divmoddi4.c", "divmodsi4.c", + "divmodti4.c", "divsc3.c", "divsf3.c", "divsi3.c", From 68cfb02668550e3398c8ee8915732daf132f2652 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Thu, 17 Sep 2020 12:59:57 +0200 Subject: [PATCH 0961/1079] [mlir] turn clang-format back on in C API test C API test uses FileCheck comments inside C code and needs to temporarily switch off clang-format to prevent it from messing with FileCheck directives. A recently landed commit forgot to turn it back on after a block of FileCheck comments. Fix that. --- mlir/test/CAPI/ir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index fa63c72bf4e84..01b007e717835 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -832,6 +832,7 @@ int main() { // CHECK: (d0, d1, d2) -> (d0) // CHECK: (d0, d1, d2) -> (d2) // CHECK: 0 + // clang-format on fprintf(stderr, "@affineMap\n"); errcode = printAffineMap(ctx); fprintf(stderr, "%d\n", errcode); From a615226743d0e986593961418efec76aedfa32b1 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 17 Sep 2020 12:10:23 +0100 Subject: [PATCH 0962/1079] [ARM] Extra fp16 bitcast tests. NFC --- llvm/test/CodeGen/ARM/fp16-bitcast.ll | 63 +++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/llvm/test/CodeGen/ARM/fp16-bitcast.ll b/llvm/test/CodeGen/ARM/fp16-bitcast.ll index d26c2d96614a4..4d450e86d46fe 100644 --- a/llvm/test/CodeGen/ARM/fp16-bitcast.ll +++ b/llvm/test/CodeGen/ARM/fp16-bitcast.ll @@ -129,3 +129,66 @@ entry: %add = add i16 %hc, 1 ret i16 %add } + +define half @constcall() { +; CHECK-VFPV4-SOFT-LABEL: constcall: +; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry +; CHECK-VFPV4-SOFT-NEXT: mov.w r0, #18688 +; CHECK-VFPV4-SOFT-NEXT: b ccc +; +; CHECK-FP16-SOFT-LABEL: constcall: +; CHECK-FP16-SOFT: @ %bb.0: @ %entry +; CHECK-FP16-SOFT-NEXT: vmov.f16 s0, #1.000000e+01 +; CHECK-FP16-SOFT-NEXT: vmov.f16 r0, s0 +; CHECK-FP16-SOFT-NEXT: b ccc +; +; CHECK-VFPV4-HARD-LABEL: constcall: +; CHECK-VFPV4-HARD: @ %bb.0: @ %entry +; CHECK-VFPV4-HARD-NEXT: vldr s0, .LCPI4_0 +; CHECK-VFPV4-HARD-NEXT: b ccc +; CHECK-VFPV4-HARD-NEXT: .p2align 2 +; CHECK-VFPV4-HARD-NEXT: @ %bb.1: +; CHECK-VFPV4-HARD-NEXT: .LCPI4_0: +; CHECK-VFPV4-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41 +; +; CHECK-FP16-HARD-LABEL: constcall: +; CHECK-FP16-HARD: @ %bb.0: @ %entry +; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01 +; CHECK-FP16-HARD-NEXT: vmov.f16 r0, s0 +; CHECK-FP16-HARD-NEXT: vmov s0, r0 +; CHECK-FP16-HARD-NEXT: b ccc +entry: + %call = tail call fast half @ccc(half 0xH4900) + ret half %call +} + +define half @constret() { +; CHECK-VFPV4-SOFT-LABEL: constret: +; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry +; CHECK-VFPV4-SOFT-NEXT: mov.w r0, #18688 +; CHECK-VFPV4-SOFT-NEXT: bx lr +; +; CHECK-FP16-SOFT-LABEL: constret: +; CHECK-FP16-SOFT: @ %bb.0: @ %entry +; CHECK-FP16-SOFT-NEXT: vmov.f16 s0, #1.000000e+01 +; CHECK-FP16-SOFT-NEXT: vmov r0, s0 +; CHECK-FP16-SOFT-NEXT: bx lr +; +; CHECK-VFPV4-HARD-LABEL: constret: +; CHECK-VFPV4-HARD: @ %bb.0: @ %entry +; CHECK-VFPV4-HARD-NEXT: vldr s0, .LCPI5_0 +; CHECK-VFPV4-HARD-NEXT: bx lr +; CHECK-VFPV4-HARD-NEXT: .p2align 2 +; CHECK-VFPV4-HARD-NEXT: @ %bb.1: +; CHECK-VFPV4-HARD-NEXT: .LCPI5_0: +; CHECK-VFPV4-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41 +; +; CHECK-FP16-HARD-LABEL: constret: +; CHECK-FP16-HARD: @ %bb.0: @ %entry +; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01 +; CHECK-FP16-HARD-NEXT: bx lr +entry: + ret half 0xH4900 +} + +declare half @ccc(half) From 71f237506b8fc06753eb733422d2fad20f622e2d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 12:12:00 +0100 Subject: [PATCH 0963/1079] DwarfFile.h - remove unnecessary includes. NFCI. Use forward declarations where possible, move includes down to DwarfFile.cpp and avoid duplicate includes. --- llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp | 3 +-- llvm/lib/CodeGen/AsmPrinter/DwarfFile.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index dee032304b683..838e1c9a10be6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -10,10 +10,9 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DIE.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Metadata.h" #include "llvm/MC/MCStreamer.h" #include #include diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index cf293d7534d04..79a6ce7801b70 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -14,7 +14,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include #include @@ -26,10 +25,12 @@ class AsmPrinter; class DbgEntity; class DbgVariable; class DbgLabel; +class DINode; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; class MCSection; +class MDNode; // Data structure to hold a range for range lists. struct RangeSpan { From 572e542c5e5fe2727502ab775a6b8c3d238c01b5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 12:18:27 +0100 Subject: [PATCH 0964/1079] DwarfStringPool.cpp - remove unnecessary StringRef include. NFCI. Already included in DwarfStringPool.h --- llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 1e2c218eaec29..a876f8ccace94 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -8,7 +8,6 @@ #include "DwarfStringPool.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCAsmInfo.h" From fece1489d10bb189fe46bd08385ff6b8954dc39c Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 17 Sep 2020 12:39:21 +0100 Subject: [PATCH 0965/1079] [ARM] Additional tests for qr intrinsics in loops. NFC --- llvm/test/CodeGen/Thumb2/mve-qrintr.ll | 709 +++++++++++++++++++++++++ 1 file changed, 709 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-qrintr.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-qrintr.ll b/llvm/test/CodeGen/Thumb2/mve-qrintr.ll new file mode 100644 index 0000000000000..4fcfe37b89e59 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-qrintr.ll @@ -0,0 +1,709 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s + +define void @vadd(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vadd: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB0_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vadd.i32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vsub(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vsub: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB1_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB1_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vsub.i32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vmul(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vmul: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB2_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB2_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmul.i32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vqadd(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vqadd: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB3_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB3_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vqadd.s32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vqsub(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vqsub: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB4_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB4_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vqsub.s32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vhadd(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vhadd: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB5_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB5_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vhadd.s32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vhsub(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vhsub: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB6_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB6_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vhsub.s32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vqdmull(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vqdmull: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB7_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.16 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB7_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrh.s32 q1, [r0] +; CHECK-NEXT: vqdmullb.s16 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB7_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %conv = trunc i32 %c0 to i16 + %.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0 + %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i16>* + %2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %1, i32 2, <4 x i1> %0, <4 x i16> zeroinitializer) + %3 = sext <4 x i16> %2 to <4 x i32> + %4 = bitcast <4 x i32> %3 to <8 x i16> + %5 = tail call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %4, <8 x i16> %.splat, i32 0, <4 x i1> %0, <4 x i32> %3) + %6 = bitcast i32* %s1.addr.013 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %6, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vqdmulh(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vqdmulh: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB8_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB8_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vqdmulh.s32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vqrdmulh(i32* %s1, i32 %c0, i32 %N) { +; CHECK-LABEL: vqrdmulh: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB9_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB9_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vqrdmulh.s32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB9_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast i32* %s1.addr.013 to <4 x i32>* + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vaddf(float* %s1, float %c0, i32 %N) { +; CHECK-LABEL: vaddf: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB10_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB10_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vadd.f32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB10_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x float> undef, float %c0, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast float* %s1.addr.013 to <4 x float>* + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %.splat, <4 x i1> %0, <4 x float> %2) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vsubf(float* %s1, float %c0, i32 %N) { +; CHECK-LABEL: vsubf: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB11_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB11_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vsub.f32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB11_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x float> undef, float %c0, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast float* %s1.addr.013 to <4 x float>* + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %3 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %.splat, <4 x i1> %0, <4 x float> %2) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vmulf(float* %s1, float %c0, i32 %N) { +; CHECK-LABEL: vmulf: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB12_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB12_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmul.f32 q1, q1, q0 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB12_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %.splatinsert = insertelement <4 x float> undef, float %c0, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) + %1 = bitcast float* %s1.addr.013 to <4 x float>* + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %3 = tail call fast <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %.splat, <4 x i1> %0, <4 x float> %2) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4 + %sub = add nsw i32 %N.addr.012, -4 + %cmp = icmp sgt i32 %N.addr.012, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vfma(float* %s1, float* %s2, float %c0, i32 %N) { +; CHECK-LABEL: vfma: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB13_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB13_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vfma.f32 q2, q1, q0 +; CHECK-NEXT: vstrw.32 q2, [r0], #16 +; CHECK-NEXT: letp lr, .LBB13_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp12 = icmp sgt i32 %N, 0 + br i1 %cmp12, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %0 = bitcast float* %s2 to <4 x float>* + %.splatinsert = insertelement <4 x float> undef, float %c0, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013) + %2 = bitcast float* %s1.addr.014 to <4 x float>* + %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %4, <4 x float> %.splat, <4 x float> %3, <4 x i1> %1) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1) + %add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4 + %sub = add nsw i32 %N.addr.013, -4 + %cmp = icmp sgt i32 %N.addr.013, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @vfmas(float* %s1, float* %s2, float %c0, i32 %N) { +; CHECK-LABEL: vfmas: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB14_1: @ %while.body.lr.ph +; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB14_2: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vfma.f32 q3, q2, q1 +; CHECK-NEXT: vstrw.32 q3, [r0], #16 +; CHECK-NEXT: letp lr, .LBB14_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r7, pc} +entry: + %cmp12 = icmp sgt i32 %N, 0 + br i1 %cmp12, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + %0 = bitcast float* %s2 to <4 x float>* + %.splatinsert = insertelement <4 x float> undef, float %c0, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] + %1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013) + %2 = bitcast float* %s1.addr.014 to <4 x float>* + %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %3, <4 x float> %4, <4 x float> %.splat, <4 x i1> %1) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1) + %add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4 + %sub = add nsw i32 %N.addr.013, -4 + %cmp = icmp sgt i32 %N.addr.013, 4 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +declare <4 x i1> @llvm.arm.mve.vctp32(i32) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) +declare <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) +declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) +declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) +declare <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) +declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) From c65627a1fe3be7521fc232d633bb6df577f55269 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 17 Sep 2020 13:07:44 +0100 Subject: [PATCH 0966/1079] Revert "[lldb] Don't send invalid region addresses to lldb server" This reverts commit c687af0c30b4dbdc9f614d5e061c888238e0f9c5 due to a test failure on Windows. --- lldb/source/Commands/CommandObjectMemory.cpp | 1 - .../API/functionalities/memory-region/TestMemoryRegion.py | 6 ------ 2 files changed, 7 deletions(-) diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index d918937994981..474c377101493 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -1707,7 +1707,6 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { "invalid address argument \"%s\": %s\n", command[0].c_str(), error.AsCString()); result.SetStatus(eReturnStatusFailed); - return false; } } diff --git a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py index 61e64d44e7945..283cc945ed09a 100644 --- a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py +++ b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py @@ -41,12 +41,6 @@ def test(self): self.assertFalse(result.Succeeded()) self.assertRegexpMatches(result.GetError(), "Usage: memory region ADDR") - # Test that when the address fails to parse, we show an error and do not continue - interp.HandleCommand("memory region not_an_address", result) - self.assertFalse(result.Succeeded()) - self.assertEqual(result.GetError(), - "error: invalid address argument \"not_an_address\": address expression \"not_an_address\" evaluation failed\n") - # Now let's print the memory region starting at 0 which should always work. interp.HandleCommand("memory region 0x0", result) self.assertTrue(result.Succeeded()) From 97a476eb56726ef09bdd9c7f8c46d7e1c456d46b Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 17 Sep 2020 13:07:46 +0100 Subject: [PATCH 0967/1079] [NFC][ARM] Tail fold test changes Run update script on one test and add another. --- .../ARM/tail-fold-multiple-icmps.ll | 84 +++ .../ARM/tail-folding-not-allowed.ll | 575 ++++++++++++++++-- 2 files changed, 611 insertions(+), 48 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll new file mode 100644 index 0000000000000..cdcb81ec2dc28 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -tail-predication=enabled -loop-vectorize -instcombine -simplifycfg %s -S -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocapture %minp, i32 %N) { +; CHECK-LABEL: @minmaxval4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP26_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 2147483647, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ -2147483648, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[COND:%.*]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[COND9:%.*]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: store i32 [[MIN_0_LCSSA]], i32* [[MINP:%.*]], align 4 +; CHECK-NEXT: ret i32 [[MAX_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[MIN_028:%.*]] = phi i32 [ [[COND9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[MAX_027:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_029]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], [[MAX_027]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[TMP9]], i32 [[MAX_027]] +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP9]], [[MIN_028]] +; CHECK-NEXT: [[COND9]] = select i1 [[CMP4]], i32 [[TMP9]], i32 [[MIN_028]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_029]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; +entry: + %cmp26.not = icmp eq i32 %N, 0 + br i1 %cmp26.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %max.0.lcssa = phi i32 [ -2147483648, %entry ], [ %cond, %for.body ] + %min.0.lcssa = phi i32 [ 2147483647, %entry ], [ %cond9, %for.body ] + store i32 %min.0.lcssa, i32* %minp, align 4 + ret i32 %max.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.029 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %min.028 = phi i32 [ %cond9, %for.body ], [ 2147483647, %entry ] + %max.027 = phi i32 [ %cond, %for.body ], [ -2147483648, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.029 + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, %max.027 + %cond = select i1 %cmp1, i32 %0, i32 %max.027 + %cmp4 = icmp slt i32 %0, %min.028 + %cond9 = select i1 %cmp4, i32 %0, i32 %min.028 + %inc = add nuw i32 %i.029, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index baedc0a23daa2..95b22eb9660ad 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -1,13 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \ ; RUN: -tail-predication=enabled -loop-vectorize -S < %s | \ ; RUN: FileCheck %s define void @trunc_not_allowed_different_vec_elemns(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i16* noalias nocapture %D) #0 { -; CHECK-LABEL: trunc_not_allowed_different_vec_elemns( +; CHECK-LABEL: @trunc_not_allowed_different_vec_elemns( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = shl <4 x i16> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, i16* [[D:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[TMP14]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP12]], <4 x i16>* [[TMP15]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_021:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD9:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_021]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_021]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_021]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD_TR:%.*]] = trunc i32 [[ADD]] to i16 +; CHECK-NEXT: [[CONV7:%.*]] = shl i16 [[ADD_TR]], 1 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[D]], i32 [[I_021]] +; CHECK-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +; CHECK-NEXT: [[ADD9]] = add nuw nsw i32 [[I_021]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD9]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; entry: br label %for.body @@ -33,11 +84,24 @@ for.body: } define void @unsupported_i64_type(i64* noalias nocapture %A, i64* noalias nocapture readonly %B, i64* noalias nocapture readonly %C) #0 { -; CHECK-LABEL: unsupported_i64_type( -; CHECK-NOT: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store +; CHECK-LABEL: @unsupported_i64_type( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; CHECK: for.body: +; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i32 [[I_09]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[C:%.*]], i32 [[I_09]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i32 [[I_09]] +; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD3]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; entry: br label %for.body @@ -59,11 +123,53 @@ for.body: } define void @narrowing_load_not_allowed(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, i16* noalias nocapture readonly %C) #0 { -; CHECK-LABEL: narrowing_load_not_allowed( +; CHECK-LABEL: @narrowing_load_not_allowed( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[C:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP6]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i8> [[WIDE_LOAD1]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <8 x i8>* +; CHECK-NEXT: store <8 x i8> [[TMP8]], <8 x i8>* [[TMP11]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 424 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 424 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 424, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[C]], i32 [[I_012]] +; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[I_012]] +; CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CONV3:%.*]] = trunc i16 [[TMP13]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP14]], [[CONV3]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[I_012]] +; CHECK-NEXT: store i8 [[ADD]], i8* [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[ADD6]] = add nuw nsw i32 [[I_012]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD6]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; entry: br label %for.body @@ -91,11 +197,54 @@ for.body: ; preds = %for.body, %entry ; we could allow this case. ; define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 { -; CHECK-LABEL: trunc_not_allowed( -; CHECK: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK-LABEL: @trunc_not_allowed( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_09]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 +; CHECK-NEXT: [[ADD_IV:%.*]] = trunc i32 [[ADD3]] to i16 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[ADD_IV]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] +; entry: br label %for.body @@ -123,11 +272,67 @@ for.body: ; force vectorisation with a loop hint. ; define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) #0 { -; CHECK-LABEL: strides_different_direction( +; CHECK-LABEL: @strides_different_direction( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 430) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP0]], [[N]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, i1 [[TMP2]], i1 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw i32 [[N]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 -3 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[N]], [[I_09]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[SUB]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD3]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]] +; entry: br label %for.body @@ -150,11 +355,53 @@ for.body: } define void @too_many_loop_blocks(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 { -; CHECK-LABEL: too_many_loop_blocks( +; CHECK-LABEL: @too_many_loop_blocks( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[LOOPINCR:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_09]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label [[LOOPINCR]] +; CHECK: loopincr: +; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD3]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP11:!llvm.loop !.*]] +; entry: br label %for.body @@ -179,9 +426,24 @@ loopincr: } define void @double(double* noalias nocapture %A, double* noalias nocapture readonly %B, double* noalias nocapture readonly %C) #0 { -; CHECK-LABEL: double( +; CHECK-LABEL: @double( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NOT: vector.body: +; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i32 [[I_09]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i32 [[I_09]] +; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i32 [[I_09]] +; CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD3]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; entry: br label %for.body @@ -203,11 +465,28 @@ for.body: } define void @fptrunc_not_allowed(float* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C, half* noalias nocapture %D) #0 { -; CHECK-LABEL: fptrunc_not_allowed( -; CHECK-NOT: vector.body: -; CHECK-NOT: llvm.masked.load -; CHECK-NOT: llvm.masked.store -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body +; CHECK-LABEL: @fptrunc_not_allowed( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_017:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[I_017]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 [[I_017]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[I_017]] +; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptrunc float [[ADD]] to half +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast half [[CONV]], 0xH4000 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds half, half* [[D:%.*]], i32 [[I_017]] +; CHECK-NEXT: store half [[FACTOR]], half* [[ARRAYIDX5]], align 2 +; CHECK-NEXT: [[ADD6]] = add nuw nsw i32 [[I_017]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD6]], 431 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; entry: br label %for.body @@ -238,6 +517,30 @@ for.body: ; to be reverted which is expensive and what we would like to avoid. ; define dso_local void @select_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N, i32* noalias nocapture readonly %Cond) { +; CHECK-LABEL: @select_not_allowed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i32 [[I_011]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: [[C_B:%.*]] = select i1 [[TOBOOL_NOT]], i32* [[C:%.*]], i32* [[B:%.*]] +; CHECK-NEXT: [[COND_IN:%.*]] = getelementptr inbounds i32, i32* [[C_B]], i32 [[I_011]] +; CHECK-NEXT: [[COND:%.*]] = load i32, i32* [[COND_IN]], align 4 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_011]] +; CHECK-NEXT: store i32 [[COND]], i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; entry: %cmp10 = icmp sgt i32 %N, 0 br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup @@ -267,11 +570,55 @@ for.body: ; preds = %for.body.preheader, } define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 { -; CHECK-LABEL: i32_smin_reduction( +; CHECK-LABEL: @i32_smin_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.masked.load -; CHECK-NOT: @llvm.masked.store -; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 2147483647, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[R_07]], [[TMP8]] +; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP13:!llvm.loop !.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[R_0_LCSSA]] +; entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body, label %for.cond.cleanup @@ -293,11 +640,55 @@ for.cond.cleanup: ; preds = %for.body, %entry } define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 { -; CHECK-LABEL: i32_smax_reduction( +; CHECK-LABEL: @i32_smax_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.masked.load -; CHECK-NOT: @llvm.masked.store -; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ -2147483648, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[R_07]], [[TMP8]] +; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP15:!llvm.loop !.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[R_0_LCSSA]] +; entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body, label %for.cond.cleanup @@ -319,11 +710,55 @@ for.cond.cleanup: ; preds = %for.body, %entry } define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 { -; CHECK-LABEL: i32_umin_reduction( +; CHECK-LABEL: @i32_umin_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.masked.load -; CHECK-NOT: @llvm.masked.store -; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[R_07]], [[TMP8]] +; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP17:!llvm.loop !.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[R_0_LCSSA]] +; entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body, label %for.cond.cleanup @@ -345,11 +780,55 @@ for.cond.cleanup: ; preds = %for.body, %entry } define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 { -; CHECK-LABEL: i32_umax_reduction( +; CHECK-LABEL: @i32_umax_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.masked.load -; CHECK-NOT: @llvm.masked.store -; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[R_07]], [[TMP8]] +; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP19:!llvm.loop !.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[R_0_LCSSA]] +; entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body, label %for.cond.cleanup From ed53ff4cde331e0ffeb492dca6281aaeea2cd8cf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 12:52:23 +0100 Subject: [PATCH 0968/1079] SymbolizableObjectFile.h - remove unnecessary includes. NFCI. Use forward declarations where possible, move includes down to SymbolizableObjectFile.cpp and avoid duplicate includes. --- llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp | 9 --------- llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h | 6 +++--- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 84524195fa8af..93d05e4e27bf8 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -12,24 +12,15 @@ #include "SymbolizableObjectFile.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolSize.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Error.h" #include -#include -#include -#include -#include -#include -#include using namespace llvm; using namespace object; diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h index 0ba304ee4c61c..be3c66df056f0 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -15,12 +15,12 @@ #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Error.h" #include -#include #include #include -#include +#include +#include namespace llvm { From abe0d8551da52ea1d0d8ad5f9ad71d22a7cd9928 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 13:08:42 +0100 Subject: [PATCH 0969/1079] MetadataLoader.cpp - remove unnecessary StringRef include. NFCI. Already included in MetadataLoader.h --- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 821185e46c046..874bb84170df2 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -63,7 +62,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" @@ -75,7 +73,6 @@ #include #include #include -#include #include #include #include From 40e771c1c0d33c687230111271060c2ba761269f Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Thu, 17 Sep 2020 13:22:26 +0100 Subject: [PATCH 0970/1079] [clang-format][regression][PR47461] ifdef causes catch to be seen as a function https://bugs.llvm.org/show_bug.cgi?id=47461 The following change {D80940} caused a regression in code which ifdef's around the try and catch block cause incorrect brace placement around the catch ``` try { } catch (...) { // This is not a small function bar = 1; } } ``` The brace after the catch will be placed on a newline Reviewed By: curdeius Differential Revision: https://reviews.llvm.org/D87291 --- clang/lib/Format/FormatTokenLexer.cpp | 2 +- clang/unittests/Format/FormatTest.cpp | 37 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index f6db58acd8dbe..c1466196b4d64 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -401,7 +401,7 @@ bool FormatTokenLexer::tryTransformTryUsageForC() { if (!Try->is(tok::kw_try)) return false; auto &Next = *(Tokens.end() - 1); - if (Next->isOneOf(tok::l_brace, tok::colon)) + if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment)) return false; if (Tokens.size() > 2) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 98e002003159c..eae7b24fae7cd 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2743,6 +2743,43 @@ TEST_F(FormatTest, FormatTryAsAVariable) { verifyFormat("int catch, size;"); verifyFormat("catch = foo();"); verifyFormat("if (catch < size) {\n return true;\n}"); + + FormatStyle Style = getLLVMStyle(); + Style.BreakBeforeBraces = FormatStyle::BS_Custom; + Style.BraceWrapping.AfterFunction = true; + Style.BraceWrapping.BeforeCatch = true; + verifyFormat("try {\n" + " int bar = 1;\n" + "}\n" + "catch (...) {\n" + " int bar = 1;\n" + "}", + Style); + verifyFormat("#if NO_EX\n" + "try\n" + "#endif\n" + "{\n" + "}\n" + "#if NO_EX\n" + "catch (...) {\n" + "}", + Style); + verifyFormat("try /* abc */ {\n" + " int bar = 1;\n" + "}\n" + "catch (...) {\n" + " int bar = 1;\n" + "}", + Style); + verifyFormat("try\n" + "// abc\n" + "{\n" + " int bar = 1;\n" + "}\n" + "catch (...) {\n" + " int bar = 1;\n" + "}", + Style); } TEST_F(FormatTest, FormatSEHTryCatch) { From bb037c2a7625d9d13a86b18d9b8b0c75eb8c91cb Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Thu, 17 Sep 2020 14:20:34 +0200 Subject: [PATCH 0971/1079] [ConstraintSystem] Remove local variable that is set but not read [NFC] gcc 7.4 warns about it. --- llvm/lib/Analysis/ConstraintSystem.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 818cfe0a171eb..d5b15e7587b37 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -46,7 +46,6 @@ bool ConstraintSystem::eliminateUsingFM() { } // FIXME do not use copy - bool EliminatedInRow = false; for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) { if (R1 == R2) continue; @@ -85,7 +84,6 @@ bool ConstraintSystem::eliminateUsingFM() { .getZExtValue(); } NewSystem.push_back(std::move(NR)); - EliminatedInRow = true; } } Constraints = std::move(NewSystem); From aa896a0b3a9d93df818fbe9b68644ad90bcda831 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 13:28:14 +0100 Subject: [PATCH 0972/1079] Remove unnecessary forward declarations. NFCI. All of these forward declarations are fully defined in headers that are directly included. --- llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h | 1 - llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h | 2 -- llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h | 1 - llvm/include/llvm/IR/LegacyPassManagers.h | 1 - llvm/include/llvm/MC/MCELFObjectWriter.h | 1 - llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h | 1 - llvm/include/llvm/ProfileData/SampleProf.h | 2 -- llvm/include/llvm/Transforms/Utils/LoopUtils.h | 1 - llvm/include/llvm/Transforms/Utils/LoopVersioning.h | 1 - 9 files changed, 11 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h index 2982146f960c9..88849d024c233 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -42,7 +42,6 @@ class StringRef; class raw_ostream; namespace pdb { -class IPDBRawSymbol; class IPDBSession; #define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue) \ diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h index 8376d163d57a5..c7ba57228ab71 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h @@ -28,8 +28,6 @@ class TargetMachine; namespace orc { -class JITTargetMachineBuilder; - IRSymbolMapper::ManglingOptions irManglingOptionsFromTargetOptions(const TargetOptions &Opts); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h index a4e43d4e1c9c2..943404262bd04 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h @@ -22,7 +22,6 @@ namespace llvm { class Module; -class JITSymbolResolver; namespace orc { diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index 6b1ddd4d79f8f..498e736a0100c 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -88,7 +88,6 @@ namespace llvm { template class ArrayRef; class Module; -class Pass; class StringRef; class Value; class Timer; diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h index 8f78b99d37949..5d99c494b11eb 100644 --- a/llvm/include/llvm/MC/MCELFObjectWriter.h +++ b/llvm/include/llvm/MC/MCELFObjectWriter.h @@ -23,7 +23,6 @@ namespace llvm { class MCAssembler; class MCContext; class MCFixup; -class MCObjectWriter; class MCSymbol; class MCSymbolELF; class MCValue; diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 5d6511372f6e1..0a1e50d501e93 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -24,7 +24,6 @@ namespace llvm { class MCInst; -class MCParsedAsmOperand; class MCStreamer; class MCSubtargetInfo; template class SmallVectorImpl; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index aca941b2da15a..3707f980ccca0 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -37,8 +37,6 @@ namespace llvm { -class raw_ostream; - const std::error_category &sampleprof_category(); enum class sampleprof_error { diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index cf0982d270b89..d741b5142e5bf 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -39,7 +39,6 @@ class ScalarEvolution; class SCEV; class SCEVExpander; class TargetLibraryInfo; -class TargetTransformInfo; class LPPassManager; class Instruction; struct RuntimeCheckingPtrGroup; diff --git a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h index ac6cee637a46d..13321e498c97f 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h @@ -25,7 +25,6 @@ namespace llvm { class Loop; class LoopAccessInfo; class LoopInfo; -class ScalarEvolution; struct RuntimeCheckingPtrGroup; typedef std::pair From 788c7d2ec11dfc868a5b03478c922dc9699c6d47 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Thu, 17 Sep 2020 13:44:01 +0100 Subject: [PATCH 0973/1079] [clang][docs] Fix documentation of -O D79916 changed the behaviour from -O2 to -O1 but the documentation was not updated to reflect this. --- clang/docs/CommandGuide/clang.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst index 11169e3528940..a24e138e86a7d 100644 --- a/clang/docs/CommandGuide/clang.rst +++ b/clang/docs/CommandGuide/clang.rst @@ -385,7 +385,7 @@ Code Generation Options :option:`-Og` Like :option:`-O1`. In future versions, this option might disable different optimizations in order to improve debuggability. - :option:`-O` Equivalent to :option:`-O2`. + :option:`-O` Equivalent to :option:`-O1`. :option:`-O4` and higher From 03783f19dc78fc45fd987f892c314578b5e52d78 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 17 Sep 2020 08:39:23 -0400 Subject: [PATCH 0974/1079] [SLP] sort candidates to increase chance of optimal compare reduction This is one (small) part of improving PR41312: https://llvm.org/PR41312 As shown there and in the smaller tests here, if we have some member of the reduction values that does not match the others, we want to push it to the end (bring the matching members forward and together). In the regression tests, we have 5 candidates for the 4 slots of the reduction. If the one "wrong" compare is grouped with the others, it prevents forming the ideal v4i1 compare reduction. Differential Revision: https://reviews.llvm.org/D87772 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 30 +++++++- .../SLPVectorizer/X86/compare-reduce.ll | 71 ++++++------------- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3d19e867b6c29..c487301177c14 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6838,9 +6838,37 @@ class HorizontalReduction { for (ReductionOpsType &RdxOp : ReductionOps) IgnoreList.append(RdxOp.begin(), RdxOp.end()); + unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); + if (NumReducedVals > ReduxWidth) { + // In the loop below, we are building a tree based on a window of + // 'ReduxWidth' values. + // If the operands of those values have common traits (compare predicate, + // constant operand, etc), then we want to group those together to + // minimize the cost of the reduction. + + // TODO: This should be extended to count common operands for + // compares and binops. + + // Step 1: Count the number of times each compare predicate occurs. + SmallDenseMap PredCountMap; + for (Value *RdxVal : ReducedVals) { + CmpInst::Predicate Pred; + if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value()))) + ++PredCountMap[Pred]; + } + // Step 2: Sort the values so the most common predicates come first. + stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) { + CmpInst::Predicate PredA, PredB; + if (match(A, m_Cmp(PredA, m_Value(), m_Value())) && + match(B, m_Cmp(PredB, m_Value(), m_Value()))) { + return PredCountMap[PredA] > PredCountMap[PredB]; + } + return false; + }); + } + Value *VectorizedTree = nullptr; unsigned i = 0; - unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { ArrayRef VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, IgnoreList); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index daa96bfa84aef..b0971dd804501 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -81,20 +81,12 @@ declare i32 @printf(i8* nocapture, ...) define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) { ; CHECK-LABEL: @merge_anyof_v4f32_wrong_first( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3 -; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01 -; CHECK-NEXT: [[CMP0:%.*]] = fcmp ogt float [[X0]], 1.000000e+00 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X1]], 1.000000e+00 -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X2]], 1.000000e+00 -; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X3]], 1.000000e+00 -; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3WRONG]] -; CHECK-NEXT: [[OR031:%.*]] = or i1 [[OR03]], [[CMP1]] -; CHECK-NEXT: [[OR0312:%.*]] = or i1 [[OR031]], [[CMP2]] -; CHECK-NEXT: [[OR03123:%.*]] = or i1 [[OR0312]], [[CMP3]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03123]], float -1.000000e+00, float 1.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 +; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[TMP1]], 4.200000e+01 +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[X]], +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], float -1.000000e+00, float 1.000000e+00 ; CHECK-NEXT: ret float [[R]] ; %x0 = extractelement <4 x float> %x, i32 0 @@ -143,20 +135,12 @@ define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) { define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) { ; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42 -; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X0]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X2]], 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X3]], 1 -; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3]] -; CHECK-NEXT: [[OR033:%.*]] = or i1 [[OR03]], [[CMP3WRONG]] -; CHECK-NEXT: [[OR0332:%.*]] = or i1 [[OR033]], [[CMP2]] -; CHECK-NEXT: [[OR03321:%.*]] = or i1 [[OR0332]], [[CMP1]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03321]], i32 -1, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 +; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[TMP1]], 42 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], i32 -1, i32 1 ; CHECK-NEXT: ret i32 [[R]] ; %x0 = extractelement <4 x i32> %x, i32 0 @@ -176,29 +160,18 @@ define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) { ret i32 %r } +; Operand/predicate swapping allows forming a reduction, but the +; ideal reduction groups all of the original 'sgt' ops together. + define i32 @merge_anyof_v4i32_wrong_middle_better_rdx(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle_better_rdx( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[Y0:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 0 -; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y]], i32 1 -; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2 -; CHECK-NEXT: [[Y3:%.*]] = extractelement <4 x i32> [[Y]], i32 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], [[Y1]] -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X3]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[Y3]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X2]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[Y0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y3]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[X3]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y2]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[CMP1]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP11]], i32 -1, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 +; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[CMP3WRONG]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 ; CHECK-NEXT: ret i32 [[R]] ; %x0 = extractelement <4 x i32> %x, i32 0 From 0dca1ac617d802c0806f57f67eb830c4f5f3fffb Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 15 Sep 2020 16:17:08 +0300 Subject: [PATCH 0975/1079] [llvm-readelf/obj][test] - Document what we print in various places for unnamed section symbols. We have an issue with `ELFDumper::getSymbolSectionName`: 1) It is used deeply for both LLVM/GNU styles and might return LLVM-style only values to describe symbols: "Undefined", "Processor Specific", "Absolute", etc. 2) `getSymbolSectionName` is used by `getFullSymbolName` and these special values might appear in instead of symbol names in many places. This occurs for unnamed section symbols. It was not noticed because for most cases I've found it is unexpected to have an unnamed section symbol. This patch documents the existent behavior, adds tests and FIXMEs. Differential revision: https://reviews.llvm.org/D87763 --- .../tools/llvm-readobj/ELF/dyn-symbols.test | 28 ++++++- .../tools/llvm-readobj/ELF/hash-symbols.test | 35 +++++++-- .../test/tools/llvm-readobj/ELF/mips-got.test | 55 ++++++++++++++ .../test/tools/llvm-readobj/ELF/mips-plt.test | 72 ++++++++++++++++++ .../tools/llvm-readobj/ELF/symbol-shndx.test | 75 +++++++++++++++++-- 5 files changed, 250 insertions(+), 15 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test index f57b21cb6e974..a438535cc1c8d 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test @@ -322,8 +322,32 @@ Sections: - NonDefault DynamicSymbols: - Name: foo - - Name: bar - - Name: zed + - Name: [[NAME=bar]] + Type: [[TYPE=STT_NOTYPE]] + Index: [[INDEX=]] + - Name: [[NAME=zed]] + Type: [[TYPE=STT_NOTYPE]] + +## Check the behavior for unnamed versioned section symbols. +## TODO: we should print proper symbol names instead of descriptions. +# RUN: yaml2obj %s -DTYPE=STT_SECTION -DNAME="''" -DINDEX=SHN_ABS --docnum=6 -o %t6.sec.sym +# RUN: llvm-readobj -V --dyn-symbols %t6.sec.sym | FileCheck %s --check-prefix=VERSIONED-SEC-SYM-LLVM +# RUN: llvm-readelf -V --dyn-symbols %t6.sec.sym | FileCheck %s --check-prefix=VERSIONED-SEC-SYM-GNU + +# VERSIONED-SEC-SYM-LLVM: DynamicSymbols [ +# VERSIONED-SEC-SYM-LLVM: Name: foo (12) +# VERSIONED-SEC-SYM-LLVM: Name: Absolute (0) +# VERSIONED-SEC-SYM-LLVM: Name: Undefined (0) +# VERSIONED-SEC-SYM-LLVM: VersionSymbols [ +# VERSIONED-SEC-SYM-LLVM: Name: foo +# VERSIONED-SEC-SYM-LLVM: Name: Absolute +# VERSIONED-SEC-SYM-LLVM: Name: Undefined + +# VERSIONED-SEC-SYM-GNU: Symbol table '.dynsym' contains 4 entries: +# VERSIONED-SEC-SYM-GNU: Num: {{.*}} Ndx Name +# VERSIONED-SEC-SYM-GNU: 1: {{.*}} UND foo +# VERSIONED-SEC-SYM-GNU-NEXT: 2: {{.*}} ABS Absolute +# VERSIONED-SEC-SYM-GNU-NEXT: 3: {{.*}} UND Undefined ## Case 8: Check what we print when: ## a) The dynamic symbol table does not exist. diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test b/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test index 5b9904bf442ca..7488bd5514e5a 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test @@ -81,23 +81,28 @@ Sections: - Tag: DT_NULL Value: 0x0000000000000000 DynamicSymbols: - - Name: ccc + - Name: [[NAME=ccc]] Binding: STB_GLOBAL - - Name: aaa + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=aaa]] Section: .hash Binding: STB_GLOBAL Value: 0x0000000000001000 - - Name: ddd + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=ddd]] Index: SHN_ABS Binding: STB_GLOBAL Value: 0x0000000000000001 - - Name: eee + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=eee]] Section: .gnu.hash Binding: STB_GLOBAL - - Name: bbb + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=bbb]] Section: .hash Binding: STB_WEAK Value: 0x0000000000001001 + Type: [[TYPE=STT_NOTYPE]] ProgramHeaders: - Type: PT_LOAD Flags: [ PF_R, PF_X ] @@ -106,6 +111,26 @@ ProgramHeaders: - Section: .gnu.hash - Section: .dynamic +## Check what we print for unnamed section symbols. +## TODO: we should print proper symbol names instead of descriptions. +# RUN: yaml2obj --docnum=1 -DBITS=64 -DTYPE=STT_SECTION -DNAME="''" %s -o %t1-sec-syms.so +# RUN: llvm-readelf --hash-symbols %t1-sec-syms.so | FileCheck %s --check-prefix=UNNAMED-SEC-SYMS + +# UNNAMED-SEC-SYMS: Symbol table of .hash for image: +# UNNAMED-SEC-SYMS-NEXT: Num {{.*}} Ndx Name +# UNNAMED-SEC-SYMS-NEXT: 1 {{.*}} UND Undefined +# UNNAMED-SEC-SYMS-NEXT: 5 {{.*}} 1 .hash +# UNNAMED-SEC-SYMS-NEXT: 3 {{.*}} ABS Absolute +# UNNAMED-SEC-SYMS-NEXT: 2 {{.*}} 1 .hash +# UNNAMED-SEC-SYMS-NEXT: 4 {{.*}} 2 .gnu.hash +# UNNAMED-SEC-SYMS-EMPTY: +# UNNAMED-SEC-SYMS: Symbol table of .gnu.hash for image: +# UNNAMED-SEC-SYMS-NEXT: Num {{.*}} Ndx Name +# UNNAMED-SEC-SYMS-NEXT: 2 {{.*}} 1 .hash +# UNNAMED-SEC-SYMS-NEXT: 3 {{.*}} ABS Absolute +# UNNAMED-SEC-SYMS-NEXT: 4 {{.*}} 2 .gnu.hash +# UNNAMED-SEC-SYMS-NEXT: 5 {{.*}} 1 .hash + ## Check the output when only .hash section is present. # RUN: yaml2obj --docnum=2 %s -o %t2-32.so diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-got.test b/llvm/test/tools/llvm-readobj/ELF/mips-got.test index 24a06dd2b3bbd..f1c3e4d1fc224 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-got.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-got.test @@ -651,3 +651,58 @@ Sections: Value: 0x1122 DynamicSymbols: - Name: foo + +## Check how we print global GOT entries when they are unnamed section symbols. +## TODO: we should print proper symbol names instead of descriptions. +# RUN: yaml2obj --docnum=5 %s -o %t.err8.o +# RUN: llvm-readobj -A %t.err8.o 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-LLVM +# RUN: llvm-readelf -A %t.err8.o 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-GNU + +# SEC-SYMS-LLVM: Global entries [ +# SEC-SYMS-LLVM-NEXT: Entry { +# SEC-SYMS-LLVM: Section: Absolute (0xFFF1) +# SEC-SYMS-LLVM-NEXT: Name: Absolute (0) +# SEC-SYMS-LLVM-NEXT: } +# SEC-SYMS-LLVM-NEXT: Entry { +# SEC-SYMS-LLVM: Section: .got (0x1) +# SEC-SYMS-LLVM-NEXT: Name: .got (0) +# SEC-SYMS-LLVM-NEXT: } +# SEC-SYMS-LLVM-NEXT: Entry { +# SEC-SYMS-LLVM: Section: Common (0xFFF2) +# SEC-SYMS-LLVM-NEXT: Name: Common (0) +# SEC-SYMS-LLVM-NEXT: } +# SEC-SYMS-LLVM-NEXT: ] + +# SEC-SYMS-GNU: Global entries: +# SEC-SYMS-GNU-NEXT: {{.*}} Ndx Name +# SEC-SYMS-GNU-NEXT: {{.*}} ABS Absolute +# SEC-SYMS-GNU-NEXT: {{.*}} 1 .got +# SEC-SYMS-GNU-NEXT: {{.*}} COM Common + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Name: .got + Type: SHT_PROGBITS + Address: 0x1122 + Size: 48 + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_MIPS_LOCAL_GOTNO + Value: 1 + - Tag: DT_MIPS_GOTSYM + Value: 1 + - Tag: DT_PLTGOT + Value: 0x1122 +DynamicSymbols: + - Type: STT_SECTION + Index: SHN_ABS + - Type: STT_SECTION + Section: .got + - Type: STT_SECTION + Index: SHN_COMMON diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-plt.test b/llvm/test/tools/llvm-readobj/ELF/mips-plt.test index 95b310ba664c1..7f3fd0897747f 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-plt.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-plt.test @@ -140,3 +140,75 @@ DynamicSymbols: [] # RUN: not llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o -check-prefix ERR7 # ERR7: error: '[[FILE]]': unable to get a string table for the SHT_DYNAMIC section with index 1: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM + +## Check how we print PLT entries when they are unnamed section symbols. +## TODO: we should print proper symbol names instead of descriptions. +# RUN: yaml2obj --docnum=3 %s -o %t.3 +# RUN: llvm-readobj -A %t.3 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-LLVM +# RUN: llvm-readelf -A %t.3 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-GNU + +# SEC-SYMS-LLVM: PLT GOT { +# SEC-SYMS-LLVM: Entries [ +# SEC-SYMS-LLVM: Entry { +# SEC-SYMS-LLVM: Section: Absolute (0xFFF1) +# SEC-SYMS-LLVM-NEXT: Name: Absolute (0) +# SEC-SYMS-LLVM-NEXT: } +# SEC-SYMS-LLVM-NEXT: Entry { +# SEC-SYMS-LLVM: Section: .got.plt (0x2) +# SEC-SYMS-LLVM-NEXT: Name: .got.plt (0) +# SEC-SYMS-LLVM-NEXT: } +# SEC-SYMS-LLVM-NEXT: Entry { +# SEC-SYMS-LLVM: Section: Common (0xFFF2) +# SEC-SYMS-LLVM-NEXT: Name: Common (0) +# SEC-SYMS-LLVM-NEXT: } +# SEC-SYMS-LLVM-NEXT: ] +# SEC-SYMS-LLVM-NEXT: } + +# SEC-SYMS-GNU: PLT GOT: +# SEC-SYMS-GNU: Entries: +# SEC-SYMS-GNU-NEXT: Address {{.*}} Ndx Name +# SEC-SYMS-GNU-NEXT: 0000000000002010 {{.*}} ABS Absolute +# SEC-SYMS-GNU-NEXT: 0000000000002018 {{.*}} 2 .got.plt +# SEC-SYMS-GNU-NEXT: 0000000000002020 {{.*}} COM Common + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Name: .rel.plt + Type: SHT_REL + Flags: [ SHF_ALLOC ] + Address: 0x1000 + Link: .dynsym + Relocations: + - Offset: 0x1 + Symbol: 1 + Type: R_MIPS_JUMP_SLOT + - Offset: 0x2 + Symbol: 2 + Type: R_MIPS_JUMP_SLOT + - Offset: 0x2 + Symbol: 3 + Type: R_MIPS_JUMP_SLOT + - Name: .got.plt + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2000 + Size: 40 ## (dynamic symbols number + 2) * 8 + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_JMPREL + Value: 0x1000 + - Tag: DT_MIPS_PLTGOT + Value: 0x2000 +DynamicSymbols: + - Type: STT_SECTION + Index: SHN_ABS + - Type: STT_SECTION + Section: .got.plt + - Type: STT_SECTION + Index: SHN_COMMON diff --git a/llvm/test/tools/llvm-readobj/ELF/symbol-shndx.test b/llvm/test/tools/llvm-readobj/ELF/symbol-shndx.test index 0d9c225c99fd2..b2d1e2f6d2ecd 100644 --- a/llvm/test/tools/llvm-readobj/ELF/symbol-shndx.test +++ b/llvm/test/tools/llvm-readobj/ELF/symbol-shndx.test @@ -57,29 +57,88 @@ Sections: Link: .symtab Entries: [ 0, 0, 0, 0, 0, 0, 0, 0, 1 ] Symbols: - - Name: undef + - Name: [[NAME=undef]] Binding: STB_GLOBAL - - Name: normal + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=normal]] Section: .text Binding: STB_GLOBAL - - Name: common + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=common]] Index: SHN_COMMON Binding: STB_GLOBAL - - Name: absolute + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=absolute]] Index: SHN_ABS Binding: STB_GLOBAL - - Name: proc + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=proc]] Index: 0xff01 Binding: STB_GLOBAL - - Name: os + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=os]] Index: 0xff21 Binding: STB_GLOBAL - - Name: reserved + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=reserved]] Index: 0xfffe Binding: STB_GLOBAL - - Name: xindex + Type: [[TYPE=STT_NOTYPE]] + - Name: [[NAME=xindex]] Index: SHN_XINDEX Binding: STB_GLOBAL + Type: [[TYPE=STT_NOTYPE]] + +## Check the behavior for section symbols. +# RUN: yaml2obj --docnum=1 -DTYPE=STT_SECTION %s -o %t1-sec +# RUN: llvm-readobj --symbols %t1-sec | FileCheck %s --check-prefix=LLVM1 +# RUN: llvm-readelf --symbols %t1-sec | FileCheck %s --check-prefix=GNU1 + +## Check the behavior for unnamed section symbols. +## TODO: we should print proper symbol names instead of descriptions. +# RUN: yaml2obj --docnum=1 -DTYPE=STT_SECTION -DNAME="''" %s -o %t1-sec-unnamed +# RUN: llvm-readobj --symbols %t1-sec-unnamed | FileCheck %s --check-prefix=LLVM1-SEC-SYMS +# RUN: llvm-readelf --symbols %t1-sec-unnamed | FileCheck %s --check-prefix=GNU1-SEC-SYMS + +# LLVM1-SEC-SYMS: Symbols [ +# LLVM1-SEC-SYMS-NEXT: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: (0) +# LLVM1-SEC-SYMS: Section: Undefined (0x0) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: Undefined (0) +# LLVM1-SEC-SYMS: Section: Undefined (0x0) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: .text (0) +# LLVM1-SEC-SYMS: Section: .text (0x1) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: Common (0) +# LLVM1-SEC-SYMS: Section: Common (0xFFF2) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: Absolute (0) +# LLVM1-SEC-SYMS: Section: Absolute (0xFFF1) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: Processor Specific (0) +# LLVM1-SEC-SYMS: Section: Processor Specific (0xFF01) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: Operating System Specific (0) +# LLVM1-SEC-SYMS: Section: Operating System Specific (0xFF21) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: Reserved (0) +# LLVM1-SEC-SYMS: Section: Reserved (0xFFFE) +# LLVM1-SEC-SYMS: Symbol { +# LLVM1-SEC-SYMS-NEXT: Name: .text (0) +# LLVM1-SEC-SYMS: Section: .text (0x1) + +# GNU1-SEC-SYMS: Num: {{.*}} Ndx Name +# GNU1-SEC-SYMS-NEXT: 0: {{.*}} UND +# GNU1-SEC-SYMS-NEXT: 1: {{.*}} UND Undefined +# GNU1-SEC-SYMS-NEXT: 2: {{.*}} 1 .text +# GNU1-SEC-SYMS-NEXT: 3: {{.*}} COM Common +# GNU1-SEC-SYMS-NEXT: 4: {{.*}} ABS Absolute +# GNU1-SEC-SYMS-NEXT: 5: {{.*}} PRC[0xff01] Processor Specific +# GNU1-SEC-SYMS-NEXT: 6: {{.*}} OS[0xff21] Operating System Specific +# GNU1-SEC-SYMS-NEXT: 7: {{.*}} RSV[0xfffe] Reserved +# GNU1-SEC-SYMS-NEXT: 8: {{.*}} 1 .text ## In this case, the index does not correspond to a real section. Check that GNU ## style just prints the section index as normal and LLVM style prints a warning From 279943edf87887403fce72c505f9760764e416f0 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 17 Sep 2020 15:36:06 +0300 Subject: [PATCH 0976/1079] [obj2yaml] - Don't emit EM_NONE. When ELF header's `e_machine == 0`, we emit: ``` Machine: EM_NONE ``` We can avoid doing this, because yaml2obj sets the `e_machine` field to `EM_NONE` by default. Differential revision: https://reviews.llvm.org/D87829 --- .../ELF/call-graph-profile-section.yaml | 7 ++- .../duplicate-symbol-and-section-names.yaml | 7 ++- llvm/test/tools/obj2yaml/ELF/emachine.yaml | 44 +++++++++---------- .../obj2yaml/ELF/gnu-unique-symbols.yaml | 9 ++-- .../obj2yaml/ELF/implicit-sections-order.yaml | 14 +++--- .../obj2yaml/ELF/invalid-section-name.yaml | 7 ++- llvm/test/tools/obj2yaml/ELF/no-symtab.yaml | 14 +++--- .../test/tools/obj2yaml/ELF/null-section.yaml | 28 +++++------- .../tools/obj2yaml/ELF/sht-symtab-shndx.yaml | 7 ++- llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml | 28 +++++------- .../tools/obj2yaml/ELF/symbol-visibility.yaml | 7 ++- .../tools/obj2yaml/ELF/versym-section.yaml | 9 ++-- llvm/tools/obj2yaml/elf2yaml.cpp | 3 +- 13 files changed, 82 insertions(+), 102 deletions(-) diff --git a/llvm/test/tools/obj2yaml/ELF/call-graph-profile-section.yaml b/llvm/test/tools/obj2yaml/ELF/call-graph-profile-section.yaml index bc8b631beea83..2e3fcd98065be 100644 --- a/llvm/test/tools/obj2yaml/ELF/call-graph-profile-section.yaml +++ b/llvm/test/tools/obj2yaml/ELF/call-graph-profile-section.yaml @@ -51,10 +51,9 @@ Symbols: # INVALID: --- !ELF # INVALID-NEXT: FileHeader: -# INVALID-NEXT: Class: ELFCLASS32 -# INVALID-NEXT: Data: ELFDATA2MSB -# INVALID-NEXT: Type: ET_DYN -# INVALID-NEXT: Machine: EM_NONE +# INVALID-NEXT: Class: ELFCLASS32 +# INVALID-NEXT: Data: ELFDATA2MSB +# INVALID-NEXT: Type: ET_DYN # INVALID-NEXT: Sections: # INVALID-NEXT: - Name: .empty # INVALID-NEXT: Type: SHT_LLVM_CALL_GRAPH_PROFILE diff --git a/llvm/test/tools/obj2yaml/ELF/duplicate-symbol-and-section-names.yaml b/llvm/test/tools/obj2yaml/ELF/duplicate-symbol-and-section-names.yaml index bea942327a5bb..9e6b8fca67ac4 100644 --- a/llvm/test/tools/obj2yaml/ELF/duplicate-symbol-and-section-names.yaml +++ b/llvm/test/tools/obj2yaml/ELF/duplicate-symbol-and-section-names.yaml @@ -24,10 +24,9 @@ # CASE1: --- !ELF # CASE1-NEXT: FileHeader: -# CASE1-NEXT: Class: ELFCLASS64 -# CASE1-NEXT: Data: ELFDATA2LSB -# CASE1-NEXT: Type: ET_REL -# CASE1-NEXT: Machine: EM_NONE +# CASE1-NEXT: Class: ELFCLASS64 +# CASE1-NEXT: Data: ELFDATA2LSB +# CASE1-NEXT: Type: ET_REL # CASE1-NEXT: Sections: # CASE1-NEXT: - Name: .foo # CASE1-NEXT: Type: SHT_PROGBITS diff --git a/llvm/test/tools/obj2yaml/ELF/emachine.yaml b/llvm/test/tools/obj2yaml/ELF/emachine.yaml index d351505aa2845..10d72bed87f4e 100644 --- a/llvm/test/tools/obj2yaml/ELF/emachine.yaml +++ b/llvm/test/tools/obj2yaml/ELF/emachine.yaml @@ -2,38 +2,36 @@ ## Check it dumps an unknown e_machine as a number. -# RUN: yaml2obj --docnum=1 %s -o %t1 -# RUN: obj2yaml %t1 | FileCheck %s --check-prefix=UNKNOWN +# RUN: yaml2obj -DMACHINE=0x1234 %s -o %t1 +# RUN: obj2yaml %t1 | FileCheck %s -DMACHINE=0x1234 -# UNKNOWN: --- !ELF -# UNKNOWN-NEXT: FileHeader: -# UNKNOWN-NEXT: Class: ELFCLASS64 -# UNKNOWN-NEXT: Data: ELFDATA2MSB -# UNKNOWN-NEXT: Type: ET_REL -# UNKNOWN-NEXT: Machine: 0x1234 +# CHECK: --- !ELF +# CHECK-NEXT: FileHeader: +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2MSB +# CHECK-NEXT: Type: ET_REL +# CHECK-NEXT: Machine: [[MACHINE]] --- !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2MSB Type: ET_REL - Machine: 0x1234 + Machine: [[MACHINE]] ## Check it dumps a known e_machine value as an enum string. -# RUN: yaml2obj --docnum=2 %s -o %t2 -# RUN: obj2yaml %t2 | FileCheck %s --check-prefix=KNOWN +# RUN: yaml2obj %s -DMACHINE=0x1 -o %t2 +# RUN: obj2yaml %t2 | FileCheck %s -DMACHINE=EM_M32 -# KNOWN: --- !ELF -# KNOWN-NEXT: FileHeader: -# KNOWN-NEXT: Class: ELFCLASS64 -# KNOWN-NEXT: Data: ELFDATA2MSB -# KNOWN-NEXT: Type: ET_REL -# KNOWN-NEXT: Machine: EM_NONE +## Check it doesn't dump e_machine when it is EM_NONE (0). ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2MSB - Type: ET_REL - Machine: 0 +# RUN: yaml2obj %s -DMACHINE=0x0 -o %t3 +# RUN: obj2yaml %t3 | FileCheck %s --check-prefix=DEFAULT + +# DEFAULT: --- !ELF +# DEFAULT-NEXT: FileHeader: +# DEFAULT-NEXT: Class: ELFCLASS64 +# DEFAULT-NEXT: Data: ELFDATA2MSB +# DEFAULT-NEXT: Type: ET_REL +# DEFAULT-NEXT: ... diff --git a/llvm/test/tools/obj2yaml/ELF/gnu-unique-symbols.yaml b/llvm/test/tools/obj2yaml/ELF/gnu-unique-symbols.yaml index 2668dad25fb4b..c34ab3e3fc0ad 100644 --- a/llvm/test/tools/obj2yaml/ELF/gnu-unique-symbols.yaml +++ b/llvm/test/tools/obj2yaml/ELF/gnu-unique-symbols.yaml @@ -5,11 +5,10 @@ # CHECK: --- !ELF # CHECK-NEXT: FileHeader: -# CHECK-NEXT: Class: ELFCLASS64 -# CHECK-NEXT: Data: ELFDATA2LSB -# CHECK-NEXT: OSABI: ELFOSABI_GNU -# CHECK-NEXT: Type: ET_REL -# CHECK-NEXT: Machine: EM_NONE +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2LSB +# CHECK-NEXT: OSABI: ELFOSABI_GNU +# CHECK-NEXT: Type: ET_REL # CHECK-NEXT: Symbols: # CHECK-NEXT: - Name: foo # CHECK-NEXT: Type: STT_OBJECT diff --git a/llvm/test/tools/obj2yaml/ELF/implicit-sections-order.yaml b/llvm/test/tools/obj2yaml/ELF/implicit-sections-order.yaml index 502b8e62688b1..e400d00eb5418 100644 --- a/llvm/test/tools/obj2yaml/ELF/implicit-sections-order.yaml +++ b/llvm/test/tools/obj2yaml/ELF/implicit-sections-order.yaml @@ -34,10 +34,9 @@ # OUTPUT: --- !ELF # OUTPUT-NEXT: FileHeader: -# OUTPUT-NEXT: Class: ELFCLASS64 -# OUTPUT-NEXT: Data: ELFDATA2LSB -# OUTPUT-NEXT: Type: ET_DYN -# OUTPUT-NEXT: Machine: EM_NONE +# OUTPUT-NEXT: Class: ELFCLASS64 +# OUTPUT-NEXT: Data: ELFDATA2LSB +# OUTPUT-NEXT: Type: ET_DYN # OUTPUT-NEXT: Sections: # OUTPUT-NEXT: - Name: .foo.1 # OUTPUT-NEXT: Type: SHT_PROGBITS @@ -124,10 +123,9 @@ DynamicSymbols: ## SHT_STRTAB/SHT_SYMTAB/SHT_DYNSYM sections. # OUTPUT2: --- !ELF # OUTPUT2-NEXT: FileHeader: -# OUTPUT2-NEXT: Class: ELFCLASS64 -# OUTPUT2-NEXT: Data: ELFDATA2LSB -# OUTPUT2-NEXT: Type: ET_DYN -# OUTPUT2-NEXT: Machine: EM_NONE +# OUTPUT2-NEXT: Class: ELFCLASS64 +# OUTPUT2-NEXT: Data: ELFDATA2LSB +# OUTPUT2-NEXT: Type: ET_DYN # OUTPUT2-NEXT: Sections: # OUTPUT2-NEXT: - Name: .foo.1 # OUTPUT2-NEXT: Type: SHT_PROGBITS diff --git a/llvm/test/tools/obj2yaml/ELF/invalid-section-name.yaml b/llvm/test/tools/obj2yaml/ELF/invalid-section-name.yaml index 3f46563b980a5..40667b57a9749 100644 --- a/llvm/test/tools/obj2yaml/ELF/invalid-section-name.yaml +++ b/llvm/test/tools/obj2yaml/ELF/invalid-section-name.yaml @@ -8,10 +8,9 @@ # CHECK: --- !ELF # CHECK-NEXT: FileHeader: -# CHECK-NEXT: Class: ELFCLASS64 -# CHECK-NEXT: Data: ELFDATA2LSB -# CHECK-NEXT: Type: ET_REL -# CHECK-NEXT: Machine: EM_NONE +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2LSB +# CHECK-NEXT: Type: ET_REL # CHECK-NEXT: Sections: # CHECK-NEXT: - Name: "{{.*}}" # CHECK-NEXT: Type: SHT_PROGBITS diff --git a/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml b/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml index 1566693339cda..8f9fb82856452 100644 --- a/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml +++ b/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml @@ -6,10 +6,9 @@ # NOSYMTAB: --- !ELF # NOSYMTAB-NEXT: FileHeader: -# NOSYMTAB-NEXT: Class: ELFCLASS64 -# NOSYMTAB-NEXT: Data: ELFDATA2LSB -# NOSYMTAB-NEXT: Type: ET_DYN -# NOSYMTAB-NEXT: Machine: EM_NONE +# NOSYMTAB-NEXT: Class: ELFCLASS64 +# NOSYMTAB-NEXT: Data: ELFDATA2LSB +# NOSYMTAB-NEXT: Type: ET_DYN # NOSYMTAB-NEXT: ... --- !ELF @@ -26,10 +25,9 @@ FileHeader: # SYMTAB: --- !ELF # SYMTAB-NEXT: FileHeader: -# SYMTAB-NEXT: Class: ELFCLASS64 -# SYMTAB-NEXT: Data: ELFDATA2LSB -# SYMTAB-NEXT: Type: ET_DYN -# SYMTAB-NEXT: Machine: EM_NONE +# SYMTAB-NEXT: Class: ELFCLASS64 +# SYMTAB-NEXT: Data: ELFDATA2LSB +# SYMTAB-NEXT: Type: ET_DYN # SYMTAB-NEXT: Symbols: [] # SYMTAB-NEXT: ... diff --git a/llvm/test/tools/obj2yaml/ELF/null-section.yaml b/llvm/test/tools/obj2yaml/ELF/null-section.yaml index 4d1e6ee1e7dbd..abba576fb4c78 100644 --- a/llvm/test/tools/obj2yaml/ELF/null-section.yaml +++ b/llvm/test/tools/obj2yaml/ELF/null-section.yaml @@ -6,10 +6,9 @@ # FIRST-SEC: --- !ELF # FIRST-SEC-NEXT: FileHeader: -# FIRST-SEC-NEXT: Class: ELFCLASS64 -# FIRST-SEC-NEXT: Data: ELFDATA2LSB -# FIRST-SEC-NEXT: Type: ET_REL -# FIRST-SEC-NEXT: Machine: EM_NONE +# FIRST-SEC-NEXT: Class: ELFCLASS64 +# FIRST-SEC-NEXT: Data: ELFDATA2LSB +# FIRST-SEC-NEXT: Type: ET_REL # FIRST-SEC-NEXT: Sections: # FIRST-SEC-NEXT: - Type: SHT_NULL # FIRST-SEC-NEXT: Flags: [ SHF_ALLOC ] @@ -48,10 +47,9 @@ Sections: # SECOND-SEC: --- !ELF # SECOND-SEC-NEXT: FileHeader: -# SECOND-SEC-NEXT: Class: ELFCLASS64 -# SECOND-SEC-NEXT: Data: ELFDATA2LSB -# SECOND-SEC-NEXT: Type: ET_REL -# SECOND-SEC-NEXT: Machine: EM_NONE +# SECOND-SEC-NEXT: Class: ELFCLASS64 +# SECOND-SEC-NEXT: Data: ELFDATA2LSB +# SECOND-SEC-NEXT: Type: ET_REL # SECOND-SEC-NEXT: Sections: # SECOND-SEC-NEXT: - Name: .foo # SECOND-SEC-NEXT: Type: SHT_PROGBITS @@ -91,10 +89,9 @@ Sections: # NULL-SEC: --- !ELF # NULL-SEC-NEXT: FileHeader: -# NULL-SEC-NEXT: Class: ELFCLASS64 -# NULL-SEC-NEXT: Data: ELFDATA2LSB -# NULL-SEC-NEXT: Type: ET_REL -# NULL-SEC-NEXT: Machine: EM_NONE +# NULL-SEC-NEXT: Class: ELFCLASS64 +# NULL-SEC-NEXT: Data: ELFDATA2LSB +# NULL-SEC-NEXT: Type: ET_REL # NULL-SEC-NEXT: Sections: # NULL-SEC-NEXT: - Name: .foo # NULL-SEC-NEXT: Type: SHT_PROGBITS @@ -118,10 +115,9 @@ Sections: # NULL-SEC-MIDDLE: --- !ELF # NULL-SEC-MIDDLE-NEXT: FileHeader: -# NULL-SEC-MIDDLE-NEXT: Class: ELFCLASS64 -# NULL-SEC-MIDDLE-NEXT: Data: ELFDATA2LSB -# NULL-SEC-MIDDLE-NEXT: Type: ET_REL -# NULL-SEC-MIDDLE-NEXT: Machine: EM_NONE +# NULL-SEC-MIDDLE-NEXT: Class: ELFCLASS64 +# NULL-SEC-MIDDLE-NEXT: Data: ELFDATA2LSB +# NULL-SEC-MIDDLE-NEXT: Type: ET_REL # NULL-SEC-MIDDLE-NEXT: Sections: # NULL-SEC-MIDDLE-NEXT: - Name: .foo # NULL-SEC-MIDDLE-NEXT: Type: SHT_PROGBITS diff --git a/llvm/test/tools/obj2yaml/ELF/sht-symtab-shndx.yaml b/llvm/test/tools/obj2yaml/ELF/sht-symtab-shndx.yaml index cc20a036daaaf..27decbe76d926 100644 --- a/llvm/test/tools/obj2yaml/ELF/sht-symtab-shndx.yaml +++ b/llvm/test/tools/obj2yaml/ELF/sht-symtab-shndx.yaml @@ -7,10 +7,9 @@ # CASE1: --- !ELF # CASE1-NEXT: FileHeader: -# CASE1-NEXT: Class: ELFCLASS64 -# CASE1-NEXT: Data: ELFDATA2LSB -# CASE1-NEXT: Type: ET_REL -# CASE1-NEXT: Machine: EM_NONE +# CASE1-NEXT: Class: ELFCLASS64 +# CASE1-NEXT: Data: ELFDATA2LSB +# CASE1-NEXT: Type: ET_REL # CASE1-NEXT: Sections: # CASE1-NEXT: - Name: bar # CASE1-NEXT: Type: SHT_PROGBITS diff --git a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml index 98a5c5ae88aac..a2ef5f1f3770f 100644 --- a/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml +++ b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml @@ -8,10 +8,9 @@ # VALID: --- !ELF # VALID-NEXT: FileHeader: -# VALID-NEXT: Class: ELFCLASS64 -# VALID-NEXT: Data: ELFDATA2LSB -# VALID-NEXT: Type: ET_EXEC -# VALID-NEXT: Machine: EM_NONE +# VALID-NEXT: Class: ELFCLASS64 +# VALID-NEXT: Data: ELFDATA2LSB +# VALID-NEXT: Type: ET_EXEC # VALID-NEXT: Sections: # VALID-NEXT: - Name: .stack_sizes # VALID-NEXT: Type: SHT_PROGBITS @@ -39,10 +38,9 @@ Sections: # INVALID: --- !ELF # INVALID-NEXT: FileHeader: -# INVALID-NEXT: Class: ELFCLASS64 -# INVALID-NEXT: Data: ELFDATA2LSB -# INVALID-NEXT: Type: ET_EXEC -# INVALID-NEXT: Machine: EM_NONE +# INVALID-NEXT: Class: ELFCLASS64 +# INVALID-NEXT: Data: ELFDATA2LSB +# INVALID-NEXT: Type: ET_EXEC # INVALID-NEXT: Sections: # INVALID-NEXT: - Name: .stack_sizes # INVALID-NEXT: Type: SHT_PROGBITS @@ -65,10 +63,9 @@ Sections: # EMPTY: --- !ELF # EMPTY-NEXT: FileHeader: -# EMPTY-NEXT: Class: ELFCLASS64 -# EMPTY-NEXT: Data: ELFDATA2LSB -# EMPTY-NEXT: Type: ET_EXEC -# EMPTY-NEXT: Machine: EM_NONE +# EMPTY-NEXT: Class: ELFCLASS64 +# EMPTY-NEXT: Data: ELFDATA2LSB +# EMPTY-NEXT: Type: ET_EXEC # EMPTY-NEXT: Sections: # EMPTY-NEXT: - Name: .stack_sizes # EMPTY-NEXT: Type: SHT_PROGBITS @@ -91,10 +88,9 @@ Sections: # MULTI: --- !ELF # MULTI-NEXT: FileHeader: -# MULTI-NEXT: Class: ELFCLASS64 -# MULTI-NEXT: Data: ELFDATA2LSB -# MULTI-NEXT: Type: ET_EXEC -# MULTI-NEXT: Machine: EM_NONE +# MULTI-NEXT: Class: ELFCLASS64 +# MULTI-NEXT: Data: ELFDATA2LSB +# MULTI-NEXT: Type: ET_EXEC # MULTI-NEXT: Sections: # MULTI-NEXT: - Name: .stack_sizes # MULTI-NEXT: Type: SHT_PROGBITS diff --git a/llvm/test/tools/obj2yaml/ELF/symbol-visibility.yaml b/llvm/test/tools/obj2yaml/ELF/symbol-visibility.yaml index 7659def7eb9f8..0c6020062fab2 100644 --- a/llvm/test/tools/obj2yaml/ELF/symbol-visibility.yaml +++ b/llvm/test/tools/obj2yaml/ELF/symbol-visibility.yaml @@ -4,10 +4,9 @@ # CHECK: --- !ELF # CHECK-NEXT: FileHeader: -# CHECK-NEXT: Class: ELFCLASS64 -# CHECK-NEXT: Data: ELFDATA2LSB -# CHECK-NEXT: Type: ET_REL -# CHECK-NEXT: Machine: EM_NONE +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2LSB +# CHECK-NEXT: Type: ET_REL # CHECK-NEXT: Symbols: # CHECK-NEXT: - Name: default # CHECK-NEXT: - Name: internal diff --git a/llvm/test/tools/obj2yaml/ELF/versym-section.yaml b/llvm/test/tools/obj2yaml/ELF/versym-section.yaml index e394c325af0f2..fd63f553dc401 100644 --- a/llvm/test/tools/obj2yaml/ELF/versym-section.yaml +++ b/llvm/test/tools/obj2yaml/ELF/versym-section.yaml @@ -5,11 +5,10 @@ # CHECK: --- !ELF # CHECK-NEXT: FileHeader: -# CHECK-NEXT: Class: ELFCLASS64 -# CHECK-NEXT: Data: ELFDATA2LSB -# CHECK-NEXT: Type: ET_EXEC -# CHECK-NEXT: Machine: EM_NONE -# CHECK-NEXT: Entry: 0x0000000000201000 +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2LSB +# CHECK-NEXT: Type: ET_EXEC +# CHECK-NEXT: Entry: 0x0000000000201000 # CHECK-NEXT: Sections: # CHECK-NEXT: - Name: .gnu.version # CHECK-NEXT: Type: SHT_GNU_versym diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index d7ce08af1a9a9..75f63795cb08b 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -240,7 +240,8 @@ template Expected ELFDumper::dump() { Y->Header.OSABI = Obj.getHeader().e_ident[ELF::EI_OSABI]; Y->Header.ABIVersion = Obj.getHeader().e_ident[ELF::EI_ABIVERSION]; Y->Header.Type = Obj.getHeader().e_type; - Y->Header.Machine = ELFYAML::ELF_EM(Obj.getHeader().e_machine); + if (Obj.getHeader().e_machine != 0) + Y->Header.Machine = ELFYAML::ELF_EM(Obj.getHeader().e_machine); Y->Header.Flags = Obj.getHeader().e_flags; Y->Header.Entry = Obj.getHeader().e_entry; From f7185b271f5b3010c82a56417b437f2a44a79230 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 17 Sep 2020 11:52:14 +0100 Subject: [PATCH 0977/1079] [SVE][CodeGen] Lower floating point -> integer conversions This patch adds new ISD nodes, FCVTZS_MERGE_PASSTHRU & FCVTZU_MERGE_PASSTHRU, which are used to lower scalable vector FP_TO_SINT/FP_TO_UINT operations and the following intrinsics: - llvm.aarch64.sve.fcvtzu - llvm.aarch64.sve.fcvtzs Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D87232 --- .../Target/AArch64/AArch64ISelLowering.cpp | 22 ++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 76 +++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 13 +- llvm/test/CodeGen/AArch64/sve-fcvt.ll | 296 ++++++++++++++++++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll | 97 ++++++ 6 files changed, 470 insertions(+), 36 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fcvt.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-split-fcvt.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b961e5a30cd0f..c4f02d36c7a79 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -145,6 +145,8 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::FROUND_MERGE_PASSTHRU: case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: case AArch64ISD::FTRUNC_MERGE_PASSTHRU: + case AArch64ISD::FCVTZU_MERGE_PASSTHRU: + case AArch64ISD::FCVTZS_MERGE_PASSTHRU: case AArch64ISD::FSQRT_MERGE_PASSTHRU: return true; } @@ -945,6 +947,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { if (isTypeLegal(VT)) { setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -1504,6 +1508,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) @@ -2870,6 +2876,14 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); + + if (VT.isScalableVector()) { + unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT + ? AArch64ISD::FCVTZU_MERGE_PASSTHRU + : AArch64ISD::FCVTZS_MERGE_PASSTHRU; + return LowerToPredicatedOp(Op, DAG, Opcode); + } + unsigned NumElts = InVT.getVectorNumElements(); // f16 conversions are promoted to f32 when full fp16 is not supported. @@ -3388,6 +3402,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_frintz: return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_fcvtzu: + return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, + Op.getValueType(), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_fcvtzs: + return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, + Op.getValueType(), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(1)); case Intrinsic::aarch64_sve_fsqrt: return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e34caacd272d1..3c113101c510d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -104,6 +104,8 @@ enum NodeType : unsigned { FROUNDEVEN_MERGE_PASSTHRU, FSQRT_MERGE_PASSTHRU, FTRUNC_MERGE_PASSTHRU, + FCVTZU_MERGE_PASSTHRU, + FCVTZS_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, ZERO_EXTEND_INREG_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 63545d30b2d11..fbe4b01a259af 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -211,6 +211,14 @@ def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; +def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, + SDTCVecEltisVT<1,i1> +]>; + +def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; +def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; + def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; @@ -1388,40 +1396,40 @@ multiclass sve_prefetch; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; - defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>; - defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>; - defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>; - defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>; - defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, null_frag, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, null_frag, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, null_frag, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, null_frag, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, null_frag, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, null_frag, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, null_frag, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, null_frag, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, null_frag, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, null_frag, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, null_frag, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, null_frag, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, null_frag, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, null_frag, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, null_frag, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, null_frag, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, null_frag, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, null_frag, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", null_frag, AArch64frintn_mt>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", null_frag, AArch64frintp_mt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 0f135c3e80593..66d8759e4d081 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2279,11 +2279,20 @@ class sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, multiclass sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, RegisterOperand o_zprtype, - SDPatternOperator op, ValueType vt1, + SDPatternOperator int_op, + SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { def NAME : sve_fp_2op_p_zd; - def : SVE_3_Op_Pat(NAME)>; + // convert vt3 to a packed type for the intrinsic patterns + defvar packedvt3 = !cond(!eq(!cast(vt3), "nxv2f16"): nxv8f16, + !eq(!cast(vt3), "nxv4f16"): nxv8f16, + !eq(!cast(vt3), "nxv2f32"): nxv4f32, + 1 : vt3); + + def : SVE_3_Op_Pat(NAME)>; + + def : SVE_1_Op_Passthru_Pat(NAME)>; } multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op_merge, diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll new file mode 100644 index 0000000000000..28eaab21a9fe2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -0,0 +1,296 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; +; FP_TO_SINT +; + +define @fcvtzs_h_nxv2f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv2f32( %a) { +; CHECK-LABEL: fcvtzs_h_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv2f64( %a) { +; CHECK-LABEL: fcvtzs_h_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv4f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv4f32( %a) { +; CHECK-LABEL: fcvtzs_h_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv8f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv2f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv2f32( %a) { +; CHECK-LABEL: fcvtzs_s_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv2f64( %a) { +; CHECK-LABEL: fcvtzs_s_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv4f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv4f32( %a) { +; CHECK-LABEL: fcvtzs_s_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_d_nxv2f16( %a) { +; CHECK-LABEL: fcvtzs_d_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_d_nxv2f32( %a) { +; CHECK-LABEL: fcvtzs_d_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_d_nxv2f64( %a) { +; CHECK-LABEL: fcvtzs_d_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +; +; FP_TO_UINT +; + +; NOTE: Using fcvtzs is safe as fptoui overflow is considered poison and a +; 64bit signed value encompasses the entire range of a 16bit unsigned value +define @fcvtzu_h_nxv2f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_h_nxv2f32( %a) { +; CHECK-LABEL: fcvtzu_h_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_h_nxv2f64( %a) { +; CHECK-LABEL: fcvtzu_h_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_h_nxv4f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_h_nxv4f32( %a) { +; CHECK-LABEL: fcvtzu_h_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzu_h_nxv8f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv2f16( %a) { +; CHECK-LABEL: fcvtzu_s_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv2f32( %a) { +; CHECK-LABEL: fcvtzu_s_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv2f64( %a) { +; CHECK-LABEL: fcvtzu_s_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv4f16( %a) { +; CHECK-LABEL: fcvtzu_s_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv4f32( %a) { +; CHECK-LABEL: fcvtzu_s_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_d_nxv2f16( %a) { +; CHECK-LABEL: fcvtzu_d_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_d_nxv2f32( %a) { +; CHECK-LABEL: fcvtzu_d_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_d_nxv2f64( %a) { +; CHECK-LABEL: fcvtzu_d_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll new file mode 100644 index 0000000000000..fbd9beceaa1f0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; FP_TO_SINT + +; Split operand +define @fcvtzs_s_nxv4f64( %a) { +; CHECK-LABEL: fcvtzs_s_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv8f64( %a) { +; CHECK-LABEL: fcvtzs_h_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +; Split result +define @fcvtzs_d_nxv4f32( %a) { +; CHECK-LABEL: fcvtzs_d_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv16f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h +; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h +; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h +; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +; FP_TO_UINT + +; Split operand +define @fcvtzu_s_nxv4f64( %a) { +; CHECK-LABEL: fcvtzu_s_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +; Split result +define @fcvtzu_d_nxv4f32( %a) { +; CHECK-LABEL: fcvtzu_d_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} From 9dc1e53787abbf4f2624c73272bf00e23fdffba0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 16 Sep 2020 18:44:40 +0100 Subject: [PATCH 0978/1079] [MemorySSA] Add another loop clobber test case. --- .../Analysis/MemorySSA/phi-translation.ll | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 7fa6e6c69057e..5e065a27baff4 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -474,3 +474,45 @@ cleanup: ; preds = %while.body, %while. declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define void @another_loop_clobber() { +; CHECK-LABEL: void @another_loop_clobber +; CHECK-LABEL: loop.header: +; CHECK-NEXT: ; 4 = MemoryPhi({entry,1},{cond.read,3}) + +; CHECK-LABEL: cond.read: +; NOLIMIT: ; MemoryUse(liveOnEntry) +; LIMIT: ; MemoryUse(4) +; CHECK-NEXT: %use = load i32, i32* %ptr.1, align 4 +; CHECK-NEXT: ; 2 = MemoryDef(4) +; CHECK-NEXT: %c.2 = call i1 @cond(i32 %use) +; CHECK-NEXT: %ptr.10 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %inc +; CHECK-NEXT: ; 3 = MemoryDef(2) +; CHECK-NEXT: store i32 10, i32* %ptr.2, align 4 + +entry: + %nodeStack = alloca [12 x i32], align 4 + %c.1 = call i1 @cond(i32 1) + br i1 %c.1, label %cleanup, label %loop.header + +loop.header: ; preds = %entry, %while.cond.backedge + %depth.1 = phi i32 [ %inc, %cond.read], [ 1, %entry ] + %cmp = icmp sgt i32 %depth.1, 0 + %inc = add nsw i32 %depth.1, 3 + %inc2 = add nsw i32 %depth.1, 6 + br i1 %cmp, label %cond.read, label %cleanup + +cond.read: ; preds = %while.cond + %ptr.1 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %depth.1 + %ptr.2 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %inc2 + %use = load i32, i32* %ptr.1, align 4 + %c.2 = call i1 @cond(i32 %use) + %ptr.10 = getelementptr inbounds [12 x i32], [12 x i32]* %nodeStack, i32 0, i32 %inc + store i32 10, i32* %ptr.2, align 4 + br i1 %c.2, label %loop.header, label %cleanup + +cleanup: + ret void +} + +declare i1 @cond(i32) From deb8f8bcf31540c657716ea5242183b0792702a1 Mon Sep 17 00:00:00 2001 From: Yvan Roux Date: Thu, 17 Sep 2020 15:13:55 +0200 Subject: [PATCH 0979/1079] [ARM][MachineOutliner] Add missing testcase for calls. --- .../CodeGen/ARM/machine-outliner-calls.mir | 360 ++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/machine-outliner-calls.mir diff --git a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir new file mode 100644 index 0000000000000..7880ddfb0051c --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir @@ -0,0 +1,360 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=arm-- -run-pass=prologepilog -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @outline_call_arm() #0 { ret void } + define void @outline_call_thumb() #1 { ret void } + define void @outline_call_tailcall_arm() #0 { ret void } + define void @outline_call_tailcall_thumb() #1 { ret void } + define void @outline_call_KO_mcount() #0 { ret void } + define void @bar() #0 { ret void } + declare void @"\01mcount"() + + attributes #0 = { minsize optsize } + attributes #1 = { minsize optsize "target-features"="+armv7-a,+thumb-mode" } +... +--- + +name: outline_call_arm +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_call_arm + ; CHECK: bb.0: + ; CHECK: liveins: $r4, $lr + ; CHECK: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: bb.1: + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: bb.2: + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: bb.3: + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: bb.4: + ; CHECK: BL @OUTLINED_FUNCTION_0 + ; CHECK: bb.5: + ; CHECK: $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $lr + ; CHECK: BX_RET 14 /* CC::al */, $noreg + bb.0: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + bb.1: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + bb.2: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + bb.3: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + bb.4: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + bb.5: + BX_RET 14, $noreg +... +--- + +name: outline_call_thumb +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_call_thumb + ; CHECK: bb.0: + ; CHECK: liveins: $r7, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_3 + ; CHECK: bb.1: + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_3 + ; CHECK: bb.2: + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_3 + ; CHECK: bb.3: + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_3 + ; CHECK: bb.4: + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_3 + ; CHECK: bb.5: + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc + bb.0: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + bb.1: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + bb.2: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + bb.3: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + bb.4: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + bb.5: + tBX_RET 14, $noreg +... +--- + +name: outline_call_tailcall_arm +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_call_tailcall_arm + ; CHECK: bb.0: + ; CHECK: liveins: $r4, $lr + ; CHECK: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: BL @OUTLINED_FUNCTION_2 + ; CHECK: bb.1: + ; CHECK: BL @OUTLINED_FUNCTION_2 + ; CHECK: bb.2: + ; CHECK: BL @OUTLINED_FUNCTION_2 + ; CHECK: bb.3: + ; CHECK: $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $lr + ; CHECK: BX_RET 14 /* CC::al */, $noreg + bb.0: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 2, 14, $noreg, $noreg + $r1 = MOVi 2, 14, $noreg, $noreg + $r2 = MOVi 2, 14, $noreg, $noreg + $r3 = MOVi 2, 14, $noreg, $noreg + $r4 = MOVi 2, 14, $noreg, $noreg + BL @bar, implicit-def dead $lr, implicit $sp + bb.1: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 2, 14, $noreg, $noreg + $r1 = MOVi 2, 14, $noreg, $noreg + $r2 = MOVi 2, 14, $noreg, $noreg + $r3 = MOVi 2, 14, $noreg, $noreg + $r4 = MOVi 2, 14, $noreg, $noreg + BL @bar, implicit-def dead $lr, implicit $sp + bb.2: + BL @bar, implicit-def dead $lr, implicit $sp + $r0 = MOVi 2, 14, $noreg, $noreg + $r1 = MOVi 2, 14, $noreg, $noreg + $r2 = MOVi 2, 14, $noreg, $noreg + $r3 = MOVi 2, 14, $noreg, $noreg + $r4 = MOVi 2, 14, $noreg, $noreg + BL @bar, implicit-def dead $lr, implicit $sp + bb.3: + BX_RET 14, $noreg +... +--- + +name: outline_call_tailcall_thumb +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_call_tailcall_thumb + ; CHECK: bb.0: + ; CHECK: liveins: $r7, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_4 + ; CHECK: bb.1: + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_4 + ; CHECK: bb.2: + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_4 + ; CHECK: bb.3: + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc + bb.0: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 2, 14, $noreg, $noreg + $r1 = t2MOVi 2, 14, $noreg, $noreg + $r2 = t2MOVi 2, 14, $noreg, $noreg + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + bb.1: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 2, 14, $noreg, $noreg + $r1 = t2MOVi 2, 14, $noreg, $noreg + $r2 = t2MOVi 2, 14, $noreg, $noreg + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + bb.2: + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + $r0 = t2MOVi 2, 14, $noreg, $noreg + $r1 = t2MOVi 2, 14, $noreg, $noreg + $r2 = t2MOVi 2, 14, $noreg, $noreg + tBL 14, $noreg, @bar, implicit-def dead $lr, implicit $sp + bb.3: + tBX_RET 14, $noreg +... +--- + +name: outline_call_KO_mcount +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_call_KO_mcount + ; CHECK: bb.0: + ; CHECK: liveins: $r4, $lr + ; CHECK: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: bb.1: + ; CHECK: BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: bb.2: + ; CHECK: BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: bb.3: + ; CHECK: BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: bb.4: + ; CHECK: BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: bb.5: + ; CHECK: $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $lr + ; CHECK: BX_RET 14 /* CC::al */, $noreg + bb.0: + BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + $r4 = MOVi 3, 14, $noreg, $noreg + bb.1: + BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + $r4 = MOVi 3, 14, $noreg, $noreg + bb.2: + BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + $r4 = MOVi 3, 14, $noreg, $noreg + bb.3: + BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + $r4 = MOVi 3, 14, $noreg, $noreg + bb.4: + BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp + $r0 = MOVi 3, 14, $noreg, $noreg + $r1 = MOVi 3, 14, $noreg, $noreg + $r2 = MOVi 3, 14, $noreg, $noreg + $r3 = MOVi 3, 14, $noreg, $noreg + $r4 = MOVi 3, 14, $noreg, $noreg + bb.5: + BX_RET 14, $noreg +... +--- + +name: bar +tracksRegLiveness: true +body: | + bb.0: + BX_RET 14, $noreg + + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_0 + ; CHECK: bb.0: + ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr + ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, 8 + ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp + ; CHECK: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: MOVPCLR 14 /* CC::al */, $noreg + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_1 + ; CHECK: bb.0: + ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8 + ; CHECK: $r0 = MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MOVPCLR 14 /* CC::al */, $noreg + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_2 + ; CHECK: bb.0: + ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr + ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, 8 + ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp + ; CHECK: $r0 = MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: TAILJMPd @bar, implicit $sp + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_3 + ; CHECK: bb.0: + ; CHECK: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr + ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, 8 + ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp + ; CHECK: $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_4 + ; CHECK: bb.0: + ; CHECK: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr + ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, 8 + ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp + ; CHECK: $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: tTAILJMPdND @bar, 14 /* CC::al */, $noreg, implicit $sp + + + From f026812110878484d003f18660492e9321ef2df1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 14:27:15 +0100 Subject: [PATCH 0980/1079] InstCombiner.h - remove unnecessary KnownBits.h include. NFCI. Move the include down to cpp files with an implicit dependency. --- llvm/include/llvm/Transforms/InstCombine/InstCombiner.h | 1 - llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 1 + llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 1 + llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 1 + llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp | 1 + 5 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index 2f412cb3ddacc..409a217a73abe 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -24,7 +24,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index b441351211734..209f932536541 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetTransformInfo.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2f89e807c1c5d..ce3910754e5b2 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Type.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f3529718b8653..5db5ab47f29e4 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 2390a98183692..94ee799010756 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -16,6 +16,7 @@ #include "X86TargetTransformInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsX86.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; From e4a198eeee3ca96ff324d5b786e44c4915334054 Mon Sep 17 00:00:00 2001 From: jerryyin Date: Wed, 16 Sep 2020 08:57:37 -0700 Subject: [PATCH 0981/1079] [AMDGPU] Bump to ROCm 3.7 dependency hip_hcc->amdhip64 Differential Revision: https://reviews.llvm.org/D87773 --- mlir/tools/mlir-rocm-runner/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt index 9b07d00d80961..2c0791d7a5c1d 100644 --- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt +++ b/mlir/tools/mlir-rocm-runner/CMakeLists.txt @@ -38,7 +38,7 @@ if(MLIR_ROCM_RUNNER_ENABLED) add_definitions(-D__ROCM_PATH__="${ROCM_PATH}") # Locate HIP runtime library. - find_library(ROCM_RUNTIME_LIBRARY hip_hcc + find_library(ROCM_RUNTIME_LIBRARY amdhip64 PATHS "${HIP_PATH}/lib") if (NOT ROCM_RUNTIME_LIBRARY) message(SEND_ERROR "Could not locate ROCm HIP runtime library") From 67ae46c820fa680e7f5828b4d8b94a562f51c9bf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 14:45:46 +0100 Subject: [PATCH 0982/1079] SafeStackLayout.cpp - remove unnecessary StackLifetime.h include. NFCI. Already included in SafeStackLayout.h --- llvm/lib/CodeGen/SafeStackLayout.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp index c823454f825cd..f333e5046ec62 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "SafeStackLayout.h" -#include "llvm/Analysis/StackLifetime.h" #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" From 69516ddd028e8314f575a90bfca1724818fb5ca6 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 17 Sep 2020 16:02:59 +0200 Subject: [PATCH 0983/1079] [compiler-rt] Avoid pulling libatomic to sanitizer tests Avoid fallbacking to software emulated compiler atomics, that are usually provided by libatomic, which is not always present. This fixes the test on NetBSD, which does not provide libatomic in base. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D87568 --- .../tests/sanitizer_atomic_test.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_atomic_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_atomic_test.cpp index 9a3078b25d762..3136886854fa5 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_atomic_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_atomic_test.cpp @@ -12,6 +12,18 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "gtest/gtest.h" +#ifndef __has_extension +#define __has_extension(x) 0 +#endif + +#if __has_extension(c_atomic) || __has_extension(cxx_atomic) +#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) +#define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE +#else +#error Unsupported compiler. +#endif + namespace __sanitizer { template @@ -69,11 +81,15 @@ TEST(SanitizerCommon, AtomicStoreLoad) { CheckStoreLoad(); CheckStoreLoad(); + // Avoid fallbacking to software emulated compiler atomics, that are usually + // provided by libatomic, which is not always present. +#if ATOMIC_LLONG_LOCK_FREE == 2 CheckStoreLoad(); CheckStoreLoad(); CheckStoreLoad(); CheckStoreLoad(); CheckStoreLoad(); +#endif CheckStoreLoad (); @@ -119,7 +135,9 @@ TEST(SanitizerCommon, AtomicCompareExchangeTest) { CheckAtomicCompareExchange(); CheckAtomicCompareExchange(); CheckAtomicCompareExchange(); +#if ATOMIC_LLONG_LOCK_FREE == 2 CheckAtomicCompareExchange(); +#endif CheckAtomicCompareExchange(); } #endif //!SANITIZER_ANDROID From d566771779cd408bbe4985ea56e9b3c2ba247ed3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 15:00:11 +0100 Subject: [PATCH 0984/1079] ValueList.cpp - remove unnecessary includes. NFCI. Already included in ValueList.h --- llvm/lib/Bitcode/Reader/ValueList.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Bitcode/Reader/ValueList.cpp b/llvm/lib/Bitcode/Reader/ValueList.cpp index 63a206eeb022c..ddfa28c6b1e44 100644 --- a/llvm/lib/Bitcode/Reader/ValueList.cpp +++ b/llvm/lib/Bitcode/Reader/ValueList.cpp @@ -16,14 +16,11 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include -#include #include #include -#include using namespace llvm; From 46e59062a0e25be6e29d3fb342402f69b0e470b1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 15:03:53 +0100 Subject: [PATCH 0985/1079] DwarfExpression.cpp - remove unnecessary includes. NFCI. Already included in DwarfExpression.h --- llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index b0fa8645de248..a2bd35d232daf 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -18,11 +18,8 @@ #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/ErrorHandling.h" #include -#include -#include using namespace llvm; From 85ba2f16633638e55ebc8e84bfbd0aaaa2f72b7a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Sep 2020 15:05:45 +0100 Subject: [PATCH 0986/1079] LiveDebugVariables.cpp - remove unnecessary Compiler.h include. NFCI. Already included in LiveDebugVariables.h --- llvm/lib/CodeGen/LiveDebugVariables.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index bfc6483db39a7..bd7024e8f483c 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -54,7 +54,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include From 85e578f53ad1ba21771470dc9516068a259d29cf Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 17 Sep 2020 16:04:50 +0200 Subject: [PATCH 0987/1079] [compiler-rt] Replace INLINE with inline This fixes the clash with BSD headers. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D87562 --- compiler-rt/lib/asan/asan_malloc_linux.cpp | 8 +-- compiler-rt/lib/asan/asan_malloc_local.h | 2 +- compiler-rt/lib/asan/asan_report.cpp | 2 +- compiler-rt/lib/msan/tests/msan_test.cpp | 18 +++--- .../sanitizer_common/sanitizer_allocator.h | 6 +- .../sanitizer_allocator_checks.h | 10 ++-- .../sanitizer_allocator_secondary.h | 8 +-- .../lib/sanitizer_common/sanitizer_atomic.h | 4 +- .../sanitizer_common/sanitizer_atomic_clang.h | 14 ++--- .../sanitizer_atomic_clang_mips.h | 10 ++-- .../sanitizer_atomic_clang_other.h | 6 +- .../sanitizer_atomic_clang_x86.h | 6 +- .../sanitizer_common/sanitizer_atomic_msvc.h | 36 ++++++------ .../lib/sanitizer_common/sanitizer_common.h | 56 +++++++++---------- .../sanitizer_internal_defs.h | 3 - .../lib/sanitizer_common/sanitizer_linux.h | 2 +- .../sanitizer_linux_libcdep.cpp | 2 +- .../lib/sanitizer_common/sanitizer_mac.h | 2 +- .../sanitizer_platform_limits_freebsd.cpp | 2 - .../sanitizer_symbolizer_report.cpp | 4 +- compiler-rt/lib/scudo/scudo_allocator.cpp | 24 ++++---- compiler-rt/lib/scudo/scudo_crc32.h | 2 +- compiler-rt/lib/scudo/scudo_tsd.h | 8 +-- compiler-rt/lib/scudo/scudo_utils.cpp | 2 +- compiler-rt/lib/scudo/scudo_utils.h | 2 +- compiler-rt/lib/tsan/rtl/tsan_interceptors.h | 2 +- compiler-rt/lib/tsan/rtl/tsan_rtl.h | 10 ++-- 27 files changed, 123 insertions(+), 128 deletions(-) diff --git a/compiler-rt/lib/asan/asan_malloc_linux.cpp b/compiler-rt/lib/asan/asan_malloc_linux.cpp index cb6c0ced0494b..9c3f0a5338ee5 100644 --- a/compiler-rt/lib/asan/asan_malloc_linux.cpp +++ b/compiler-rt/lib/asan/asan_malloc_linux.cpp @@ -34,7 +34,7 @@ static uptr last_dlsym_alloc_size_in_words; static const uptr kDlsymAllocPoolSize = SANITIZER_RTEMS ? 4096 : 1024; static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize]; -static INLINE bool IsInDlsymAllocPool(const void *ptr) { +static inline bool IsInDlsymAllocPool(const void *ptr) { uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym; return off < allocated_for_dlsym * sizeof(alloc_memory_for_dlsym[0]); } @@ -95,12 +95,12 @@ bool IsFromLocalPool(const void *ptr) { } #endif -static INLINE bool MaybeInDlsym() { +static inline bool MaybeInDlsym() { // Fuchsia doesn't use dlsym-based interceptors. return !SANITIZER_FUCHSIA && asan_init_is_running; } -static INLINE bool UseLocalPool() { +static inline bool UseLocalPool() { return EarlyMalloc() || MaybeInDlsym(); } @@ -304,4 +304,4 @@ void ReplaceSystemMalloc() { #endif // SANITIZER_ANDROID #endif // SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX || - // SANITIZER_NETBSD || SANITIZER_SOLARIS \ No newline at end of file + // SANITIZER_NETBSD || SANITIZER_SOLARIS diff --git a/compiler-rt/lib/asan/asan_malloc_local.h b/compiler-rt/lib/asan/asan_malloc_local.h index 3f784b90c739c..e2c9be0379f2f 100644 --- a/compiler-rt/lib/asan/asan_malloc_local.h +++ b/compiler-rt/lib/asan/asan_malloc_local.h @@ -17,7 +17,7 @@ #include "sanitizer_common/sanitizer_platform.h" #include "asan_internal.h" -static INLINE bool EarlyMalloc() { +static inline bool EarlyMalloc() { return SANITIZER_RTEMS && (!__asan::asan_inited || __asan::asan_init_is_running); } diff --git a/compiler-rt/lib/asan/asan_report.cpp b/compiler-rt/lib/asan/asan_report.cpp index 99e8678aa7857..4b4db1db6dc9c 100644 --- a/compiler-rt/lib/asan/asan_report.cpp +++ b/compiler-rt/lib/asan/asan_report.cpp @@ -411,7 +411,7 @@ static bool IsInvalidPointerPair(uptr a1, uptr a2) { return false; } -static INLINE void CheckForInvalidPointerPair(void *p1, void *p2) { +static inline void CheckForInvalidPointerPair(void *p1, void *p2) { switch (flags()->detect_invalid_pointer_pairs) { case 0: return; diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 4c98bb4861f20..6306b3dbfb82d 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -139,7 +139,7 @@ typedef signed short S2; typedef signed int S4; typedef signed long long S8; #define NOINLINE __attribute__((noinline)) -#define INLINE __attribute__((always_inline)) +#define ALWAYS_INLINE __attribute__((always_inline)) static bool TrackingOrigins() { S8 x; @@ -4312,7 +4312,7 @@ TEST(MemorySanitizerOrigins, InitializedStoreDoesNotChangeOrigin) { } // namespace template -INLINE +ALWAYS_INLINE void BinaryOpOriginTest(BinaryOp op) { U4 ox = rand(); //NOLINT U4 oy = rand(); //NOLINT @@ -4345,12 +4345,12 @@ void BinaryOpOriginTest(BinaryOp op) { EXPECT_ORIGIN(ox, __msan_get_origin(z)); } -template INLINE T XOR(const T &a, const T&b) { return a ^ b; } -template INLINE T ADD(const T &a, const T&b) { return a + b; } -template INLINE T SUB(const T &a, const T&b) { return a - b; } -template INLINE T MUL(const T &a, const T&b) { return a * b; } -template INLINE T AND(const T &a, const T&b) { return a & b; } -template INLINE T OR (const T &a, const T&b) { return a | b; } +template ALWAYS_INLINE T XOR(const T &a, const T&b) { return a ^ b; } +template ALWAYS_INLINE T ADD(const T &a, const T&b) { return a + b; } +template ALWAYS_INLINE T SUB(const T &a, const T&b) { return a - b; } +template ALWAYS_INLINE T MUL(const T &a, const T&b) { return a * b; } +template ALWAYS_INLINE T AND(const T &a, const T&b) { return a & b; } +template ALWAYS_INLINE T OR (const T &a, const T&b) { return a | b; } TEST(MemorySanitizerOrigins, BinaryOp) { if (!TrackingOrigins()) return; @@ -4704,7 +4704,7 @@ static void TestBZHI() { __builtin_ia32_bzhi_di(0xABCDABCDABCDABCD, Poisoned(1, 0xFFFFFFFF00000000ULL))); } -inline U4 bextr_imm(U4 start, U4 len) { +ALWAYS_INLINE U4 bextr_imm(U4 start, U4 len) { start &= 0xFF; len &= 0xFF; return (len << 8) | start; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h index 23d589888d3b6..5ec47416fe0c9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h @@ -52,14 +52,14 @@ struct NoOpMapUnmapCallback { // Callback type for iterating over chunks. typedef void (*ForEachChunkCallback)(uptr chunk, void *arg); -INLINE u32 Rand(u32 *state) { // ANSI C linear congruential PRNG. +inline u32 Rand(u32 *state) { // ANSI C linear congruential PRNG. return (*state = *state * 1103515245 + 12345) >> 16; } -INLINE u32 RandN(u32 *state, u32 n) { return Rand(state) % n; } // [0, n) +inline u32 RandN(u32 *state, u32 n) { return Rand(state) % n; } // [0, n) template -INLINE void RandomShuffle(T *a, u32 n, u32 *rand_state) { +inline void RandomShuffle(T *a, u32 n, u32 *rand_state) { if (n <= 1) return; u32 state = *rand_state; for (u32 i = n - 1; i > 0; i--) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h index fc426f0e74f48..1cc3992c4c9fa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h @@ -27,7 +27,7 @@ namespace __sanitizer { void SetErrnoToENOMEM(); // A common errno setting logic shared by almost all sanitizer allocator APIs. -INLINE void *SetErrnoOnNull(void *ptr) { +inline void *SetErrnoOnNull(void *ptr) { if (UNLIKELY(!ptr)) SetErrnoToENOMEM(); return ptr; @@ -41,7 +41,7 @@ INLINE void *SetErrnoOnNull(void *ptr) { // two and that the size is a multiple of alignment for POSIX implementation, // and a bit relaxed requirement for non-POSIX ones, that the size is a multiple // of alignment. -INLINE bool CheckAlignedAllocAlignmentAndSize(uptr alignment, uptr size) { +inline bool CheckAlignedAllocAlignmentAndSize(uptr alignment, uptr size) { #if SANITIZER_POSIX return alignment != 0 && IsPowerOfTwo(alignment) && (size & (alignment - 1)) == 0; @@ -52,13 +52,13 @@ INLINE bool CheckAlignedAllocAlignmentAndSize(uptr alignment, uptr size) { // Checks posix_memalign() parameters, verifies that alignment is a power of two // and a multiple of sizeof(void *). -INLINE bool CheckPosixMemalignAlignment(uptr alignment) { +inline bool CheckPosixMemalignAlignment(uptr alignment) { return alignment != 0 && IsPowerOfTwo(alignment) && (alignment % sizeof(void *)) == 0; } // Returns true if calloc(size, n) call overflows on size*n calculation. -INLINE bool CheckForCallocOverflow(uptr size, uptr n) { +inline bool CheckForCallocOverflow(uptr size, uptr n) { if (!size) return false; uptr max = (uptr)-1L; @@ -67,7 +67,7 @@ INLINE bool CheckForCallocOverflow(uptr size, uptr n) { // Returns true if the size passed to pvalloc overflows when rounded to the next // multiple of page_size. -INLINE bool CheckForPvallocOverflow(uptr size, uptr page_size) { +inline bool CheckForPvallocOverflow(uptr size, uptr page_size) { return RoundUpTo(size, page_size) < size; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h index 1d128f55de05a..61fb98742373a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h @@ -18,8 +18,8 @@ // (currently, 32 bits and internal allocator). class LargeMmapAllocatorPtrArrayStatic { public: - INLINE void *Init() { return &p_[0]; } - INLINE void EnsureSpace(uptr n) { CHECK_LT(n, kMaxNumChunks); } + inline void *Init() { return &p_[0]; } + inline void EnsureSpace(uptr n) { CHECK_LT(n, kMaxNumChunks); } private: static const int kMaxNumChunks = 1 << 15; uptr p_[kMaxNumChunks]; @@ -31,14 +31,14 @@ class LargeMmapAllocatorPtrArrayStatic { // same functionality in Fuchsia case, which does not support MAP_NORESERVE. class LargeMmapAllocatorPtrArrayDynamic { public: - INLINE void *Init() { + inline void *Init() { uptr p = address_range_.Init(kMaxNumChunks * sizeof(uptr), SecondaryAllocatorName); CHECK(p); return reinterpret_cast(p); } - INLINE void EnsureSpace(uptr n) { + inline void EnsureSpace(uptr n) { CHECK_LT(n, kMaxNumChunks); DCHECK(n <= n_reserved_); if (UNLIKELY(n == n_reserved_)) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic.h index a798a0cf25d9c..46f06957228c9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic.h @@ -72,12 +72,12 @@ namespace __sanitizer { // Clutter-reducing helpers. template -INLINE typename T::Type atomic_load_relaxed(const volatile T *a) { +inline typename T::Type atomic_load_relaxed(const volatile T *a) { return atomic_load(a, memory_order_relaxed); } template -INLINE void atomic_store_relaxed(volatile T *a, typename T::Type v) { +inline void atomic_store_relaxed(volatile T *a, typename T::Type v) { atomic_store(a, v, memory_order_relaxed); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h index c40461ebc3bf6..fc13ca52dda74 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h @@ -34,16 +34,16 @@ namespace __sanitizer { // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html // for mappings of the memory model to different processors. -INLINE void atomic_signal_fence(memory_order) { +inline void atomic_signal_fence(memory_order) { __asm__ __volatile__("" ::: "memory"); } -INLINE void atomic_thread_fence(memory_order) { +inline void atomic_thread_fence(memory_order) { __sync_synchronize(); } template -INLINE typename T::Type atomic_fetch_add(volatile T *a, +inline typename T::Type atomic_fetch_add(volatile T *a, typename T::Type v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); @@ -51,7 +51,7 @@ INLINE typename T::Type atomic_fetch_add(volatile T *a, } template -INLINE typename T::Type atomic_fetch_sub(volatile T *a, +inline typename T::Type atomic_fetch_sub(volatile T *a, typename T::Type v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); @@ -59,7 +59,7 @@ INLINE typename T::Type atomic_fetch_sub(volatile T *a, } template -INLINE typename T::Type atomic_exchange(volatile T *a, +inline typename T::Type atomic_exchange(volatile T *a, typename T::Type v, memory_order mo) { DCHECK(!((uptr)a % sizeof(*a))); if (mo & (memory_order_release | memory_order_acq_rel | memory_order_seq_cst)) @@ -71,7 +71,7 @@ INLINE typename T::Type atomic_exchange(volatile T *a, } template -INLINE bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp, +inline bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp, typename T::Type xchg, memory_order mo) { typedef typename T::Type Type; @@ -84,7 +84,7 @@ INLINE bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp, } template -INLINE bool atomic_compare_exchange_weak(volatile T *a, +inline bool atomic_compare_exchange_weak(volatile T *a, typename T::Type *cmp, typename T::Type xchg, memory_order mo) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h index d369aeb9935c6..59155e9883ebe 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h @@ -37,7 +37,7 @@ static struct { } __attribute__((aligned(32))) lock = {0, {0}}; template <> -INLINE atomic_uint64_t::Type atomic_fetch_add(volatile atomic_uint64_t *ptr, +inline atomic_uint64_t::Type atomic_fetch_add(volatile atomic_uint64_t *ptr, atomic_uint64_t::Type val, memory_order mo) { DCHECK(mo & @@ -55,14 +55,14 @@ INLINE atomic_uint64_t::Type atomic_fetch_add(volatile atomic_uint64_t *ptr, } template <> -INLINE atomic_uint64_t::Type atomic_fetch_sub(volatile atomic_uint64_t *ptr, +inline atomic_uint64_t::Type atomic_fetch_sub(volatile atomic_uint64_t *ptr, atomic_uint64_t::Type val, memory_order mo) { return atomic_fetch_add(ptr, -val, mo); } template <> -INLINE bool atomic_compare_exchange_strong(volatile atomic_uint64_t *ptr, +inline bool atomic_compare_exchange_strong(volatile atomic_uint64_t *ptr, atomic_uint64_t::Type *cmp, atomic_uint64_t::Type xchg, memory_order mo) { @@ -87,7 +87,7 @@ INLINE bool atomic_compare_exchange_strong(volatile atomic_uint64_t *ptr, } template <> -INLINE atomic_uint64_t::Type atomic_load(const volatile atomic_uint64_t *ptr, +inline atomic_uint64_t::Type atomic_load(const volatile atomic_uint64_t *ptr, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_releasae | memory_order_seq_cst)); @@ -100,7 +100,7 @@ INLINE atomic_uint64_t::Type atomic_load(const volatile atomic_uint64_t *ptr, } template <> -INLINE void atomic_store(volatile atomic_uint64_t *ptr, atomic_uint64_t::Type v, +inline void atomic_store(volatile atomic_uint64_t *ptr, atomic_uint64_t::Type v, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_releasae | memory_order_seq_cst)); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_other.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_other.h index b8685a8542676..7580ac2dc5889 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_other.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_other.h @@ -17,12 +17,12 @@ namespace __sanitizer { -INLINE void proc_yield(int cnt) { +inline void proc_yield(int cnt) { __asm__ __volatile__("" ::: "memory"); } template -INLINE typename T::Type atomic_load( +inline typename T::Type atomic_load( const volatile T *a, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_consume | memory_order_acquire | memory_order_seq_cst)); @@ -60,7 +60,7 @@ INLINE typename T::Type atomic_load( } template -INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { +inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_release | memory_order_seq_cst)); DCHECK(!((uptr)a % sizeof(*a))); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_x86.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_x86.h index f2ce553baa7a1..51597b4927412 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_x86.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_x86.h @@ -16,7 +16,7 @@ namespace __sanitizer { -INLINE void proc_yield(int cnt) { +inline void proc_yield(int cnt) { __asm__ __volatile__("" ::: "memory"); for (int i = 0; i < cnt; i++) __asm__ __volatile__("pause"); @@ -24,7 +24,7 @@ INLINE void proc_yield(int cnt) { } template -INLINE typename T::Type atomic_load( +inline typename T::Type atomic_load( const volatile T *a, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_consume | memory_order_acquire | memory_order_seq_cst)); @@ -70,7 +70,7 @@ INLINE typename T::Type atomic_load( } template -INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { +inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_release | memory_order_seq_cst)); DCHECK(!((uptr)a % sizeof(*a))); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_msvc.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_msvc.h index 6a7c5465dcbbc..31317adcdfc99 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_msvc.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_msvc.h @@ -54,21 +54,21 @@ extern "C" long long _InterlockedExchangeAdd64(long long volatile *Addend, namespace __sanitizer { -INLINE void atomic_signal_fence(memory_order) { +inline void atomic_signal_fence(memory_order) { _ReadWriteBarrier(); } -INLINE void atomic_thread_fence(memory_order) { +inline void atomic_thread_fence(memory_order) { _mm_mfence(); } -INLINE void proc_yield(int cnt) { +inline void proc_yield(int cnt) { for (int i = 0; i < cnt; i++) _mm_pause(); } template -INLINE typename T::Type atomic_load( +inline typename T::Type atomic_load( const volatile T *a, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_consume | memory_order_acquire | memory_order_seq_cst)); @@ -86,7 +86,7 @@ INLINE typename T::Type atomic_load( } template -INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { +inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { DCHECK(mo & (memory_order_relaxed | memory_order_release | memory_order_seq_cst)); DCHECK(!((uptr)a % sizeof(*a))); @@ -102,7 +102,7 @@ INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { atomic_thread_fence(memory_order_seq_cst); } -INLINE u32 atomic_fetch_add(volatile atomic_uint32_t *a, +inline u32 atomic_fetch_add(volatile atomic_uint32_t *a, u32 v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); @@ -110,7 +110,7 @@ INLINE u32 atomic_fetch_add(volatile atomic_uint32_t *a, (long)v); } -INLINE uptr atomic_fetch_add(volatile atomic_uintptr_t *a, +inline uptr atomic_fetch_add(volatile atomic_uintptr_t *a, uptr v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); @@ -123,7 +123,7 @@ INLINE uptr atomic_fetch_add(volatile atomic_uintptr_t *a, #endif } -INLINE u32 atomic_fetch_sub(volatile atomic_uint32_t *a, +inline u32 atomic_fetch_sub(volatile atomic_uint32_t *a, u32 v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); @@ -131,7 +131,7 @@ INLINE u32 atomic_fetch_sub(volatile atomic_uint32_t *a, -(long)v); } -INLINE uptr atomic_fetch_sub(volatile atomic_uintptr_t *a, +inline uptr atomic_fetch_sub(volatile atomic_uintptr_t *a, uptr v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); @@ -144,28 +144,28 @@ INLINE uptr atomic_fetch_sub(volatile atomic_uintptr_t *a, #endif } -INLINE u8 atomic_exchange(volatile atomic_uint8_t *a, +inline u8 atomic_exchange(volatile atomic_uint8_t *a, u8 v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); return (u8)_InterlockedExchange8((volatile char*)&a->val_dont_use, v); } -INLINE u16 atomic_exchange(volatile atomic_uint16_t *a, +inline u16 atomic_exchange(volatile atomic_uint16_t *a, u16 v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); return (u16)_InterlockedExchange16((volatile short*)&a->val_dont_use, v); } -INLINE u32 atomic_exchange(volatile atomic_uint32_t *a, +inline u32 atomic_exchange(volatile atomic_uint32_t *a, u32 v, memory_order mo) { (void)mo; DCHECK(!((uptr)a % sizeof(*a))); return (u32)_InterlockedExchange((volatile long*)&a->val_dont_use, v); } -INLINE bool atomic_compare_exchange_strong(volatile atomic_uint8_t *a, +inline bool atomic_compare_exchange_strong(volatile atomic_uint8_t *a, u8 *cmp, u8 xchgv, memory_order mo) { @@ -191,7 +191,7 @@ INLINE bool atomic_compare_exchange_strong(volatile atomic_uint8_t *a, return false; } -INLINE bool atomic_compare_exchange_strong(volatile atomic_uintptr_t *a, +inline bool atomic_compare_exchange_strong(volatile atomic_uintptr_t *a, uptr *cmp, uptr xchg, memory_order mo) { @@ -204,7 +204,7 @@ INLINE bool atomic_compare_exchange_strong(volatile atomic_uintptr_t *a, return false; } -INLINE bool atomic_compare_exchange_strong(volatile atomic_uint16_t *a, +inline bool atomic_compare_exchange_strong(volatile atomic_uint16_t *a, u16 *cmp, u16 xchg, memory_order mo) { @@ -217,7 +217,7 @@ INLINE bool atomic_compare_exchange_strong(volatile atomic_uint16_t *a, return false; } -INLINE bool atomic_compare_exchange_strong(volatile atomic_uint32_t *a, +inline bool atomic_compare_exchange_strong(volatile atomic_uint32_t *a, u32 *cmp, u32 xchg, memory_order mo) { @@ -230,7 +230,7 @@ INLINE bool atomic_compare_exchange_strong(volatile atomic_uint32_t *a, return false; } -INLINE bool atomic_compare_exchange_strong(volatile atomic_uint64_t *a, +inline bool atomic_compare_exchange_strong(volatile atomic_uint64_t *a, u64 *cmp, u64 xchg, memory_order mo) { @@ -244,7 +244,7 @@ INLINE bool atomic_compare_exchange_strong(volatile atomic_uint64_t *a, } template -INLINE bool atomic_compare_exchange_weak(volatile T *a, +inline bool atomic_compare_exchange_weak(volatile T *a, typename T::Type *cmp, typename T::Type xchg, memory_order mo) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 86e19d96e0369..c8575a984c0c3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -53,25 +53,25 @@ const u64 kExternalPCBit = 1ULL << 60; extern const char *SanitizerToolName; // Can be changed by the tool. extern atomic_uint32_t current_verbosity; -INLINE void SetVerbosity(int verbosity) { +inline void SetVerbosity(int verbosity) { atomic_store(¤t_verbosity, verbosity, memory_order_relaxed); } -INLINE int Verbosity() { +inline int Verbosity() { return atomic_load(¤t_verbosity, memory_order_relaxed); } #if SANITIZER_ANDROID -INLINE uptr GetPageSize() { +inline uptr GetPageSize() { // Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array. return 4096; } -INLINE uptr GetPageSizeCached() { +inline uptr GetPageSizeCached() { return 4096; } #else uptr GetPageSize(); extern uptr PageSizeCached; -INLINE uptr GetPageSizeCached() { +inline uptr GetPageSizeCached() { if (!PageSizeCached) PageSizeCached = GetPageSize(); return PageSizeCached; @@ -91,7 +91,7 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, // Memory management void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false); -INLINE void *MmapOrDieQuietly(uptr size, const char *mem_type) { +inline void *MmapOrDieQuietly(uptr size, const char *mem_type) { return MmapOrDie(size, mem_type, /*raw_report*/ true); } void UnmapOrDie(void *addr, uptr size); @@ -374,7 +374,7 @@ unsigned char _BitScanReverse64(unsigned long *index, unsigned __int64 mask); } #endif -INLINE uptr MostSignificantSetBitIndex(uptr x) { +inline uptr MostSignificantSetBitIndex(uptr x) { CHECK_NE(x, 0U); unsigned long up; #if !SANITIZER_WINDOWS || defined(__clang__) || defined(__GNUC__) @@ -391,7 +391,7 @@ INLINE uptr MostSignificantSetBitIndex(uptr x) { return up; } -INLINE uptr LeastSignificantSetBitIndex(uptr x) { +inline uptr LeastSignificantSetBitIndex(uptr x) { CHECK_NE(x, 0U); unsigned long up; #if !SANITIZER_WINDOWS || defined(__clang__) || defined(__GNUC__) @@ -408,11 +408,11 @@ INLINE uptr LeastSignificantSetBitIndex(uptr x) { return up; } -INLINE bool IsPowerOfTwo(uptr x) { +inline bool IsPowerOfTwo(uptr x) { return (x & (x - 1)) == 0; } -INLINE uptr RoundUpToPowerOfTwo(uptr size) { +inline uptr RoundUpToPowerOfTwo(uptr size) { CHECK(size); if (IsPowerOfTwo(size)) return size; @@ -422,20 +422,20 @@ INLINE uptr RoundUpToPowerOfTwo(uptr size) { return 1ULL << (up + 1); } -INLINE uptr RoundUpTo(uptr size, uptr boundary) { +inline uptr RoundUpTo(uptr size, uptr boundary) { RAW_CHECK(IsPowerOfTwo(boundary)); return (size + boundary - 1) & ~(boundary - 1); } -INLINE uptr RoundDownTo(uptr x, uptr boundary) { +inline uptr RoundDownTo(uptr x, uptr boundary) { return x & ~(boundary - 1); } -INLINE bool IsAligned(uptr a, uptr alignment) { +inline bool IsAligned(uptr a, uptr alignment) { return (a & (alignment - 1)) == 0; } -INLINE uptr Log2(uptr x) { +inline uptr Log2(uptr x) { CHECK(IsPowerOfTwo(x)); return LeastSignificantSetBitIndex(x); } @@ -451,14 +451,14 @@ template void Swap(T& a, T& b) { } // Char handling -INLINE bool IsSpace(int c) { +inline bool IsSpace(int c) { return (c == ' ') || (c == '\n') || (c == '\t') || (c == '\f') || (c == '\r') || (c == '\v'); } -INLINE bool IsDigit(int c) { +inline bool IsDigit(int c) { return (c >= '0') && (c <= '9'); } -INLINE int ToLower(int c) { +inline int ToLower(int c) { return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c; } @@ -840,15 +840,15 @@ void WriteToSyslog(const char *buffer); #if SANITIZER_MAC || SANITIZER_WIN_TRACE void LogFullErrorReport(const char *buffer); #else -INLINE void LogFullErrorReport(const char *buffer) {} +inline void LogFullErrorReport(const char *buffer) {} #endif #if SANITIZER_LINUX || SANITIZER_MAC void WriteOneLineToSyslog(const char *s); void LogMessageOnPrintf(const char *str); #else -INLINE void WriteOneLineToSyslog(const char *s) {} -INLINE void LogMessageOnPrintf(const char *str) {} +inline void WriteOneLineToSyslog(const char *s) {} +inline void LogMessageOnPrintf(const char *str) {} #endif #if SANITIZER_LINUX || SANITIZER_WIN_TRACE @@ -856,21 +856,21 @@ INLINE void LogMessageOnPrintf(const char *str) {} void AndroidLogInit(); void SetAbortMessage(const char *); #else -INLINE void AndroidLogInit() {} +inline void AndroidLogInit() {} // FIXME: MacOS implementation could use CRSetCrashLogMessage. -INLINE void SetAbortMessage(const char *) {} +inline void SetAbortMessage(const char *) {} #endif #if SANITIZER_ANDROID void SanitizerInitializeUnwinder(); AndroidApiLevel AndroidGetApiLevel(); #else -INLINE void AndroidLogWrite(const char *buffer_unused) {} -INLINE void SanitizerInitializeUnwinder() {} -INLINE AndroidApiLevel AndroidGetApiLevel() { return ANDROID_NOT_ANDROID; } +inline void AndroidLogWrite(const char *buffer_unused) {} +inline void SanitizerInitializeUnwinder() {} +inline AndroidApiLevel AndroidGetApiLevel() { return ANDROID_NOT_ANDROID; } #endif -INLINE uptr GetPthreadDestructorIterations() { +inline uptr GetPthreadDestructorIterations() { #if SANITIZER_ANDROID return (AndroidGetApiLevel() == ANDROID_LOLLIPOP_MR1) ? 8 : 4; #elif SANITIZER_POSIX @@ -976,7 +976,7 @@ RunOnDestruction at_scope_exit(Fn fn) { #if SANITIZER_LINUX && SANITIZER_S390_64 void AvoidCVE_2016_2143(); #else -INLINE void AvoidCVE_2016_2143() {} +inline void AvoidCVE_2016_2143() {} #endif struct StackDepotStats { @@ -997,7 +997,7 @@ bool GetRandom(void *buffer, uptr length, bool blocking = true); // Returns the number of logical processors on the system. u32 GetNumberOfCPUs(); extern u32 NumberOfCPUsCached; -INLINE u32 GetNumberOfCPUsCached() { +inline u32 GetNumberOfCPUsCached() { if (!NumberOfCPUsCached) NumberOfCPUsCached = GetNumberOfCPUs(); return NumberOfCPUsCached; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h index 84973eedda60a..a6c5514870528 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h @@ -196,9 +196,6 @@ typedef u64 tid_t; // This header should NOT include any other headers to avoid portability issues. // Common defs. -#ifndef INLINE -#define INLINE inline -#endif #define INTERFACE_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE #define SANITIZER_WEAK_DEFAULT_IMPL \ extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE NOINLINE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index c162d1ca5d285..1adc120815d14 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -109,7 +109,7 @@ void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)); // Releases memory pages entirely within the [beg, end] address range. // The pages no longer count toward RSS; reads are guaranteed to return 0. // Requires (but does not verify!) that pages are MAP_PRIVATE. -INLINE void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) { +inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) { // man madvise on Linux promises zero-fill for anonymous private pages. // Testing shows the same behaviour for private (but not anonymous) mappings // of shm_open() files, as long as the underlying file is untouched. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 86918a51a2460..28c14f2717be9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -772,7 +772,7 @@ void LogMessageOnPrintf(const char *str) { // initialized after the vDSO function pointers, so if it exists, is not null // and is not empty, we can use clock_gettime. extern "C" SANITIZER_WEAK_ATTRIBUTE char *__progname; -INLINE bool CanUseVDSO() { +inline bool CanUseVDSO() { // Bionic is safe, it checks for the vDSO function pointers to be initialized. if (SANITIZER_ANDROID) return true; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h index f61ebe2566e5f..023071e4f11de 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h @@ -75,7 +75,7 @@ asm(".desc ___crashreporter_info__, 0x10"); namespace __sanitizer { static BlockingMutex crashreporter_info_mutex(LINKER_INITIALIZED); -INLINE void CRAppendCrashLogMessage(const char *msg) { +inline void CRAppendCrashLogMessage(const char *msg) { BlockingMutexLock l(&crashreporter_info_mutex); internal_strlcat(__crashreporter_info_buff__, msg, sizeof(__crashreporter_info_buff__)); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp index dcc6c71c07d8a..b1c15be58deaa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp @@ -81,8 +81,6 @@ #include #undef _KERNEL -#undef INLINE // to avoid clashes with sanitizers' definitions - #undef IOC_DIRMASK // Include these after system headers to avoid name clashes and ambiguities. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index c26724ceb7a7d..c8eb781dfc845 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -47,14 +47,14 @@ bool ReportFile::SupportsColors() { return SupportsColoredOutput(fd); } -static INLINE bool ReportSupportsColors() { +static inline bool ReportSupportsColors() { return report_file.SupportsColors(); } #else // SANITIZER_FUCHSIA // Fuchsia's logs always go through post-processing that handles colorization. -static INLINE bool ReportSupportsColors() { return true; } +static inline bool ReportSupportsColors() { return true; } #endif // !SANITIZER_FUCHSIA diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp index 343f85a4ef88b..53f6479a3bfff 100644 --- a/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -44,7 +44,7 @@ static u32 Cookie; // at compilation or at runtime. static atomic_uint8_t HashAlgorithm = { CRC32Software }; -INLINE u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { +ATTR_inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { // If the hardware CRC32 feature is defined here, it was enabled everywhere, // as opposed to only for scudo_crc32.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a @@ -71,31 +71,31 @@ INLINE u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { static BackendT &getBackend(); namespace Chunk { - static INLINE AtomicPackedHeader *getAtomicHeader(void *Ptr) { + static inline AtomicPackedHeader *getAtomicHeader(void *Ptr) { return reinterpret_cast(reinterpret_cast(Ptr) - getHeaderSize()); } - static INLINE + static inline const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) { return reinterpret_cast( reinterpret_cast(Ptr) - getHeaderSize()); } - static INLINE bool isAligned(const void *Ptr) { + static inline bool isAligned(const void *Ptr) { return IsAligned(reinterpret_cast(Ptr), MinAlignment); } // We can't use the offset member of the chunk itself, as we would double // fetch it without any warranty that it wouldn't have been tampered. To // prevent this, we work with a local copy of the header. - static INLINE void *getBackendPtr(const void *Ptr, UnpackedHeader *Header) { + static inline void *getBackendPtr(const void *Ptr, UnpackedHeader *Header) { return reinterpret_cast(reinterpret_cast(Ptr) - getHeaderSize() - (Header->Offset << MinAlignmentLog)); } // Returns the usable size for a chunk, meaning the amount of bytes from the // beginning of the user data to the end of the backend allocated chunk. - static INLINE uptr getUsableSize(const void *Ptr, UnpackedHeader *Header) { + static inline uptr getUsableSize(const void *Ptr, UnpackedHeader *Header) { const uptr ClassId = Header->ClassId; if (ClassId) return PrimaryT::ClassIdToSize(ClassId) - getHeaderSize() - @@ -105,7 +105,7 @@ namespace Chunk { } // Returns the size the user requested when allocating the chunk. - static INLINE uptr getSize(const void *Ptr, UnpackedHeader *Header) { + static inline uptr getSize(const void *Ptr, UnpackedHeader *Header) { const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (Header->ClassId) return SizeOrUnusedBytes; @@ -114,7 +114,7 @@ namespace Chunk { } // Compute the checksum of the chunk pointer and its header. - static INLINE u16 computeChecksum(const void *Ptr, UnpackedHeader *Header) { + static inline u16 computeChecksum(const void *Ptr, UnpackedHeader *Header) { UnpackedHeader ZeroChecksumHeader = *Header; ZeroChecksumHeader.Checksum = 0; uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)]; @@ -126,7 +126,7 @@ namespace Chunk { // Checks the validity of a chunk by verifying its checksum. It doesn't // incur termination in the event of an invalid chunk. - static INLINE bool isValid(const void *Ptr) { + static inline bool isValid(const void *Ptr) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); UnpackedHeader NewUnpackedHeader = @@ -140,7 +140,7 @@ namespace Chunk { COMPILER_CHECK(ChunkAvailable == 0); // Loads and unpacks the header, verifying the checksum in the process. - static INLINE + static inline void loadHeader(const void *Ptr, UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); @@ -151,7 +151,7 @@ namespace Chunk { } // Packs and stores the header, computing the checksum in the process. - static INLINE void storeHeader(void *Ptr, UnpackedHeader *NewUnpackedHeader) { + static inline void storeHeader(void *Ptr, UnpackedHeader *NewUnpackedHeader) { NewUnpackedHeader->Checksum = computeChecksum(Ptr, NewUnpackedHeader); PackedHeader NewPackedHeader = bit_cast(*NewUnpackedHeader); atomic_store_relaxed(getAtomicHeader(Ptr), NewPackedHeader); @@ -160,7 +160,7 @@ namespace Chunk { // Packs and stores the header, computing the checksum in the process. We // compare the current header with the expected provided one to ensure that // we are not being raced by a corruption occurring in another thread. - static INLINE void compareExchangeHeader(void *Ptr, + static inline void compareExchangeHeader(void *Ptr, UnpackedHeader *NewUnpackedHeader, UnpackedHeader *OldUnpackedHeader) { NewUnpackedHeader->Checksum = computeChecksum(Ptr, NewUnpackedHeader); diff --git a/compiler-rt/lib/scudo/scudo_crc32.h b/compiler-rt/lib/scudo/scudo_crc32.h index bad15a929a3e0..ef40595a56d1f 100644 --- a/compiler-rt/lib/scudo/scudo_crc32.h +++ b/compiler-rt/lib/scudo/scudo_crc32.h @@ -85,7 +85,7 @@ static const u32 CRC32Table[] = { 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) { +inline u32 computeSoftwareCRC32(u32 Crc, uptr Data) { for (uptr i = 0; i < sizeof(Data); i++) { Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); Data >>= 8; diff --git a/compiler-rt/lib/scudo/scudo_tsd.h b/compiler-rt/lib/scudo/scudo_tsd.h index 1d4e4e6f126e5..ec8dabc1f8a7d 100644 --- a/compiler-rt/lib/scudo/scudo_tsd.h +++ b/compiler-rt/lib/scudo/scudo_tsd.h @@ -29,7 +29,7 @@ struct ALIGNED(SANITIZER_CACHE_LINE_SIZE) ScudoTSD { void init(); void commitBack(); - INLINE bool tryLock() { + inline bool tryLock() { if (Mutex.TryLock()) { atomic_store_relaxed(&Precedence, 0); return true; @@ -40,14 +40,14 @@ struct ALIGNED(SANITIZER_CACHE_LINE_SIZE) ScudoTSD { return false; } - INLINE void lock() { + inline void lock() { atomic_store_relaxed(&Precedence, 0); Mutex.Lock(); } - INLINE void unlock() { Mutex.Unlock(); } + inline void unlock() { Mutex.Unlock(); } - INLINE uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } + inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } private: StaticSpinMutex Mutex; diff --git a/compiler-rt/lib/scudo/scudo_utils.cpp b/compiler-rt/lib/scudo/scudo_utils.cpp index f31d68058acbc..b7ce8f9158172 100644 --- a/compiler-rt/lib/scudo/scudo_utils.cpp +++ b/compiler-rt/lib/scudo/scudo_utils.cpp @@ -121,7 +121,7 @@ bool hasHardwareCRC32ARMPosix() { return false; } // initialized after the other globals, so we can check its value to know if // calling getauxval is safe. extern "C" SANITIZER_WEAK_ATTRIBUTE char *__progname; -INLINE bool areBionicGlobalsInitialized() { +inline bool areBionicGlobalsInitialized() { return !SANITIZER_ANDROID || (&__progname && __progname); } diff --git a/compiler-rt/lib/scudo/scudo_utils.h b/compiler-rt/lib/scudo/scudo_utils.h index a8dfbdeb3b708..b657c69d9baff 100644 --- a/compiler-rt/lib/scudo/scudo_utils.h +++ b/compiler-rt/lib/scudo/scudo_utils.h @@ -20,7 +20,7 @@ namespace __scudo { template -INLINE Dest bit_cast(const Source& source) { +inline Dest bit_cast(const Source& source) { static_assert(sizeof(Dest) == sizeof(Source), "Sizes are not equal!"); Dest dest; memcpy(&dest, &source, sizeof(dest)); diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors.h b/compiler-rt/lib/tsan/rtl/tsan_interceptors.h index 88d1edd775d37..29576ea2d49ad 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors.h +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors.h @@ -22,7 +22,7 @@ class ScopedInterceptor { LibIgnore *libignore(); #if !SANITIZER_GO -INLINE bool in_symbolizer() { +inline bool in_symbolizer() { cur_thread_init(); return UNLIKELY(cur_thread()->in_symbolizer); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index d3bb61ff87d3f..efdc53a1e9252 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -458,22 +458,22 @@ struct ThreadState { ThreadState *cur_thread(); void set_cur_thread(ThreadState *thr); void cur_thread_finalize(); -INLINE void cur_thread_init() { } +inline void cur_thread_init() { } #else __attribute__((tls_model("initial-exec"))) extern THREADLOCAL char cur_thread_placeholder[]; -INLINE ThreadState *cur_thread() { +inline ThreadState *cur_thread() { return reinterpret_cast(cur_thread_placeholder)->current; } -INLINE void cur_thread_init() { +inline void cur_thread_init() { ThreadState *thr = reinterpret_cast(cur_thread_placeholder); if (UNLIKELY(!thr->current)) thr->current = thr; } -INLINE void set_cur_thread(ThreadState *thr) { +inline void set_cur_thread(ThreadState *thr) { reinterpret_cast(cur_thread_placeholder)->current = thr; } -INLINE void cur_thread_finalize() { } +inline void cur_thread_finalize() { } #endif // SANITIZER_MAC || SANITIZER_ANDROID #endif // SANITIZER_GO From 9339f68f21facc34fb0901045d571c818e1fa84a Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 17 Sep 2020 16:27:48 +0200 Subject: [PATCH 0988/1079] [compiler-rt] [tsan] [netbsd] Catch unsupported LONG_JMP_SP_ENV_SLOT Error out during build for unsupported CPU. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D87602 --- compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 645152a06c399..710e7ec97b703 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -384,12 +384,16 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { #endif } -#ifdef __powerpc__ +#if SANITIZER_NETBSD +# ifdef __x86_64__ +# define LONG_JMP_SP_ENV_SLOT 6 +# else +# error unsupported +# endif +#elif defined(__powerpc__) # define LONG_JMP_SP_ENV_SLOT 0 #elif SANITIZER_FREEBSD # define LONG_JMP_SP_ENV_SLOT 2 -#elif SANITIZER_NETBSD -# define LONG_JMP_SP_ENV_SLOT 6 #elif SANITIZER_LINUX # ifdef __aarch64__ # define LONG_JMP_SP_ENV_SLOT 13 From 0efbb70b719e990fe153373eda5a604344ae36bb Mon Sep 17 00:00:00 2001 From: alex-t Date: Wed, 16 Sep 2020 19:54:29 +0300 Subject: [PATCH 0989/1079] [AMDGPU] should expand ROTL i16 to shifts. Instruction combining pass turns library rotl implementation to llvm.fshl.i16. In the selection dag the intrinsic is turned to ISD::ROTL node that cannot be selected. Need to expand it to shifts again. Reviewed By: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D87618 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++-- llvm/test/CodeGen/AMDGPU/rotl.ll | 25 +++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/rotr.ll | 25 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e119d65a7f0ac..ed0a3a17e71af 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -546,8 +546,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote); AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32); - setOperationAction(ISD::ROTR, MVT::i16, Promote); - setOperationAction(ISD::ROTL, MVT::i16, Promote); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i16, Expand); setOperationAction(ISD::SDIV, MVT::i16, Promote); setOperationAction(ISD::UDIV, MVT::i16, Promote); diff --git a/llvm/test/CodeGen/AMDGPU/rotl.ll b/llvm/test/CodeGen/AMDGPU/rotl.ll index c4bc8cdaabf5b..12c46d3605289 100644 --- a/llvm/test/CodeGen/AMDGPU/rotl.ll +++ b/llvm/test/CodeGen/AMDGPU/rotl.ll @@ -55,3 +55,28 @@ entry: store <4 x i32> %3, <4 x i32> addrspace(1)* %in ret void } + +; GCN-LABEL: @test_rotl_i16 +; GCN: global_load_ushort [[X:v[0-9]+]] +; GCN: global_load_ushort [[D:v[0-9]+]] +; GCN: v_sub_nc_u16_e64 [[NX:v[0-9]+]], 0, [[X]] +; GCN: v_and_b32_e32 [[XAND:v[0-9]+]], 15, [[X]] +; GCN: v_and_b32_e32 [[NXAND:v[0-9]+]], 15, [[NX]] +; GCN: v_lshlrev_b16_e64 [[LO:v[0-9]+]], [[XAND]], [[D]] +; GCN: v_lshrrev_b16_e64 [[HI:v[0-9]+]], [[NXAND]], [[D]] +; GCN: v_or_b32_e32 [[RES:v[0-9]+]], [[LO]], [[HI]] +; GCN: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RES]] + +declare i16 @llvm.fshl.i16(i16, i16, i16) + +define void @test_rotl_i16(i16 addrspace(1)* nocapture readonly %sourceA, i16 addrspace(1)* nocapture readonly %sourceB, i16 addrspace(1)* nocapture %destValues) { +entry: + %arrayidx = getelementptr inbounds i16, i16 addrspace(1)* %sourceA, i64 16 + %a = load i16, i16 addrspace(1)* %arrayidx + %arrayidx2 = getelementptr inbounds i16, i16 addrspace(1)* %sourceB, i64 24 + %b = load i16, i16 addrspace(1)* %arrayidx2 + %c = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %b) + %arrayidx5 = getelementptr inbounds i16, i16 addrspace(1)* %destValues, i64 4 + store i16 %c, i16 addrspace(1)* %arrayidx5 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/rotr.ll b/llvm/test/CodeGen/AMDGPU/rotr.ll index b4e2c2b67ce14..84f277bcc0870 100644 --- a/llvm/test/CodeGen/AMDGPU/rotr.ll +++ b/llvm/test/CodeGen/AMDGPU/rotr.ll @@ -51,3 +51,28 @@ entry: store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %in ret void } + +; GCN-LABEL: @test_rotr_i16 +; GCN: global_load_ushort [[X:v[0-9]+]] +; GCN: global_load_ushort [[D:v[0-9]+]] +; GCN: v_sub_nc_u16_e64 [[NX:v[0-9]+]], 0, [[X]] +; GCN: v_and_b32_e32 [[XAND:v[0-9]+]], 15, [[X]] +; GCN: v_and_b32_e32 [[NXAND:v[0-9]+]], 15, [[NX]] +; GCN: v_lshrrev_b16_e64 [[LO:v[0-9]+]], [[XAND]], [[D]] +; GCN: v_lshlrev_b16_e64 [[HI:v[0-9]+]], [[NXAND]], [[D]] +; GCN: v_or_b32_e32 [[RES:v[0-9]+]], [[LO]], [[HI]] +; GCN: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RES]] + +declare i16 @llvm.fshr.i16(i16, i16, i16) + +define void @test_rotr_i16(i16 addrspace(1)* nocapture readonly %sourceA, i16 addrspace(1)* nocapture readonly %sourceB, i16 addrspace(1)* nocapture %destValues) { +entry: + %arrayidx = getelementptr inbounds i16, i16 addrspace(1)* %sourceA, i64 16 + %a = load i16, i16 addrspace(1)* %arrayidx + %arrayidx2 = getelementptr inbounds i16, i16 addrspace(1)* %sourceB, i64 24 + %b = load i16, i16 addrspace(1)* %arrayidx2 + %c = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %b) + %arrayidx5 = getelementptr inbounds i16, i16 addrspace(1)* %destValues, i64 4 + store i16 %c, i16 addrspace(1)* %arrayidx5 + ret void +} From 72c5feeed8d8d570e1c971ef069483491463a003 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 17 Sep 2020 16:34:59 +0200 Subject: [PATCH 0990/1079] [compiler-rt] [netbsd] Include Fixes build on NetBSD/sparc64. --- .../lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp index dc1f5a6616f33..c8f2aa5dba4af 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include From e7de267910e935ab885dae22b5191bfb118ca5f9 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 17 Sep 2020 16:46:32 +0200 Subject: [PATCH 0991/1079] [compiler-rt] [hwasan] Replace INLINE with inline Fixes the build after landing D87562. --- compiler-rt/lib/hwasan/hwasan_malloc_bisect.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/hwasan/hwasan_malloc_bisect.h b/compiler-rt/lib/hwasan/hwasan_malloc_bisect.h index eaf124aab7ddc..7d134e8c4b7fa 100644 --- a/compiler-rt/lib/hwasan/hwasan_malloc_bisect.h +++ b/compiler-rt/lib/hwasan/hwasan_malloc_bisect.h @@ -28,7 +28,7 @@ static u32 malloc_hash(StackTrace *stack, uptr orig_size) { return H.get(); } -static INLINE bool malloc_bisect(StackTrace *stack, uptr orig_size) { +static inline bool malloc_bisect(StackTrace *stack, uptr orig_size) { uptr left = flags()->malloc_bisect_left; uptr right = flags()->malloc_bisect_right; if (LIKELY(left == 0 && right == 0)) From 5e0ded268929b87ddf2c5e077c9185554342f602 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 16 Sep 2020 10:01:54 +0200 Subject: [PATCH 0992/1079] [mlir][Standard] Canonicalize chains of tensor_cast operations Adds a pattern that replaces a chain of two tensor_cast operations by a single tensor_cast operation if doing so will not remove constraints on the shapes. --- .../mlir/Dialect/StandardOps/IR/Ops.td | 2 + mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 81 +++++++++++++++++++ mlir/test/Transforms/canonicalize.mlir | 48 +++++++++++ 3 files changed, 131 insertions(+) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index b0aa9b9e3c76a..2113dfeb4c089 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2997,6 +2997,8 @@ def TensorCastOp : CastOp<"tensor_cast"> { /// The result of a tensor_cast is always a tensor. TensorType getType() { return getResult().getType().cast(); } }]; + + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 0c86c87384d33..c0dc87210a3f1 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -3163,6 +3163,87 @@ OpFoldResult TensorCastOp::fold(ArrayRef operands) { return impl::foldCastOp(*this); } +/// Compute a TensorType that has the joined shape knowledge of the two +/// given TensorTypes. The element types need to match. +static TensorType joinShapes(TensorType one, TensorType two) { + assert(one.getElementType() == two.getElementType()); + + if (!one.hasRank()) + return two; + if (!two.hasRank()) + return one; + + int64_t rank = one.getRank(); + if (rank != two.getRank()) + return {}; + + SmallVector join; + join.reserve(rank); + for (int64_t i = 0; i < rank; ++i) { + if (one.isDynamicDim(i)) { + join.push_back(two.getDimSize(i)); + continue; + } + if (two.isDynamicDim(i)) { + join.push_back(one.getDimSize(i)); + continue; + } + if (one.getDimSize(i) != two.getDimSize(i)) + return {}; + join.push_back(one.getDimSize(i)); + } + return RankedTensorType::get(join, one.getElementType()); +} + +namespace { + +/// Replaces chains of two tensor_cast operations by a single tensor_cast +/// operation if doing so does not remove runtime constraints. +struct ChainedTensorCast : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TensorCastOp tensorCast, + PatternRewriter &rewriter) const final { + auto tensorCastOperand = + tensorCast.getOperand().getDefiningOp(); + + if (!tensorCastOperand) + return failure(); + + auto sourceType = + tensorCastOperand.getOperand().getType().cast(); + auto intermediateType = tensorCastOperand.getType().cast(); + auto resultType = tensorCast.getType().cast(); + + // We can remove the intermediate cast if joining all three produces the + // same result as just joining the source and result shapes. + auto firstJoin = + joinShapes(joinShapes(sourceType, intermediateType), resultType); + + // The join might not exist if the cast sequence would fail at runtime. + if (!firstJoin) + return failure(); + + // The newJoin always exists if the above join exists, it might just contain + // less information. If so, we cannot drop the intermediate cast, as doing + // so would remove runtime checks. + auto newJoin = joinShapes(sourceType, resultType); + if (firstJoin != newJoin) + return failure(); + + rewriter.replaceOpWithNewOp(tensorCast, resultType, + tensorCastOperand.getOperand()); + return success(); + } +}; + +} // namespace + +void TensorCastOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // Helpers for Tensor[Load|Store]Op //===----------------------------------------------------------------------===// diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 320418545893e..3603c473a1fd7 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1062,3 +1062,51 @@ func @static_dynamic_tensor_from_elements(%size1: index, %size4: index) -> tenso return %0 : tensor<3x?x?x7x?xindex> } +// ----- + +// CHECK-LABEL: @tensor_cast_chain_ok +// CHECK-SAME: %[[IN:.*]]: tensor<*xi32> +func @tensor_cast_chain_ok(%input: tensor<*xi32>) -> tensor<4x8xi32> { + // CHECK-NEXT: %[[RES:.*]] = tensor_cast %[[IN]] : tensor<*xi32> to tensor<4x8xi32> + %0 = tensor_cast %input : tensor<*xi32> to tensor<4x?xi32> + %1 = tensor_cast %0 : tensor<4x?xi32> to tensor<4x8xi32> + // CHECK-NEXT: return %[[RES]] + return %1 : tensor<4x8xi32> +} + +// ----- + +// CHECK-LABEL: @tensor_cast_chain_regain +// CHECK-SAME: %[[IN:.*]]: tensor<4xi32> +func @tensor_cast_chain_regain(%input: tensor<4xi32>) -> tensor<4xi32> { + %0 = tensor_cast %input : tensor<4xi32> to tensor + %1 = tensor_cast %0 : tensor to tensor<4xi32> + // CHECK-NEXT: return %[[IN]] + return %1 : tensor<4xi32> +} + +// ----- + +// CHECK-LABEL: @tensor_cast_chain_keep +// CHECK-SAME: %[[IN:.*]]: tensor +func @tensor_cast_chain_keep(%input: tensor) -> tensor { + // CHECK-NEXT: %[[C1:.*]] = tensor_cast %[[IN]] + %0 = tensor_cast %input : tensor to tensor<4x?xi32> + // CHECK-NEXT: %[[C2:.*]] = tensor_cast %[[C1]] + %1 = tensor_cast %0 : tensor<4x?xi32> to tensor + // CHECK-NEXT: return %[[C2]] + return %1 : tensor +} + +// ----- + +// CHECK-LABEL: @tensor_cast_chain_invalid +// CHECK-SAME: %[[IN:.*]]: tensor<4x8xi32> +func @tensor_cast_chain_invalid(%input: tensor<4x8xi32>) -> tensor<8x4xi32> { + // CHECK-NEXT: %[[C1:.*]] = tensor_cast %[[IN]] + %0 = tensor_cast %input : tensor<4x8xi32> to tensor + // CHECK-NEXT: %[[C2:.*]] = tensor_cast %[[C1]] + %1 = tensor_cast %0 : tensor to tensor<8x4xi32> + // CHECK-NEXT: return %[[C2]] + return %1 : tensor<8x4xi32> +} From 7b2dd58eb09d3ead649bdd0a67f69d8776a636ff Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Thu, 17 Sep 2020 16:57:30 +0200 Subject: [PATCH 0993/1079] [compiler-rt] [scudo] Fix typo in function attribute Fixes the build after landing https://reviews.llvm.org/D87562 --- compiler-rt/lib/scudo/scudo_allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp index 53f6479a3bfff..c6a3309cb925b 100644 --- a/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -44,7 +44,7 @@ static u32 Cookie; // at compilation or at runtime. static atomic_uint8_t HashAlgorithm = { CRC32Software }; -ATTR_inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { +inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { // If the hardware CRC32 feature is defined here, it was enabled everywhere, // as opposed to only for scudo_crc32.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a From 34b27b9441d27ef886ea22b3bb75b357a5ec707b Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 17 Sep 2020 16:00:51 +0100 Subject: [PATCH 0994/1079] [ARM] Sink splats to MVE intrinsics The predicated MVE intrinsics are generated as, for example, llvm.arm.mve.add.predicated(x, splat(y). p). We need to sink the splat value back into the loop, like we do for other instructions, so we can re-select qr variants. Differential Revision: https://reviews.llvm.org/D87693 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 13 ++ .../Thumb2/LowOverheadLoops/mov-operand.ll | 13 +- llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll | 188 +++++++++--------- llvm/test/CodeGen/Thumb2/mve-qrintr.ll | 110 +++++----- 4 files changed, 162 insertions(+), 162 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cfb77f466cd19..d2e755b38ca97 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -16446,6 +16446,19 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I, switch (II->getIntrinsicID()) { case Intrinsic::fma: return !IsFMS(I); + case Intrinsic::arm_mve_add_predicated: + case Intrinsic::arm_mve_mul_predicated: + case Intrinsic::arm_mve_qadd_predicated: + case Intrinsic::arm_mve_hadd_predicated: + case Intrinsic::arm_mve_vqdmull_predicated: + case Intrinsic::arm_mve_qdmulh_predicated: + case Intrinsic::arm_mve_qrdmulh_predicated: + case Intrinsic::arm_mve_fma_predicated: + return true; + case Intrinsic::arm_mve_sub_predicated: + case Intrinsic::arm_mve_qsub_predicated: + case Intrinsic::arm_mve_hsub_predicated: + return Operand == 1; default: return false; } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 1cf101ea5d5f1..3cd24f8f52471 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -17,19 +17,18 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 -; CHECK-NEXT: dlstp.32 lr, r1 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vdup.32 q1, r3 -; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: .LBB0_3: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q2, [r0], #16 -; CHECK-NEXT: vsub.f32 q2, q2, q1 -; CHECK-NEXT: vfma.f32 q0, q2, q2 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vsub.f32 q1, q1, r12 +; CHECK-NEXT: vfma.f32 q0, q1, q1 ; CHECK-NEXT: letp lr, .LBB0_3 ; CHECK-NEXT: @ %bb.4: @ %do.end ; CHECK-NEXT: subs r0, r1, #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 646124e0cf983..0f3e893fd8017 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -683,84 +683,86 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #20 -; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: add.w r12, sp, #12 ; CHECK-NEXT: cmp r3, #4 -; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: stm.w r12, {r0, r1, r2} @ 12-byte Folded Spill ; CHECK-NEXT: bne .LBB5_8 ; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph -; CHECK-NEXT: ldr r7, [sp, #84] -; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #68] -; CHECK-NEXT: add.w r1, r3, r7, lsl #1 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: adds r1, r3, r7 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: add.w r1, r7, r7, lsl #1 -; CHECK-NEXT: vdup.16 q0, r0 -; CHECK-NEXT: adds r0, r3, r1 +; CHECK-NEXT: ldr r2, [sp, #88] +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #72] +; CHECK-NEXT: add.w r0, r1, r2, lsl #1 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds r0, r1, r2 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r2, r2, lsl #1 +; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: adds r0, r7, #7 -; CHECK-NEXT: lsr.w r9, r0, #3 +; CHECK-NEXT: adds r0, r2, #7 +; CHECK-NEXT: lsrs r2, r0, #3 ; CHECK-NEXT: b .LBB5_5 ; CHECK-NEXT: .LBB5_3: @ in Loop: Header=BB5_5 Depth=1 -; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: .LBB5_4: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #92] -; CHECK-NEXT: add.w r0, r8, r10 +; CHECK-NEXT: add.w r0, r10, r8 +; CHECK-NEXT: ldr r1, [sp, #96] ; CHECK-NEXT: add r0, r6 ; CHECK-NEXT: add r0, r12 -; CHECK-NEXT: strb.w r0, [r1, r11] -; CHECK-NEXT: add.w r11, r11, #1 -; CHECK-NEXT: cmp r11, r2 +; CHECK-NEXT: strb.w r0, [r1, r9] +; CHECK-NEXT: add.w r9, r9, #1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r9, r0 ; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: .LBB5_5: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB5_7 Depth 2 -; CHECK-NEXT: ldr r0, [sp, #88] -; CHECK-NEXT: subs.w lr, r9, r9 -; CHECK-NEXT: ldr.w r12, [r0, r11, lsl #2] +; CHECK-NEXT: ldr r0, [sp, #92] +; CHECK-NEXT: subs.w lr, r2, r2 +; CHECK-NEXT: ldr.w r12, [r0, r9, lsl #2] ; CHECK-NEXT: ble .LBB5_3 ; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #84] +; CHECK-NEXT: ldr.w r11, [sp, #88] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dlstp.16 lr, r3 +; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r8, r12 -; CHECK-NEXT: mla r5, r11, r3, r0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mla r3, r9, r11, r0 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: .LBB5_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q1, [r4], #8 -; CHECK-NEXT: vadd.i16 q2, q1, q0 +; CHECK-NEXT: vldrb.s16 q0, [r7], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r4 +; CHECK-NEXT: vldrb.s16 q0, [r3], #8 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r5], #8 -; CHECK-NEXT: vmlava.s16 r12, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r0], #8 -; CHECK-NEXT: vadd.i16 q2, q2, q0 -; CHECK-NEXT: vmlava.s16 r6, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r7], #8 -; CHECK-NEXT: vadd.i16 q2, q2, q0 -; CHECK-NEXT: vmlava.s16 r8, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r1], #8 -; CHECK-NEXT: vadd.i16 q2, q2, q0 -; CHECK-NEXT: vmlava.s16 r10, q1, q2 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r1], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: letp lr, .LBB5_7 ; CHECK-NEXT: b .LBB5_4 ; CHECK-NEXT: .LBB5_8: @ %if.end -; CHECK-NEXT: ldr r0, [sp, #92] -; CHECK-NEXT: add sp, #20 +; CHECK-NEXT: ldr r0, [sp, #96] +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %cmp = icmp eq i16 %num_cols, 4 @@ -869,83 +871,85 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #20 -; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: add.w r12, sp, #12 ; CHECK-NEXT: cmp r3, #4 -; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: stm.w r12, {r0, r1, r2} @ 12-byte Folded Spill ; CHECK-NEXT: bne .LBB6_8 ; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq .LBB6_8 ; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph -; CHECK-NEXT: ldr r7, [sp, #84] -; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #68] -; CHECK-NEXT: add.w r1, r3, r7, lsl #1 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: adds r1, r3, r7 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: add.w r1, r7, r7, lsl #1 -; CHECK-NEXT: vdup.16 q0, r0 -; CHECK-NEXT: adds r0, r3, r1 +; CHECK-NEXT: ldr r2, [sp, #88] +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #72] +; CHECK-NEXT: add.w r0, r1, r2, lsl #1 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds r0, r1, r2 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r2, r2, lsl #1 +; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: adds r0, r7, #7 -; CHECK-NEXT: lsr.w r9, r0, #3 +; CHECK-NEXT: adds r0, r2, #7 +; CHECK-NEXT: lsrs r2, r0, #3 ; CHECK-NEXT: .LBB6_3: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB6_5 Depth 2 -; CHECK-NEXT: ldr r0, [sp, #88] -; CHECK-NEXT: subs.w lr, r9, r9 -; CHECK-NEXT: ldr.w r12, [r0, r11, lsl #2] +; CHECK-NEXT: ldr r0, [sp, #92] +; CHECK-NEXT: subs.w lr, r2, r2 +; CHECK-NEXT: ldr.w r12, [r0, r9, lsl #2] ; CHECK-NEXT: ble .LBB6_6 ; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #84] +; CHECK-NEXT: ldr.w r11, [sp, #88] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dlstp.16 lr, r3 +; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r8, r12 -; CHECK-NEXT: mla r5, r11, r3, r0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mla r3, r9, r11, r0 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: .LBB6_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q1, [r4], #8 -; CHECK-NEXT: vadd.i16 q2, q1, q0 +; CHECK-NEXT: vldrb.s16 q0, [r7], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r4 +; CHECK-NEXT: vldrb.s16 q0, [r3], #8 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r5], #8 -; CHECK-NEXT: vmlava.s16 r12, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r0], #8 -; CHECK-NEXT: vadd.i16 q2, q2, q0 -; CHECK-NEXT: vmlava.s16 r6, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r7], #8 -; CHECK-NEXT: vadd.i16 q2, q2, q0 -; CHECK-NEXT: vmlava.s16 r8, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r1], #8 -; CHECK-NEXT: vadd.i16 q2, q2, q0 -; CHECK-NEXT: vmlava.s16 r10, q1, q2 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r1], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: letp lr, .LBB6_5 ; CHECK-NEXT: b .LBB6_7 ; CHECK-NEXT: .LBB6_6: @ in Loop: Header=BB6_3 Depth=1 -; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #92] -; CHECK-NEXT: add.w r0, r8, r10 +; CHECK-NEXT: add.w r0, r10, r8 +; CHECK-NEXT: ldr r1, [sp, #96] ; CHECK-NEXT: add r0, r6 ; CHECK-NEXT: add r0, r12 -; CHECK-NEXT: strb.w r0, [r1, r11] -; CHECK-NEXT: add.w r11, r11, #1 -; CHECK-NEXT: cmp r11, r2 +; CHECK-NEXT: strb.w r0, [r1, r9] +; CHECK-NEXT: add.w r9, r9, #1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r9, r0 ; CHECK-NEXT: bne .LBB6_3 ; CHECK-NEXT: .LBB6_8: @ %if.end -; CHECK-NEXT: ldr r0, [sp, #92] -; CHECK-NEXT: add sp, #20 +; CHECK-NEXT: ldr r0, [sp, #96] +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %cmp = icmp eq i16 %num_cols, 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-qrintr.ll b/llvm/test/CodeGen/Thumb2/mve-qrintr.ll index 4fcfe37b89e59..31f3378fc23fc 100644 --- a/llvm/test/CodeGen/Thumb2/mve-qrintr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-qrintr.ll @@ -10,13 +10,12 @@ define void @vadd(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vadd.i32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vadd.i32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -55,13 +54,12 @@ define void @vsub(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB1_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vsub.i32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vsub.i32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -100,13 +98,12 @@ define void @vmul(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB2_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB2_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vmul.i32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vmul.i32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB2_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -145,13 +142,12 @@ define void @vqadd(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vqadd.s32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vqadd.s32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB3_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -190,13 +186,12 @@ define void @vqsub(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vqsub.s32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vqsub.s32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB4_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -235,13 +230,12 @@ define void @vhadd(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB5_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vhadd.s32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vhadd.s32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB5_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -280,13 +274,12 @@ define void @vhsub(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB6_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB6_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vhsub.s32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vhsub.s32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB6_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -325,13 +318,12 @@ define void @vqdmull(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB7_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.16 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB7_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.s32 q1, [r0] -; CHECK-NEXT: vqdmullb.s16 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: vqdmullb.s16 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB7_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -374,13 +366,12 @@ define void @vqdmulh(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB8_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB8_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vqdmulh.s32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vqdmulh.s32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB8_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -419,13 +410,12 @@ define void @vqrdmulh(i32* %s1, i32 %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB9_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB9_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vqrdmulh.s32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vqrdmulh.s32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB9_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -464,13 +454,12 @@ define void @vaddf(float* %s1, float %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB10_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB10_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vadd.f32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vadd.f32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB10_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -509,13 +498,12 @@ define void @vsubf(float* %s1, float %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB11_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB11_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vsub.f32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vsub.f32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB11_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -554,13 +542,12 @@ define void @vmulf(float* %s1, float %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB12_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r1 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB12_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vmul.f32 q1, q1, q0 -; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vmul.f32 q0, q0, r1 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB12_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -599,14 +586,13 @@ define void @vfma(float* %s1, float* %s2, float %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB13_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r2 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB13_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vfma.f32 q2, q1, q0 -; CHECK-NEXT: vstrw.32 q2, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vfma.f32 q1, q0, r2 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 ; CHECK-NEXT: letp lr, .LBB13_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} @@ -647,15 +633,13 @@ define void @vfmas(float* %s1, float* %s2, float %c0, i32 %N) { ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB14_1: @ %while.body.lr.ph -; CHECK-NEXT: vdup.32 q0, r2 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB14_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vfma.f32 q3, q2, q1 -; CHECK-NEXT: vstrw.32 q3, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vfmas.f32 q1, q0, r2 +; CHECK-NEXT: vstrw.32 q1, [r0], #16 ; CHECK-NEXT: letp lr, .LBB14_2 ; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} From c3492a1aa1b98c8d81b0969d52cea7681f0624c2 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 9 Sep 2020 16:48:03 -0400 Subject: [PATCH 0995/1079] [amdgpu] Lower SGPR-to-VGPR copy in the final phase of ISel. - Need to lower COPY from SGPR to VGPR to a real instruction as the standard COPY is used where the source and destination are from the same register bank so that we potentially coalesc them together and save one COPY. Considering that, backend optimizations, such as CSE, won't handle them. However, the copy from SGPR to VGPR always needs materializing to a native instruction, it should be lowered into a real one before other backend optimizations. Differential Revision: https://reviews.llvm.org/D87556 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 61 +++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/fabs.ll | 6 +-- llvm/test/CodeGen/AMDGPU/fneg-fabs.ll | 6 +-- llvm/test/CodeGen/AMDGPU/sgpr-copy-cse.ll | 26 ++++++++++ llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll | 4 +- llvm/test/CodeGen/AMDGPU/wqm.ll | 4 +- 7 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sgpr-copy-cse.ll diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index a24394cdf795f..4df7fd85a5dde 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1244,6 +1244,11 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace); } else { + // Skip updating literal use if it's used in the same REQ_SQUENCE as, + // if that literal could be inlined, it's just a single use. + if (NonInlineUse && NonInlineUse->getParent() == UseMI && + UseMI->isRegSequence()) + continue; if (++NumLiteralUses == 1) { NonInlineUse = &*Use; NonInlineUseOpNo = OpNo; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ed0a3a17e71af..b446ac3af9b13 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -102,6 +102,10 @@ static cl::opt UseDivergentRegisterIndexing( cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false)); +static cl::opt EnableLowerSGPRToVGPRCopy( + "lower-sgpr-to-vgpr-copy", cl::Hidden, + cl::desc("Enable lowering copy from SGPR to VGPR"), cl::init(true)); + static bool hasFP32Denormals(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); return Info->getMode().allFP32Denormals(); @@ -11485,6 +11489,59 @@ bool SITargetLowering::checkAsmConstraintValA(SDValue Op, return false; } +// Lower COPY from SGPR to VGPR to real one as they are real transfer instead +// of COPY. +static void lowerSGPRToVGPRCopy(MachineFunction &MF, MachineRegisterInfo &MRI, + const SIRegisterInfo &TRI, + const SIInstrInfo &TII) { + for (MachineBasicBlock &MBB : MF) { + for (auto BI = MBB.begin(), BE = MBB.end(); BI != BE; /*EMPTY*/) { + MachineInstr &MI = *BI++; + + auto IsSGPRToVGPRCopy = [&MRI, &TRI](const MachineInstr &MI) { + if (!MI.isCopy()) + return false; + + auto DstReg = MI.getOperand(0).getReg(); + auto SrcReg = MI.getOperand(1).getReg(); + const auto *DstRC = DstReg.isVirtual() ? MRI.getRegClass(DstReg) + : TRI.getPhysRegClass(DstReg); + const auto *SrcRC = SrcReg.isVirtual() ? MRI.getRegClass(SrcReg) + : TRI.getPhysRegClass(SrcReg); + return (DstRC == &AMDGPU::VGPR_32RegClass || + DstRC == &AMDGPU::VReg_64RegClass) && + (SrcRC == &AMDGPU::SGPR_32RegClass || + SrcRC == &AMDGPU::SGPR_64RegClass); + }; + + // Skip if it's not a copy from SGPR to VGPR. + if (!IsSGPRToVGPRCopy(MI)) + continue; + + const MachineOperand &Src = MI.getOperand(1); + // FIXME: Need subreg support. + if (Src.getSubReg() != AMDGPU::NoSubRegister) + continue; + // FIXME: Need undef support. + if (Src.getReg().isVirtual()) { + auto *DefMI = MRI.getVRegDef(Src.getReg()); + if (!DefMI || DefMI->isImplicitDef()) + continue; + } + + LLVM_DEBUG(dbgs() << "Lower COPY: " << MI); + unsigned Opcode = (TRI.getRegSizeInBits(Src.getReg(), MRI) == 64) + ? AMDGPU::V_MOV_B64_PSEUDO + : AMDGPU::V_MOV_B32_e32; + auto DstReg = MI.getOperand(0).getReg(); + auto MIB = BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), DstReg) + .add(MI.getOperand(1)); + LLVM_DEBUG(dbgs() << " to: " << *MIB.getInstr()); + MI.eraseFromParent(); + } + } +} + // Figure out which registers should be reserved for stack access. Only after // the function is legalized do we know all of the non-spill stack objects or if // calls are present. @@ -11493,6 +11550,10 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { SIMachineFunctionInfo *Info = MF.getInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + + if (EnableLowerSGPRToVGPRCopy) + lowerSGPRToVGPRCopy(MF, MRI, *TRI, *TII); if (Info->isEntryFunction()) { // Callable functions have fixed registers used for stack access. diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll index badaa16bbfcc5..05f0bafb47c74 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.ll @@ -11,7 +11,7 @@ ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff +; SI: s_bitset0_b32 s{{[0-9]+}}, 31 ; VI: s_bitset0_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @s_fabs_fn_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float @@ -24,7 +24,7 @@ define amdgpu_kernel void @s_fabs_fn_free(float addrspace(1)* %out, i32 %in) { ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff +; SI: s_bitset0_b32 s{{[0-9]+}}, 31 ; VI: s_bitset0_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @s_fabs_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float @@ -36,7 +36,7 @@ define amdgpu_kernel void @s_fabs_free(float addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: {{^}}s_fabs_f32: ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff +; SI: s_bitset0_b32 s{{[0-9]+}}, 31 ; VI: s_bitset0_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @s_fabs_f32(float addrspace(1)* %out, float %in) { %fabs = call float @llvm.fabs.f32(float %in) diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll index a621b04a346c0..afae6b43ee587 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -34,7 +34,7 @@ define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x ; R600: |PV.{{[XYZW]}}| ; R600: -PV -; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: s_bitset1_b32 s{{[0-9]+}}, 31 ; VI: s_bitset1_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float @@ -49,7 +49,7 @@ define amdgpu_kernel void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) ; R600: |PV.{{[XYZW]}}| ; R600: -PV -; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: s_bitset1_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fabs = call float @fabs(float %bc) @@ -59,7 +59,7 @@ define amdgpu_kernel void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 % } ; FUNC-LABEL: {{^}}fneg_fabs_f32: -; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: s_bitset1_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @fneg_fabs_f32(float addrspace(1)* %out, float %in) { %fabs = call float @llvm.fabs.f32(float %in) %fsub = fsub float -0.000000e+00, %fabs diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy-cse.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy-cse.ll new file mode 100644 index 0000000000000..f032f170e3b4c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy-cse.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -o - %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +target triple = "amdgcn-amd-amdhsa" + +; CHECK-LABEL: {{^}}t0: +; CHECK: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], s[4:5], 0x0 +; CHECK-COUNT-1: v_mov_b32_e32 v{{[0-9]+}}, s[[PTR_HI]] +; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, s[[PTR_HI]] +define protected amdgpu_kernel void @t0(float addrspace(1)* %p, i32 %i0, i32 %j0, i32 %k0) { +entry: + %0 = tail call i32 @llvm.amdgcn.workitem.id.x() + %i = add i32 %0, %i0 + %j = add i32 %0, %j0 + %k = add i32 %0, %k0 + %pi = getelementptr float, float addrspace(1)* %p, i32 %i + %vi = load float, float addrspace(1)* %pi + %pj = getelementptr float, float addrspace(1)* %p, i32 %j + %vj = load float, float addrspace(1)* %pj + %sum = fadd float %vi, %vj + %pk = getelementptr float, float addrspace(1)* %p, i32 %k + store float %sum, float addrspace(1)* %pk + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll index 4cbd89147722b..4d9c6a9a540fd 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll @@ -153,7 +153,9 @@ bb: ; GCN-LABEL: barrier_vmcnt_vscnt_flat_workgroup: ; GCN: flat_load_dword -; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8_9: s_waitcnt lgkmcnt(0){{$}} +; GFX8_9: s_waitcnt vmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GFX10: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_barrier define amdgpu_kernel void @barrier_vmcnt_vscnt_flat_workgroup(i32* %arg) { diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 127d0bc0fc686..860e58d33abf4 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -650,12 +650,12 @@ main_body: ; CHECK: image_store ; CHECK: s_wqm_b64 exec, exec ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 -; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000 +; CHECK-DAG: v_mov_b32_e32 [[SEVEN:v[0-9]+]], 0x40e00000 ; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body ; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] ; CHECK: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop -; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] +; CHECK: v_cmp_gt_f32_e32 vcc, [[CTR]], [[SEVEN]] ; CHECK: s_cbranch_vccz [[LOOPHDR]] ; CHECK: ; %break From a3c28ccd49391931acd8b3b27dc98d7c606051e0 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 30 Jul 2020 10:00:53 -0400 Subject: [PATCH 0996/1079] [libc++] Remove some workarounds for missing variadic templates We don't support GCC in C++03 mode, and Clang provides variadic templates even in C++03 mode. So there's effectively no supported compiler that doesn't support variadic templates. This effectively gets rid of all uses of _LIBCPP_HAS_NO_VARIADICS, but some workarounds for the lack of variadics remain. --- libcxx/include/__config | 4 - libcxx/include/future | 6 +- libcxx/include/memory | 157 +----------- libcxx/include/type_traits | 237 ++++++------------ ...ber_function_pointer_no_variadics.pass.cpp | 84 ------- 5 files changed, 92 insertions(+), 396 deletions(-) delete mode 100644 libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/member_function_pointer_no_variadics.pass.cpp diff --git a/libcxx/include/__config b/libcxx/include/__config index 17e6bfe207aaf..c29fd4267f323 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -456,10 +456,6 @@ typedef __char32_t char32_t; #define _LIBCPP_HAS_NO_AUTO_TYPE #endif -#if !(__has_feature(cxx_variadic_templates)) -#define _LIBCPP_HAS_NO_VARIADICS -#endif - // Objective-C++ features (opt-in) #if __has_feature(objc_arc) #define _LIBCPP_HAS_OBJC_ARC diff --git a/libcxx/include/future b/libcxx/include/future index 483266dddec4e..295b6ac5d6ee7 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -1605,8 +1605,6 @@ template struct _LIBCPP_TEMPLATE_VIS uses_allocator, _Alloc> : public true_type {}; -#ifndef _LIBCPP_HAS_NO_VARIADICS - // packaged_task template class __packaged_task_base; @@ -2158,6 +2156,8 @@ __make_async_assoc_state(_Fp&& __f) return future<_Rp>(__h.get()); } +#ifndef _LIBCPP_CXX03_LANG + template class _LIBCPP_HIDDEN __async_func { @@ -2225,7 +2225,7 @@ async(_Fp&& __f, _Args&&... __args) _VSTD::forward<_Args>(__args)...); } -#endif // _LIBCPP_HAS_NO_VARIADICS +#endif // C++03 // shared_future diff --git a/libcxx/include/memory b/libcxx/include/memory index ebb0a723a162a..0ce7d092a2e11 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -762,8 +762,6 @@ struct __pointer_traits_element_type<_Ptr, true> typedef _LIBCPP_NODEBUG_TYPE typename _Ptr::element_type type; }; -#ifndef _LIBCPP_HAS_NO_VARIADICS - template